Source code for finalfusion.metadata

"""
finalfusion metadata
"""
import struct
from os import PathLike
from typing import BinaryIO, Union

import toml

from finalfusion.io import Chunk, ChunkIdentifier, find_chunk, _read_required_binary,\
    _write_binary, FinalfusionFormatError


[docs]class Metadata(dict, Chunk): """ Embeddings metadata Metadata can be used as a regular Python dict. For serialization, the contents need to be serializable through `toml.dumps`. Finalfusion assumes metadata to be a TOML formatted string. Examples -------- >>> metadata = Metadata({'Some': 'value', 'number': 1}) >>> metadata {'Some': 'value', 'number': 1} >>> metadata['Some'] 'value' >>> metadata['Some'] = 'other value' >>> metadata['Some'] 'other value' """
[docs] @staticmethod def chunk_identifier() -> ChunkIdentifier: return ChunkIdentifier.Metadata
[docs] @staticmethod def read_chunk(file: BinaryIO) -> 'Metadata': chunk_header_size = struct.calcsize("<IQ") # place the file before the chunk header since the chunk size for # metadata the number of bytes that we need to read file.seek(-chunk_header_size, 1) chunk_id, chunk_len = _read_required_binary(file, "<IQ") assert ChunkIdentifier(chunk_id) == Metadata.chunk_identifier() buf = file.read(chunk_len) if len(buf) != chunk_len: raise FinalfusionFormatError( f'Could not read {chunk_len} bytes from file') return Metadata(toml.loads(buf.decode("utf-8")))
[docs] def write_chunk(self, file: BinaryIO): b_data = bytes(toml.dumps(self), "utf-8") _write_binary(file, "<IQ", int(self.chunk_identifier()), len(b_data)) file.write(b_data)
[docs]def load_metadata(file: Union[str, bytes, int, PathLike]) -> Metadata: """ Load a Metadata chunk from the given file. Parameters ---------- file : str, bytes, int, PathLike Finalfusion file with a metadata chunk. Returns ------- metadata : Metadata The Metadata from the file. Raises ------ ValueError If the file did not contain an Metadata chunk. """ with open(file, 'rb') as inf: chunk = find_chunk(inf, [ChunkIdentifier.Metadata]) if chunk is None: raise ValueError("File did not contain a Metadata chunk") if chunk == ChunkIdentifier.Metadata: return Metadata.read_chunk(inf) raise ValueError(f"unexpected chunk: {str(chunk)}")
__all__ = ['Metadata', 'load_metadata']