Source code for fairly.metadata

"""Metadata class module.

Metadata class is used to store metadata attributes in a standardized manner.

Usage example:

    >>> metadata = Metadata({"title": "Title", "DOI": "doi:xxx"})
    >>> metadata["authors"] = ["Doe, John"]

"""
from __future__ import annotations
from typing import Any, Dict, List, Callable
from collections.abc import MutableMapping

from .person import Person, PersonList

import re
import copy
import sys
import ruamel.yaml


[docs] class Metadata(MutableMapping): """Metadata class. Attributes: _attrs (Dict): Metadata attributes. _basis (Dict): Basis of metadata attributes. _normalize (Callable): Attribute normalization method. _serialize (Callable): Attribute serialization method. Class Attributes: REGEXP_DOI: Regular expression to validate DOI. """ REGEXP_DOI = re.compile(r"10\.\d{4,9}/[-._;()/:a-z\d]+", re.IGNORECASE) def __init__(self, normalize: Callable=None, serialize: Callable=None, **kwargs): """Initializes Metadata object. The corresponding default methods are not called if user-defined attribute value normalization and serialization methods are provided. Args: normalize: Attribute value normalization method (optional). serialize: Attribute value serialization method (optional). **kwargs: Metadata attributes. """ self._normalize = normalize if normalize else Metadata.normalize_value self._serialize = serialize if serialize else Metadata.serialize_value self._attrs = {} for key, val in kwargs.items(): if bool(val) or isinstance(val, (bool, int, float)): self._attrs[key] = self._normalize(key, val) self.rebase() def __setitem__(self, key, val): if bool(val) or isinstance(val, (bool, int, float)): self._attrs[key] = self._normalize(key, val) elif key in self._attrs: del self._attrs[key] def __getitem__(self, key): return self._attrs[key] def __delitem__(self, key): del self._attrs[key] def __iter__(self): return iter(self._attrs) def __len__(self): return len(self._attrs) def __str__(self): return str(self._attrs) def __repr__(self): return "Metadata({})".format(self._attrs)
[docs] def rebase(self) -> None: """Updates the basis of the metadata attributes.""" self._basis = copy.deepcopy(self._attrs)
@property def is_modified(self) -> bool: """Checks if metadata is modified. Returns: True is metadata is modified, False otherwise. """ return self._attrs != self._basis
[docs] @classmethod def normalize_value(cls, key: str, val) -> Any: """Normalizes metadata attribute value. Supported attributes: - doi - keywords - authors Args: key (str): Attribute key. val: Attribute value. Returns: Normalized attribute value. Raises: ValueError: If invalid attribute value. """ # Digital Object Identifier if key == "doi": if isinstance(val, str): val = val.lower() if val.startswith("doi:"): val = val[4:] elif val.startswith("http://doi.org/"): val = val[15:] elif val.startswith("https://doi.org/"): val = val[16:] if not re.fullmatch(Metadata.REGEXP_DOI, val): raise ValueError else: raise ValueError # Keywords elif key == "keywords": if isinstance(val, str): val = re.split(r"[,;\n]", val) try: val = [keyword.strip() for keyword in iter(val)] except TypeError: raise ValueError # Authors elif key == "authors": val = Person.get_persons(val) # Return normalized value return val
[docs] @classmethod def serialize_value(cls, key: str, val) -> Any: """Serializes metadata attribute value. Supported attributes: - Any attribute with a data type of `Person`. - Any attribute with a data type of `PersonList`. Args: key (str): Attribute key. val: Attribute value. Returns: Serialized attribute value. """ if isinstance(val, Person): return val.serialize() if isinstance(val, PersonList): return [person.serialize() for person in val] return copy.deepcopy(val)
[docs] def serialize(self) -> Dict: """Serializes metadata as a dictionary. Returns: Metadata dictionary. """ out = {} for key, val in self._attrs.items(): out[key] = self._serialize(key, val) return out
[docs] def autocomplete(self, overwrite: bool=False, attrs: List=None, **kwargs) -> Dict: """Completes missing metadata attributes by using the available information. Supported attributes: - Any attribute with a data type of `Person`. - Any attribute with a data type of `PersonList`. Args: overwrite (bool): Set True to overwrite existing attributes (default False). attrs (List): List of attributes to be completed (optional). **kwargs: Arguments for the specific autocomplete methods. Returns: A dictionary of attributes set by method. """ updated = {} for key, val in self._attrs.items(): if attrs and key not in attrs: continue if isinstance(val, Person): result = val.autocomplete(overwrite=overwrite, **kwargs) elif isinstance(val, PersonList): result = {} for index, person in enumerate(val): res = person.autocomplete(overwrite=overwrite, **kwargs) if res: result[key] = res else: continue if result: updated[key] = result return updated
def _remove_comments(self, var) -> None: """Removes comments from a YAML dictionary recursively. Args: var: YAML dictionary or a dictionary item, if called recursively. """ # REMARK: Based on https://stackoverflow.com/questions/60080325/how-to-delete-all-comments-in-ruamel-yaml if isinstance(var, dict): for key, val in var.items(): self._remove_comments(key) self._remove_comments(val) elif isinstance(var, list): for item in var: self._remove_comments(item) try: if isinstance(var, ruamel.yaml.scalarstring.ScalarString): attr = "comment" else: attr = ruamel.yaml.comments.Comment.attrib delattr(var, attr) except AttributeError: pass
[docs] def print(self) -> None: """Pretty prints metadata. Serializes metadata and prints as YAML without comments. """ yaml = ruamel.yaml.YAML() out = self.serialize() self._remove_comments(out) yaml.dump(out, sys.stdout)