Source code for fairly.person

"""Person class module.

Person class is used to store person (e.g. author) information in a standardized
manner.

Usage example:

    >>> person = Person("Doe, John")
    >>> person = Person(fullname="Doe, Jon", orcid_id="xxx")
    >>> person.affiliation = "fairly Community"

"""
from __future__ import annotations
from typing import List, Dict
from collections.abc import Iterable, MutableMapping

import fairly

import re
import requests
import copy

[docs] class Person(MutableMapping): """Class to handle person information, e.g. for authors, contributors, etc. Class Attributes: REGEXP_ORCID_ID: Regular expression to validate ORCID identifier. REGEXP_EMAIL: Regular expression to validate e-mail address. """ # TODO: Check the checksum digit # https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier REGEXP_ORCID_ID = re.compile(r"(\d{4}-){3}\d{3}(\d|X)") REGEXP_EMAIL = re.compile(r"[\w\.+-]+@([\w-]+\.)+[\w-]{2,}") def __init__(self, person: str=None, **kwargs): """Initializes Person object. Full name is obtained from name and surname, if required. Name and surname are obtained from full name, if required. (see `parse()` method for details). Standard attributes: name (string): Name of the person. surname (string): Surname of the person. fullname (string): Full name of the person. email (string): E-mail address of the person. institution (string): Institution of the person. orcid_id (string): ORCID identifier of the person. Args: person: Person identifier. **kwargs: Person attributes. """ attrs = Person.parse(person) if person else {} if kwargs.get("fullname"): attrs.update(Person.parse(kwargs["fullname"])) attrs.update(kwargs) if not attrs.get("fullname") and attrs.get("name") and attrs.get("surname"): attrs["fullname"] = (attrs["name"] + " " + attrs["surname"]).strip() for key, val in attrs.items(): if bool(val) or isinstance(val, (bool, int, float)): self.__dict__[key] = val def __setitem__(self, key, val): if bool(val) or isinstance(val, (bool, int, float)): self.__dict__[key] = val elif key in self.__dict__: del self.__dict__[key] def __getitem__(self, key): return self.__dict__[key] def __delitem__(self, key): del self.__dict__[key] def __iter__(self): return iter(self.__dict__) def __len__(self): return len(self.__dict__) def __str__(self): return str(self.__dict__) def __repr__(self): return f"Person({self.__dict__})"
[docs] @classmethod def parse(cls, person: str) -> Dict: """Parses person identifier and extracts available person attributes. The following attributes might be extracted: - name - surname - fullname - orcid_id Args: person: Person identifier (e.g. fullname) Returns: Dictionary of person attributes. """ person = person.strip() if re.fullmatch(Person.REGEXP_ORCID_ID, person): return {"orcid_id": person} if re.fullmatch(Person.REGEXP_EMAIL, person): return {"email": person} attrs = {"fullname": person} parts = [part.strip() for part in person.split(",")] if len(parts) == 2: attrs["surname"], attrs["name"] = parts return attrs
[docs] @staticmethod def get_orcid_token(client_id: str=None, client_secret: str=None) -> str: """Retrieves ORCID access token by using ORCID client id and secret. ORCID access token is required to retrieve person information by using an ORCID ID. If not specified, `client_id` and `client_secret` are read from fairly configuration. Args: client_id: ORCID client id. client_secret: ORCID client secret. Returns: ORCID access token. Raises: ValueError("No client id"): If client id is not available. ValueError("No client secret"): If client secret is not available. ValueError("Invalid response"): If access token is not retrieved. """ config = fairly.get_config("fairly") if not client_id: client_id = config.get("orcid_client_id") if not client_id: raise ValueError("No client id") if not client_secret: client_secret = config.get("orcid_client_secret") if not client_secret: raise ValueError("No client secret") response = requests.post( "https://orcid.org/oauth/token", data=f"client_id={client_id}&client_secret={client_secret}&grant_type=client_credentials&scope=/read-public", headers={ "Accept": "application/json", "Content-Type": "application/x-www-form-urlencoded", } ) response.raise_for_status() json = response.json() if "access_token" not in json: raise ValueError("Invalid response") return json["access_token"]
[docs] @staticmethod def from_orcid_id(orcid_id: str, token: str=None) -> Person: """Retrieves person information from ORCID identifier. If not specified, `token` is read from fairly configuration. If it is also not available, it is retrieved by using `get_orcid_token()` method. Args: orcid_id: ORCID identifier. token: ORCID access token. Returns: Person object if valid ORCID identifier, None otherwise. Raises: ValueError("No access token"): If access token is not available. ValueError("Invalid ORCID identifier"): If ORCID identified is not valid. """ # Get default access token if required if not token: config = fairly.get_config("fairly") token = config.get("orcid_token") if not token: try: token = Person.get_orcid_token() except: raise ValueError("No access token") # Send request fields = ",".join(["orcid", "email", "given-names", "family-name", "current-institution-affiliation-name"]) response = requests.get( f"https://pub.orcid.org/v3.0/expanded-search/?q=orcid:{orcid_id}&fl={fields}", headers={ "Content-type": "application/vnd.orcid+json", "Authorization type and Access token": f"Bearer {token}" } ) response.raise_for_status() results = response.json().get("expanded-result") # Raise exception if no results if not results: raise ValueError("Invalid ORCID Id") # Return the first person matching the ORCID identifier result = results[0] return Person( orcid_id=result.get("orcid-id"), name=result.get("given-names"), surname=result.get("family-names"), email=result.get("email"), institution=result["institution-name"][0] if result.get("institution-name") else None )
[docs] @staticmethod def get_persons(people) -> List[Person]: """Returns standard person list from the people argument. A string or an iterable are accepted as input. If input is a string, it is split using semicolon and line feed as separators. For the items of the iterable, the following are performed: - If it is a Person object, a copy is created. - If it is a string, it is parsed to a dictionary using parse(). - If is is a dictionary, Person object is created. Args: people: People argument. Returns: List of person objects. Raises: ValueError: If people argument is invalid. """ if not people: return PersonList() if isinstance(people, str): people = re.split(r"[;\n]", people) if not isinstance(people, Iterable): raise ValueError persons = PersonList() for item in people: if not item: continue if isinstance(item, Person): person = copy.copy(item) else: if isinstance(item, str): item = Person.parse(item) if not isinstance(item, Dict): raise ValueError person = Person(**item) persons.append(person) return persons
[docs] def autocomplete(self, overwrite: bool=False, orcid_token: str=None) -> Dict: """Completes missing information by using the ORCID identifier. Args: overwrite: If True existing attributes are overwritten. Returns: A dictionary of attributes set by method. """ if not self.get("orcid_id"): return {} person = Person.from_orcid_id(self["orcid_id"], token=orcid_token) updated = {} for key, val in person.__dict__.items(): if key not in self.__dict__ or overwrite: self.__dict__[key] = updated[key] = val return updated
[docs] def serialize(self) -> Dict: """Serializes person as a dictionary. Returns: Person dictionary. """ return self.__dict__.copy()
[docs] class PersonList(list): def _person(self, item): if isinstance(item, Person): return item if isinstance(item, str): return Person(item) if isinstance(item, dict): return Person(**item) raise ValueError def __init__(self, iterable=None): if iterable: super().__init__(self._person(item) for item in iterable) def __setitem__(self, index, item): super().__setitem__(index, self._person(item))
[docs] def insert(self, index, item): super().insert(index, self._person(item))
[docs] def append(self, item): super().append(self._person(item))
[docs] def extend(self, other): if isinstance(other, type(self)): super().extend(other) else: super().extend(self._person(item) for item in other)