Source code for fairly.file.remote

"""RemoteFile class module.

RemoteFile class is used to perform operations on remote files.
"""
from typing import Dict

from . import File

import requests
import mimetypes
import os.path
from urllib.parse import urlparse
import logging


[docs] class RemoteFile(File): """RemoteFile class. Attributes: _url (str): URL address of the remote file. _id (str): Identifier of the remote file. _headers (Dict): HTTP headers of the remote file. """ def __init__(self, url: str, id: str=None, path: str=None, size: int=None, type: str=None, md5: str=None): """Initializes RemoteFile object. Args: url (str): URL address of the remote file. id (str): Identifier of the remote file (optional). path (str): Path of the remote file (optional). size (int): Size of the remote file in bytes (optional). type (str): Content type of the remote file (optional). md5 (str): MD5 checksum of the remote file (optional). """ self._url = url self._id = id self._headers = None self._path = path self._name = os.path.basename(path) if path else None self._size = size self._type = type self._md5 = md5 @property def url(self) -> str: """URL address of the remote file.""" return self._url @property def id(self) -> str: """Identifier of the remote file.""" return self._id @property def headers(self) -> Dict: """HTTP headers of the remote file.""" if self._headers is None: logging.info("Fetching HTTP headers from %s.", self.url) # TODO: Add error handling response = requests.head(self.url, allow_redirects=True) response.raise_for_status() logging.debug("Headers %s", response.headers) self._headers = response.headers return self._headers @property def name(self) -> str: """Name of the remote file.""" if self._name is None: parts = urlparse(self.url) self._name = os.path.basename(parts.path) return self._name @property def size(self) -> int: """Size of the remote file in bytes. Content-Length header is used to get the size. It is only calculated once and cached for subsequent calls. """ if self._size is None: self._size = self.headers.get("content-length") return self._size @property def type(self) -> str: """Content type of the remote file. Content type is guessed by using the URL address. If it fails, then Content-Type header is used to get the content type. It is only calculated once and cached for subsequent calls. """ if self._type is None: self._type, _ = mimetypes.guess_type(self.url) if self._type is None: self._type = self.headers.get("content-type") return self._type @property def md5(self) -> str: """MD5 checksum of the remote file. Content-MD5 header is used to get the MD5 checksum. It is only calculated once and cached for subsequent calls. """ if self._md5 is None: self._md5 = self.headers.get("content-md5") return self._md5
[docs] def match(self, val: str) -> bool: """Checks if remote file matches the specified file identifier. File URL address and id are compared with the specified identifier in addition to the properties checked by File.match(). Args: val (str): File identifier. Returns: True if file matches the specified file identifier, False otherwise. """ return True if self.url == val or self.id == val else super().match(val)