Source code for owmeta_core.datasource_loader

# -*- coding: utf-8 -*-
'''
DataSourceLoaders take a DataSource and retrieve the primary data (e.g., CSV files,
electrode recordings) from some location (e.g., a file store, via a bittorrent tracker).

Each loader can treat the base_directory given as its own namespace and place directories
in there however it wants.
'''
from .utils import FCN
from os.path import exists, isdir, join as pth_join, isabs, realpath


[docs]class DataSourceDirLoader(object): ''' Loads data files for a DataSource The loader is expected to organize files for each data source within the given base directory. .. automethod:: __call__ ''' def __init__(self, base_directory=None, directory_key=None): self.base_directory = base_directory self.directory_key = directory_key or FCN(type(self))
[docs] def __call__(self, data_source): ''' Load the data source. Calls `load` Parameters ---------- data_source : .DataSource The data source to load files for Returns ------- str A path to the loaded resource Raises ------ LoadFailed If `load`: * throws an exception * doesn't return anything * returns a path that isn't under `base_directory` * returns a path that doesn't exist ''' # Call str(ยท) to give a more uniform interface to the sub-class ``load`` # Conventionally, types that tag or "enhance" a string have the base string representation as their __str__ try: s = self.load(data_source) except LoadFailed: raise LoadFailed(data_source, self, 'Loader erred') if not s: raise LoadFailed(data_source, self, 'Loader returned an empty string') # N.B.: This logic is NOT intended as a security measure against directory traversal: it is only to make the # interface both flexible and unambiguous for implementers # Relative paths are allowed if not isabs(s): s = pth_join(self.base_directory, s) # Make sure the loader isn't doing some nonsense with symlinks or non-portable paths rpath = realpath(s) if not rpath.startswith(self.base_directory): msg = 'Loader returned a file path, "{}",' \ ' outside of the base directory, "{}"'.format(rpath, self.base_directory) raise LoadFailed(data_source, self, msg) if not exists(rpath): msg = 'Loader returned a non-existant file {}'.format(rpath) raise LoadFailed(data_source, self, msg) if not isdir(rpath): msg = 'Loader did not return a directory, but returned {}'.format(rpath) raise LoadFailed(data_source, self, msg) return rpath
@property def base_directory(self): try: return self.__base_directory except AttributeError: return None @base_directory.setter def base_directory(self, base_directory): self.__base_directory = realpath(base_directory) if base_directory else None
[docs] def load(self, data_source): ''' Loads the files for the data source Parameters ---------- data_source : .DataSource The data source to load files for Returns ------- str A path to the loaded resource ''' raise NotImplementedError()
[docs] def can_load(self, data_source): ''' Returns true if the `.DataSource` can be loaded by this loader Parameters ---------- data_source : .DataSource The data source to load files for ''' return False
def __str__(self): return FCN(type(self)) + '()'
[docs]class LoadFailed(Exception): ''' Thrown when loading fails for a .DataSourceDirLoader ''' def __init__(self, data_source, loader, *args): ''' Parameters ---------- data_source : .DataSource The `.DataSource` on which loading was attempted loader : DataSourceDirLoader The loader that attempted to load the data source args[0] : str Message explaining why loading failed args[1:] Passed on to `Exception` ''' msg = args[0] mmsg = 'Failed to load {} data with loader {}{}'.format(data_source, loader, ': ' + msg if msg else '') super(LoadFailed, self).__init__(mmsg, *args[1:])