Source code for biothings.hub.dataplugin.assistant

import abc
import os
import urllib.parse
from pathlib import Path
from typing import Optional, Union

import requests

from biothings import config as btconfig
from biothings.hub.dataplugin.loaders.loader import AdvancedPluginLoader, ManifestBasedPluginLoader
from biothings.hub.dataplugin.plugins import GitDataPlugin, ManualDataPlugin
from biothings.utils.common import (
    get_plugin_name_from_local_manifest,
    get_plugin_name_from_remote_manifest,
    parse_folder_name_from_url,
)
from biothings.utils.hub_db import get_data_plugin
from biothings.utils.loggers import get_logger


[docs] class AssistantException(Exception): pass
[docs] class BaseAssistant(abc.ABC): plugin_type = None # to be defined in subblass data_plugin_manager = None # set by assistant manager dumper_manager = None # set by assistant manager uploader_manager = None # set by assistant manager keylookup = None # set by assistant manager # known plugin loaders loaders = { "manifest": ManifestBasedPluginLoader, "advanced": AdvancedPluginLoader, } def __init__(self, url: str, plugin_name: Optional[str] = None, src_folder: Optional[Union[str, Path]] = None): self.url = url self._plugin_name = plugin_name self._src_folder = src_folder self._loader = None self.logfile = None self.logger = None self.setup_log()
[docs] def setup_log(self): """ Setup and return a logger instance """ self.logger, self.logfile = get_logger("assistant_%s" % self.__class__.plugin_type)
[docs] def register_loader(self): dp = get_data_plugin() dp.update( {"_id": self.plugin_name}, {"$set": {"plugin.loader": self.loader.loader_type}}, upsert=True, )
@property def loader(self): """ Return loader object able to interpret plugin's folder content """ if not self._loader: # iterate over known loaders, the first one which can interpret plugin content is kept for klass in self.loaders.values(): # propagate managers klass.dumper_manager = self.dumper_manager klass.uploader_manager = self.uploader_manager klass.data_plugin_manager = self.data_plugin_manager klass.keylookup = self.keylookup loader = klass(self.plugin_name) if loader.can_load_plugin(): self._loader = loader self.logger.debug( 'For plugin "%s", selecting loader class "%s"', self.plugin_name, self._loader.__class__.__name__, ) self.register_loader() break else: self.logger.debug('Loader %s cannot load plugin "%s"', loader, self.plugin_name) continue return self._loader
[docs] def handle(self): """ Access self.url and do whatever is necessary to bring code to life within the hub... (hint: that may involve creating a dumper on-the-fly and register that dumper to a manager...) """ assert self.__class__.data_plugin_manager, "Please set data_plugin_manager attribute" klass = self.get_classdef() self.__class__.data_plugin_manager.register_classes([klass])
@property @abc.abstractmethod def plugin_name(self) -> str: """ Return plugin name, parsed from self.url and set self._src_folder as path to folder containing dataplugin source code """
[docs] @abc.abstractmethod def can_handle(self) -> bool: """ Return true if assistant can handle the code """
[docs] class GithubAssistant(BaseAssistant): plugin_type = "github" @property def plugin_name(self): folder_name = parse_folder_name_from_url(self.url) if not self._plugin_name: self._src_folder = os.path.join(btconfig.DATA_PLUGIN_FOLDER, folder_name) # Try to load plugin name from the local first, if exist that mean we are working with a cloned and updated plugin # If plugin name is empty that mean this plugin has not cloned to local then we try to fetch its name from the Github # Otherwise we use the path_name as the fallback. plugin_name = get_plugin_name_from_local_manifest(os.path.join(btconfig.DATA_PLUGIN_FOLDER, folder_name)) if not plugin_name: plugin_name = get_plugin_name_from_remote_manifest(self.url) if not plugin_name: plugin_name = folder_name self._plugin_name = plugin_name return self._plugin_name
[docs] def can_handle(self) -> bool: # analyze headers to guess type of required assitant try: headers = requests.head(self.url).headers return headers.get("server").lower() == "github.com" except Exception as gen_exc: self.logger.exception(gen_exc) self.logger.error("%s plugin can't handle URL '%s'", self.plugin_type, self.url) return False
[docs] def get_classdef(self): # generate class dynamically and register confdict = { "SRC_NAME": self.plugin_name, "GIT_REPO_URL": self.url, "SRC_ROOT_FOLDER": self._src_folder, } # TODO: store confdict in hubconf collection k = type("AssistedGitDataPlugin_%s" % self.plugin_name, (GitDataPlugin,), confdict) return k
[docs] class LocalAssistant(BaseAssistant): plugin_type = "local" @property def plugin_name(self): """ We attempt to derive the plugin name from the url as we expect the URL (for local plugins) to follow the structure local://<pluginname> Formats local://pluginname so it's in hostname. (we leverage urlsplit over urlparse due to lack of need for parameter parsing) https://docs.python.org/3/library/urllib.parse.html#structured-parse-results If we discover a subdirectory we raise an error for moment due to lack of subdirectory support at the moment for our pathing This can be verified by checking the `path` value from the SplitResult url -> local://plugin-name Supported: > ParseResult(scheme='local', netloc='plugin-name', path='', params='', query='', fragment='') url -> local://sub-directory/plugin-name Unsupported: > ParseResult(scheme='local', netloc='plugin-name', path='sub-directory', params='', query='', fragment='') """ if not self._plugin_name: split = urllib.parse.urlsplit(self.url) # format local://pluginname so it's in hostname. # if path is set, it means format is local://subdir/pluginname # and we don't support that for import reason (we would need to # add .../plugins/subdir to sys.path, not impossible but might have side effects # so for now we stay on the safe (and also let's remember 1st version of # MS DOS didn't support subdirs, so I guess we're on the right path :)) assert not split.path, "It seems URL '%s' references a sub-directory (%s)," % ( self.url, split.hostname, ) + " with plugin name '%s', sub-directories are not supported (yet)" % split.path.strip("/") # don't use hostname here because it's lowercased, netloc isn't # (and we're matching directory names on the filesystem, it's case-sensitive) src_folder_name = os.path.basename(split.netloc) try: self._plugin_name = ( get_plugin_name_from_local_manifest(os.path.join(btconfig.DATA_PLUGIN_FOLDER, src_folder_name)) or src_folder_name ) except Exception as ex: self.logger.exception(ex) self._plugin_name = src_folder_name self._src_folder = os.path.join(btconfig.DATA_PLUGIN_FOLDER, src_folder_name) return self._plugin_name
[docs] def can_handle(self) -> bool: return self.url.startswith(self.__class__.plugin_type + "://")
[docs] def get_classdef(self): # generate class dynamically and register confdict = {"SRC_NAME": self.plugin_name, "SRC_ROOT_FOLDER": self._src_folder} k = type("AssistedManualDataPlugin_%s" % self.plugin_name, (ManualDataPlugin,), confdict) return k