Source code for biothings.web.handlers.base

"""
Biothings Web Handlers

biothings.web.handlers.BaseHandler

    Supports:
    - access to biothings namespace
    - monitor exceptions with Sentry

biothings.web.handlers.BaseAPIHandler

    Additionally supports:
    - JSON and YAML payload in the request body
    - request arguments standardization
    - multi-type output (json, yaml, html, msgpack)
    - standardized error response (exception -> error template)
    - analytics and usage tracking (Google Analytics and AWS)
    - default common http headers (CORS and Cache Control)

"""
import logging

import orjson
import yaml
from tornado.web import HTTPError, RequestHandler

from biothings.utils import serializer
from biothings.web.analytics.events import Event
from biothings.web.analytics.notifiers import AnalyticsMixin
from biothings.web.options import OptionError, ReqArgs

logger = logging.getLogger(__name__)


[docs] class BaseHandler(RequestHandler): @property def biothings(self): return self.application.biothings
[docs] class BaseAPIHandler(BaseHandler, AnalyticsMixin): name = "__base__" kwargs = { "*": { "format": { "type": str, "default": "json", "enum": ("json", "yaml", "html", "msgpack"), } } } format = "json" cache = None cache_control_template = "max-age={cache}, public"
[docs] def initialize(self, cache=None): cache_value = self.biothings.config.DEFAULT_CACHE_MAX_AGE if self.cache is not None: cache_value = self.cache if cache is not None: cache_value = cache # self._header has already set when call set_default_headers func before # so we need to overwrite it to make custom cache age works self.set_cache_header(cache_value) self.args = {} # processed args will be available here self.args_query = {} # query parameters in the URL self.args_form = {} # form-data and x-www-form-urlencoded self.args_json = {} # applicatoin/json type body self.args_yaml = {} # applicatoin/yaml type body self.event = Event()
# do not assume the data types of some the variables # defined above. self.args can be a dotdict after # processing. json/yaml can be any serializable objs. # self.event may be replaced with its sub-classes.
[docs] def prepare(self): content_type = self.request.headers.get("Content-Type", "") if content_type.startswith("application/json"): self.args_json = self._parse_json() elif content_type.startswith("application/yaml"): self.args_yaml = self._parse_yaml() self.args_query = {key: self.get_query_argument(key) for key in self.request.query_arguments} self.args_form = {key: self.get_body_argument(key) for key in self.request.body_arguments} reqargs = ReqArgs( ReqArgs.Path(args=self.path_args, kwargs=self.path_kwargs), query=self.args_query, form=self.args_form, json_=self.args_json, ) # standardized request arguments self.args = self._parse_args(reqargs) self.format = self.args.format
def _parse_json(self): if not self.request.body: raise HTTPError( 400, reason=( "Empty body is not a valid JSON. " "Remove the content-type header, or " "provide an empty object in the body." ), ) try: return orjson.loads(self.request.body) except orjson.JSONDecodeError: raise HTTPError(400, reason="Invalid JSON body.") def _parse_yaml(self): try: return yaml.load(self.request.body, Loader=yaml.SafeLoader) except (yaml.scanner.ScannerError, yaml.parser.ParserError): raise HTTPError(400, reason="Invalid YAML body.") def _parse_args(self, reqargs): if not self.name: # feature disabled return {} # default value optionsets = self.biothings.optionsets optionset = optionsets.get(self.name) try: # uses biothings.web.options to standardize args args = optionset.parse(self.request.method, reqargs) except OptionError as err: args = err # for logging in "finally" clause raise HTTPError(400, None, err.info) else: # set on self.args return args finally: # one log message regardless of success logger.debug("%s %s\n%s\n%s", self.request.method, self.request.uri, reqargs, args)
[docs] def write(self, chunk): try: if self.format == "json": chunk = serializer.to_json(chunk) self.set_header("Content-Type", "application/json; charset=UTF-8") elif self.format == "yaml": chunk = serializer.to_yaml(chunk) self.set_header("Content-Type", "text/x-yaml; charset=UTF-8") elif self.format == "msgpack": chunk = serializer.to_msgpack(chunk) self.set_header("Content-Type", "application/x-msgpack") elif self.format == "html": chunk = self.render_string("api.html", data=serializer.to_json(chunk)) self.set_header("Content-Type", "text/html; charset=utf-8") except Exception as exc: # this is a low-level method, used in many places, # error handling should happen in the upper layers, logger.warning(exc) super().write(chunk)
[docs] def get_template_path(self): # APIs should not normally need to use templating # set the path to where we can find the api.html import biothings.web.templates return next(iter(biothings.web.templates.__path__))
[docs] def on_finish(self): """ This is a tornado lifecycle hook. Override to provide tracking features. """ logger.debug(self.event) super().on_finish()
[docs] def write_error(self, status_code, **kwargs): """ from tornado.web import Finish, HTTPError raise HTTPError(404) raise HTTPError(404, reason="document not found") raise HTTPError(404, None, {"id": "-1"}, reason="document not found") -> { "code": 404, "success": False, "error": "document not found" "id": "-1" } """ reason = kwargs.pop("reason", self._reason) # "reason" is a reserved tornado keyword # see RequestHandler.send_error assert isinstance(reason, str) assert "\n" not in reason message = {"code": status_code, "success": False, "error": reason} try: # merge exception info exception = kwargs["exc_info"][1] if isinstance(exception.args[0], dict): message.update(exception.args[0]) except Exception: pass self.finish(message)
[docs] def options(self, *args, **kwargs): self.set_status(204) self.finish()
[docs] def set_default_headers(self): self.set_header("Access-Control-Allow-Origin", "*") self.set_header("Access-Control-Allow-Methods", "*") self.set_header("Access-Control-Allow-Headers", "*") self.set_header("Access-Control-Allow-Credentials", "false") self.set_header("Access-Control-Max-Age", "60")
[docs] def set_cache_header(self, cache_value): if isinstance(cache_value, int): # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control # to disable caching for a handler, set cls.cache to 0 or # run self.clear_header('Cache-Control') in an HTTP method # or set cache value on the config file: # r"/api/query/?", "biothings.web.handlers.QueryHandler", {"biothing_type": "schema", "cache": 0}), self.set_header("Cache-Control", self.cache_control_template.format(cache=cache_value))