Source code for biothings.web.api.es.handlers.base_handler

from biothings.web.api.helper import BaseHandler, BiothingParameterTypeError
from biothings.utils.common import dotdict, is_str
import re
import logging

[docs]class BaseESRequestHandler(BaseHandler): ''' Parent class of all Elasticsearch-based Request handlers, subclass of `BaseHandler`_. Contains handling for Elasticsearch-specific query params (like ``fields``, ``size``, etc) ''' # override these in child class control_kwargs = {} es_kwargs = {} esqb_kwargs = {} transform_kwargs = {}
[docs] def initialize(self, web_settings): ''' Tornado reqeust handler initialization. Initializations common to all Elasticsearch-specific request handlers go here.''' super(BaseESRequestHandler, self).initialize(web_settings)
def _return_data_and_track(self, data, ga_event_data={}, rawquery=False, status_code=200, _format='json'): ''' Small function to return a chunk of data and send a google analytics tracking request.''' if rawquery: self.return_raw_query_json(data, status_code=status_code, _format=_format) else: self.return_json(data, status_code=status_code, _format=_format) self.ga_track(event=self.ga_event_object(ga_event_data)) self.self_track(data=self.ga_event_object_ret) return
[docs] def return_raw_query_json(self, query, status_code=200, _format='json'): '''Return valid JSON if `rawquery` option is selected. This is necessary as queries can span multiple lines (POST)''' _ret = query.get('body', {'GET': query.get('bid')}) if is_str(_ret) and len(_ret.split('\n')) > 1: self.return_json({'body': _ret}, status_code=status_code, _format=_format) else: self.return_json(_ret, status_code=status_code, _format=_format)
def _should_sanitize(self, param, kwargs): return ((param in kwargs) and (param in self.kwarg_settings)) def _sanitize_source_param(self, kwargs): if self._should_sanitize('_source', kwargs): if len(kwargs['_source']) == 0: kwargs.pop('_source') elif len(kwargs['_source']) == 1 and kwargs['_source'][0].lower() == 'all': kwargs['_source'] = True return kwargs def _sanitize_sort_param(self, kwargs): if self._should_sanitize('sort', kwargs): dirty_sort = kwargs['sort'] kwargs['sort'] = [{field[1:]:{"order":"desc"}} if field.startswith('-') else {field:{"order":"asc"}} for field in dirty_sort] return kwargs def _sanitize_size_param(self, kwargs): # cap size if self._should_sanitize('size', kwargs): kwargs['size'] = kwargs['size'] if (kwargs['size'] < self.web_settings.ES_SIZE_CAP) else self.web_settings.ES_SIZE_CAP return kwargs def _sanitize_aggs_param(self, kwargs): if self._should_sanitize('aggs', kwargs): kwargs['facet_size'] = kwargs.get('facet_size', self.kwarg_settings['facet_size']['default']) if (kwargs['facet_size'] > self.kwarg_settings['facet_size'].get('max', 1000)): kwargs['facet_size'] = self.kwarg_settings['facet_size'].get('max', 1000) kwargs['aggs'] = dict([(field, {"terms": {"field": field, "size": kwargs['facet_size']}}) for field in kwargs['aggs']]) return kwargs def _sanitize_from_param(self, kwargs): # cap from if self._should_sanitize('from', kwargs) and kwargs['from'] > self.web_settings.ES_RESULT_WINDOW_SIZE_CAP: raise BiothingParameterTypeError('"from" parameter cannot be greater than {0}'.format(self.web_settings.ES_RESULT_WINDOW_SIZE_CAP)) return kwargs
[docs] def get_cleaned_options(self, kwargs): ''' Separate URL keyword arguments into their functional category. Incoming kwargs are separated into one of 4 categories, depending on how the argument controls the pipeline: * ``control_kwargs`` - These are arguments that control the pipeline execution (**raw**, **rawquery**, etc) * ``es_kwargs`` - These are arguments that get passed directly to the Elasticsearch client during query * ``esqb_kwargs`` - These are arguments that go to the Elasticsearch query builder (**fields**, **size**, etc) * ``transform_kwargs`` - These are arguments that go to the Elasticsearch result transformer (**jsonld**, **dotfield**, etc)''' options = dotdict() # split kwargs into one (or more) of 4 categories: # * control_kwargs: kwargs that control aspects of the handler's pipeline (e.g. raw, rawquery) # * es_kwargs: kwargs that go directly to the ES query (e.g. fields, size, ...) # * esqb_kwargs: kwargs that go directly to the ESQueryBuilder instance # * transform_kwargs: kwargs that go directly to the response transformer (e.g. jsonld, dotfield) for kwarg_category in ["control_kwargs", "es_kwargs", "esqb_kwargs", "transform_kwargs"]: options.setdefault(kwarg_category, dotdict()) for option, settings in getattr(self, kwarg_category, {}).items(): if option in kwargs or settings.get('default', None) is not None: options.get(kwarg_category).setdefault(option, kwargs.get(option, settings['default'])) # do userquery kwargs if options.esqb_kwargs.userquery: for k in kwargs.keys(): if re.match(self.web_settings.USERQUERY_KWARG_REGEX, k): options['esqb_kwargs'].setdefault('userquery_kwargs', dotdict()) options['esqb_kwargs']['userquery_kwargs'][self.web_settings.USERQUERY_KWARG_TRANSFORM(k)] = kwargs.get(k) return options
def _sanitize_params(self, kwargs): kwargs = super(BaseESRequestHandler, self)._sanitize_params(kwargs) kwargs = self._sanitize_source_param(kwargs) kwargs = self._sanitize_size_param(kwargs) kwargs = self._sanitize_sort_param(kwargs) kwargs = self._sanitize_aggs_param(kwargs) kwargs = self._sanitize_from_param(kwargs) return kwargs def _get_es_index(self, options): ''' Override to change query index for this request. ''' return self.web_settings.ES_INDEX def _get_es_doc_type(self, options): ''' Override to change doc_type for this request. ''' return self.web_settings.ES_DOC_TYPE def _pre_query_builder_GET_hook(self, options): ''' Override me. ''' return options def _pre_query_GET_hook(self, options, query): ''' Override me. ''' return query def _pre_transform_GET_hook(self, options, res): ''' Override me. ''' return res def _pre_finish_GET_hook(self, options, res): ''' Override me. ''' return res def _pre_query_builder_POST_hook(self, options): ''' Override me. ''' return options def _pre_query_POST_hook(self, options, query): ''' Override me. ''' return query def _pre_transform_POST_hook(self, options, res): ''' Override me. ''' return res def _pre_finish_POST_hook(self, options, res): ''' Override me. ''' return res