Source code for biothings.web.analytics.events

import hashlib

# import smtplib
import uuid
from collections import UserDict
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from ipaddress import IPv4Address, IPv6Address, ip_address
from pprint import pformat
from random import randint
from typing import Union
from urllib.parse import urlencode


[docs] class Event(UserDict): # HTTP PageView # Fields under __request__: # user_agent, user_ip, host, path, referer def __getattr__(self, name): return self["__request__"][name] or "" def _cid_v1(self): # Author: Cyrus Afrasiabi # Reference: https://github.com/kra3/py-ga-mob # More at: pyga.utils.generate_hash hash_val = 1 if self.user_agent: hash_val = 0 for ordinal in map(ord, self.user_agent[::-1]): hash_val = ((hash_val << 6) & 0xFFFFFFF) + ordinal + (ordinal << 14) left_most_7 = hash_val & 0xFE00000 if left_most_7 != 0: hash_val ^= left_most_7 >> 21 return (randint(0, 0x7FFFFFFF) ^ hash_val) & 0x7FFFFFFF def _cid_v2(self): # Author: Zhongchao Qian # More at: https://github.com/biothings/biothings.api/pull/124 """ Generates a unique user ID Using the remote IP and client user agent, to produce a somewhat unique identifier for users. A UUID version 4 conforming to RFC 4122 is produced which is generally acceptable. It is not entirely random, but looks random enough to the outside. Using a hash func. the user info is completely anonymized. Args: remote_ip: Client IP address as a string, IPv4 or IPv6 user_agent: User agent string of the client """ try: rip: Union[IPv4Address, IPv6Address] = ip_address(self.user_ip) ip_packed = rip.packed except ValueError: # in the weird case I don't get an IP ip_packed = randint(0, 0xFFFFFFFF).to_bytes(4, "big") # nosec h = hashlib.blake2b(digest_size=16, salt=b"biothings") h.update(ip_packed) h.update(self.user_agent.encode("utf-8", errors="replace")) d = bytearray(h.digest()) # truncating hash is not that bad, fixing some bits should be okay, too d[6] = 0x40 | (d[6] & 0x0F) # set version d[8] = 0x80 | (d[8] & 0x3F) # set variant u = str(uuid.UUID(bytes=bytes(d))) return u def _cid(self, version): if version == 1: return self._cid_v1() elif version == 2: return self._cid_v2() # this is a required GA field raise ValueError("CID Version.")
[docs] def to_GA_payload(self, tracking_id, cid_version=1): # by default implements # a GA PageView hit-type # In the future, consider adding additional # keys as cutomized dimensions or metrics. payload = { "v": 1, # protocol version "t": "pageview", "tid": tracking_id, "cid": self._cid(cid_version), "uip": self.user_ip, "dh": self.host, "dp": self.path, } # add document referer if isinstance(self.referer, str): if len(self.referer) <= 2048: # GA Limit payload["dr"] = self.referer # add user_agent if self.user_agent: payload["ua"] = self.user_agent # this also escapes payload vals return [urlencode(payload)]
[docs] def to_GA4_payload(self, measurement_id, cid_version=1): # Document about page_view event: https://support.google.com/analytics/answer/9964640#pageviews&zippy=%2Cin-this-article # GA4 does not support [Document path as UA](https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters#dp) # so we use location instead of host and path # TODO consider to use request.uri as page_location payload = { "name": "page_view", "params": _clean( { "page_location": f"{self.host}{self.path}", "page_title": self.path.strip("/").replace("/", "-"), } ), } # add document referer if isinstance(self.referer, str): # Parameter values (including item parameter values) must be 100 character or fewer. if len(self.referer) <= 100: payload["params"]["page_referrer"] = self.referer # add user_agent if self.user_agent: payload["params"]["user_agent"] = self.user_agent[:100] return [payload]
def __str__(self): # to facilitate logging return f"{type(self).__name__}({pformat(self)})"
def _clean(dict): return {k: v for k, v in dict.items() if v}
[docs] class GAEvent(Event): # GA Event # { # "category": "video", # "action": "play", # "label": "sample.mpg", # "value": "60" # }
[docs] def to_GA_payload(self, tracking_id, cid_version=1): payloads = super().to_GA_payload(tracking_id, cid_version) if self.get("category") and self.get("action"): payloads.append( urlencode( _clean( { "v": 1, # protocol version "t": "event", "tid": tracking_id, "cid": self._cid(cid_version), "ec": self["category"], "ea": self["action"], "el": self.get("label", ""), "ev": self.get("value", ""), } ) ) ) for event in self.get("__secondary__", []): event["__request__"] = self["__request__"] payloads.extend(event.to_GA_payload(tracking_id, cid_version)[1:]) # ignore the first event (pageview) # which is already generated once return payloads
[docs] def to_GA4_payload(self, measurement_id, cid_version=1): payloads = super().to_GA4_payload(measurement_id, cid_version) if self.get("category") and self.get("action"): payloads.append( { # Following this article https://support.google.com/analytics/answer/11091026?hl=en&ref_topic=11091421#zippy=%2Cin-this-article # <action> in the Universal Analytics property maps to <event_name> in the Google Analytics 4 # and 'event_category', 'event_label', and 'value' — along with their respective values — map to parameters with values. "name": self["action"], "params": _clean( { "event_category": self["category"], "event_label": self.get("label", ""), "value": self.get("value", ""), } ), } ) for event in self.get("__secondary__", []): event["__request__"] = self["__request__"] payloads.extend(event.to_GA4_payload(measurement_id, cid_version)[1:]) # ignore the first event (pageview) # which is already generated once return payloads
[docs] class Message(Event): """ Logical document that can be sent through services. Processable fields: title, body, url, url_text, image, image_altext Optionally define default field values below. """ DEFAULTS = { "title": "Notification Message", "url_text": "View Details", "image_altext": "<IMAGE>", } def __getattr__(self, attr): # virtual attributes if attr in ("title", "body", "url", "url_text", "image", "image_altext"): if attr in self: return self[attr] if attr in self.DEFAULTS: return self.DEFAULTS[attr] return "" raise AttributeError()
[docs] def to_ADF(self): """ Generate ADF for Atlassian Jira payload. Overwrite this to build differently. https://developer.atlassian.com/cloud/jira/platform/apis/document/playground/ """ adf = {"version": 1, "type": "doc", "content": []} if self.body: adf["content"].append({"type": "paragraph", "content": [{"type": "text", "text": self.body}]}) if self.url: adf["content"].append( { "type": "paragraph", "content": [ { "type": "text", "text": self.url_text, "marks": [{"type": "link", "attrs": {"href": self.url}}], } ], } ) return adf
[docs] def to_slack_payload(self): """ Generate slack webhook notification payload. https://api.slack.com/messaging/composing/layouts """ blocks = [] if self.title: blocks.append( { "type": "header", "text": { "type": "plain_text", "text": ":sparkles: " + self.title, "emoji": True, }, } ) blocks.append({"type": "divider"}) if self.body: body = { "type": "section", "text": {"type": "mrkdwn", "text": self.body}, } if self.image: body["accessory"] = { "type": "image", "image_url": self.image, "alt_text": self.image_altext, } blocks.append(body) if self.url: blocks.append( { "type": "section", "text": {"type": "mrkdwn", "text": f"*<{self.url}|{self.url_text}>*"}, } ) return {"attachments": [{"blocks": blocks}]}
[docs] def to_jira_payload(self, profile): """ Combine notification message with project profile to genereate jira issue tracking ticket request payload. """ return { "fields": { "project": {"id": profile.project_id}, "summary": self.title, "issuetype": {"id": profile.issuetype_id}, "assignee": {"id": profile.assignee_id}, "reporter": {"id": profile.reporter_id}, "priority": {"id": "3"}, "labels": [profile.label], "description": self.to_ADF(), } }
[docs] def to_email_payload(self, sendfrom, sendto): """ Build a MIMEMultipart message that can be sent as an email. https://docs.aws.amazon.com/ses/latest/DeveloperGuide/examples-send-using-smtp.html """ # Create message container - the correct MIME type is multipart/alternative. msg = MIMEMultipart("alternative") msg["Subject"] = self.title # msg['From'] = email.utils.formataddr((SENDERNAME, SENDER)) msg["From"] = sendfrom msg["To"] = sendto # Comment or delete the next line if you are not using a configuration set # msg.add_header('X-SES-CONFIGURATION-SET',CONFIGURATION_SET) # Record the MIME types of both parts - text/plain and text/html. part1 = MIMEText(self.body, "plain") part2 = MIMEText( f""" <!DOCTYPE html> <html> <head> <title>{self.title}</title> </head> <body> <p>{self.body}</p> </body> </html>""", "html", ) # Attach parts into message container. # According to RFC 2046, the last part of a multipart message, in this case # the HTML message, is best and preferred. msg.attach(part1) msg.attach(part2) return msg