Source code for biothings.utils.jsonschema

import json

import bson

from biothings.utils.common import is_str


[docs] def generate_json_schema(dmap): scalarmap = { str: "string", int: "integer", float: "number", bool: "boolean", bson.int64.Int64: "number", None: "null", } def merge_type(typ1, typ2): if isinstance(typ1, list): if isinstance(typ2, list): typ1.extend(typ2) else: typ1.append(typ2) elif isinstance(typ2, list): typ1 = [typ1] + typ1 else: typ1 = [typ1, typ2] return list(set(typ1)) schema = {} if isinstance(dmap, dict): for k in dmap: if is_str(k): esch = generate_json_schema(dmap[k]) if schema: if schema["type"] == "object": # we just complete 'properties', key already defined previously pass elif schema["type"] == "array": if not schema.get("properties"): schema["properties"] = {} schema["type"] = merge_type(schema["type"], "object") elif isinstance(schema["type"], list): assert set(schema["type"]) == {"object", "array"} else: raise Exception("Previous schema type not expected: %s" % schema["type"]) else: schema = {"type": "object", "properties": {}} schema["properties"][k] = esch # elif type(k) == type: # TODO: remove this line elif isinstance(k, type): if k == list: if schema: # already defined for this key, mixed types schema.update({"items": {}}) schema["type"] = merge_type(schema["type"], "array") else: schema = {"type": "array", "items": {}} esch = generate_json_schema(dmap[k]) schema["items"] = generate_json_schema(dmap[k]) else: if schema: schema["type"] = merge_type(schema["type"], scalarmap[k]) else: schema = {"type": scalarmap[k]} elif k is None: schema = {"type": None} else: raise Exception("no not here, k: %s" % k) else: pass return schema
[docs] def test(): # TODO: Move these test to tests folder, or maybe already moved over? Chunlei # can't use assert directly, as we can't ensure the order of types (for instance) import biothings.utils.jsondiff from biothings.utils.inspect import inspect_docs, typify_inspect_doc biothings.utils.jsondiff.UNORDERED_LIST = True jsondiff = biothings.utils.jsondiff.make # object td1 = {"i": {"a": 456}} s1 = { "properties": { "i": { "properties": {"a": {"type": "integer"}}, "type": "object", } }, "type": "object", } m = inspect_docs([td1], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s1) == [], "%s !=\n%s" % (gs, s1) td5 = {"i": [1, 2, 3]} s5 = { "properties": { "i": { "items": {"type": "integer"}, "type": "array", } }, "type": "object", } m = inspect_docs([td5], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s5) == [], "%s !=\n%s" % (gs, s5) # array of object td2 = {"i": [{"a": 123}]} s2 = { "properties": { "i": { "items": { "properties": {"a": {"type": "integer"}}, "type": "object", }, "type": "array", } }, "type": "object", } m = inspect_docs([td2], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s2) == [], "%s !=\n%s" % (gs, s2) # object in object td3 = {"i": {"a": {"b": 123}}} s3 = { "properties": { "i": { "properties": { "a": { "properties": {"b": {"type": "integer"}}, "type": "object", } }, "type": "object", } }, "type": "object", } m = inspect_docs([td3], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s3) == [], "%s !=\n%s" % (gs, s3) # mixed str/float in array td6 = {"i": [1, 2, "a"]} s6 = { "properties": { "i": { "items": {"type": ["integer", "string"]}, "type": "array", } }, "type": "object", } m = inspect_docs([td6], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s6) == [], "%s !=\n%s" % (gs, s6) # mixed array/object td1 = {"i": {"a": 456}} td2 = {"i": [{"a": 123}]} s12 = { "properties": { "i": { "items": { "properties": {"a": {"type": "integer"}}, "type": "object", }, "properties": {"a": {"type": "integer"}}, "type": ["array", "object"], } }, "type": "object", } m = inspect_docs([td1, td2], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s12) == [], "%s !=\n%s" % (gs, s12) # list of integer (list of things which are not objects) td4 = {"a": [5, 5, 3]} s4 = { "properties": { "a": { "items": {"type": "integer"}, "type": "array", } }, "type": "object", } m = inspect_docs([td4], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s4) == [], "%s !=\n%s" % (gs, s4) td7 = {"i": {"a": 1, "b": 2}} s7 = { "type": "object", "properties": { "i": { "type": "object", "properties": { "a": {"type": "integer"}, "b": {"type": "integer"}, }, } }, } m = inspect_docs([td7], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s7) == [], "%s !=\n%s" % (gs, s7) # int or list of int (not a list of dict, testing scalar there) td81 = {"i": 1} td82 = {"i": [2, 3]} s812 = { "properties": { "i": { "items": {"type": "integer"}, "type": ["array", "integer"], } }, "type": "object", } m = inspect_docs([td81, td82], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s812) == [], "%s !=\n%s" % (gs, s812) # run from app folder, biothings as symlink # small real-life collection cgi_schema = json.load(open("biothings/tests/cgi_schema.json")) cgi_map = typify_inspect_doc(json.load(open("biothings/tests/cgi_map.json"))) schema = generate_json_schema(cgi_map) assert jsondiff(cgi_schema, schema) == [] clinvar_schema = json.load(open("biothings/tests/clinvar_schema.json")) clinvar_map = typify_inspect_doc(json.load(open("biothings/tests/clinvar_map.json"))) schema = generate_json_schema(clinvar_map) assert jsondiff(clinvar_schema, schema) == [] mygene_schema = json.load(open("biothings/tests/mygene_schema.json")) mygene_map = typify_inspect_doc(json.load(open("biothings/tests/mygene_map.json"))) schema = generate_json_schema(mygene_map) assert jsondiff(mygene_schema, schema) == [] print("All test OK")
if __name__ == "__main__": test()