Source code for biothings.tests.hub

# DatabaseCollectionTesting

# Author: Amiteshk Sharma
# https://github.com/amiteshksharma/Database-Testing

import pymongo

# from jsonpath_ng import jsonpath, parse


[docs] class DatabaseCollectionTesting: """ Constructor that takes in three items db_url - string - the mongoDB url to connect to db - string - name of DB to use collection - string - name of collection in db """ def __init__(self, db_url, db, collection): if "mongo" not in db_url: raise ValueError self.database_url = db_url self.client = pymongo.MongoClient(db_url) self.db = self.client[db] self.collection = self.db[collection] # test to see if there exists only 1 item for an ID # _id - the document _id to query
[docs] def test_field_unique_id(self, _id): get_item = self.collection.find({"_id": _id}) item = list(get_item) assert len(item) == 1
# check for all items within a taxid # taxid - the taxid of the document to query
[docs] def test_field_taxid(self, taxid): get_items = self.collection.find({"taxid": taxid}) item_list = list(get_items) assert len(item_list) >= 1
# check all documents with the same taxid value
[docs] def test_documents_taxid(self, taxid): get_documents = self.collection.find({"taxid": taxid}) doc_list = list(get_documents) sub_list = ["_id", "taxid", "name", "ensembl", "symbol"] for doc in doc_list: keys = doc.keys() if not all(x in keys for x in sub_list): # may not contain a name attribute if "name" not in keys: pass # may not contain the ensembl attribute elif "ensembl" not in keys: pass # may not contain the symbol attribute elif "symbol" not in keys: pass else: # assert False raise AssertionError() assert True
# check an _id and make sure it does not exist # _id - the document _id to query
[docs] def test_field_does_not_exist(self, _id): get_items = self.collection.find({"_id": _id}) item_list = list(get_items) assert len(item_list) == 0
# Check number of documents is correct # expected count - the expected count of documents with specific taxid
[docs] def test_total_document_count(self, expected_count): get_all_document = self.collection.find() document_list = list(get_all_document) assert len(document_list) == expected_count
# check the indices for the mongoDB database
[docs] def test_database_index(self): get_indices = self.collection.index_information() indices_list = list(get_indices) size = len(indices_list) # if only size 1, then it only has _id_ index if size == 1: assert all(x in indices_list for x in ["_id_"]) elif size == 3: assert all(x in indices_list for x in ["_id_", "taxid_1", "entrezgene_1"])
# test the name attribute on randomly selected items in the database
[docs] def test_document_name(self): random_docs = self.collection.aggregate([{"$sample": {"size": 10}}]) count = 0 for doc in random_docs: if "name" in doc: count = count + 1 else: print("_id for document with no name: " + doc["_id"]) assert count == 10
if __name__ == "__main__": c = DatabaseCollectionTesting("mongodb://su05:27017", "genedoc", "mygene_allspecies_20191111_eeesndlz") c.test_documents_taxid(29302)