# DatabaseCollectionTesting
# Author: Amiteshk Sharma
# https://github.com/amiteshksharma/Database-Testing
import pymongo
# from jsonpath_ng import jsonpath, parse
[docs]
class DatabaseCollectionTesting:
"""
Constructor that takes in three items
db_url - string - the mongoDB url to connect to
db - string - name of DB to use
collection - string - name of collection in db
"""
def __init__(self, db_url, db, collection):
if "mongo" not in db_url:
raise ValueError
self.database_url = db_url
self.client = pymongo.MongoClient(db_url)
self.db = self.client[db]
self.collection = self.db[collection]
# test to see if there exists only 1 item for an ID
# _id - the document _id to query
[docs]
def test_field_unique_id(self, _id):
get_item = self.collection.find({"_id": _id})
item = list(get_item)
assert len(item) == 1
# check for all items within a taxid
# taxid - the taxid of the document to query
[docs]
def test_field_taxid(self, taxid):
get_items = self.collection.find({"taxid": taxid})
item_list = list(get_items)
assert len(item_list) >= 1
# check all documents with the same taxid value
[docs]
def test_documents_taxid(self, taxid):
get_documents = self.collection.find({"taxid": taxid})
doc_list = list(get_documents)
sub_list = ["_id", "taxid", "name", "ensembl", "symbol"]
for doc in doc_list:
keys = doc.keys()
if not all(x in keys for x in sub_list):
# may not contain a name attribute
if "name" not in keys:
pass
# may not contain the ensembl attribute
elif "ensembl" not in keys:
pass
# may not contain the symbol attribute
elif "symbol" not in keys:
pass
else:
# assert False
raise AssertionError()
assert True
# check an _id and make sure it does not exist
# _id - the document _id to query
[docs]
def test_field_does_not_exist(self, _id):
get_items = self.collection.find({"_id": _id})
item_list = list(get_items)
assert len(item_list) == 0
# Check number of documents is correct
# expected count - the expected count of documents with specific taxid
[docs]
def test_total_document_count(self, expected_count):
get_all_document = self.collection.find()
document_list = list(get_all_document)
assert len(document_list) == expected_count
# check the indices for the mongoDB database
[docs]
def test_database_index(self):
get_indices = self.collection.index_information()
indices_list = list(get_indices)
size = len(indices_list)
# if only size 1, then it only has _id_ index
if size == 1:
assert all(x in indices_list for x in ["_id_"])
elif size == 3:
assert all(x in indices_list for x in ["_id_", "taxid_1", "entrezgene_1"])
# test the name attribute on randomly selected items in the database
[docs]
def test_document_name(self):
random_docs = self.collection.aggregate([{"$sample": {"size": 10}}])
count = 0
for doc in random_docs:
if "name" in doc:
count = count + 1
else:
print("_id for document with no name: " + doc["_id"])
assert count == 10
if __name__ == "__main__":
c = DatabaseCollectionTesting("mongodb://su05:27017", "genedoc", "mygene_allspecies_20191111_eeesndlz")
c.test_documents_taxid(29302)