""" Functions to return versions of things. """
import functools
import logging
# import pip
import os
import re
import shlex
import sys
from subprocess import DEVNULL, check_output
from git import GitCommandError, InvalidGitRepositoryError, NoSuchPathError, Repo
import biothings
from biothings.utils.dataload import dict_sweep
[docs]
def get_python_version():
"""Get a list of python packages installed and their versions."""
try:
output = check_output([sys.executable or "python3", "-m", "pip", "list"], stderr=DEVNULL)
return output.decode("utf-8").replace("\r", "").split("\n")[2:-1]
except Exception:
return []
[docs]
@functools.lru_cache()
def get_biothings_commit():
"""Gets the biothings commit information."""
try:
with open(os.path.join(os.path.dirname(biothings.__file__), ".git-info"), "r", encoding="utf-8") as git_file:
lines = [ln.strip("\n") for ln in git_file.readlines()]
return {
"repository-url": lines[0],
"commit-hash": lines[1],
"master-commits": lines[2],
"version": biothings.__version__,
}
except Exception:
return {
"repository-url": "",
"commit-hash": "",
"master-commits": "",
"version": biothings.__version__,
}
[docs]
def get_python_exec_version():
"""return Python version"""
return {
"version": sys.version,
"version_info": {
"major": sys.version_info[0],
"minor": sys.version_info[1],
"micro": sys.version_info[2],
},
}
[docs]
@functools.lru_cache()
def get_software_info(app_dir=None):
"""return current application info"""
return {
"python-package-info": get_python_version(),
"codebase": get_repository_information(app_dir=app_dir),
"biothings": get_biothings_commit(),
"python-info": get_python_exec_version(),
}
[docs]
def check_new_version(folder, max_commits=10):
"""
Given a folder pointing to a Git repo, return a dict containing info
about remote commits not qpplied yet to the repo, or empty dict if nothing
new.
"""
# from https://stackoverflow.com/questions/8290233/gitpython-get-list-of-remote-commits-not-yet-applied
try:
repo = Repo(folder)
except InvalidGitRepositoryError:
logging.warning("Not a valid git repository for folder '%s', skipped for checking new version.", folder)
return
try:
# Get URL from actual remote branch name that is being tracked.
# more details: see comments in get_version
remote_name = repo.active_branch.tracking_branch().remote_name
url = repo.remote(remote_name).url
repo_url = re.sub(r"\.git$", "", url)
except Exception as err:
logging.debug("Can't determine repository URL: %s", err)
repo_url = None
new_info = {}
try:
# we can't directly get the list of new commits without fetching them locally first
# but we'd like to avoid fetching all the time just to check.
# what we can do is a ls-remote and check the HEAD hash, if different, then fetch
# (no pull) and inspect differences.
try:
head = repo.head.ref
except TypeError as err:
# cannot get the head reference, e.g. when HEAD is detached at certain commit point
# For example, TypeError: HEAD is a detached symbolic reference as it points to '19ad50463d0cdd3329618789040c4b9012ccca24'
logging.warning("%s, skipped for checking new version.", err)
return
tracking = head.tracking_branch()
# inspect remote HEAD for that branch
output = repo.git.ls_remote("--heads", tracking.remote_name, tracking.remote_head)
remote_head_hexsha = output.split("\t")[0]
if remote_head_hexsha == head.commit.hexsha:
# hashes the same, we're up-to-date with the remote
return
else:
logging.info("HEAD on remote is different, new commit(s) available for '%s'", folder)
logging.info("HEAD(remote): %s, HEAD(local): %s", remote_head_hexsha, head.commit.hexsha)
# need to fetch new code locally
# usually one remotes, but just in case...
for remote in repo.remotes:
remote.fetch()
# now identify new commits
new_commits = [commit for commit in tracking.commit.iter_items(repo, f"{head.path}..{tracking.path}")]
if new_commits:
new_info = {
"latest": new_commits[0].hexsha[:6],
"commits": [
{
"hash": c.hexsha[:6],
"url": repo_url and os.path.join(repo_url, "commit", c.hexsha) or None,
"date": c.committed_datetime.isoformat(),
"message": c.message,
}
for c in new_commits
][:max_commits],
"total": len(new_commits),
}
except Exception as err:
logging.warning("Can't check for new version: %s", err)
raise err
return new_info
[docs]
def get_version(folder):
"""return revision of a git folder"""
try:
repo = Repo(folder) # app or lib dir
except InvalidGitRepositoryError:
logging.warning("Not a valid git repository for folder '%s', skipped for getting its version.", folder)
return
try:
# Get URL from actual remote branch name that is being tracked.
# do not assume that the active branch is tracking origin,
# or if it is tracking anything, or if the alias origin exists
remote_name = repo.active_branch.tracking_branch().remote_name
url = repo.remote(remote_name).url
except Exception: # pylint: disable=W0702
# it is possible that the active branch is not tracking anything
url = None
try:
commit = repo.head.object.hexsha[:6]
commitdate = repo.head.object.committed_datetime.isoformat()
except Exception as err:
logging.warning("can't determine app commit hash: %s", err)
commit = "unknown"
commitdate = "unknown"
try:
return {
"branch": repo.active_branch.name,
"commit": commit,
"date": commitdate,
"giturl": url,
}
except Exception as err:
logging.warning("can't determine app version, assuming HEAD detached': %s", err)
return {
"branch": "HEAD detached",
"commit": commit,
"date": commitdate,
"giturl": url,
}
[docs]
def set_versions(config, app_folder):
"""
Propagate versions (git branch name) in config module.
Also set app and biothings folder paths (though not
exposed as a config param since they are lower-cased,
see biothings.__init__.py, regex PARAM_PAT)
"""
if not os.path.exists(app_folder):
raise FileNotFoundError(f"'{app_folder}' application folder doesn't exist")
# app_version: version of the API application
if not hasattr(config, "APP_VERSION"):
config.APP_VERSION = get_version(app_folder)
config.app_folder = app_folder
else:
logging.info("app_version '%s' forced in configuration file", config.APP_VERSION)
# biothings_version: version of BioThings SDK
if not hasattr(config, "BIOTHINGS_VERSION"):
import biothings
# .../biothings.api/biothings/__init__.py
bt_folder, _bt = os.path.split(os.path.split(os.path.realpath(biothings.__file__))[0])
if not os.path.exists(bt_folder):
raise FileNotFoundError(f"'{bt_folder}' biothings folder doesn't exist")
assert _bt == "biothings", "Expectig 'biothings' dir in biothings lib path"
config.BIOTHINGS_VERSION = get_version(bt_folder)
config.biothings_folder = bt_folder
else:
logging.info("biothings_version '%s' forced in configuration file", config.BIOTHINGS_VERSION)
logging.info(
"Running app_version=%s with biothings_version=%s", repr(config.APP_VERSION), repr(config.BIOTHINGS_VERSION)
)
[docs]
def get_source_code_info(src_file):
"""
Given a path to a source code, try to find information
about repository, revision, URL pointing to that file, etc...
Return None if nothing can be determined.
Tricky cases:
- src_file could refer to another repo, within current repo
(namely a remote data plugin, cloned within the api's plugins folder
- src_file could point to a folder, when for instance a dataplugin is
analized. This is because we can't point to an uploader file since
it's dynamically generated
"""
# need to be absolute to build proper github URL
abs_src_file = os.path.abspath(src_file)
try:
repo = Repo(abs_src_file, search_parent_directories=True)
except (InvalidGitRepositoryError, NoSuchPathError):
logging.exception("Can't find a github repository for file '%s'", src_file)
return None
try:
gcmd = repo.git
_hash = gcmd.rev_list(-1, repo.active_branch, abs_src_file)
rel_src_file = abs_src_file.replace(repo.working_dir, "").strip("/")
if not _hash:
# seems to be a repo cloned within a repo, change directory
curdir = os.path.abspath(os.curdir)
try:
if os.path.isdir(abs_src_file):
os.chdir(abs_src_file)
_hash = gcmd.rev_list(-1, repo.active_branch)
else:
dirname, filename = os.path.split(abs_src_file)
os.chdir(dirname)
_hash = gcmd.rev_list(-1, repo.active_branch, filename)
rel_src_file = "" # will point to folder by commit hash
finally:
os.chdir(curdir)
if _hash:
short_hash = gcmd.rev_parse(_hash, short=7)
else:
logging.warning("Couldn't determine commit hash for file '%s'", src_file)
_hash = None
short_hash = None
# could have more than one URLs for origin, only take first
repo_url = next(repo.remote().urls)
info = {
"repo": repo_url,
"commit": short_hash,
"branch": repo.active_branch.name,
}
if os.path.isdir(src_file):
info["folder"] = rel_src_file
else:
info["file"] = rel_src_file
info = dict_sweep(info)
# rebuild URL to that file
if "github.com" in repo_url:
info["url"] = os.path.join(re.sub(r"\.git$", "", repo_url), "tree", _hash, rel_src_file)
return info
except GitCommandError:
logging.exception("Error while getting git information for file '%s'", src_file)
return None
except TypeError as err:
# happens with biothings symlink, just ignore
logging.debug("Can't determine source code info (but that's fine): %s", err)
return None