Source code for biothings.cli.dataplugin

from typing import Optional

import typer
from typing_extensions import Annotated

from biothings.cli import utils

logger = utils.get_logger("dataplugin")


short_help = (
    "[green]Test an individual data plugin locally and make simple queries to inspect your parsed data objects.[/green]"
)
long_help = (
    short_help
    + "\n\n[magenta]   :sparkles: Go to your existing data plugin folder.[/magenta]"
    + "\n[magenta]   :sparkles: Dumping, uploading and inspecting your data plugin.[/magenta]"
    + "\n[magenta]   :sparkles: Serving your data as a web service for making simple queries[/magenta]"
    + "\n\n[green]   :point_right: Always run this command inside of your data plugin folder.[/green]"
    + "\n[green]   :point_right: Default traceback errors are kept minimal, but you can set [bold]BTCLI_RICH_TRACEBACK=1[/bold][/green]"
    + "\n[green]      ENV variable to enable full and pretty-formatted tracebacks, [/green]"
    + "\n[green]      or set [bold]BTCLI_DEBUG=1[/bold] to enable even more debug logs for debugging purpose.[/green]"
    + "\n[green]   :point_right: You can include a config.py at the working directly to override the default biothings.config settings.[/green]"
    + "\n   :rocket::boom::sparkling_heart:"
)

app = typer.Typer(
    help=long_help,
    short_help=short_help,
    no_args_is_help=True,
    rich_markup_mode="rich",
)



[docs]
@app.command(
    name="create",
    help="Create a new data plugin from the tempplate",
)
def create_data_plugin(
    name: Annotated[
        str,
        typer.Option("--name", "-n", help="Provide a data plugin name", prompt="What's your data plugin name?"),
    ] = "",
    multi_uploaders: Annotated[
        Optional[bool],
        typer.Option("--multi-uploaders", help="If provided, the data plugin includes multiple uploaders"),
    ] = False,
    # parallelizer: bool = typer.Option(False, "--parallelizer", help="Using parallelizer or not? Default: No"),
    parallelizer: Annotated[
        Optional[bool],
        typer.Option("--parallelizer", help="If provided, the data plugin's upload step will run in parallel"),
    ] = False,
):
    """*create* command for creating a new data plugin from the template"""
    utils.do_create(name, multi_uploaders, parallelizer, logger=logger)




[docs]
@app.command(
    name="dump",
    help="Download source data files to local",
)
def dump_data():
    """*dump* command for downloading source data files to local"""
    utils.do_dump(plugin_name=None, logger=logger)




[docs]
@app.command(
    name="upload",
    help="Convert downloaded data from dump step into JSON documents and upload the to the source database",
)
def upload_source(
    batch_limit: Annotated[
        Optional[int],
        typer.Option(
            "--batch-limit",
            help="The maximum number of batches that should be uploaded. Batch size is 1000 docs",
        ),
    ] = None,
):
    """*upload* command for converting downloaded data from dump step into JSON documents and upload the to the source database.
    A local sqlite database used to store the uploaded data"""
    utils.do_upload(plugin_name=None, logger=logger)




[docs]
@app.command(
    "dump_and_upload",
    help="Download data source to local folder then convert to Json document and upload to the source database",
)
def dump_and_upload(
    # multi_uploaders: bool = typer.Option(
    #     False, "--multi-uploaders", help="Add this option if you want to create multiple uploaders"
    # ),
    # parallelizer: bool = typer.Option(
    #     False, "--parallelizer", help="Using parallelizer or not? Default: No"
    # ),
):
    """*dump_and_upload* command for downloading source data files to local, then converting them into JSON documents and uploading them to the source database.
    Two steps in one command."""
    utils.do_dump_and_upload(plugin_name=None, logger=logger)




[docs]
@app.command(
    name="list",
    help="Listing dumped files or uploaded sources",
)
def listing(
    dump: Annotated[Optional[bool], typer.Option("--dump", help="Listing dumped files")] = False,
    upload: Annotated[Optional[bool], typer.Option("--upload", help="Listing uploaded sources")] = False,
    hubdb: Annotated[Optional[bool], typer.Option("--hubdb", help="Listing internal hubdb content")] = False,
):
    """*list* command for listing dumped files and/or uploaded sources"""
    utils.do_list(plugin_name=None, dump=dump, upload=upload, hubdb=hubdb, logger=logger)




[docs]
@app.command(
    name="inspect",
    help="Giving detailed information about the structure of documents coming from the parser",
)
def inspect_source(
    sub_source_name: Annotated[
        Optional[str], typer.Option("--sub-source-name", "-s", help="Your sub source name")
    ] = "",
    mode: Annotated[
        Optional[str],
        typer.Option(
            "--mode",
            "-m",
            help="""
            The inspect mode or list of modes (comma separated), e.g. "type,mapping".\n
            Possible values are:\n
            - "type": explore documents and report strict data structure\n
            - "mapping": same as type but also perform test on data so guess best mapping\n
               (eg. check if a string is splitable, etc...). Implies merge=True\n
            - "stats": explore documents and compute basic stats (count,min,max,sum)\n
            """,
        ),
    ] = "type,stats",
    limit: Annotated[
        Optional[int],
        typer.Option(
            "--limit",
            "-l",
            help="""
            can limit the inspection to the x first docs (None = no limit, inspects all)
            """,
        ),
    ] = None,
    # merge: Annotated[
    #     Optional[bool],
    #     typer.Option(
    #         "--merge",
    #         "-m",
    #         help="""Merge scalar into list when both exist (eg. {"val":..} and [{"val":...}])""",
    #     ),
    # ] = False,
    output: Annotated[
        Optional[str],
        typer.Option(
            "--output",
            "-o",
            help="The local JSON file path for storing mapping info if you run with mode 'mapping' (absolute path or relative path)",
        ),
    ] = None,
):
    """*inspect* command for giving detailed information about the structure of documents coming from the parser after the upload step"""

    utils.do_inspect(
        plugin_name=None,
        sub_source_name=sub_source_name,
        mode=mode,
        limit=limit,
        merge=False,
        output=output,
        logger=logger,
    )




[docs]
@app.command(name="serve")
def serve(
    host: Annotated[
        Optional[str],
        typer.Option(
            "--host",
            help="The host name to run the test API server",
        ),
    ] = "localhost",
    port: Annotated[
        Optional[int],
        typer.Option(
            "--port",
            "-p",
            help="The port number to tun the test API server",
        ),
    ] = 9999,
):
    """
    *serve* command runs a simple API server for serving documents from the source database.

    For example, after run 'dump_and_upload', we have a source_name = "test" with a document structure
    like this:

    doc = {"_id": "123", "key": {"a":{"b": "1"},"x":[{"y": "3", "z": "4"}, "5"]}}.

    An API server will run at http://host:port/<your source name>/, like http://localhost:9999/test/:

        - You can see all available sources on the index page: http://localhost:9999/
        - You can list all docs: http://localhost:9999/test/ (default is to return the first 10 docs)
        - You can paginate doc list: http://localhost:9999/test/?start=10&limit=10
        - You can retrieve a doc by id: http://localhost:9999/test/123
        - You can filter out docs with one or multiple fielded terms:
            - http://localhost:9999/test/?q=key.a.b:1 (query by any field with dot notation like key.a.b=1)
            - http://localhost:9999/test/?q=key.a.b:1%20AND%20key.x.y=3 (find all docs that match two fields)
            - http://localhost:9999/test/?q=key.x.z:4*  (field value can contain wildcard * or ?)
            - http://localhost:9999/test/?q=key.x:5&start=10&limit=10 (pagination also works)
    """
    utils.do_serve(plugin_name=None, host=host, port=port, logger=logger)




[docs]
@app.command(
    name="clean",
    help="Delete all dumped files and drop uploaded sources tables",
    no_args_is_help=True,
)
def clean_data(
    dump: Annotated[Optional[bool], typer.Option("--dump", help="Delete all dumped files")] = False,
    upload: Annotated[Optional[bool], typer.Option("--upload", help="Drop uploaded sources tables")] = False,
    clean_all: Annotated[
        Optional[bool],
        typer.Option(
            "--all",
            help="Delete all dumped files and drop uploaded sources tables",
        ),
    ] = False,
):
    """*clean* command for deleting all dumped files and/or drop uploaded sources tables"""
    utils.do_clean(plugin_name=None, dump=dump, upload=upload, clean_all=clean_all, logger=logger)