Source code for jina.helloworld.fashion

__copyright__ = "Copyright (c) 2020 Jina AI Limited. All rights reserved."
__license__ = "Apache-2.0"

import os
from pathlib import Path

from pkg_resources import resource_filename

from ..helper import (
    print_result,
    write_html,
    download_data,
    index_generator,
    query_generator,
)
from ...flow import Flow
from ...helper import countdown, colored


[docs]def hello_world(args): """ Runs Jina's Hello World. Usage: Use it via CLI :command:`jina hello-world`. Description: It downloads Fashion-MNIST dataset and :term:`Indexer<indexes>` 50,000 images. The index is stored into 4 *shards*. It randomly samples 128 unseen images as :term:`Queries<Searching>` Results are shown in a webpage. More options can be found in :command:`jina hello-world --help` """ Path(args.workdir).mkdir(parents=True, exist_ok=True) targets = { 'index-labels': { 'url': args.index_labels_url, 'filename': os.path.join(args.workdir, 'index-labels'), }, 'query-labels': { 'url': args.query_labels_url, 'filename': os.path.join(args.workdir, 'query-labels'), }, 'index': { 'url': args.index_data_url, 'filename': os.path.join(args.workdir, 'index-original'), }, 'query': { 'url': args.query_data_url, 'filename': os.path.join(args.workdir, 'query-original'), }, } # download the data download_data(targets, args.download_proxy) # this envs are referred in index and query flow YAMLs os.environ['PATH'] += ( os.pathsep + resource_filename('jina', 'resources') + '/fashion/' ) os.environ['SHARDS'] = str(args.shards) os.environ['PARALLEL'] = str(args.parallel) os.environ['HW_WORKDIR'] = args.workdir # reduce the network load by using `fp16`, or even `uint8` os.environ['JINA_ARRAY_QUANT'] = 'fp16' # now comes the real work # load index flow from a YAML file f = Flow.load_config(args.uses_index) # run it! with f: f.index( index_generator(num_docs=targets['index']['data'].shape[0], target=targets), request_size=args.index_request_size, ) # wait for couple of seconds countdown( 8, reason=colored( 'behold! im going to switch to query mode', 'cyan', attrs=['underline', 'bold', 'reverse'], ), ) # now load query flow from another YAML file f = Flow.load_config(args.uses_query) # run it! with f: f.search( query_generator( num_docs=args.num_query, target=targets, with_groundtruth=True ), shuffle=True, on_done=print_result, request_size=args.query_request_size, top_k=args.top_k, ) # write result to html write_html(os.path.join(args.workdir, 'hello-world.html'))