Skip to content

Commit

Permalink
feat: add cli args to hello world
Browse files Browse the repository at this point in the history
  • Loading branch information
hanxiao committed Apr 18, 2020
1 parent 0112658 commit 59cbba2
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 52 deletions.
31 changes: 24 additions & 7 deletions jina/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,32 @@

__uptime__ = datetime.now().strftime('%Y%m%d%H%M%S')

__jina_env__ = ('JINA_PROFILING',
'JINA_WARN_UNNAMED',
'JINA_VCS_VERSION',
'JINA_CONTROL_PORT',
# update on MacOS
# 1. clean this tuple,
# 2. grep -ohE "\'JINA_.*?\'" **/*.py | sort -u | sed "s/$/,/g"
# 3. copy all lines EXCEPT the first (which is the grep command in the last line)
__jina_env__ = ('JINA_ARRAY_QUANT',
'JINA_CONTRIB_MODULE',
'JINA_CONTRIB_MODULE_IS_LOADING',
'JINA_CONTROL_PORT',
'JINA_DEFAULT_HOST',
'JINA_EXECUTOR_WORKDIR',
'JINA_FULL_CLI',
'JINA_IPC_SOCK_TMP',
'JINA_LOG_FILE',
'JINA_LOG_LONG',
'JINA_LOG_NO_COLOR',
'JINA_LOG_PROFILING',
'JINA_LOG_SSE',
'JINA_LOG_VERBOSITY',
'JINA_PROFILING',
'JINA_SOCKET_HWM',
'JINA_ARRAY_QUANT')
'JINA_STACK_CONFIG',
'JINA_TEST_CONTAINER',
'JINA_TEST_PRETRAINED',
'JINA_VCS_VERSION',
'JINA_VERSION',
'JINA_WARN_UNNAMED',)

__default_host__ = os.environ.get('JINA_DEFAULT_HOST', '0.0.0.0')
__ready_msg__ = 'ready and listening'
Expand Down Expand Up @@ -191,7 +208,7 @@ def raise_nofile(nofile_atleast=4096):
if hard < soft:
hard = soft

default_logger.info('setting soft & hard ulimit -n {} {}'.format(soft, hard))
default_logger.debug('setting soft & hard ulimit -n {} {}'.format(soft, hard))
try:
res.setrlimit(res.RLIMIT_NOFILE, (soft, hard))
except (ValueError, res.error):
Expand All @@ -203,7 +220,7 @@ def raise_nofile(nofile_atleast=4096):
default_logger.warning('failed to set ulimit, giving up')
soft, hard = res.getrlimit(res.RLIMIT_NOFILE)

default_logger.info('ulimit -n soft,hard: {} {}'.format(soft, hard))
default_logger.debug('ulimit -n soft,hard: {} {}'.format(soft, hard))
return soft, hard


Expand Down
31 changes: 21 additions & 10 deletions jina/helloworld/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


def hello_world(args):
"""The hello world of Jina. Use it via CLI :command:`jina hello-world`
"""The hello world of Jina. Use it via CLI :command:`jina hello-world`.
It downloads Fashion-MNIST dataset and indexes 50,000 images via Jina search framework.
The index is stored into 4 *shards*. We then randomly sample 128 unseen images as *Queries*,
Expand All @@ -20,25 +20,36 @@ def hello_world(args):
"""
Path(args.workdir).mkdir(parents=True, exist_ok=True)

urls = [args.index_data_url, args.query_data_url]
targets = [os.path.join(args.workdir, 'index-original'), os.path.join(args.workdir, 'query-original')]
download_data(targets, urls)
targets = {
'index': {
'url': args.index_data_url,
'filename': os.path.join(args.workdir, 'index-original')
},
'query': {
'url': args.query_data_url,
'filename': os.path.join(args.workdir, 'query-original')
}
}

download_data(targets)

os.environ['RESOURCE_DIR'] = resource_filename('jina', 'resources')
os.environ['SHARDS'] = str(args.shards)
os.environ['REPLICAS'] = str(args.replicas)
os.environ['HW_WORKDIR'] = args.workdir
os.environ['WITH_LOGSERVER'] = str(args.logserver)

f = Flow().load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.index.yml'))))
f = Flow().load_config(args.index_yaml_path)
with f.build() as fl:
fl.index(raw_bytes=input_fn(targets[0]), batch_size=1024)
fl.index(raw_bytes=input_fn(targets['index']['filename']), batch_size=args.index_batch_size)

countdown(8, reason=colored('behold! im going to switch to query mode', color='yellow', attrs='bold'))
countdown(8, reason=colored('behold! im going to switch to query mode', 'cyan',
attrs=['underline', 'bold', 'reverse']))

f = Flow().load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.query.yml'))))
f = Flow().load_config(args.query_yaml_path)
with f.build() as fl:
fl.search(raw_bytes=input_fn(targets[1], index=False, num_doc=128),
callback=print_result, top_k=args.top_k, batch_size=32)
fl.search(raw_bytes=input_fn(targets['query']['filename'], index=False, num_doc=args.num_query),
callback=print_result, top_k=args.top_k, batch_size=args.query_batch_size)

html_path = os.path.join(args.workdir, 'hello-world.html')
write_html(html_path)
8 changes: 4 additions & 4 deletions jina/helloworld/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ def write_html(html_path):
f'🤩 Intrigued? Play with "jina hello-world --help" and learn more about Jina at {colored_url}')


def download_data(targets, urls):
def download_data(targets):
with ProgressBar(task_name='download fashion-mnist') as t:
for f, u in zip(targets, urls):
if not os.path.exists(f):
urllib.request.urlretrieve(u, f, reporthook=lambda *x: t.update(1))
for v in targets.values():
if not os.path.exists(v['filename']):
urllib.request.urlretrieve(v['url'], v['filename'], reporthook=lambda *x: t.update(1))
91 changes: 61 additions & 30 deletions jina/main/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,24 +76,46 @@ def set_hw_parser(parser=None):
if not parser:
parser = set_base_parser()
from ..helper import get_random_identity
parser.add_argument('--workdir', type=str, default=get_random_identity(),
help='the workdir for hello-world demo, all indices, output will be there')
parser.add_argument('--shards', type=int,
default=4,
help='number of shards when index and query')
parser.add_argument('--replicas', type=int,
default=4,
help='number of replicas when index and query')
parser.add_argument('--index-data-url', type=str,
default='http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz',
help='the url of index data (should be in idx3-ubyte.gz format)')
parser.add_argument('--query-data-url', type=str,
default='http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz',
help='the url of query data (should be in idx3-ubyte.gz format)')
parser.add_argument('--num-query', type=int, default=128,
help='number of queries to visualize')
parser.add_argument('--top-k', type=int, default=50,
help='top-k results to retrieve and visualize')
from pkg_resources import resource_filename

gp = add_arg_group(parser, 'general arguments')
gp.add_argument('--workdir', type=str, default=get_random_identity(),
help='the workdir for hello-world demo, '
'all data, indices, shards and outputs will be saved there')
gp.add_argument('--logserver', action='store_true', default=False,
help='start a log server for the dashboard')
gp = add_arg_group(parser, 'scalability arguments')
gp.add_argument('--shards', type=int,
default=4,
help='number of shards when index and query')
gp.add_argument('--replicas', type=int,
default=4,
help='number of replicas when index and query')
gp = add_arg_group(parser, 'index arguments')
gp.add_argument('--index-yaml-path', type=str,
default=resource_filename('jina', '/'.join(('resources', 'helloworld.flow.index.yml'))),
help='the yaml path of the index flow')
gp.add_argument('--index-data-url', type=str,
default='http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz',
help='the url of index data (should be in idx3-ubyte.gz format)')
gp.add_argument('--index-batch-size', type=int,
default=1024,
help='the batch size in indexing')
gp = add_arg_group(parser, 'query arguments')
gp.add_argument('--query-yaml-path', type=str,
default=resource_filename('jina', '/'.join(('resources', 'helloworld.flow.query.yml'))),
help='the yaml path of the query flow')
gp.add_argument('--query-data-url', type=str,
default='http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz',
help='the url of query data (should be in idx3-ubyte.gz format)')
gp.add_argument('--query-batch-size', type=int,
default=32,
help='the batch size in searching')
gp.add_argument('--num-query', type=int, default=128,
help='number of queries to visualize')
gp.add_argument('--top-k', type=int, default=50,
help='top-k results to retrieve and visualize')

return parser


Expand Down Expand Up @@ -394,33 +416,30 @@ def set_client_cli_parser(parser=None):
def get_main_parser():
# create the top-level parser
parser = set_base_parser()
import os
show_all = 'JINA_FULL_CLI' in os.environ

sp = parser.add_subparsers(dest='cli',
description='use "%(prog)-8s [sub-command] --help" '
'to get detailed information about each sub-command', required=True)

set_hw_parser(sp.add_parser('hello-world', help='👋 Hello World! Hello Jina!',
description='Start the hello-world demo, a simple end2end image search',
description='Start the hello-world demo, a simple end2end image index and search demo '
'without any extra dependencies.',
formatter_class=_chf))

# cli
set_pod_parser(sp.add_parser('pod', help='start a pod',
description='Start a Jina pod',
formatter_class=_chf))
set_pea_parser(sp.add_parser('pea',
description='Start a Jina pea. You should rarely use this directly unless you '
'are doing low-level orchestration',
help='start a pea', formatter_class=_chf))

set_flow_parser(sp.add_parser('flow',
description='Start a Jina flow that consists of multiple pods',
help='start a flow from a YAML file', formatter_class=_chf))
set_gateway_parser(sp.add_parser('gateway',
description='Start a Jina gateway that receives client remote requests via gRPC',
help='start a gateway', formatter_class=_chf))
set_client_cli_parser(
sp.add_parser('client', help='start a client',
description='Start a Python client that connects to a remote Jina gateway',
formatter_class=_chf))

# set_grpc_service_parser(sp.add_parser('grpc', help='start a general purpose grpc service', formatter_class=adf))

# # check
Expand All @@ -436,10 +455,22 @@ def get_main_parser():
sp.add_parser('check', help='check the import status all executors and drivers',
description='Check the import status all executors and drivers',
formatter_class=_chf)

set_pea_parser(sp.add_parser('pea',
description='Start a Jina pea. '
'You should rarely use this directly unless you '
'are doing low-level orchestration',
formatter_class=_chf, **(dict(help='start a pea')) if show_all else {}))

set_logger_parser(sp.add_parser('log',
help='receive piped log output and beautify the log',
description='Receive piped log output and beautify the log',
formatter_class=_chf))
description='Receive piped log output and beautify the log. '
'Depreciated, use Jina Dashboard instead',
formatter_class=_chf,
**(dict(help='beautify the log')) if show_all else {}))
set_client_cli_parser(
sp.add_parser('client',
description='Start a Python client that connects to a remote Jina gateway',
formatter_class=_chf, **(dict(help='start a client')) if show_all else {}))
return parser


Expand Down
3 changes: 2 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
class MyTestCase(JinaTestCase):

def test_cli(self):
for j in ('pod', 'pea', 'gateway', 'log', 'check', 'ping', 'client', 'flow'):
for j in ('pod', 'pea', 'gateway', 'log',
'check', 'ping', 'client', 'flow', 'hello-word'):
subprocess.check_call(['jina', j, '--help'])


Expand Down

0 comments on commit 59cbba2

Please sign in to comment.