Skip to content

Commit

Permalink
feat: add hello world yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
hanxiao committed Apr 17, 2020
1 parent e0e642e commit dffc5a3
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 50 deletions.
43 changes: 43 additions & 0 deletions jina/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,46 @@ def import_classes(namespace: str, targets=None,
import signal

signal.signal(signal.SIGINT, signal.default_int_handler)

# !/usr/bin/env python
try:
import resource as res
except ImportError: # Windows
res = None


def raise_nofile(nofile_atleast=4096):
"""
sets nofile soft limit to at least 4096, useful for running matlplotlib/seaborn on
parallel executing plot generators vs. Ubuntu default ulimit -n 1024 or OS X El Captian 256
temporary setting extinguishing with Python session.
"""
from .logging import default_logger
if res is None:
return (None,) * 2

soft, ohard = res.getrlimit(res.RLIMIT_NOFILE)
hard = ohard

if soft < nofile_atleast:
soft = nofile_atleast
if hard < soft:
hard = soft

default_logger.info('setting soft & hard ulimit -n {} {}'.format(soft, hard))
try:
res.setrlimit(res.RLIMIT_NOFILE, (soft, hard))
except (ValueError, res.error):
try:
hard = soft
default_logger.warning('trouble with max limit, retrying with soft,hard {},{}'.format(soft, hard))
res.setrlimit(res.RLIMIT_NOFILE, (soft, hard))
except Exception:
default_logger.warning('failed to set ulimit, giving up')
soft, hard = res.getrlimit(res.RLIMIT_NOFILE)

default_logger.info('ulimit -n soft,hard: {} {}'.format(soft, hard))
return soft, hard


raise_nofile()
56 changes: 6 additions & 50 deletions jina/helloworld/__init__.py
Original file line number Diff line number Diff line change
@@ -1,80 +1,36 @@
import os
import urllib.request
import webbrowser
from pathlib import Path

import numpy as np
from pkg_resources import resource_filename

from .helper import write_png, input_fn, print_result, write_html
from ..clients.python import ProgressBar
from ..executors.crafters import BaseSegmenter, BaseDocCrafter
from ..executors.encoders import BaseImageEncoder
from .components import *
from .helper import write_png, input_fn, print_result, write_html, download_data
from ..flow import Flow
from ..helper import countdown, colored
from ..logging import default_logger


class MyDocCrafter(BaseDocCrafter):
def craft(self, raw_bytes, *args, **kwargs):
doc = np.frombuffer(raw_bytes, dtype=np.uint8)
return dict(meta_info=write_png(doc))


class MySegmenter(BaseSegmenter):
def craft(self, raw_bytes, doc_id, *args, **kwargs):
return [dict(blob=np.frombuffer(raw_bytes, dtype=np.uint8))]


class MyEncoder(BaseImageEncoder):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# generate a random orthogonal matrix
H = np.random.rand(784, 64)
u, s, vh = np.linalg.svd(H, full_matrices=False)
self.oth_mat = u @ vh
self.touch()

def encode(self, data: 'np.ndarray', *args, **kwargs):
# reduce dimension to 50 by random orthogonal projection
return (data.reshape([-1, 784]) / 255) @ self.oth_mat


def hello_world(args):
Path(args.workdir).mkdir(parents=True, exist_ok=True)
urls = [args.index_data_url, args.query_data_url]

urls = [args.index_data_url, args.query_data_url]
targets = [os.path.join(args.workdir, 'index-original'), os.path.join(args.workdir, 'query-original')]

with ProgressBar(task_name='download fashion-mnist') as t:
for f, u in zip(targets, urls):
if not os.path.exists(f):
urllib.request.urlretrieve(u, f, reporthook=lambda *x: t.update(1))
download_data(targets, urls)

os.environ['RESOURCE_DIR'] = resource_filename('jina', 'resources')
os.environ['SHARDS'] = str(args.shards)
os.environ['REPLICAS'] = str(args.replicas)
os.environ['HW_WORKDIR'] = args.workdir

f = Flow().load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.index.yml'))))

with f.build() as fl:
fl.index(raw_bytes=input_fn(targets[0]), batch_size=1024)

countdown(5, reason=colored('behold! im going to switch to query mode', color='green'))
f = Flow().load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.query.yml'))))
countdown(8, reason=colored('behold! im going to switch to query mode', color='yellow', attrs='bold'))

f = Flow().load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.query.yml'))))
with f.build() as fl:
fl.search(raw_bytes=input_fn(targets[1], index=False, num_doc=128),
callback=print_result, top_k=args.top_k, batch_size=32)

html_path = os.path.join(args.workdir, 'hello-world.html')
write_html(html_path)
url_html_path = 'file://' + os.path.abspath(html_path)
try:
webbrowser.open(url_html_path, new=2)
except:
pass
finally:
default_logger.success(f'You should see a "hello-world.html" opened in your browser, '
f'if not you may open {url_html_path} manually')
30 changes: 30 additions & 0 deletions jina/helloworld/components.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import numpy as np

from ..executors.crafters import BaseSegmenter, BaseDocCrafter
from ..executors.encoders import BaseImageEncoder


class MyDocCrafter(BaseDocCrafter):
def craft(self, raw_bytes, *args, **kwargs):
doc = np.frombuffer(raw_bytes, dtype=np.uint8)
from .helper import write_png
return dict(meta_info=write_png(doc))


class MySegmenter(BaseSegmenter):
def craft(self, raw_bytes, doc_id, *args, **kwargs):
return [dict(blob=np.frombuffer(raw_bytes, dtype=np.uint8))]


class MyEncoder(BaseImageEncoder):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# generate a random orthogonal matrix
H = np.random.rand(784, 64)
u, s, vh = np.linalg.svd(H, full_matrices=False)
self.oth_mat = u @ vh
self.touch()

def encode(self, data: 'np.ndarray', *args, **kwargs):
# reduce dimension to 50 by random orthogonal projection
return (data.reshape([-1, 784]) / 255) @ self.oth_mat
28 changes: 28 additions & 0 deletions jina/helloworld/helper.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
import base64
import gzip
import os
import struct
import urllib.request
import webbrowser
import zlib

import numpy as np
from pkg_resources import resource_filename

from ..clients.python import ProgressBar
from ..helper import colored
from ..logging import default_logger


def load_mnist(path):
with gzip.open(path, 'rb') as fp:
Expand Down Expand Up @@ -71,3 +78,24 @@ def write_html(html_path):
t = fp.read()
t = t.replace('{% RESULT %}', '\n'.join(result_html))
fw.write(t)

url_html_path = 'file://' + os.path.abspath(html_path)

try:
webbrowser.open(url_html_path, new=2)
except:
pass
finally:
default_logger.success(f'You should see a "hello-world.html" opened in your browser, '
f'if not you may open {url_html_path} manually')

colored_url = colored('https://github.com/jina-ai/jina', color='cyan', attrs='underline')
default_logger.success(
f'🤩 Intrigued? Play with "jina hello-world --help" and learn more about Jina at {colored_url}')


def download_data(targets, urls):
with ProgressBar(task_name='download fashion-mnist') as t:
for f, u in zip(targets, urls):
if not os.path.exists(f):
urllib.request.urlretrieve(u, f, reporthook=lambda *x: t.update(1))

0 comments on commit dffc5a3

Please sign in to comment.