Skip to content

Commit

Permalink
fix: float32 matchidx overflow
Browse files Browse the repository at this point in the history
  • Loading branch information
hanxiao committed Apr 13, 2020
1 parent 813738b commit 4750555
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 6 deletions.
12 changes: 10 additions & 2 deletions jina/drivers/prune.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ class ChunkPruneDriver(BaseDriver):
Removed fields are ``embedding``, ``raw_bytes``, ``blob``, ``text``.
"""

def __init__(self, pruned=('embedding', 'raw_bytes', 'blob', 'text'), *args, **kwargs):
super().__init__(*args, **kwargs)
self.pruned = pruned

def __call__(self, *args, **kwargs):
for d in self.req.docs:
for c in d.chunks:
for k in ('embedding', 'raw_bytes', 'blob', 'text'):
for k in self.pruned:
c.ClearField(k)


Expand All @@ -20,9 +24,13 @@ class DocPruneDriver(BaseDriver):
Removed fields are ``chunks``
"""

def __init__(self, pruned=('chunks',), *args, **kwargs):
super().__init__(*args, **kwargs)
self.pruned = pruned

def __call__(self, *args, **kwargs):
for d in self.req.docs:
for k in ('chunks',):
for k in self.pruned:
d.ClearField(k)


Expand Down
4 changes: 3 additions & 1 deletion jina/drivers/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ def __call__(self, *args, **kwargs):
query_chunk_meta[c.chunk_id] = pb_obj2dict(c, exec.required_keys)
match_chunk_meta[k.match_chunk.chunk_id] = pb_obj2dict(k.match_chunk, exec.required_keys)

match_idx = np.array(match_idx, dtype=np.float32)
# the type range has to be bigger than np.uint32
# otherwise the chunk_id will go overflow without even notice
match_idx = np.array(match_idx, dtype=np.float64)

doc_idx = self.exec_fn(match_idx, query_chunk_meta, match_chunk_meta)

Expand Down
3 changes: 2 additions & 1 deletion jina/peapods/gateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ def prefetch_req(num_req, fetch_to):
return False

with TimeContext(f'prefetching {self.args.prefetch} requests', self.logger):
self.logger.info('if this takes too long, you may want to reduce "--prefetch"')
self.logger.warning('if this takes too long, you may want to take smaller "--prefetch" or '
'ask client to reduce "--batch-size"')
is_req_empty = prefetch_req(self.args.prefetch, prefetch_task)

while not (zmqlet.msg_sent == zmqlet.msg_recv != 0 and is_req_empty):
Expand Down
7 changes: 5 additions & 2 deletions jina/resources/executors.requests.DocPbIndexer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ on:
ControlRequest:
- !ControlReqDriver {}
SearchRequest:
- !DocPbIndexDriver
- !DocPbSearchDriver
with:
method: query
IndexRequest:
- !DocPbSearchDriver
- !DocPruneDriver
with:
pruned: raw_bytes
- !DocPbIndexDriver
with:
method: add

0 comments on commit 4750555

Please sign in to comment.