diff --git a/jina/drivers/prune.py b/jina/drivers/prune.py index c2ce8d536f00e..d155ad600f257 100644 --- a/jina/drivers/prune.py +++ b/jina/drivers/prune.py @@ -7,10 +7,14 @@ class ChunkPruneDriver(BaseDriver): Removed fields are ``embedding``, ``raw_bytes``, ``blob``, ``text``. """ + def __init__(self, pruned=('embedding', 'raw_bytes', 'blob', 'text'), *args, **kwargs): + super().__init__(*args, **kwargs) + self.pruned = pruned + def __call__(self, *args, **kwargs): for d in self.req.docs: for c in d.chunks: - for k in ('embedding', 'raw_bytes', 'blob', 'text'): + for k in self.pruned: c.ClearField(k) @@ -20,9 +24,13 @@ class DocPruneDriver(BaseDriver): Removed fields are ``chunks`` """ + def __init__(self, pruned=('chunks',), *args, **kwargs): + super().__init__(*args, **kwargs) + self.pruned = pruned + def __call__(self, *args, **kwargs): for d in self.req.docs: - for k in ('chunks',): + for k in self.pruned: d.ClearField(k) diff --git a/jina/drivers/score.py b/jina/drivers/score.py index bab92ce14a6ba..bea89dcb846d1 100644 --- a/jina/drivers/score.py +++ b/jina/drivers/score.py @@ -23,7 +23,9 @@ def __call__(self, *args, **kwargs): query_chunk_meta[c.chunk_id] = pb_obj2dict(c, exec.required_keys) match_chunk_meta[k.match_chunk.chunk_id] = pb_obj2dict(k.match_chunk, exec.required_keys) - match_idx = np.array(match_idx, dtype=np.float32) + # the type range has to be bigger than np.uint32 + # otherwise the chunk_id will go overflow without even notice + match_idx = np.array(match_idx, dtype=np.float64) doc_idx = self.exec_fn(match_idx, query_chunk_meta, match_chunk_meta) diff --git a/jina/peapods/gateway.py b/jina/peapods/gateway.py index b6fb9b2fad50e..12c552e9ed1c6 100644 --- a/jina/peapods/gateway.py +++ b/jina/peapods/gateway.py @@ -112,7 +112,8 @@ def prefetch_req(num_req, fetch_to): return False with TimeContext(f'prefetching {self.args.prefetch} requests', self.logger): - self.logger.info('if this takes too long, you may want to reduce "--prefetch"') + self.logger.warning('if this takes too long, you may want to take smaller "--prefetch" or ' + 'ask client to reduce "--batch-size"') is_req_empty = prefetch_req(self.args.prefetch, prefetch_task) while not (zmqlet.msg_sent == zmqlet.msg_recv != 0 and is_req_empty): diff --git a/jina/resources/executors.requests.DocPbIndexer.yml b/jina/resources/executors.requests.DocPbIndexer.yml index d0fb71ae920ed..b134ea35443b0 100644 --- a/jina/resources/executors.requests.DocPbIndexer.yml +++ b/jina/resources/executors.requests.DocPbIndexer.yml @@ -2,10 +2,13 @@ on: ControlRequest: - !ControlReqDriver {} SearchRequest: - - !DocPbIndexDriver + - !DocPbSearchDriver with: method: query IndexRequest: - - !DocPbSearchDriver + - !DocPruneDriver + with: + pruned: raw_bytes + - !DocPbIndexDriver with: method: add