-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathcontriever_config.py
49 lines (46 loc) · 2.37 KB
/
contriever_config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import argparse
def parse_retriever_arguments():
parser = argparse.ArgumentParser()
parser.add_argument(
"--data",
# required=True,
type=str,
default=None,
help=".json file containing question and answers, similar format to reader data",
)
parser.add_argument("--passages", type=str, default=None, help="Path to passages (.tsv file)")
parser.add_argument("--passages_embeddings", type=str, default=None, help="Glob path to encoded passages")
parser.add_argument(
"--output_dir", type=str, default=None, help="Results are written to outputdir with data suffix"
)
parser.add_argument("--n_docs", type=int, default=100, help="Number of documents to retrieve per questions")
parser.add_argument(
"--validation_workers", type=int, default=32, help="Number of parallel processes to validate results"
)
parser.add_argument("--per_gpu_batch_size", type=int, default=64, help="Batch size for question encoding")
parser.add_argument(
"--save_or_load_index", action="store_true", help="If enabled, save index and load index if it exists"
)
parser.add_argument(
"--model_name_or_path", type=str, default="/root/autodl-tmp/contriever-msmarco", help="path to directory containing model weights and config file"
)
parser.add_argument("--no_fp16", action="store_true", help="inference in fp32")
parser.add_argument("--question_maxlength", type=int, default=512, help="Maximum number of tokens in a question")
parser.add_argument(
"--indexing_batch_size", type=int, default=1000000, help="Batch size of the number of passages indexed"
)
parser.add_argument("--projection_size", type=int, default=768)
parser.add_argument(
"--n_subquantizers",
type=int,
default=0,
help="Number of subquantizer used for vector quantization, if 0 flat index is used",
)
parser.add_argument("--n_bits", type=int, default=8, help="Number of bits per subquantizer")
parser.add_argument("--lang", nargs="+")
parser.add_argument("--dataset", type=str, default="none")
parser.add_argument("--lowercase", action="store_true", help="lowercase text before encoding")
parser.add_argument("--normalize_text", action="store_true", help="normalize text")
parsed_args = parser.parse_args()
return parsed_args
c_args = parse_retriever_arguments()