Skip to content

Commit

Permalink
add Qwen & Baichuan into CE, cleaned for PR
Browse files Browse the repository at this point in the history
  • Loading branch information
ziangqin-baidu committed Jan 23, 2024
1 parent 1e6bab4 commit d691518
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 31 deletions.
2 changes: 1 addition & 1 deletion tests/test_tipc/llm/fixtures/predictor-ptuning.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ baichuan:
qwen:
model_name: qwen/qwen-7b-chat
fused_model: true
dtype: float16
dtype: bfloat16
data_file: tests/fixtures/llm/zh_query.json
2 changes: 1 addition & 1 deletion tests/test_tipc/llm/fixtures/predictor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@ baichuan:
qwen:
model_name: qwen/qwen-7b-chat
fused_model: true
dtype: float16
dtype: bfloat16
data_file: tests/fixtures/llm/zh_query.json
4 changes: 0 additions & 4 deletions tests/test_tipc/llm/inference/run_predictor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ export FLAGS_fraction_of_gpu_memory_to_use=0.92

model_name=${model_name:-"facebook/llama-7b"}
output_path=${output_path:-"./llm-inference-output"}
#output_path="/root/paddlejob/workspace/qinziang/workspace/PaddleNLP/tests/test_tmps"
fused_model=${fused_model:-false}
dtype=${dtype:-"float16"}
inference_model=${inference_model:-"true"}
Expand All @@ -36,15 +35,12 @@ echo "pwd -> "
cd ..

echo "==============================run-dynamic-predictor=============================="
echo "python ./llm/predictor.py --model_name_or_path ${model_name} --mode dynamic --output_file ${output_path}/dynamic.json ${common_arguments}"
python ./llm/predictor.py --model_name_or_path ${model_name} --mode dynamic --output_file ${output_path}/dynamic.json ${common_arguments}

echo "==============================run-export-predictor=============================="
echo "python ./llm/export_model.py --model_name_or_path ${model_name} --output_path ${output_path} ${common_arguments}"
python ./llm/export_model.py --model_name_or_path ${model_name} --output_path ${output_path} ${common_arguments}

echo "==============================run-static-predictor=============================="
echo "python ./llm/predictor.py --model_name_or_path ${output_path} --mode static --output_file ${output_path}/static.json ${common_arguments}"
python ./llm/predictor.py --model_name_or_path ${output_path} --mode static --output_file ${output_path}/static.json ${common_arguments}


Expand Down
37 changes: 12 additions & 25 deletions tests/test_tipc/llm/test_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ class InferenceTest(unittest.TestCase):
def setUp(self) -> None:
paddle.set_default_dtype("float32")
self.output_path = tempfile.mkdtemp()
self.output_path = "/root/paddlejob/workspace/qinziang/workspace/PaddleNLP/tests/test_tmps"
sys.path.insert(0, "../llm")
self.model_name = os.getenv("MODEL_NAME")
self.run_predictor_shell_path = os.path.join(os.path.dirname(__file__), self.predictor_shell_name)
Expand Down Expand Up @@ -81,34 +80,27 @@ def test_predictor(self):
config = self._load_config(self.model_name)

# 0. download the ground-truth file for comparing
# import pdb; pdb.set_trace()
# print(f"> self.output_path: {self.output_path}")
get_path_from_url_with_filelock(
#os.path.join(self.ce_testing_base_url, config["model_name"], self.predict_file_name),
os.path.join(self.ce_testing_base_url, "linly-ai/chinese-llama-2-7b", self.predict_file_name),
os.path.join(self.ce_testing_base_url, config["model_name"], self.predict_file_name),
root_dir=self.output_path,
)

config["output_path"] = self.output_path
command_prefix = " ".join([f"{key}={value}" for key, value in config.items()])

# import pdb; pdb.set_trace()
# 1.run dynamic model
print(f"\n\n> run dynamic modle, CMD:\n{command_prefix + ' bash ' + self.run_predictor_shell_path}")
subprocess.run(
command_prefix + " bash " + self.run_predictor_shell_path, stdout=sys.stdout, stderr=sys.stderr, shell=True
)

full_match_acc, _ = self.compare_result("dynamic.json", "static.json")
self.assertGreater(full_match_acc, 0.8)

# full_match_acc, half_match_acc = self.compare_result(self.predict_file_name, "static.json")
#self.assertGreater(full_match_acc, 0.6)
#self.assertGreater(half_match_acc, 0.75)
full_match_acc, half_match_acc = self.compare_result(self.predict_file_name, "static.json")
self.assertGreater(full_match_acc, 0.6)
self.assertGreater(half_match_acc, 0.75)

# 2.run fused-mt model
# pdb.set_trace()
print(f"\n\n> run fused-mt model, CMD:\n{command_prefix} inference_model=true bash {self.run_predictor_shell_path}")
subprocess.run(
command_prefix + " inference_model=true bash " + self.run_predictor_shell_path,
stdout=sys.stdout,
Expand All @@ -122,14 +114,11 @@ def test_predictor(self):
print("precision:", full_match_acc)
self.assertGreater(full_match_acc, 0.6)
self.assertGreater(half_match_acc, 0.75)
#full_match_acc, half_match_acc = self.compare_result(self.predict_file_name, "static.json")
#self.assertGreater(full_match_acc, 0.6)
#self.assertGreater(half_match_acc, 0.75)
full_match_acc, half_match_acc = self.compare_result(self.predict_file_name, "static.json")
self.assertGreater(full_match_acc, 0.6)
self.assertGreater(half_match_acc, 0.75)

# 3. run sample decoding & benchmark on fused-mt model
print(f"> self.log_file: {self.log_file}")
# pdb.set_trace()
print(f"\n\n> run sample decoding & benchmark on fused-mt model, CMD:\n{command_prefix} top_p=0.7 decode_strategy=sampling benchmark=1 inference_model=true bash {self.run_predictor_shell_path}")
subprocess.run(
command_prefix
+ " top_p=0.7 decode_strategy=sampling benchmark=1 inference_model=true bash "
Expand All @@ -144,9 +133,9 @@ def test_predictor(self):
self.assertLessEqual(full_match_acc, 0.55)
self.assertLessEqual(half_match_acc, 0.85)

#full_match_acc, half_match_acc = self.compare_result(self.predict_file_name, "static.json")
#self.assertLessEqual(full_match_acc, 0.55)
#self.assertLessEqual(half_match_acc, 0.85)
full_match_acc, half_match_acc = self.compare_result(self.predict_file_name, "static.json")
self.assertLessEqual(full_match_acc, 0.55)
self.assertLessEqual(half_match_acc, 0.85)

# read ips value from log file
ips = self._read_ips_from_log_file()
Expand Down Expand Up @@ -182,8 +171,7 @@ def _load_config(self, key):

for file in ["pre_caches.npy", "prefix_config.json", "prefix_model_state.pdparams"]:
get_path_from_url_with_filelock(
# os.path.join(self.ce_testing_base_url, config["model_name"], file), root_dir=self.output_path
os.path.join(self.ce_testing_base_url, "linly-ai/chinese-llama-2-7b", file), root_dir=self.output_path
os.path.join(self.ce_testing_base_url, config["model_name"], file), root_dir=self.output_path
)

config["prefix_path"] = self.output_path
Expand All @@ -197,8 +185,7 @@ def test_predictor(self):

# 0. download the ground-truth file for comparing
get_path_from_url_with_filelock(
# os.path.join(self.ce_testing_base_url, config["model_name"], self.predict_file_name),
os.path.join(self.ce_testing_base_url, "linly-ai/chinese-llama-2-7b", self.predict_file_name),
os.path.join(self.ce_testing_base_url, config["model_name"], self.predict_file_name),
root_dir=self.output_path,
)

Expand Down

0 comments on commit d691518

Please sign in to comment.