add Qwen & Baichuan into CE, cleaned for PR

PaddlePaddle · Jan 23, 2024 · d691518 · d691518
1 parent 1e6bab4
commit d691518
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 31 deletions.
diff --git a/tests/test_tipc/llm/fixtures/predictor-ptuning.yaml b/tests/test_tipc/llm/fixtures/predictor-ptuning.yaml
@@ -25,5 +25,5 @@ baichuan:
 qwen:
   model_name: qwen/qwen-7b-chat
   fused_model: true
-  dtype: float16
+  dtype: bfloat16
   data_file: tests/fixtures/llm/zh_query.json
diff --git a/tests/test_tipc/llm/fixtures/predictor.yaml b/tests/test_tipc/llm/fixtures/predictor.yaml
@@ -31,5 +31,5 @@ baichuan:
 qwen:
   model_name: qwen/qwen-7b-chat
   fused_model: true
-  dtype: float16
+  dtype: bfloat16
   data_file: tests/fixtures/llm/zh_query.json
diff --git a/tests/test_tipc/llm/inference/run_predictor.sh b/tests/test_tipc/llm/inference/run_predictor.sh
@@ -19,7 +19,6 @@ export FLAGS_fraction_of_gpu_memory_to_use=0.92
 
 model_name=${model_name:-"facebook/llama-7b"}
 output_path=${output_path:-"./llm-inference-output"}
-#output_path="/root/paddlejob/workspace/qinziang/workspace/PaddleNLP/tests/test_tmps"
 fused_model=${fused_model:-false}
 dtype=${dtype:-"float16"}
 inference_model=${inference_model:-"true"}
@@ -36,15 +35,12 @@ echo "pwd -> "
 cd ..
 
 echo "==============================run-dynamic-predictor=============================="
-echo "python ./llm/predictor.py --model_name_or_path ${model_name} --mode dynamic --output_file ${output_path}/dynamic.json ${common_arguments}"
 python ./llm/predictor.py --model_name_or_path ${model_name} --mode dynamic --output_file ${output_path}/dynamic.json ${common_arguments}
 
 echo "==============================run-export-predictor=============================="
-echo "python ./llm/export_model.py --model_name_or_path ${model_name} --output_path ${output_path} ${common_arguments}"
 python ./llm/export_model.py --model_name_or_path ${model_name} --output_path ${output_path} ${common_arguments}
 
 echo "==============================run-static-predictor=============================="
-echo "python ./llm/predictor.py --model_name_or_path ${output_path} --mode static --output_file ${output_path}/static.json ${common_arguments}"
 python ./llm/predictor.py --model_name_or_path ${output_path} --mode static --output_file ${output_path}/static.json ${common_arguments}
 
 

diff --git a/tests/test_tipc/llm/test_predictor.py b/tests/test_tipc/llm/test_predictor.py
@@ -36,7 +36,6 @@ class InferenceTest(unittest.TestCase):
     def setUp(self) -> None:
         paddle.set_default_dtype("float32")
         self.output_path = tempfile.mkdtemp()
-        self.output_path = "/root/paddlejob/workspace/qinziang/workspace/PaddleNLP/tests/test_tmps"
         sys.path.insert(0, "../llm")
         self.model_name = os.getenv("MODEL_NAME")
         self.run_predictor_shell_path = os.path.join(os.path.dirname(__file__), self.predictor_shell_name)
@@ -81,34 +80,27 @@ def test_predictor(self):
         config = self._load_config(self.model_name)
 
         # 0. download the ground-truth file for comparing
-        # import pdb; pdb.set_trace()
-        # print(f"> self.output_path: {self.output_path}")
         get_path_from_url_with_filelock(
-            #os.path.join(self.ce_testing_base_url, config["model_name"], self.predict_file_name),
-            os.path.join(self.ce_testing_base_url, "linly-ai/chinese-llama-2-7b", self.predict_file_name),
+            os.path.join(self.ce_testing_base_url, config["model_name"], self.predict_file_name),
             root_dir=self.output_path,
         )
 
         config["output_path"] = self.output_path
         command_prefix = " ".join([f"{key}={value}" for key, value in config.items()])
 
-        # import pdb; pdb.set_trace()
         # 1.run dynamic model
-        print(f"\n\n> run dynamic modle, CMD:\n{command_prefix + ' bash ' + self.run_predictor_shell_path}")
         subprocess.run(
             command_prefix + " bash " + self.run_predictor_shell_path, stdout=sys.stdout, stderr=sys.stderr, shell=True
         )
 
         full_match_acc, _ = self.compare_result("dynamic.json", "static.json")
         self.assertGreater(full_match_acc, 0.8)
 
-        # full_match_acc, half_match_acc = self.compare_result(self.predict_file_name, "static.json")
-        #self.assertGreater(full_match_acc, 0.6)
-        #self.assertGreater(half_match_acc, 0.75)
+        full_match_acc, half_match_acc = self.compare_result(self.predict_file_name, "static.json")
+        self.assertGreater(full_match_acc, 0.6)
+        self.assertGreater(half_match_acc, 0.75)
 
         # 2.run fused-mt model
-        # pdb.set_trace()
-        print(f"\n\n> run fused-mt model, CMD:\n{command_prefix} inference_model=true bash {self.run_predictor_shell_path}")
         subprocess.run(
             command_prefix + " inference_model=true bash " + self.run_predictor_shell_path,
             stdout=sys.stdout,
@@ -122,14 +114,11 @@ def test_predictor(self):
         print("precision:", full_match_acc)
         self.assertGreater(full_match_acc, 0.6)
         self.assertGreater(half_match_acc, 0.75)
-        #full_match_acc, half_match_acc = self.compare_result(self.predict_file_name, "static.json")
-        #self.assertGreater(full_match_acc, 0.6)
-        #self.assertGreater(half_match_acc, 0.75)
+        full_match_acc, half_match_acc = self.compare_result(self.predict_file_name, "static.json")
+        self.assertGreater(full_match_acc, 0.6)
+        self.assertGreater(half_match_acc, 0.75)
 
         # 3. run sample decoding & benchmark on fused-mt model
-        print(f"> self.log_file: {self.log_file}")
-        # pdb.set_trace()
-        print(f"\n\n> run sample decoding & benchmark on fused-mt model, CMD:\n{command_prefix} top_p=0.7 decode_strategy=sampling benchmark=1 inference_model=true bash {self.run_predictor_shell_path}")
         subprocess.run(
             command_prefix
             + " top_p=0.7 decode_strategy=sampling benchmark=1 inference_model=true bash "
@@ -144,9 +133,9 @@ def test_predictor(self):
         self.assertLessEqual(full_match_acc, 0.55)
         self.assertLessEqual(half_match_acc, 0.85)
 
-        #full_match_acc, half_match_acc = self.compare_result(self.predict_file_name, "static.json")
-        #self.assertLessEqual(full_match_acc, 0.55)
-        #self.assertLessEqual(half_match_acc, 0.85)
+        full_match_acc, half_match_acc = self.compare_result(self.predict_file_name, "static.json")
+        self.assertLessEqual(full_match_acc, 0.55)
+        self.assertLessEqual(half_match_acc, 0.85)
 
         # read ips value from log file
         ips = self._read_ips_from_log_file()
@@ -182,8 +171,7 @@ def _load_config(self, key):
 
         for file in ["pre_caches.npy", "prefix_config.json", "prefix_model_state.pdparams"]:
             get_path_from_url_with_filelock(
-               #  os.path.join(self.ce_testing_base_url, config["model_name"], file), root_dir=self.output_path
-                os.path.join(self.ce_testing_base_url, "linly-ai/chinese-llama-2-7b", file), root_dir=self.output_path
+                os.path.join(self.ce_testing_base_url, config["model_name"], file), root_dir=self.output_path
             )
 
         config["prefix_path"] = self.output_path
@@ -197,8 +185,7 @@ def test_predictor(self):
 
         # 0. download the ground-truth file for comparing
         get_path_from_url_with_filelock(
-            # os.path.join(self.ce_testing_base_url, config["model_name"], self.predict_file_name),
-            os.path.join(self.ce_testing_base_url, "linly-ai/chinese-llama-2-7b", self.predict_file_name),
+            os.path.join(self.ce_testing_base_url, config["model_name"], self.predict_file_name),
             root_dir=self.output_path,
         )