Merge pull request #6 from loyal812/feat/test-data

feat: add test data with correct structure
loyal812 · Mar 26, 2024 · 67621a7 · 67621a7
2 parents 1aec367 + bea0be8
commit 67621a7
Show file tree

Hide file tree

Showing 45 changed files with 793 additions and 344 deletions.
diff --git a/src/finetune/FineTuningClass.py b/src/finetune/FineTuningClass.py
@@ -98,7 +98,7 @@ def generate_and_save_questions(documents, output_file, num_questions):
 
                     print(f"Generated {len(questions)} questions")
 
-                    with open(output_file, "w") as f:
+                    with open(output_file, "w", encoding='utf-8') as f:
                         for question in questions:
                             f.write(question + "\n")
 
@@ -112,7 +112,7 @@ def generate_and_save_questions(documents, output_file, num_questions):
 
     def initial_eval(self):
         questions = []
-        with open(f'{self.data_path}/eval_questions.txt', "r") as f:
+        with open(f'{self.data_path}/eval_questions.txt', "r", encoding='utf-8') as f:
             for line in f:
                 questions.append(line.strip())
 
@@ -158,7 +158,7 @@ def jsonl_generation(self):
         )
 
         questions = []
-        with open(f'{self.data_path}/generated_data/train_questions.txt', "r") as f:
+        with open(f'{self.data_path}/generated_data/train_questions.txt', "r", encoding='utf-8') as f:
             for line in f:
                 questions.append(line.strip())
 
@@ -196,23 +196,24 @@ def finetune(self):
             while True:
                 print("Waiting for fine-tuning to complete...")
                 job_handle = openai.fine_tuning.jobs.retrieve(fine_tuning_job_id=job.id)
+                print(f"status: {job_handle.status}")
                 if job_handle.status == "succeeded":
                     print("Fine-tuning complete")
                     print("Fine-tuned model info", job_handle)
                     print("Model id", job_handle.fine_tuned_model)
 
-                    with open(f'{self.data_path}/generated_data/model.txt', "w") as f:
+                    with open(f'{self.data_path}/generated_data/model.txt', "w", encoding='utf-8') as f:
                         f.write(job_handle.fine_tuned_model + "\n")
 
                     # Load the JSON data from the file
-                    with open(f'{self.data_path}/payload/chatting_payload.json', 'r') as file:
+                    with open(f'{self.data_path}/payload/chatting_payload.json', 'r', encoding='utf-8') as file:
                         payload = json.load(file)
 
                     # Update the model_id with specific data
                     payload['model_id'] = job_handle.fine_tuned_model
 
                     # Write the updated JSON back to the file
-                    with open(f'{self.data_path}/payload/chatting_payload.json', 'w') as file:
+                    with open(f'{self.data_path}/payload/chatting_payload.json', 'w', encoding='utf-8') as file:
                         json.dump(payload, file, indent=4)
 
                     return job_handle.fine_tuned_model
@@ -243,18 +244,18 @@ def finetune(self):
         #             print("Fine-tuned model info", job_handle)
         #             print("Model id", job_handle.fine_tuned_model)
 
-        #             with open(f'{self.data_path}/generated_data/model.txt', "w") as f:
+        #             with open(f'{self.data_path}/generated_data/model.txt', "w", encoding='utf-8') as f:
         #                 f.write(job_handle.fine_tuned_model + "\n")
 
         #             # Load the JSON data from the file
-        #             with open(f'{self.data_path}/payload/chatting_payload.json', 'r') as file:
+        #             with open(f'{self.data_path}/payload/chatting_payload.json', 'r', encoding='utf-8') as file:
         #                 payload = json.load(file)
 
         #             # Update the model_id with specific data
         #             payload['model_id'] = job_handle.fine_tuned_model
 
         #             # Write the updated JSON back to the file
-        #             with open(f'{self.data_path}/payload/chatting_payload.json', 'w') as file:
+        #             with open(f'{self.data_path}/payload/chatting_payload.json', 'w', encoding='utf-8') as file:
         #                 json.dump(payload, file, indent=4)
 
         #             return job_handle.fine_tuned_model

diff --git a/test/regression/regression_test003/generated_data/eval_questions.txt b/test/regression/regression_test003/generated_data/eval_questions.txt
@@ -7,27 +7,27 @@ What factors might influence the synchronization quality of the TCC implementati
 Why is background subtraction necessary when building applications for real usage?
 How does the network's focus on other motion features impact the distance in the latent space?
 What is the main factor causing the enormous distance in the latent space during the golf swing analysis?
-How does the density and radius of red spheres indicate the degree of joint position difference between the user's pose and the expert's pose?
-What is the title of the research paper mentioned in the context information?
-Who are the authors of the research paper?
-What is the main focus of the research paper?
-What is the purpose of the AI Golf tool mentioned in the context information?
-What are some of the techniques used in the AI Golf tool for self-training?
-How does the AI Golf tool enhance complex motor task learning?
-What is the significance of sonification and haptic feedback in the AI Golf tool?
-How does the Subtletee system augment posture awareness for beginner golfers?
-What is the role of the decayed dynamic time warping algorithm in the AR-based self-sports learning system?
-How does the AI Golf tool utilize deep learning techniques for swing analysis?
+How does the skeleton version provide more compressed and precise information compared to the video version?
+What is the title of the paper mentioned in the context information?
+Who are the authors of the paper?
+What is the file type of the document mentioned in the context information?
+When was the document last accessed?
+What is the research interest of Chen-Chieh Liao?
+Where did Dong-Hyun Hwang receive his Ph.D. degree from?
+What are the research interests of Hideki Koike?
+What is the volume number of the publication mentioned in the context information?
+What is the file size of the document mentioned in the context information?
+What is the creation date of the document?
 What is the purpose of the motion decoder in the AI Golf system?
 How is the intermediate motion between two points in the latent space retrieved?
 What are the four different conditions under which statistical analysis was conducted in the experimental setup?
 How was the video dataset used in the study collected and processed?
 What is the significance of creating a video dataset without background information?
 How were 3D human pose datasets created for more precise analysis?
 What evaluation metric was used to measure the precision of alignment in the trained network?
-What is the key event and phase labeling used in the accuracy metric?
-How was the TCC loss used to train the network in the self-supervised learning method?
-What is the purpose of the SA-TCC model in the AI Golf system?
+What is the key event and phase labeling in the context of golf swing analysis?
+How many high-quality golf swing videos were included in the GolfDB dataset?
+What is the purpose of using Mask R-CNN in the video dataset processing?
 What is the main challenge addressed in the study of the AI Golf Swing Analysis Tool?
 How does the proposed system help users understand the differences between themselves and professional players?
 What is the significance of using an unsupervised learning method in the proposed approach?