-
Notifications
You must be signed in to change notification settings - Fork 0
/
codex_run.py
122 lines (115 loc) · 5.39 KB
/
codex_run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import json
import time
import os
import openai
from utils import create_python_script
from evaluation import evaluate_script, execute_code, evaluate_script_tatqa
def few_shot_prompting(prompt,model='code-davinci-002',
max_tokens = 256,
temperature=0.0,
n=1,
stop=['\n\n','Read the context','Read the following','Modality'],
key="OPENAI_API_KEY",
sleep=5):
'''
Generate code output given a prompt using the OpenAI Code Completion API (code-davinci-002).
This API is now deprecated and only accessible through the Research Access Program :
See https://platform.openai.com/docs/guides/code) for a complete guide on OpenAI API Chat endpoints.
Params:
messages (list) : list of messages. Each message is a dictionary
max_tokens (int) : the maximum token length of the output.
temperature (float) : the temperature of the LLM, higher value results in less deterministic behavior.
n (int) : number of responses to generate.
key (str) : API key identifier. Refers to the name of an environment variable on the user's device.
sleep (float) : waiting time between 2 API calls.
'''
#Retrieve key from environment variables
openai.api_key = os.getenv(key)
response = False
start_time = time.time()
while not response:
try:
response = openai.Completion.create(model=model,prompt=prompt,
max_tokens=max_tokens,temperature=temperature,n=n,
stop=stop)
except Exception:
time.sleep(sleep)
if time.time() -start_time >= 300:
response={}
response['correct'] = 'Time out error.'
break
response['time_taken'] = time.time() - start_time
return response
def few_shot_chat_prompting(messages,model='gpt-3.5-turbo',
max_tokens = 256,
temperature=0.0,
n=1,
key="OPENAI_API_KEY",
sleep=5):
'''
Generate code output given a prompt using the OpenAI Chat API (gpt-3.5-turbo model).
Following the deprecation of the CODEX API, we propose an alternative in the form of the CHAT API (gpt-3.5-turbo)
See (https://platform.openai.com/docs/guides/gpt/chat-completions-api) for a complete guide on OpenAI API Chat endpoints.
Params:
messages (list) : list of messages. Each message is a dictionary
max_tokens (int) : the maximum token length of the output.
temperature (float) : the temperature of the LLM, higher value results in less deterministic behavior.
n (int) : number of responses to generate
key (str) : API key identifier. Refers to the name of an environment variable on the user's device.
sleep (float) : waiting time between 2 API calls.
'''
#Retrieve key from environment variables
openai.api_key = os.getenv(key)
response = False
start_time = time.time()
while not response:
try:
response = openai.ChatCompletion.create(model=model,messages=messages,
max_tokens=max_tokens,temperature=temperature,n=n)
except Exception:
time.sleep(sleep)
if time.time() -start_time >= 180:
response={}
response['correct'] = 'Time out error.'
break
response['time_taken'] = time.time() - start_time
time.sleep(sleep) #Cooldown between each API call
return response
def save_result_finqa(idx,dataset,dataset_path,result,output_path="output/finqa/predictions/run1/",selection=[],print_eval=False):
result['question'] = dataset[idx]['qa']['question']
result['gold_answer'] = dataset[idx]['qa']['exe_ans']
result['code'] = result['choices'][0]['text']
script = create_python_script(result['code'],idx,dataset_path)
try:
result['prediction'] = execute_code(script)
except:
print(script)
result['correct'] = evaluate_script(dataset[idx],script)
if print_eval:
print('Instance %s is %s'%(idx,result['correct']))
result['few_shot'] = selection
#Serializing
json_object = json.dumps(result,indent = 4)
#Json output
with open(output_path+"sample%s.json"%idx, "w") as outfile:
outfile.write(json_object)
def save_result_tatqa(idx,dataset,dataframe,result,output_path="output/tatqa/predictions/run1/",selection=[],print_eval=False):
context = dataset[dataframe.loc[idx,'context_index']]
instance = context['questions'][dataframe.loc[idx,'instance_index']]
result['question'] = instance['question']
result['gold_answer'] = instance['answer']
result['code'] = result['choices'][0]['text']
script = create_python_script(result['code'])
try:
result['prediction'] = execute_code(script)
except:
print(script)
result['correct'] = evaluate_script_tatqa(instance,script)
if print_eval:
print('Instance %s : EM = %s , F1 = %s'%(idx,result['correct'][0],result['correct'][1]))
result['few_shot'] = selection
#Serializing
json_object = json.dumps(result,indent = 4)
#Json output
with open(output_path+"sample%s.json"%idx, "w") as outfile:
outfile.write(json_object)