forked from MarcinKorcz101/mk-ai-agents
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branches 'main' and 'main' of github.com:Farmerobot/mk-ai-agents
- Loading branch information
Showing
3 changed files
with
365 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,321 @@ | ||
,imp_model,crew_model,imp_out_tokens,crew_out_tokens,rounds,result,imp_pretend_count | ||
0,claude-3-5-haiku,claude-3-5-haiku,6440,11838,6,imp,2 | ||
1,claude-3-5-haiku,claude-3-5-haiku,146456,265404,37,imp,4 | ||
2,claude-3-5-haiku,claude-3-5-haiku,8400,14422,9,crew,1 | ||
3,claude-3-5-haiku,claude-3-5-haiku,205984,322017,40,lim,9 | ||
4,claude-3-5-haiku,claude-3-5-haiku,15211,34426,12,imp,2 | ||
5,claude-3-5-haiku,claude-3-5-sonnet,25569,33026,12,imp,6 | ||
6,claude-3-5-haiku,claude-3-5-sonnet,74847,127648,27,crew,9 | ||
7,claude-3-5-haiku,claude-3-5-sonnet,145025,188550,34,imp,8 | ||
8,claude-3-5-haiku,claude-3-5-sonnet,5367,11827,6,crew,1 | ||
9,claude-3-5-haiku,claude-3-5-sonnet,6808,17798,8,crew,2 | ||
10,claude-3-5-haiku,gemini-flash-1-5,7444,6813,6,imp,2 | ||
11,claude-3-5-haiku,gemini-flash-1-5,11012,13284,9,imp,2 | ||
12,claude-3-5-haiku,gemini-flash-1-5,7897,11996,9,crew,2 | ||
13,claude-3-5-haiku,gemini-flash-1-5,3942,4014,6,imp,2 | ||
14,claude-3-5-haiku,gemini-flash-1-5,7797,9743,10,imp,3 | ||
15,claude-3-5-haiku,gemini-pro-1-5,7103,5840,8,crew,1 | ||
16,claude-3-5-haiku,gemini-pro-1-5,7072,8723,8,crew,2 | ||
17,claude-3-5-haiku,gemini-pro-1-5,6063,6464,7,crew,2 | ||
18,claude-3-5-haiku,gemini-pro-1-5,8147,5817,8,crew,2 | ||
19,claude-3-5-haiku,gemini-pro-1-5,5808,7191,7,crew,2 | ||
20,claude-3-5-haiku,gpt-4o-mini,6602,10734,7,crew,2 | ||
21,claude-3-5-haiku,gpt-4o-mini,9128,8941,7,imp,3 | ||
22,claude-3-5-haiku,gpt-4o-mini,6868,6465,6,imp,2 | ||
23,claude-3-5-haiku,gpt-4o-mini,9574,12376,10,crew,2 | ||
24,claude-3-5-haiku,gpt-4o-mini,7045,8936,8,crew,1 | ||
25,claude-3-5-haiku,gpt-4o,8722,15799,9,crew,2 | ||
26,claude-3-5-haiku,gpt-4o,6809,10631,7,crew,2 | ||
27,claude-3-5-haiku,gpt-4o,16266,17684,10,imp,5 | ||
28,claude-3-5-haiku,gpt-4o,61709,80142,25,imp,16 | ||
29,claude-3-5-haiku,gpt-4o,207552,251503,40,lim,11 | ||
30,claude-3-5-haiku,llama-3-1-405b-instruct,6821,18236,8,crew,2 | ||
31,claude-3-5-haiku,llama-3-1-405b-instruct,10537,19824,8,imp,4 | ||
32,claude-3-5-haiku,llama-3-1-405b-instruct,6463,14288,7,crew,1 | ||
33,claude-3-5-haiku,llama-3-1-405b-instruct,13914,28284,11,crew,3 | ||
34,claude-3-5-haiku,llama-3-1-405b-instruct,6337,11189,6,crew,1 | ||
35,claude-3-5-haiku,llama-3-1-8b-instruct,13469,35181,11,imp,4 | ||
36,claude-3-5-haiku,llama-3-1-8b-instruct,13594,27742,9,imp,5 | ||
37,claude-3-5-haiku,llama-3-1-8b-instruct,208304,576267,40,lim,13 | ||
38,claude-3-5-haiku,llama-3-1-8b-instruct,17286,34801,10,imp,3 | ||
39,claude-3-5-haiku,llama-3-1-8b-instruct,6631,11525,6,imp,3 | ||
40,claude-3-5-sonnet,claude-3-5-haiku,5284,16738,8,crew,1 | ||
41,claude-3-5-sonnet,claude-3-5-haiku,4536,13523,7,crew,1 | ||
42,claude-3-5-sonnet,claude-3-5-haiku,9058,27958,11,crew,4 | ||
43,claude-3-5-sonnet,claude-3-5-haiku,5782,16110,8,crew,1 | ||
44,claude-3-5-sonnet,claude-3-5-haiku,5838,15522,8,crew,1 | ||
45,claude-3-5-sonnet,claude-3-5-sonnet,17492,51573,15,crew,5 | ||
46,claude-3-5-sonnet,claude-3-5-sonnet,6977,17372,9,crew,2 | ||
47,claude-3-5-sonnet,claude-3-5-sonnet,6760,16709,9,crew,2 | ||
48,claude-3-5-sonnet,claude-3-5-sonnet,5480,13981,8,crew,1 | ||
49,claude-3-5-sonnet,claude-3-5-sonnet,4757,14893,7,crew,1 | ||
50,claude-3-5-sonnet,gemini-flash-1-5,72498,128174,30,imp,12 | ||
51,claude-3-5-sonnet,gemini-flash-1-5,5707,11411,8,crew,1 | ||
52,claude-3-5-sonnet,gemini-flash-1-5,18671,28909,15,crew,1 | ||
53,claude-3-5-sonnet,gemini-flash-1-5,8936,12461,10,imp,2 | ||
54,claude-3-5-sonnet,gemini-flash-1-5,11980,16664,12,imp,3 | ||
55,claude-3-5-sonnet,gemini-pro-1-5,6430,7698,9,crew,3 | ||
56,claude-3-5-sonnet,gemini-pro-1-5,27133,17989,18,imp,7 | ||
57,claude-3-5-sonnet,gemini-pro-1-5,27786,14762,18,imp,4 | ||
58,claude-3-5-sonnet,gemini-pro-1-5,50659,42624,25,imp,11 | ||
59,claude-3-5-sonnet,gemini-pro-1-5,135185,127538,40,lim,20 | ||
60,claude-3-5-sonnet,gpt-4o-mini,93104,263014,33,crew,20 | ||
61,claude-3-5-sonnet,gpt-4o-mini,4793,9656,7,crew,1 | ||
62,claude-3-5-sonnet,gpt-4o-mini,9720,15256,10,imp,2 | ||
63,claude-3-5-sonnet,gpt-4o-mini,28327,68550,20,crew,9 | ||
64,claude-3-5-sonnet,gpt-4o-mini,31200,87894,21,imp,12 | ||
65,claude-3-5-sonnet,gpt-4o,8634,14483,10,imp,2 | ||
66,claude-3-5-sonnet,gpt-4o,54365,115286,25,imp,14 | ||
67,claude-3-5-sonnet,gpt-4o,28770,39806,19,imp,9 | ||
68,claude-3-5-sonnet,gpt-4o,27204,60020,19,crew,8 | ||
69,claude-3-5-sonnet,gpt-4o,49312,133824,22,crew,16 | ||
70,claude-3-5-sonnet,llama-3-1-405b-instruct,5660,15765,8,crew,1 | ||
71,claude-3-5-sonnet,llama-3-1-405b-instruct,16414,44249,15,crew,6 | ||
72,claude-3-5-sonnet,llama-3-1-405b-instruct,10505,30599,12,crew,2 | ||
73,claude-3-5-sonnet,llama-3-1-405b-instruct,8688,22572,10,crew,3 | ||
74,claude-3-5-sonnet,llama-3-1-405b-instruct,72324,289115,30,imp,16 | ||
75,claude-3-5-sonnet,llama-3-1-8b-instruct,5040,13953,7,crew,1 | ||
76,claude-3-5-sonnet,llama-3-1-8b-instruct,4864,16119,7,crew,1 | ||
77,claude-3-5-sonnet,llama-3-1-8b-instruct,73943,391492,31,crew,17 | ||
78,claude-3-5-sonnet,llama-3-1-8b-instruct,36883,103406,19,imp,4 | ||
79,claude-3-5-sonnet,llama-3-1-8b-instruct,5268,18093,8,crew,1 | ||
80,gemini-flash-1-5,claude-3-5-haiku,4952,13528,9,crew,1 | ||
81,gemini-flash-1-5,claude-3-5-haiku,3667,13730,7,crew,1 | ||
82,gemini-flash-1-5,claude-3-5-haiku,2387,9471,7,crew,1 | ||
83,gemini-flash-1-5,claude-3-5-haiku,4169,15201,8,crew,1 | ||
84,gemini-flash-1-5,claude-3-5-haiku,4762,17730,9,imp,1 | ||
85,gemini-flash-1-5,claude-3-5-sonnet,4818,14391,10,imp,1 | ||
86,gemini-flash-1-5,claude-3-5-sonnet,3221,12177,6,crew,1 | ||
87,gemini-flash-1-5,claude-3-5-sonnet,3445,14861,7,crew,1 | ||
88,gemini-flash-1-5,claude-3-5-sonnet,3289,12507,6,crew,1 | ||
89,gemini-flash-1-5,claude-3-5-sonnet,4518,11280,9,crew,1 | ||
90,gemini-flash-1-5,gemini-flash-1-5,3820,10624,7,crew,1 | ||
91,gemini-flash-1-5,gemini-flash-1-5,13865,24423,13,crew,1 | ||
92,gemini-flash-1-5,gemini-flash-1-5,4343,7141,9,crew,1 | ||
93,gemini-flash-1-5,gemini-flash-1-5,7287,14072,11,imp,1 | ||
94,gemini-flash-1-5,gemini-flash-1-5,30887,82140,20,imp,0 | ||
95,gemini-flash-1-5,gemini-pro-1-5,3120,5883,7,crew,1 | ||
96,gemini-flash-1-5,gemini-pro-1-5,2926,3745,7,crew,1 | ||
97,gemini-flash-1-5,gemini-pro-1-5,3470,6039,8,imp,1 | ||
98,gemini-flash-1-5,gemini-pro-1-5,5414,4808,8,crew,1 | ||
99,gemini-flash-1-5,gemini-pro-1-5,4526,5057,8,crew,1 | ||
100,gemini-flash-1-5,gpt-4o-mini,3357,9922,7,crew,1 | ||
101,gemini-flash-1-5,gpt-4o-mini,5501,10213,9,crew,0 | ||
102,gemini-flash-1-5,gpt-4o-mini,5221,10622,11,imp,1 | ||
103,gemini-flash-1-5,gpt-4o-mini,4352,8457,9,imp,1 | ||
104,gemini-flash-1-5,gpt-4o-mini,4339,7941,9,crew,1 | ||
105,gemini-flash-1-5,gpt-4o,30630,64853,22,crew,1 | ||
106,gemini-flash-1-5,gpt-4o,27696,90457,23,imp,1 | ||
107,gemini-flash-1-5,gpt-4o,3560,8699,8,crew,1 | ||
108,gemini-flash-1-5,gpt-4o,3658,8626,7,crew,1 | ||
109,gemini-flash-1-5,gpt-4o,3355,9039,7,imp,1 | ||
110,gemini-flash-1-5,llama-3-1-405b-instruct,4004,15864,8,crew,1 | ||
111,gemini-flash-1-5,llama-3-1-405b-instruct,3181,13209,6,crew,1 | ||
112,gemini-flash-1-5,llama-3-1-405b-instruct,3834,14993,7,imp,1 | ||
113,gemini-flash-1-5,llama-3-1-405b-instruct,4650,16581,8,crew,1 | ||
114,gemini-flash-1-5,llama-3-1-405b-instruct,3324,11775,6,crew,1 | ||
115,gemini-flash-1-5,llama-3-1-8b-instruct,5371,19503,9,imp,1 | ||
116,gemini-flash-1-5,llama-3-1-8b-instruct,5962,15471,9,imp,1 | ||
117,gemini-flash-1-5,llama-3-1-8b-instruct,5619,15066,9,crew,1 | ||
118,gemini-flash-1-5,llama-3-1-8b-instruct,4676,14866,9,crew,1 | ||
119,gemini-flash-1-5,llama-3-1-8b-instruct,2525,8100,5,imp,1 | ||
120,gemini-pro-1-5,claude-3-5-haiku,2245,13385,7,crew,1 | ||
121,gemini-pro-1-5,claude-3-5-haiku,2497,12834,7,crew,1 | ||
122,gemini-pro-1-5,claude-3-5-haiku,3103,12846,9,crew,1 | ||
123,gemini-pro-1-5,claude-3-5-haiku,3029,13072,9,crew,1 | ||
124,gemini-pro-1-5,claude-3-5-haiku,54804,508312,40,lim,3 | ||
125,gemini-pro-1-5,claude-3-5-sonnet,2010,12020,6,crew,1 | ||
126,gemini-pro-1-5,claude-3-5-sonnet,2239,11811,6,crew,1 | ||
127,gemini-pro-1-5,claude-3-5-sonnet,1445,12796,7,crew,1 | ||
128,gemini-pro-1-5,claude-3-5-sonnet,10639,61446,24,imp,1 | ||
129,gemini-pro-1-5,claude-3-5-sonnet,2414,15760,8,crew,1 | ||
130,gemini-pro-1-5,gemini-flash-1-5,2503,9129,7,crew,1 | ||
131,gemini-pro-1-5,gemini-flash-1-5,1881,8939,7,crew,1 | ||
132,gemini-pro-1-5,gemini-flash-1-5,44915,301109,40,lim,3 | ||
133,gemini-pro-1-5,gemini-flash-1-5,2276,9632,7,crew,1 | ||
134,gemini-pro-1-5,gemini-flash-1-5,11599,53806,22,crew,1 | ||
135,gemini-pro-1-5,gemini-pro-1-5,2674,5556,9,crew,1 | ||
136,gemini-pro-1-5,gemini-pro-1-5,2336,7049,7,crew,1 | ||
137,gemini-pro-1-5,gemini-pro-1-5,2463,5816,7,crew,1 | ||
138,gemini-pro-1-5,gemini-pro-1-5,3130,5263,10,crew,1 | ||
139,gemini-pro-1-5,gemini-pro-1-5,2366,6418,7,crew,1 | ||
140,gemini-pro-1-5,gpt-4o-mini,2397,8854,7,crew,1 | ||
141,gemini-pro-1-5,gpt-4o-mini,1849,8519,6,crew,1 | ||
142,gemini-pro-1-5,gpt-4o-mini,2028,9022,6,crew,1 | ||
143,gemini-pro-1-5,gpt-4o-mini,2300,9133,7,crew,0 | ||
144,gemini-pro-1-5,gpt-4o-mini,2530,8708,8,crew,1 | ||
145,gemini-pro-1-5,gpt-4o,8046,24794,13,imp,1 | ||
146,gemini-pro-1-5,gpt-4o,8484,26035,13,crew,1 | ||
147,gemini-pro-1-5,gpt-4o,20853,155118,24,crew,3 | ||
148,gemini-pro-1-5,gpt-4o,2113,8135,7,crew,1 | ||
149,gemini-pro-1-5,gpt-4o,2987,12687,9,imp,1 | ||
150,gemini-pro-1-5,llama-3-1-405b-instruct,2085,14016,7,imp,1 | ||
151,gemini-pro-1-5,llama-3-1-405b-instruct,1905,13600,7,crew,0 | ||
152,gemini-pro-1-5,llama-3-1-405b-instruct,12924,69542,21,imp,2 | ||
153,gemini-pro-1-5,llama-3-1-405b-instruct,1934,12825,7,crew,1 | ||
154,gemini-pro-1-5,llama-3-1-405b-instruct,2345,13838,7,imp,1 | ||
155,gemini-pro-1-5,llama-3-1-8b-instruct,3203,20703,9,imp,1 | ||
156,gemini-pro-1-5,llama-3-1-8b-instruct,2124,19341,7,crew,1 | ||
157,gemini-pro-1-5,llama-3-1-8b-instruct,2113,14334,7,crew,1 | ||
158,gemini-pro-1-5,llama-3-1-8b-instruct,3970,25320,11,imp,0 | ||
159,gemini-pro-1-5,llama-3-1-8b-instruct,6019,38085,14,imp,1 | ||
160,gpt-4o-mini,claude-3-5-haiku,17120,50879,16,crew,7 | ||
161,gpt-4o-mini,claude-3-5-haiku,3922,14516,7,crew,2 | ||
162,gpt-4o-mini,claude-3-5-haiku,3974,14021,7,crew,1 | ||
163,gpt-4o-mini,claude-3-5-haiku,6942,17008,10,crew,3 | ||
164,gpt-4o-mini,claude-3-5-haiku,3796,14452,7,crew,2 | ||
165,gpt-4o-mini,claude-3-5-sonnet,24155,59224,19,crew,10 | ||
166,gpt-4o-mini,claude-3-5-sonnet,3489,12772,6,crew,1 | ||
167,gpt-4o-mini,claude-3-5-sonnet,19542,68262,17,crew,10 | ||
168,gpt-4o-mini,claude-3-5-sonnet,6736,17331,9,imp,3 | ||
169,gpt-4o-mini,claude-3-5-sonnet,5043,12995,7,imp,2 | ||
170,gpt-4o-mini,gemini-flash-1-5,5714,14370,9,crew,2 | ||
171,gpt-4o-mini,gemini-flash-1-5,15408,26038,12,imp,8 | ||
172,gpt-4o-mini,gemini-flash-1-5,4654,10232,8,crew,2 | ||
173,gpt-4o-mini,gemini-flash-1-5,43266,65600,22,crew,18 | ||
174,gpt-4o-mini,gemini-flash-1-5,5925,14166,9,crew,3 | ||
175,gpt-4o-mini,gemini-pro-1-5,12627,9837,18,imp,9 | ||
176,gpt-4o-mini,gemini-pro-1-5,3388,6625,7,crew,2 | ||
177,gpt-4o-mini,gemini-pro-1-5,4063,3951,8,crew,1 | ||
178,gpt-4o-mini,gemini-pro-1-5,3826,6647,8,crew,1 | ||
179,gpt-4o-mini,gemini-pro-1-5,4440,6647,9,crew,3 | ||
180,gpt-4o-mini,gpt-4o-mini,6376,14124,9,crew,2 | ||
181,gpt-4o-mini,gpt-4o-mini,5519,10632,9,imp,2 | ||
182,gpt-4o-mini,gpt-4o-mini,6705,17621,10,crew,5 | ||
183,gpt-4o-mini,gpt-4o-mini,5065,6592,6,imp,1 | ||
184,gpt-4o-mini,gpt-4o-mini,10828,17554,12,imp,4 | ||
185,gpt-4o-mini,gpt-4o,5479,9908,7,imp,2 | ||
186,gpt-4o-mini,gpt-4o,3736,9450,7,crew,2 | ||
187,gpt-4o-mini,gpt-4o,23290,42010,18,imp,12 | ||
188,gpt-4o-mini,gpt-4o,26632,61574,19,imp,12 | ||
189,gpt-4o-mini,gpt-4o,4764,9146,7,imp,2 | ||
190,gpt-4o-mini,llama-3-1-405b-instruct,4912,16357,8,crew,2 | ||
191,gpt-4o-mini,llama-3-1-405b-instruct,9760,24455,11,imp,4 | ||
192,gpt-4o-mini,llama-3-1-405b-instruct,3807,13635,6,crew,1 | ||
193,gpt-4o-mini,llama-3-1-405b-instruct,30802,95038,22,crew,14 | ||
194,gpt-4o-mini,llama-3-1-405b-instruct,14152,42983,15,imp,3 | ||
195,gpt-4o-mini,llama-3-1-8b-instruct,8354,18971,9,imp,6 | ||
196,gpt-4o-mini,llama-3-1-8b-instruct,5489,14409,9,crew,2 | ||
197,gpt-4o-mini,llama-3-1-8b-instruct,28338,194190,20,imp,13 | ||
198,gpt-4o-mini,llama-3-1-8b-instruct,7675,21954,11,crew,4 | ||
199,gpt-4o-mini,llama-3-1-8b-instruct,6872,18426,10,imp,3 | ||
200,gpt-4o,claude-3-5-haiku,126011,417122,40,lim,29 | ||
201,gpt-4o,claude-3-5-haiku,130229,525256,40,lim,35 | ||
202,gpt-4o,claude-3-5-haiku,44069,218689,29,imp,19 | ||
203,gpt-4o,claude-3-5-haiku,147008,516427,40,lim,31 | ||
204,gpt-4o,claude-3-5-haiku,136164,504348,40,lim,33 | ||
205,gpt-4o,claude-3-5-sonnet,34766,142139,23,imp,17 | ||
206,gpt-4o,claude-3-5-sonnet,36596,134993,22,crew,16 | ||
207,gpt-4o,claude-3-5-sonnet,71790,192773,31,crew,23 | ||
208,gpt-4o,claude-3-5-sonnet,39203,116048,20,imp,15 | ||
209,gpt-4o,claude-3-5-sonnet,70976,174039,31,crew,21 | ||
210,gpt-4o,gemini-flash-1-5,31861,61216,20,imp,13 | ||
211,gpt-4o,gemini-flash-1-5,43423,78198,24,imp,13 | ||
212,gpt-4o,gemini-flash-1-5,4748,13502,9,crew,3 | ||
213,gpt-4o,gemini-flash-1-5,29526,48634,18,imp,8 | ||
214,gpt-4o,gemini-flash-1-5,142752,283354,40,lim,33 | ||
215,gpt-4o,gemini-pro-1-5,128551,98549,40,lim,33 | ||
216,gpt-4o,gemini-pro-1-5,63615,63575,40,lim,36 | ||
217,gpt-4o,gemini-pro-1-5,6717,11016,11,crew,3 | ||
218,gpt-4o,gemini-pro-1-5,63430,50189,29,imp,19 | ||
219,gpt-4o,gemini-pro-1-5,121048,67795,40,lim,34 | ||
220,gpt-4o,gpt-4o-mini,6031,15229,9,imp,2 | ||
221,gpt-4o,gpt-4o-mini,15090,36996,17,imp,8 | ||
222,gpt-4o,gpt-4o-mini,10720,25596,13,crew,6 | ||
223,gpt-4o,gpt-4o-mini,120725,374028,40,lim,30 | ||
224,gpt-4o,gpt-4o-mini,4384,10059,7,imp,3 | ||
225,gpt-4o,gpt-4o,60241,117895,28,crew,17 | ||
226,gpt-4o,gpt-4o,64290,196746,26,crew,25 | ||
227,gpt-4o,gpt-4o,26767,63197,23,imp,15 | ||
228,gpt-4o,gpt-4o,43580,127862,25,imp,17 | ||
229,gpt-4o,gpt-4o,82446,329038,40,lim,32 | ||
230,gpt-4o,llama-3-1-405b-instruct,123450,319191,39,imp,35 | ||
231,gpt-4o,llama-3-1-405b-instruct,39470,116261,21,imp,16 | ||
232,gpt-4o,llama-3-1-405b-instruct,76301,409743,40,lim,38 | ||
233,gpt-4o,llama-3-1-405b-instruct,10165,41598,14,imp,4 | ||
234,gpt-4o,llama-3-1-405b-instruct,131246,669360,40,lim,37 | ||
235,gpt-4o,llama-3-1-8b-instruct,3024,15918,6,crew,1 | ||
236,gpt-4o,llama-3-1-8b-instruct,19976,70805,17,crew,8 | ||
237,gpt-4o,llama-3-1-8b-instruct,10516,43960,13,crew,6 | ||
238,gpt-4o,llama-3-1-8b-instruct,11668,37704,14,crew,5 | ||
239,gpt-4o,llama-3-1-8b-instruct,166839,702078,40,lim,38 | ||
240,llama-3-1-405b-instruct,claude-3-5-haiku,6288,13812,9,imp,1 | ||
241,llama-3-1-405b-instruct,claude-3-5-haiku,8237,24600,10,crew,3 | ||
242,llama-3-1-405b-instruct,claude-3-5-haiku,6976,14492,9,imp,1 | ||
243,llama-3-1-405b-instruct,claude-3-5-haiku,9565,23036,12,crew,4 | ||
244,llama-3-1-405b-instruct,claude-3-5-haiku,8145,20404,10,crew,2 | ||
245,llama-3-1-405b-instruct,claude-3-5-sonnet,3907,12017,6,crew,1 | ||
246,llama-3-1-405b-instruct,claude-3-5-sonnet,3642,13519,6,crew,1 | ||
247,llama-3-1-405b-instruct,claude-3-5-sonnet,5459,11875,7,imp,1 | ||
248,llama-3-1-405b-instruct,claude-3-5-sonnet,71969,184043,33,crew,8 | ||
249,llama-3-1-405b-instruct,claude-3-5-sonnet,33388,82278,20,crew,8 | ||
250,llama-3-1-405b-instruct,gemini-flash-1-5,5028,6624,9,crew,1 | ||
251,llama-3-1-405b-instruct,gemini-flash-1-5,158177,164741,37,imp,27 | ||
252,llama-3-1-405b-instruct,gemini-flash-1-5,59210,61035,26,crew,6 | ||
253,llama-3-1-405b-instruct,gemini-flash-1-5,151270,262625,40,lim,27 | ||
254,llama-3-1-405b-instruct,gemini-flash-1-5,20075,30423,17,imp,4 | ||
255,llama-3-1-405b-instruct,gemini-pro-1-5,6224,4024,9,crew,1 | ||
256,llama-3-1-405b-instruct,gemini-pro-1-5,4778,4202,7,crew,1 | ||
257,llama-3-1-405b-instruct,gemini-pro-1-5,4615,4429,7,crew,1 | ||
258,llama-3-1-405b-instruct,gemini-pro-1-5,6518,5032,9,crew,1 | ||
259,llama-3-1-405b-instruct,gemini-pro-1-5,8881,3541,7,imp,3 | ||
260,llama-3-1-405b-instruct,gpt-4o-mini,10362,9165,8,imp,4 | ||
261,llama-3-1-405b-instruct,gpt-4o-mini,8664,13394,11,crew,1 | ||
262,llama-3-1-405b-instruct,gpt-4o-mini,3983,9775,7,crew,2 | ||
263,llama-3-1-405b-instruct,gpt-4o-mini,6747,10383,9,imp,1 | ||
264,llama-3-1-405b-instruct,gpt-4o-mini,3390,5361,5,imp,1 | ||
265,llama-3-1-405b-instruct,gpt-4o,6413,11987,8,imp,1 | ||
266,llama-3-1-405b-instruct,gpt-4o,6364,10255,7,imp,1 | ||
267,llama-3-1-405b-instruct,gpt-4o,6171,9627,7,imp,1 | ||
268,llama-3-1-405b-instruct,gpt-4o,6221,11005,8,imp,2 | ||
269,llama-3-1-405b-instruct,gpt-4o,7934,11469,10,crew,1 | ||
270,llama-3-1-405b-instruct,llama-3-1-405b-instruct,5384,15722,8,crew,1 | ||
271,llama-3-1-405b-instruct,llama-3-1-405b-instruct,5819,15056,9,crew,1 | ||
272,llama-3-1-405b-instruct,llama-3-1-405b-instruct,16692,39787,13,imp,1 | ||
273,llama-3-1-405b-instruct,llama-3-1-405b-instruct,4388,8618,6,crew,1 | ||
274,llama-3-1-405b-instruct,llama-3-1-405b-instruct,5183,21680,7,imp,1 | ||
275,llama-3-1-405b-instruct,llama-3-1-8b-instruct,8924,21762,11,imp,2 | ||
276,llama-3-1-405b-instruct,llama-3-1-8b-instruct,9018,20276,11,imp,3 | ||
277,llama-3-1-405b-instruct,llama-3-1-8b-instruct,11416,28870,11,imp,3 | ||
278,llama-3-1-405b-instruct,llama-3-1-8b-instruct,27688,86189,19,imp,6 | ||
279,llama-3-1-405b-instruct,llama-3-1-8b-instruct,6532,26643,9,crew,2 | ||
280,llama-3-1-8b-instruct,claude-3-5-haiku,10245,26686,11,crew,1 | ||
281,llama-3-1-8b-instruct,claude-3-5-haiku,6169,18422,9,crew,2 | ||
282,llama-3-1-8b-instruct,claude-3-5-haiku,8448,25274,10,crew,3 | ||
283,llama-3-1-8b-instruct,claude-3-5-haiku,245620,286453,40,lim,19 | ||
284,llama-3-1-8b-instruct,claude-3-5-haiku,64079,679275,40,lim,30 | ||
285,llama-3-1-8b-instruct,claude-3-5-sonnet,5478,14790,8,imp,3 | ||
286,llama-3-1-8b-instruct,claude-3-5-sonnet,37947,98647,20,crew,8 | ||
287,llama-3-1-8b-instruct,claude-3-5-sonnet,80993,239551,26,crew,8 | ||
288,llama-3-1-8b-instruct,claude-3-5-sonnet,90131,179296,30,crew,18 | ||
289,llama-3-1-8b-instruct,claude-3-5-sonnet,101283,265258,31,crew,9 | ||
290,llama-3-1-8b-instruct,gemini-flash-1-5,182941,224191,40,lim,12 | ||
291,llama-3-1-8b-instruct,gemini-flash-1-5,169005,141582,40,lim,15 | ||
292,llama-3-1-8b-instruct,gemini-flash-1-5,155316,147533,40,lim,13 | ||
293,llama-3-1-8b-instruct,gemini-flash-1-5,181693,171269,40,lim,29 | ||
294,llama-3-1-8b-instruct,gemini-flash-1-5,35169,38841,16,imp,11 | ||
295,llama-3-1-8b-instruct,gemini-pro-1-5,232404,78998,36,crew,16 | ||
296,llama-3-1-8b-instruct,gemini-pro-1-5,6696,6401,9,crew,2 | ||
297,llama-3-1-8b-instruct,gemini-pro-1-5,18070,14167,15,crew,7 | ||
298,llama-3-1-8b-instruct,gemini-pro-1-5,12117,9431,11,crew,5 | ||
299,llama-3-1-8b-instruct,gemini-pro-1-5,162878,69384,40,lim,17 | ||
300,llama-3-1-8b-instruct,gpt-4o-mini,36188,61069,19,imp,10 | ||
301,llama-3-1-8b-instruct,gpt-4o-mini,5199,10911,7,crew,1 | ||
302,llama-3-1-8b-instruct,gpt-4o-mini,263069,268440,39,imp,20 | ||
303,llama-3-1-8b-instruct,gpt-4o-mini,161065,208146,40,lim,22 | ||
304,llama-3-1-8b-instruct,gpt-4o-mini,156071,301218,40,lim,24 | ||
305,llama-3-1-8b-instruct,gpt-4o,104447,143344,33,imp,21 | ||
306,llama-3-1-8b-instruct,gpt-4o,38613,61447,19,crew,9 | ||
307,llama-3-1-8b-instruct,gpt-4o,89130,203071,30,imp,9 | ||
308,llama-3-1-8b-instruct,gpt-4o,104978,135653,32,crew,25 | ||
309,llama-3-1-8b-instruct,gpt-4o,21723,24857,15,imp,4 | ||
310,llama-3-1-8b-instruct,llama-3-1-405b-instruct,23505,80795,17,crew,6 | ||
311,llama-3-1-8b-instruct,llama-3-1-405b-instruct,7377,18663,8,imp,2 | ||
312,llama-3-1-8b-instruct,llama-3-1-405b-instruct,162908,463018,40,lim,22 | ||
313,llama-3-1-8b-instruct,llama-3-1-405b-instruct,161602,443417,40,lim,23 | ||
314,llama-3-1-8b-instruct,llama-3-1-405b-instruct,81209,404306,40,lim,23 | ||
315,llama-3-1-8b-instruct,llama-3-1-8b-instruct,19031,41846,13,imp,6 | ||
316,llama-3-1-8b-instruct,llama-3-1-8b-instruct,67933,142896,27,crew,6 | ||
317,llama-3-1-8b-instruct,llama-3-1-8b-instruct,42732,51594,15,imp,8 | ||
318,llama-3-1-8b-instruct,llama-3-1-8b-instruct,166912,308687,39,imp,14 | ||
319,llama-3-1-8b-instruct,llama-3-1-8b-instruct,21883,59095,15,imp,4 |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import json | ||
import os | ||
import pandas as pd | ||
|
||
|
||
dir = 'data/tournament' | ||
files = list(os.walk(dir))[0][2] | ||
|
||
columns = ["imp_model", "crew_model", "imp_out_tokens", "crew_out_tokens", "rounds", "result", "imp_pretend_count"] | ||
data = [] | ||
|
||
for f in files: | ||
with open(dir + '/' + f) as file: | ||
d = json.load(file) | ||
imp_model = f.split(".")[0].split("_")[0] | ||
crew_model = f.split(".")[0].split("_")[2] | ||
imp_out_tokens, crew_out_tokens = 0, 0 | ||
rounds = len(d["players"][0]["history"]["rounds"]) | ||
|
||
if "_round_limit" in f: | ||
res = "lim" | ||
elif "Crewmates win!" in d["playthrough"][-1]: | ||
res = "crew" | ||
else: | ||
res = "imp" | ||
|
||
for i in range(5): | ||
p = d["players"][i] | ||
if p["role"] == "Impostor": | ||
imp_out_tokens += p["state"]["token_usage"]["output_tokens"] | ||
|
||
imp_pretend = 1 if "pretended" in p["state"]["action_result"] else 0 | ||
for round in p["history"]["rounds"]: | ||
if "pretended" in round["action_result"]: | ||
imp_pretend += 1 | ||
else: | ||
crew_out_tokens += p["state"]["token_usage"]["output_tokens"] | ||
data.append([imp_model, crew_model, imp_out_tokens, crew_out_tokens, rounds, res, imp_pretend]) | ||
|
||
|
||
df = pd.DataFrame(data, columns=columns) | ||
df.to_csv("token_usage.csv") | ||
|
||
print(df) |