diff --git a/data/token_usage.csv b/data/token_usage.csv new file mode 100644 index 0000000..af4dc84 --- /dev/null +++ b/data/token_usage.csv @@ -0,0 +1,321 @@ +,imp_model,crew_model,imp_out_tokens,crew_out_tokens,rounds,result,imp_pretend_count +0,claude-3-5-haiku,claude-3-5-haiku,6440,11838,6,imp,2 +1,claude-3-5-haiku,claude-3-5-haiku,146456,265404,37,imp,4 +2,claude-3-5-haiku,claude-3-5-haiku,8400,14422,9,crew,1 +3,claude-3-5-haiku,claude-3-5-haiku,205984,322017,40,lim,9 +4,claude-3-5-haiku,claude-3-5-haiku,15211,34426,12,imp,2 +5,claude-3-5-haiku,claude-3-5-sonnet,25569,33026,12,imp,6 +6,claude-3-5-haiku,claude-3-5-sonnet,74847,127648,27,crew,9 +7,claude-3-5-haiku,claude-3-5-sonnet,145025,188550,34,imp,8 +8,claude-3-5-haiku,claude-3-5-sonnet,5367,11827,6,crew,1 +9,claude-3-5-haiku,claude-3-5-sonnet,6808,17798,8,crew,2 +10,claude-3-5-haiku,gemini-flash-1-5,7444,6813,6,imp,2 +11,claude-3-5-haiku,gemini-flash-1-5,11012,13284,9,imp,2 +12,claude-3-5-haiku,gemini-flash-1-5,7897,11996,9,crew,2 +13,claude-3-5-haiku,gemini-flash-1-5,3942,4014,6,imp,2 +14,claude-3-5-haiku,gemini-flash-1-5,7797,9743,10,imp,3 +15,claude-3-5-haiku,gemini-pro-1-5,7103,5840,8,crew,1 +16,claude-3-5-haiku,gemini-pro-1-5,7072,8723,8,crew,2 +17,claude-3-5-haiku,gemini-pro-1-5,6063,6464,7,crew,2 +18,claude-3-5-haiku,gemini-pro-1-5,8147,5817,8,crew,2 +19,claude-3-5-haiku,gemini-pro-1-5,5808,7191,7,crew,2 +20,claude-3-5-haiku,gpt-4o-mini,6602,10734,7,crew,2 +21,claude-3-5-haiku,gpt-4o-mini,9128,8941,7,imp,3 +22,claude-3-5-haiku,gpt-4o-mini,6868,6465,6,imp,2 +23,claude-3-5-haiku,gpt-4o-mini,9574,12376,10,crew,2 +24,claude-3-5-haiku,gpt-4o-mini,7045,8936,8,crew,1 +25,claude-3-5-haiku,gpt-4o,8722,15799,9,crew,2 +26,claude-3-5-haiku,gpt-4o,6809,10631,7,crew,2 +27,claude-3-5-haiku,gpt-4o,16266,17684,10,imp,5 +28,claude-3-5-haiku,gpt-4o,61709,80142,25,imp,16 +29,claude-3-5-haiku,gpt-4o,207552,251503,40,lim,11 +30,claude-3-5-haiku,llama-3-1-405b-instruct,6821,18236,8,crew,2 +31,claude-3-5-haiku,llama-3-1-405b-instruct,10537,19824,8,imp,4 +32,claude-3-5-haiku,llama-3-1-405b-instruct,6463,14288,7,crew,1 +33,claude-3-5-haiku,llama-3-1-405b-instruct,13914,28284,11,crew,3 +34,claude-3-5-haiku,llama-3-1-405b-instruct,6337,11189,6,crew,1 +35,claude-3-5-haiku,llama-3-1-8b-instruct,13469,35181,11,imp,4 +36,claude-3-5-haiku,llama-3-1-8b-instruct,13594,27742,9,imp,5 +37,claude-3-5-haiku,llama-3-1-8b-instruct,208304,576267,40,lim,13 +38,claude-3-5-haiku,llama-3-1-8b-instruct,17286,34801,10,imp,3 +39,claude-3-5-haiku,llama-3-1-8b-instruct,6631,11525,6,imp,3 +40,claude-3-5-sonnet,claude-3-5-haiku,5284,16738,8,crew,1 +41,claude-3-5-sonnet,claude-3-5-haiku,4536,13523,7,crew,1 +42,claude-3-5-sonnet,claude-3-5-haiku,9058,27958,11,crew,4 +43,claude-3-5-sonnet,claude-3-5-haiku,5782,16110,8,crew,1 +44,claude-3-5-sonnet,claude-3-5-haiku,5838,15522,8,crew,1 +45,claude-3-5-sonnet,claude-3-5-sonnet,17492,51573,15,crew,5 +46,claude-3-5-sonnet,claude-3-5-sonnet,6977,17372,9,crew,2 +47,claude-3-5-sonnet,claude-3-5-sonnet,6760,16709,9,crew,2 +48,claude-3-5-sonnet,claude-3-5-sonnet,5480,13981,8,crew,1 +49,claude-3-5-sonnet,claude-3-5-sonnet,4757,14893,7,crew,1 +50,claude-3-5-sonnet,gemini-flash-1-5,72498,128174,30,imp,12 +51,claude-3-5-sonnet,gemini-flash-1-5,5707,11411,8,crew,1 +52,claude-3-5-sonnet,gemini-flash-1-5,18671,28909,15,crew,1 +53,claude-3-5-sonnet,gemini-flash-1-5,8936,12461,10,imp,2 +54,claude-3-5-sonnet,gemini-flash-1-5,11980,16664,12,imp,3 +55,claude-3-5-sonnet,gemini-pro-1-5,6430,7698,9,crew,3 +56,claude-3-5-sonnet,gemini-pro-1-5,27133,17989,18,imp,7 +57,claude-3-5-sonnet,gemini-pro-1-5,27786,14762,18,imp,4 +58,claude-3-5-sonnet,gemini-pro-1-5,50659,42624,25,imp,11 +59,claude-3-5-sonnet,gemini-pro-1-5,135185,127538,40,lim,20 +60,claude-3-5-sonnet,gpt-4o-mini,93104,263014,33,crew,20 +61,claude-3-5-sonnet,gpt-4o-mini,4793,9656,7,crew,1 +62,claude-3-5-sonnet,gpt-4o-mini,9720,15256,10,imp,2 +63,claude-3-5-sonnet,gpt-4o-mini,28327,68550,20,crew,9 +64,claude-3-5-sonnet,gpt-4o-mini,31200,87894,21,imp,12 +65,claude-3-5-sonnet,gpt-4o,8634,14483,10,imp,2 +66,claude-3-5-sonnet,gpt-4o,54365,115286,25,imp,14 +67,claude-3-5-sonnet,gpt-4o,28770,39806,19,imp,9 +68,claude-3-5-sonnet,gpt-4o,27204,60020,19,crew,8 +69,claude-3-5-sonnet,gpt-4o,49312,133824,22,crew,16 +70,claude-3-5-sonnet,llama-3-1-405b-instruct,5660,15765,8,crew,1 +71,claude-3-5-sonnet,llama-3-1-405b-instruct,16414,44249,15,crew,6 +72,claude-3-5-sonnet,llama-3-1-405b-instruct,10505,30599,12,crew,2 +73,claude-3-5-sonnet,llama-3-1-405b-instruct,8688,22572,10,crew,3 +74,claude-3-5-sonnet,llama-3-1-405b-instruct,72324,289115,30,imp,16 +75,claude-3-5-sonnet,llama-3-1-8b-instruct,5040,13953,7,crew,1 +76,claude-3-5-sonnet,llama-3-1-8b-instruct,4864,16119,7,crew,1 +77,claude-3-5-sonnet,llama-3-1-8b-instruct,73943,391492,31,crew,17 +78,claude-3-5-sonnet,llama-3-1-8b-instruct,36883,103406,19,imp,4 +79,claude-3-5-sonnet,llama-3-1-8b-instruct,5268,18093,8,crew,1 +80,gemini-flash-1-5,claude-3-5-haiku,4952,13528,9,crew,1 +81,gemini-flash-1-5,claude-3-5-haiku,3667,13730,7,crew,1 +82,gemini-flash-1-5,claude-3-5-haiku,2387,9471,7,crew,1 +83,gemini-flash-1-5,claude-3-5-haiku,4169,15201,8,crew,1 +84,gemini-flash-1-5,claude-3-5-haiku,4762,17730,9,imp,1 +85,gemini-flash-1-5,claude-3-5-sonnet,4818,14391,10,imp,1 +86,gemini-flash-1-5,claude-3-5-sonnet,3221,12177,6,crew,1 +87,gemini-flash-1-5,claude-3-5-sonnet,3445,14861,7,crew,1 +88,gemini-flash-1-5,claude-3-5-sonnet,3289,12507,6,crew,1 +89,gemini-flash-1-5,claude-3-5-sonnet,4518,11280,9,crew,1 +90,gemini-flash-1-5,gemini-flash-1-5,3820,10624,7,crew,1 +91,gemini-flash-1-5,gemini-flash-1-5,13865,24423,13,crew,1 +92,gemini-flash-1-5,gemini-flash-1-5,4343,7141,9,crew,1 +93,gemini-flash-1-5,gemini-flash-1-5,7287,14072,11,imp,1 +94,gemini-flash-1-5,gemini-flash-1-5,30887,82140,20,imp,0 +95,gemini-flash-1-5,gemini-pro-1-5,3120,5883,7,crew,1 +96,gemini-flash-1-5,gemini-pro-1-5,2926,3745,7,crew,1 +97,gemini-flash-1-5,gemini-pro-1-5,3470,6039,8,imp,1 +98,gemini-flash-1-5,gemini-pro-1-5,5414,4808,8,crew,1 +99,gemini-flash-1-5,gemini-pro-1-5,4526,5057,8,crew,1 +100,gemini-flash-1-5,gpt-4o-mini,3357,9922,7,crew,1 +101,gemini-flash-1-5,gpt-4o-mini,5501,10213,9,crew,0 +102,gemini-flash-1-5,gpt-4o-mini,5221,10622,11,imp,1 +103,gemini-flash-1-5,gpt-4o-mini,4352,8457,9,imp,1 +104,gemini-flash-1-5,gpt-4o-mini,4339,7941,9,crew,1 +105,gemini-flash-1-5,gpt-4o,30630,64853,22,crew,1 +106,gemini-flash-1-5,gpt-4o,27696,90457,23,imp,1 +107,gemini-flash-1-5,gpt-4o,3560,8699,8,crew,1 +108,gemini-flash-1-5,gpt-4o,3658,8626,7,crew,1 +109,gemini-flash-1-5,gpt-4o,3355,9039,7,imp,1 +110,gemini-flash-1-5,llama-3-1-405b-instruct,4004,15864,8,crew,1 +111,gemini-flash-1-5,llama-3-1-405b-instruct,3181,13209,6,crew,1 +112,gemini-flash-1-5,llama-3-1-405b-instruct,3834,14993,7,imp,1 +113,gemini-flash-1-5,llama-3-1-405b-instruct,4650,16581,8,crew,1 +114,gemini-flash-1-5,llama-3-1-405b-instruct,3324,11775,6,crew,1 +115,gemini-flash-1-5,llama-3-1-8b-instruct,5371,19503,9,imp,1 +116,gemini-flash-1-5,llama-3-1-8b-instruct,5962,15471,9,imp,1 +117,gemini-flash-1-5,llama-3-1-8b-instruct,5619,15066,9,crew,1 +118,gemini-flash-1-5,llama-3-1-8b-instruct,4676,14866,9,crew,1 +119,gemini-flash-1-5,llama-3-1-8b-instruct,2525,8100,5,imp,1 +120,gemini-pro-1-5,claude-3-5-haiku,2245,13385,7,crew,1 +121,gemini-pro-1-5,claude-3-5-haiku,2497,12834,7,crew,1 +122,gemini-pro-1-5,claude-3-5-haiku,3103,12846,9,crew,1 +123,gemini-pro-1-5,claude-3-5-haiku,3029,13072,9,crew,1 +124,gemini-pro-1-5,claude-3-5-haiku,54804,508312,40,lim,3 +125,gemini-pro-1-5,claude-3-5-sonnet,2010,12020,6,crew,1 +126,gemini-pro-1-5,claude-3-5-sonnet,2239,11811,6,crew,1 +127,gemini-pro-1-5,claude-3-5-sonnet,1445,12796,7,crew,1 +128,gemini-pro-1-5,claude-3-5-sonnet,10639,61446,24,imp,1 +129,gemini-pro-1-5,claude-3-5-sonnet,2414,15760,8,crew,1 +130,gemini-pro-1-5,gemini-flash-1-5,2503,9129,7,crew,1 +131,gemini-pro-1-5,gemini-flash-1-5,1881,8939,7,crew,1 +132,gemini-pro-1-5,gemini-flash-1-5,44915,301109,40,lim,3 +133,gemini-pro-1-5,gemini-flash-1-5,2276,9632,7,crew,1 +134,gemini-pro-1-5,gemini-flash-1-5,11599,53806,22,crew,1 +135,gemini-pro-1-5,gemini-pro-1-5,2674,5556,9,crew,1 +136,gemini-pro-1-5,gemini-pro-1-5,2336,7049,7,crew,1 +137,gemini-pro-1-5,gemini-pro-1-5,2463,5816,7,crew,1 +138,gemini-pro-1-5,gemini-pro-1-5,3130,5263,10,crew,1 +139,gemini-pro-1-5,gemini-pro-1-5,2366,6418,7,crew,1 +140,gemini-pro-1-5,gpt-4o-mini,2397,8854,7,crew,1 +141,gemini-pro-1-5,gpt-4o-mini,1849,8519,6,crew,1 +142,gemini-pro-1-5,gpt-4o-mini,2028,9022,6,crew,1 +143,gemini-pro-1-5,gpt-4o-mini,2300,9133,7,crew,0 +144,gemini-pro-1-5,gpt-4o-mini,2530,8708,8,crew,1 +145,gemini-pro-1-5,gpt-4o,8046,24794,13,imp,1 +146,gemini-pro-1-5,gpt-4o,8484,26035,13,crew,1 +147,gemini-pro-1-5,gpt-4o,20853,155118,24,crew,3 +148,gemini-pro-1-5,gpt-4o,2113,8135,7,crew,1 +149,gemini-pro-1-5,gpt-4o,2987,12687,9,imp,1 +150,gemini-pro-1-5,llama-3-1-405b-instruct,2085,14016,7,imp,1 +151,gemini-pro-1-5,llama-3-1-405b-instruct,1905,13600,7,crew,0 +152,gemini-pro-1-5,llama-3-1-405b-instruct,12924,69542,21,imp,2 +153,gemini-pro-1-5,llama-3-1-405b-instruct,1934,12825,7,crew,1 +154,gemini-pro-1-5,llama-3-1-405b-instruct,2345,13838,7,imp,1 +155,gemini-pro-1-5,llama-3-1-8b-instruct,3203,20703,9,imp,1 +156,gemini-pro-1-5,llama-3-1-8b-instruct,2124,19341,7,crew,1 +157,gemini-pro-1-5,llama-3-1-8b-instruct,2113,14334,7,crew,1 +158,gemini-pro-1-5,llama-3-1-8b-instruct,3970,25320,11,imp,0 +159,gemini-pro-1-5,llama-3-1-8b-instruct,6019,38085,14,imp,1 +160,gpt-4o-mini,claude-3-5-haiku,17120,50879,16,crew,7 +161,gpt-4o-mini,claude-3-5-haiku,3922,14516,7,crew,2 +162,gpt-4o-mini,claude-3-5-haiku,3974,14021,7,crew,1 +163,gpt-4o-mini,claude-3-5-haiku,6942,17008,10,crew,3 +164,gpt-4o-mini,claude-3-5-haiku,3796,14452,7,crew,2 +165,gpt-4o-mini,claude-3-5-sonnet,24155,59224,19,crew,10 +166,gpt-4o-mini,claude-3-5-sonnet,3489,12772,6,crew,1 +167,gpt-4o-mini,claude-3-5-sonnet,19542,68262,17,crew,10 +168,gpt-4o-mini,claude-3-5-sonnet,6736,17331,9,imp,3 +169,gpt-4o-mini,claude-3-5-sonnet,5043,12995,7,imp,2 +170,gpt-4o-mini,gemini-flash-1-5,5714,14370,9,crew,2 +171,gpt-4o-mini,gemini-flash-1-5,15408,26038,12,imp,8 +172,gpt-4o-mini,gemini-flash-1-5,4654,10232,8,crew,2 +173,gpt-4o-mini,gemini-flash-1-5,43266,65600,22,crew,18 +174,gpt-4o-mini,gemini-flash-1-5,5925,14166,9,crew,3 +175,gpt-4o-mini,gemini-pro-1-5,12627,9837,18,imp,9 +176,gpt-4o-mini,gemini-pro-1-5,3388,6625,7,crew,2 +177,gpt-4o-mini,gemini-pro-1-5,4063,3951,8,crew,1 +178,gpt-4o-mini,gemini-pro-1-5,3826,6647,8,crew,1 +179,gpt-4o-mini,gemini-pro-1-5,4440,6647,9,crew,3 +180,gpt-4o-mini,gpt-4o-mini,6376,14124,9,crew,2 +181,gpt-4o-mini,gpt-4o-mini,5519,10632,9,imp,2 +182,gpt-4o-mini,gpt-4o-mini,6705,17621,10,crew,5 +183,gpt-4o-mini,gpt-4o-mini,5065,6592,6,imp,1 +184,gpt-4o-mini,gpt-4o-mini,10828,17554,12,imp,4 +185,gpt-4o-mini,gpt-4o,5479,9908,7,imp,2 +186,gpt-4o-mini,gpt-4o,3736,9450,7,crew,2 +187,gpt-4o-mini,gpt-4o,23290,42010,18,imp,12 +188,gpt-4o-mini,gpt-4o,26632,61574,19,imp,12 +189,gpt-4o-mini,gpt-4o,4764,9146,7,imp,2 +190,gpt-4o-mini,llama-3-1-405b-instruct,4912,16357,8,crew,2 +191,gpt-4o-mini,llama-3-1-405b-instruct,9760,24455,11,imp,4 +192,gpt-4o-mini,llama-3-1-405b-instruct,3807,13635,6,crew,1 +193,gpt-4o-mini,llama-3-1-405b-instruct,30802,95038,22,crew,14 +194,gpt-4o-mini,llama-3-1-405b-instruct,14152,42983,15,imp,3 +195,gpt-4o-mini,llama-3-1-8b-instruct,8354,18971,9,imp,6 +196,gpt-4o-mini,llama-3-1-8b-instruct,5489,14409,9,crew,2 +197,gpt-4o-mini,llama-3-1-8b-instruct,28338,194190,20,imp,13 +198,gpt-4o-mini,llama-3-1-8b-instruct,7675,21954,11,crew,4 +199,gpt-4o-mini,llama-3-1-8b-instruct,6872,18426,10,imp,3 +200,gpt-4o,claude-3-5-haiku,126011,417122,40,lim,29 +201,gpt-4o,claude-3-5-haiku,130229,525256,40,lim,35 +202,gpt-4o,claude-3-5-haiku,44069,218689,29,imp,19 +203,gpt-4o,claude-3-5-haiku,147008,516427,40,lim,31 +204,gpt-4o,claude-3-5-haiku,136164,504348,40,lim,33 +205,gpt-4o,claude-3-5-sonnet,34766,142139,23,imp,17 +206,gpt-4o,claude-3-5-sonnet,36596,134993,22,crew,16 +207,gpt-4o,claude-3-5-sonnet,71790,192773,31,crew,23 +208,gpt-4o,claude-3-5-sonnet,39203,116048,20,imp,15 +209,gpt-4o,claude-3-5-sonnet,70976,174039,31,crew,21 +210,gpt-4o,gemini-flash-1-5,31861,61216,20,imp,13 +211,gpt-4o,gemini-flash-1-5,43423,78198,24,imp,13 +212,gpt-4o,gemini-flash-1-5,4748,13502,9,crew,3 +213,gpt-4o,gemini-flash-1-5,29526,48634,18,imp,8 +214,gpt-4o,gemini-flash-1-5,142752,283354,40,lim,33 +215,gpt-4o,gemini-pro-1-5,128551,98549,40,lim,33 +216,gpt-4o,gemini-pro-1-5,63615,63575,40,lim,36 +217,gpt-4o,gemini-pro-1-5,6717,11016,11,crew,3 +218,gpt-4o,gemini-pro-1-5,63430,50189,29,imp,19 +219,gpt-4o,gemini-pro-1-5,121048,67795,40,lim,34 +220,gpt-4o,gpt-4o-mini,6031,15229,9,imp,2 +221,gpt-4o,gpt-4o-mini,15090,36996,17,imp,8 +222,gpt-4o,gpt-4o-mini,10720,25596,13,crew,6 +223,gpt-4o,gpt-4o-mini,120725,374028,40,lim,30 +224,gpt-4o,gpt-4o-mini,4384,10059,7,imp,3 +225,gpt-4o,gpt-4o,60241,117895,28,crew,17 +226,gpt-4o,gpt-4o,64290,196746,26,crew,25 +227,gpt-4o,gpt-4o,26767,63197,23,imp,15 +228,gpt-4o,gpt-4o,43580,127862,25,imp,17 +229,gpt-4o,gpt-4o,82446,329038,40,lim,32 +230,gpt-4o,llama-3-1-405b-instruct,123450,319191,39,imp,35 +231,gpt-4o,llama-3-1-405b-instruct,39470,116261,21,imp,16 +232,gpt-4o,llama-3-1-405b-instruct,76301,409743,40,lim,38 +233,gpt-4o,llama-3-1-405b-instruct,10165,41598,14,imp,4 +234,gpt-4o,llama-3-1-405b-instruct,131246,669360,40,lim,37 +235,gpt-4o,llama-3-1-8b-instruct,3024,15918,6,crew,1 +236,gpt-4o,llama-3-1-8b-instruct,19976,70805,17,crew,8 +237,gpt-4o,llama-3-1-8b-instruct,10516,43960,13,crew,6 +238,gpt-4o,llama-3-1-8b-instruct,11668,37704,14,crew,5 +239,gpt-4o,llama-3-1-8b-instruct,166839,702078,40,lim,38 +240,llama-3-1-405b-instruct,claude-3-5-haiku,6288,13812,9,imp,1 +241,llama-3-1-405b-instruct,claude-3-5-haiku,8237,24600,10,crew,3 +242,llama-3-1-405b-instruct,claude-3-5-haiku,6976,14492,9,imp,1 +243,llama-3-1-405b-instruct,claude-3-5-haiku,9565,23036,12,crew,4 +244,llama-3-1-405b-instruct,claude-3-5-haiku,8145,20404,10,crew,2 +245,llama-3-1-405b-instruct,claude-3-5-sonnet,3907,12017,6,crew,1 +246,llama-3-1-405b-instruct,claude-3-5-sonnet,3642,13519,6,crew,1 +247,llama-3-1-405b-instruct,claude-3-5-sonnet,5459,11875,7,imp,1 +248,llama-3-1-405b-instruct,claude-3-5-sonnet,71969,184043,33,crew,8 +249,llama-3-1-405b-instruct,claude-3-5-sonnet,33388,82278,20,crew,8 +250,llama-3-1-405b-instruct,gemini-flash-1-5,5028,6624,9,crew,1 +251,llama-3-1-405b-instruct,gemini-flash-1-5,158177,164741,37,imp,27 +252,llama-3-1-405b-instruct,gemini-flash-1-5,59210,61035,26,crew,6 +253,llama-3-1-405b-instruct,gemini-flash-1-5,151270,262625,40,lim,27 +254,llama-3-1-405b-instruct,gemini-flash-1-5,20075,30423,17,imp,4 +255,llama-3-1-405b-instruct,gemini-pro-1-5,6224,4024,9,crew,1 +256,llama-3-1-405b-instruct,gemini-pro-1-5,4778,4202,7,crew,1 +257,llama-3-1-405b-instruct,gemini-pro-1-5,4615,4429,7,crew,1 +258,llama-3-1-405b-instruct,gemini-pro-1-5,6518,5032,9,crew,1 +259,llama-3-1-405b-instruct,gemini-pro-1-5,8881,3541,7,imp,3 +260,llama-3-1-405b-instruct,gpt-4o-mini,10362,9165,8,imp,4 +261,llama-3-1-405b-instruct,gpt-4o-mini,8664,13394,11,crew,1 +262,llama-3-1-405b-instruct,gpt-4o-mini,3983,9775,7,crew,2 +263,llama-3-1-405b-instruct,gpt-4o-mini,6747,10383,9,imp,1 +264,llama-3-1-405b-instruct,gpt-4o-mini,3390,5361,5,imp,1 +265,llama-3-1-405b-instruct,gpt-4o,6413,11987,8,imp,1 +266,llama-3-1-405b-instruct,gpt-4o,6364,10255,7,imp,1 +267,llama-3-1-405b-instruct,gpt-4o,6171,9627,7,imp,1 +268,llama-3-1-405b-instruct,gpt-4o,6221,11005,8,imp,2 +269,llama-3-1-405b-instruct,gpt-4o,7934,11469,10,crew,1 +270,llama-3-1-405b-instruct,llama-3-1-405b-instruct,5384,15722,8,crew,1 +271,llama-3-1-405b-instruct,llama-3-1-405b-instruct,5819,15056,9,crew,1 +272,llama-3-1-405b-instruct,llama-3-1-405b-instruct,16692,39787,13,imp,1 +273,llama-3-1-405b-instruct,llama-3-1-405b-instruct,4388,8618,6,crew,1 +274,llama-3-1-405b-instruct,llama-3-1-405b-instruct,5183,21680,7,imp,1 +275,llama-3-1-405b-instruct,llama-3-1-8b-instruct,8924,21762,11,imp,2 +276,llama-3-1-405b-instruct,llama-3-1-8b-instruct,9018,20276,11,imp,3 +277,llama-3-1-405b-instruct,llama-3-1-8b-instruct,11416,28870,11,imp,3 +278,llama-3-1-405b-instruct,llama-3-1-8b-instruct,27688,86189,19,imp,6 +279,llama-3-1-405b-instruct,llama-3-1-8b-instruct,6532,26643,9,crew,2 +280,llama-3-1-8b-instruct,claude-3-5-haiku,10245,26686,11,crew,1 +281,llama-3-1-8b-instruct,claude-3-5-haiku,6169,18422,9,crew,2 +282,llama-3-1-8b-instruct,claude-3-5-haiku,8448,25274,10,crew,3 +283,llama-3-1-8b-instruct,claude-3-5-haiku,245620,286453,40,lim,19 +284,llama-3-1-8b-instruct,claude-3-5-haiku,64079,679275,40,lim,30 +285,llama-3-1-8b-instruct,claude-3-5-sonnet,5478,14790,8,imp,3 +286,llama-3-1-8b-instruct,claude-3-5-sonnet,37947,98647,20,crew,8 +287,llama-3-1-8b-instruct,claude-3-5-sonnet,80993,239551,26,crew,8 +288,llama-3-1-8b-instruct,claude-3-5-sonnet,90131,179296,30,crew,18 +289,llama-3-1-8b-instruct,claude-3-5-sonnet,101283,265258,31,crew,9 +290,llama-3-1-8b-instruct,gemini-flash-1-5,182941,224191,40,lim,12 +291,llama-3-1-8b-instruct,gemini-flash-1-5,169005,141582,40,lim,15 +292,llama-3-1-8b-instruct,gemini-flash-1-5,155316,147533,40,lim,13 +293,llama-3-1-8b-instruct,gemini-flash-1-5,181693,171269,40,lim,29 +294,llama-3-1-8b-instruct,gemini-flash-1-5,35169,38841,16,imp,11 +295,llama-3-1-8b-instruct,gemini-pro-1-5,232404,78998,36,crew,16 +296,llama-3-1-8b-instruct,gemini-pro-1-5,6696,6401,9,crew,2 +297,llama-3-1-8b-instruct,gemini-pro-1-5,18070,14167,15,crew,7 +298,llama-3-1-8b-instruct,gemini-pro-1-5,12117,9431,11,crew,5 +299,llama-3-1-8b-instruct,gemini-pro-1-5,162878,69384,40,lim,17 +300,llama-3-1-8b-instruct,gpt-4o-mini,36188,61069,19,imp,10 +301,llama-3-1-8b-instruct,gpt-4o-mini,5199,10911,7,crew,1 +302,llama-3-1-8b-instruct,gpt-4o-mini,263069,268440,39,imp,20 +303,llama-3-1-8b-instruct,gpt-4o-mini,161065,208146,40,lim,22 +304,llama-3-1-8b-instruct,gpt-4o-mini,156071,301218,40,lim,24 +305,llama-3-1-8b-instruct,gpt-4o,104447,143344,33,imp,21 +306,llama-3-1-8b-instruct,gpt-4o,38613,61447,19,crew,9 +307,llama-3-1-8b-instruct,gpt-4o,89130,203071,30,imp,9 +308,llama-3-1-8b-instruct,gpt-4o,104978,135653,32,crew,25 +309,llama-3-1-8b-instruct,gpt-4o,21723,24857,15,imp,4 +310,llama-3-1-8b-instruct,llama-3-1-405b-instruct,23505,80795,17,crew,6 +311,llama-3-1-8b-instruct,llama-3-1-405b-instruct,7377,18663,8,imp,2 +312,llama-3-1-8b-instruct,llama-3-1-405b-instruct,162908,463018,40,lim,22 +313,llama-3-1-8b-instruct,llama-3-1-405b-instruct,161602,443417,40,lim,23 +314,llama-3-1-8b-instruct,llama-3-1-405b-instruct,81209,404306,40,lim,23 +315,llama-3-1-8b-instruct,llama-3-1-8b-instruct,19031,41846,13,imp,6 +316,llama-3-1-8b-instruct,llama-3-1-8b-instruct,67933,142896,27,crew,6 +317,llama-3-1-8b-instruct,llama-3-1-8b-instruct,42732,51594,15,imp,8 +318,llama-3-1-8b-instruct,llama-3-1-8b-instruct,166912,308687,39,imp,14 +319,llama-3-1-8b-instruct,llama-3-1-8b-instruct,21883,59095,15,imp,4 diff --git a/tournament_analysis.csv b/data/tournament_analysis.csv similarity index 100% rename from tournament_analysis.csv rename to data/tournament_analysis.csv diff --git a/src/among_them/token_counter.py b/src/among_them/token_counter.py new file mode 100644 index 0000000..a9477df --- /dev/null +++ b/src/among_them/token_counter.py @@ -0,0 +1,44 @@ +import json +import os +import pandas as pd + + +dir = 'data/tournament' +files = list(os.walk(dir))[0][2] + +columns = ["imp_model", "crew_model", "imp_out_tokens", "crew_out_tokens", "rounds", "result", "imp_pretend_count"] +data = [] + +for f in files: + with open(dir + '/' + f) as file: + d = json.load(file) + imp_model = f.split(".")[0].split("_")[0] + crew_model = f.split(".")[0].split("_")[2] + imp_out_tokens, crew_out_tokens = 0, 0 + rounds = len(d["players"][0]["history"]["rounds"]) + + if "_round_limit" in f: + res = "lim" + elif "Crewmates win!" in d["playthrough"][-1]: + res = "crew" + else: + res = "imp" + + for i in range(5): + p = d["players"][i] + if p["role"] == "Impostor": + imp_out_tokens += p["state"]["token_usage"]["output_tokens"] + + imp_pretend = 1 if "pretended" in p["state"]["action_result"] else 0 + for round in p["history"]["rounds"]: + if "pretended" in round["action_result"]: + imp_pretend += 1 + else: + crew_out_tokens += p["state"]["token_usage"]["output_tokens"] + data.append([imp_model, crew_model, imp_out_tokens, crew_out_tokens, rounds, res, imp_pretend]) + + +df = pd.DataFrame(data, columns=columns) +df.to_csv("token_usage.csv") + +print(df) \ No newline at end of file