-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
304 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import os | ||
import numpy as np | ||
import pandas as pd | ||
import seaborn as sns | ||
|
||
from datetime import datetime | ||
from matplotlib import pyplot as plt, ticker | ||
|
||
import benchmark as bm | ||
from benchmark.suite.plot.util import load | ||
|
||
|
||
def plot(args): | ||
''' | ||
kwargs: | ||
benchmark (str): benchmark to plot, | ||
empty to plot the last batch suite result. | ||
''' | ||
# get the newest batch suite result | ||
result_file = None | ||
for file in os.scandir(bm.result_dir): | ||
# extract suite and benchmark name from result file name | ||
suite, name = file.name[20:31], file.name[32:-4] | ||
if suite == 'suite-batch': | ||
if args.kwargs and 'benchmark' in args.kwargs: | ||
if name == args.kwargs['benchmark']: | ||
result_file = file | ||
else: | ||
result_file = file | ||
|
||
if not result_file: | ||
raise ValueError("No batch suite results found.") | ||
results = pd.read_csv(result_file) | ||
|
||
# calculate difference from baseline | ||
baseline = results[results['batch_size'] == 1].iloc[0] | ||
|
||
def delta(x): | ||
x['throughput'] = x['throughput'] - baseline['throughput'] | ||
x['cost'] = baseline['cost'] - x['cost'] | ||
return x | ||
|
||
results = results.apply(delta, axis=1) | ||
|
||
# plot | ||
sns.set_theme(font_scale=1.4) | ||
fig, (ax1, ax2) = plt.subplots(1, 2) | ||
fig.set_size_inches(12, 5) | ||
sns.barplot(results, x='batch_size', y='throughput', color="#2ecc71", ax=ax1, width=0.8) | ||
ax1.axhline(0, color='#e74c3c') | ||
ax1.set(xlabel="Batch Size vs Throughput", ylabel="Throughput (pps)", title="") | ||
ax1.get_xaxis().set_major_formatter( | ||
ticker.FuncFormatter(lambda x, p: int(x))) | ||
sns.barplot(results, x='batch_size', y='cost', color="#2ecc71", ax=ax2, width=0.8) | ||
ax2.set(xlabel="Batch Size vs Cost Savings", ylabel="Cost Savings ($)", title="") | ||
ax2.get_xaxis().set_major_formatter( | ||
ticker.FuncFormatter(lambda x, p: int(x))) | ||
plt.tight_layout() | ||
|
||
output_file = os.path.join( | ||
bm.figure_dir, | ||
f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}-" | ||
"batch-size.svg") | ||
plt.savefig(output_file, format='svg', bbox_inches = "tight") | ||
print(f"Batch size plot saved to: {output_file}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import os | ||
import seaborn as sns | ||
|
||
from datetime import datetime | ||
from matplotlib import pyplot as plt, ticker | ||
|
||
import benchmark as bm | ||
from benchmark.suite.plot.util import load | ||
|
||
|
||
def plot(args): | ||
''' | ||
kwargs: | ||
models (list[str]): models to include in the plot. | ||
benchmarks (list[str]): benchmarks to include in the plot. | ||
''' | ||
results = load(args) | ||
|
||
# calculate pairs per $ | ||
def ppd(x): | ||
x['num_pairs'] = x['tp'] + x['fp'] + x['tn'] + x['fn'] | ||
if x['cost'] > 0: | ||
x['ppd'] = x['num_pairs'] / x['cost'] | ||
else: | ||
x['ppd'] = -1 | ||
return x | ||
results = results.apply(ppd, axis=1) | ||
|
||
# order by ppd | ||
results = results[results['ppd'] >= 0] | ||
results = results.groupby('model')[['ppd']].mean().sort_values('ppd')['ppd'].reset_index() | ||
# dummy column needed for markers | ||
results['label'] = 1 | ||
|
||
# calculate range and positioning numbers | ||
y_range = results['ppd'].max() - results['ppd'].min() | ||
y_unit = y_range / 25 | ||
x_unit = len(results) / 110 | ||
|
||
# plot | ||
sns.set_theme(font_scale=1.4) | ||
fig, ax = plt.subplots() | ||
fig.set_size_inches(9, 5) | ||
|
||
# draw AWS baseline | ||
plt.axhline(4000, color='#3498db', linewidth=2, linestyle='--') | ||
ax.text(len(results) - 1.7, 4000 + y_unit, "AWS ER", color='#3498db', size=14) | ||
|
||
# plot line graph with value labels | ||
sns.lineplot(results, x='model', y='ppd', color='#2ecc71', style='label', markers=True, | ||
ax=ax, linewidth=3, legend=False, markersize=8) | ||
for item in results.iterrows(): | ||
text = f"{format(int(item[1]['ppd']), ',')}" | ||
x_space = len(text) * x_unit | ||
ax.text(item[0] - x_space, item[1]['ppd'] + y_unit, text, color='#000', size=15) | ||
|
||
plt.xticks(rotation=90) | ||
plt.locator_params(axis='y', nbins=5) | ||
ax.get_yaxis().set_major_formatter( | ||
ticker.FuncFormatter(lambda x, p: format(int(x), ','))) | ||
plt.ylim(-y_unit, y_range + y_unit * 3) | ||
plt.title('') | ||
plt.xlabel('') | ||
plt.ylabel("Pairs per Dollar") | ||
|
||
output_file = os.path.join( | ||
bm.figure_dir, | ||
f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}-" | ||
"model-cost-trend.svg") | ||
plt.savefig(output_file, format='svg', bbox_inches = "tight") | ||
print(f"Model cost trend plot saved to: {output_file}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import os | ||
import numpy as np | ||
import pandas as pd | ||
import seaborn as sns | ||
|
||
from datetime import datetime | ||
from matplotlib import pyplot as plt | ||
|
||
import benchmark as bm | ||
from benchmark.suite.plot.util import load | ||
|
||
|
||
def plot(args): | ||
''' | ||
kwargs: | ||
models (list[str]): models to include in the plot. | ||
benchmarks (list[str]): benchmarks to include in the plot. | ||
''' | ||
results = load(args) | ||
|
||
# order by f1 score | ||
order = results.groupby('model')[['f1']].mean().sort_values('f1').index | ||
|
||
# plot | ||
plt.figure(figsize=(9, 5)) | ||
sns.set_theme(font_scale=1.4) | ||
sns.barplot(results, x='model', y='f1', estimator=np.mean, capsize=.2, color='#2ecc71', order=order, width=0.7) | ||
plt.xticks(rotation=90) | ||
plt.ylim(0, 100) | ||
plt.title('') | ||
plt.xlabel('') | ||
plt.ylabel("F1 Score (%)") | ||
|
||
output_file = os.path.join( | ||
bm.figure_dir, | ||
f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}-" | ||
"model-f1.svg") | ||
plt.savefig(output_file, format='svg', bbox_inches = "tight") | ||
print(f"Model F1 plot saved to: {output_file}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import os | ||
import numpy as np | ||
import pandas as pd | ||
import seaborn as sns | ||
|
||
from datetime import datetime | ||
from matplotlib import pyplot as plt | ||
|
||
import benchmark as bm | ||
from benchmark.suite.plot.util import load | ||
|
||
|
||
def plot(args): | ||
''' | ||
kwargs: | ||
models (list[str]): models to include in the plot. | ||
benchmarks (list[str]): benchmarks to include in the plot. | ||
''' | ||
results = load(args) | ||
|
||
# order by f1 score | ||
order = results.groupby('model')[['throughput']].mean().sort_values('throughput').index | ||
|
||
# plot | ||
plt.figure(figsize=(9, 5)) | ||
sns.set_theme(font_scale=1.4) | ||
sns.barplot(results, x='model', y='throughput', estimator=np.mean, capsize=.2, color='#2ecc71', order=order, width=0.7) | ||
plt.xticks(rotation=90) | ||
plt.title('') | ||
plt.xlabel('') | ||
plt.ylabel("Throughput (pps)") | ||
|
||
output_file = os.path.join( | ||
bm.figure_dir, | ||
f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}-" | ||
"model-throughput.svg") | ||
plt.savefig(output_file, format='svg', bbox_inches = "tight") | ||
print(f"Model throughput plot saved to: {output_file}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import os | ||
import pandas as pd | ||
|
||
import benchmark as bm | ||
|
||
|
||
def load(args) -> pd.DataFrame: | ||
''' | ||
kwargs: | ||
models (list[str]): models to include in the plot. | ||
benchmarks (list[str]): benchmarks to include in the plot, | ||
leave empty to include all benchmarks. | ||
''' | ||
|
||
models = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo', | ||
'gpt-4o', 'gpt-4o-mini', 'llama3', 'llama3.1'] | ||
if args.kwargs and 'models' in args.kwargs: | ||
models = args.kwargs['models'] | ||
|
||
benchmarks = [] | ||
if args.kwargs and 'benchmarks' in args.kwargs: | ||
benchmarks = args.kwargs['benchmarks'] | ||
|
||
# load in files, get newest result for each model if available | ||
files = {} | ||
for file in os.scandir(bm.result_dir): | ||
# extract suite and model name from result file name | ||
suite, name = file.name[20:25], file.name[26:-4] | ||
if suite == 'suite' and name in models: | ||
files[name] = file | ||
|
||
if len(files) == 0: | ||
raise ValueError("No suite resuts found. Run through " | ||
"at least one benchmark suite before plotting.") | ||
|
||
# load all files into DataFrame | ||
dfs = [] | ||
for k, v in files.items(): | ||
df = pd.read_csv(v) | ||
df.loc[:, 'model'] = k | ||
dfs.append(df) | ||
results = pd.concat(dfs) | ||
|
||
# filter benchmarks | ||
if len(benchmarks) > 0: | ||
results = results[results['benchmark'].isin(benchmarks)] | ||
|
||
return results |