Skip to content

Commit

Permalink
feat: add suite plots (#95)
Browse files Browse the repository at this point in the history
  • Loading branch information
daiwaid authored Aug 31, 2024
1 parent 2045be3 commit 4a3831b
Show file tree
Hide file tree
Showing 9 changed files with 304 additions and 4 deletions.
2 changes: 1 addition & 1 deletion benchmark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
from benchmark.classic import (
benchmarks, classic_benchmarks, challenges
)
from benchmark.suite import suites
from benchmark.suite import suites, suite_plots
27 changes: 26 additions & 1 deletion benchmark/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ def run(args) -> dict:
# create a deep copy of args before making changes
args = copy.deepcopy(args)

if args.input_file:
if args.plot:
benchmark_func = run_plot
elif args.input_file:
args.name = args.input_file.split('/')[-1].split(".")[0]
benchmark_func = run_from_file
elif args.suite:
Expand All @@ -25,6 +27,19 @@ def run(args) -> dict:
return benchmark_func(args)


def run_plot(args):
if args.plot == 'all':
for name in bm.suite_plots.keys():
bm.suite_plots[name](args)
else:
plots = args.plot.split(',')
for plot in plots:
name = plot.strip().replace('_', '-')
if name not in bm.suite_plots:
raise ValueError(f"Plot {name} not found.")
bm.suite_plots[name](args)


def run_from_file(args) -> dict:
"""
Entity pairs should follow the Libem result format:
Expand Down Expand Up @@ -82,6 +97,10 @@ def args() -> argparse.Namespace:
help="Number of pairs to run through. "
"Set as <= 0 to run through the entire dataset.",
type=int, default=5)
parser.add_argument("--plot", dest='plot', nargs='?',
help="The benchmark suite plot name(s), separated by comma, "
"or 'all' to plot all.",
type=str, default='')

# dataset configurations
parser.add_argument("--no-shuffle", dest='shuffle',
Expand Down Expand Up @@ -167,6 +186,12 @@ def validate(args):
if args.suite and args.input_file:
raise ValueError("Cannot specify both "
"suite and input file.")
if args.plot and args.suite:
raise ValueError("Cannot specify both "
"plot and suite.")
if args.plot and args.input_file:
raise ValueError("Cannot specify both "
"plot and input file.")

if args.batch_size <= 0:
raise ValueError("Batch size cannot be <= 0.")
Expand Down
16 changes: 15 additions & 1 deletion benchmark/suite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,18 @@
'gpt-4o': gpt_4o.run,
'gpt-4o-mini': gpt_4o_mini.run,
'llama3': llama3.run,
}
}

from benchmark.suite.plot import (
batch_size,
model_cost_trend,
model_f1,
model_throughput
)

suite_plots = {
'batch-size': batch_size.plot,
'model-cost-trend': model_cost_trend.plot,
'model-f1': model_f1.plot,
'model-throughput': model_throughput.plot
}
2 changes: 1 addition & 1 deletion benchmark/suite/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def run(args):

batch_sizes = [1, 4, 16, 64, 128, 256, 512]

print(f"Benchmark: Varying the batch size on the {args.name} benchmarks:")
print(f"Benchmark: Varying the batch size on the {args.name} benchmark:")
start = time.time()

reports = {}
Expand Down
65 changes: 65 additions & 0 deletions benchmark/suite/plot/batch_size.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import os
import numpy as np
import pandas as pd
import seaborn as sns

from datetime import datetime
from matplotlib import pyplot as plt, ticker

import benchmark as bm
from benchmark.suite.plot.util import load


def plot(args):
'''
kwargs:
benchmark (str): benchmark to plot,
empty to plot the last batch suite result.
'''
# get the newest batch suite result
result_file = None
for file in os.scandir(bm.result_dir):
# extract suite and benchmark name from result file name
suite, name = file.name[20:31], file.name[32:-4]
if suite == 'suite-batch':
if args.kwargs and 'benchmark' in args.kwargs:
if name == args.kwargs['benchmark']:
result_file = file
else:
result_file = file

if not result_file:
raise ValueError("No batch suite results found.")
results = pd.read_csv(result_file)

# calculate difference from baseline
baseline = results[results['batch_size'] == 1].iloc[0]

def delta(x):
x['throughput'] = x['throughput'] - baseline['throughput']
x['cost'] = baseline['cost'] - x['cost']
return x

results = results.apply(delta, axis=1)

# plot
sns.set_theme(font_scale=1.4)
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_size_inches(12, 5)
sns.barplot(results, x='batch_size', y='throughput', color="#2ecc71", ax=ax1, width=0.8)
ax1.axhline(0, color='#e74c3c')
ax1.set(xlabel="Batch Size vs Throughput", ylabel="Throughput (pps)", title="")
ax1.get_xaxis().set_major_formatter(
ticker.FuncFormatter(lambda x, p: int(x)))
sns.barplot(results, x='batch_size', y='cost', color="#2ecc71", ax=ax2, width=0.8)
ax2.set(xlabel="Batch Size vs Cost Savings", ylabel="Cost Savings ($)", title="")
ax2.get_xaxis().set_major_formatter(
ticker.FuncFormatter(lambda x, p: int(x)))
plt.tight_layout()

output_file = os.path.join(
bm.figure_dir,
f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}-"
"batch-size.svg")
plt.savefig(output_file, format='svg', bbox_inches = "tight")
print(f"Batch size plot saved to: {output_file}")
71 changes: 71 additions & 0 deletions benchmark/suite/plot/model_cost_trend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os
import seaborn as sns

from datetime import datetime
from matplotlib import pyplot as plt, ticker

import benchmark as bm
from benchmark.suite.plot.util import load


def plot(args):
'''
kwargs:
models (list[str]): models to include in the plot.
benchmarks (list[str]): benchmarks to include in the plot.
'''
results = load(args)

# calculate pairs per $
def ppd(x):
x['num_pairs'] = x['tp'] + x['fp'] + x['tn'] + x['fn']
if x['cost'] > 0:
x['ppd'] = x['num_pairs'] / x['cost']
else:
x['ppd'] = -1
return x
results = results.apply(ppd, axis=1)

# order by ppd
results = results[results['ppd'] >= 0]
results = results.groupby('model')[['ppd']].mean().sort_values('ppd')['ppd'].reset_index()
# dummy column needed for markers
results['label'] = 1

# calculate range and positioning numbers
y_range = results['ppd'].max() - results['ppd'].min()
y_unit = y_range / 25
x_unit = len(results) / 110

# plot
sns.set_theme(font_scale=1.4)
fig, ax = plt.subplots()
fig.set_size_inches(9, 5)

# draw AWS baseline
plt.axhline(4000, color='#3498db', linewidth=2, linestyle='--')
ax.text(len(results) - 1.7, 4000 + y_unit, "AWS ER", color='#3498db', size=14)

# plot line graph with value labels
sns.lineplot(results, x='model', y='ppd', color='#2ecc71', style='label', markers=True,
ax=ax, linewidth=3, legend=False, markersize=8)
for item in results.iterrows():
text = f"{format(int(item[1]['ppd']), ',')}"
x_space = len(text) * x_unit
ax.text(item[0] - x_space, item[1]['ppd'] + y_unit, text, color='#000', size=15)

plt.xticks(rotation=90)
plt.locator_params(axis='y', nbins=5)
ax.get_yaxis().set_major_formatter(
ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
plt.ylim(-y_unit, y_range + y_unit * 3)
plt.title('')
plt.xlabel('')
plt.ylabel("Pairs per Dollar")

output_file = os.path.join(
bm.figure_dir,
f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}-"
"model-cost-trend.svg")
plt.savefig(output_file, format='svg', bbox_inches = "tight")
print(f"Model cost trend plot saved to: {output_file}")
39 changes: 39 additions & 0 deletions benchmark/suite/plot/model_f1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os
import numpy as np
import pandas as pd
import seaborn as sns

from datetime import datetime
from matplotlib import pyplot as plt

import benchmark as bm
from benchmark.suite.plot.util import load


def plot(args):
'''
kwargs:
models (list[str]): models to include in the plot.
benchmarks (list[str]): benchmarks to include in the plot.
'''
results = load(args)

# order by f1 score
order = results.groupby('model')[['f1']].mean().sort_values('f1').index

# plot
plt.figure(figsize=(9, 5))
sns.set_theme(font_scale=1.4)
sns.barplot(results, x='model', y='f1', estimator=np.mean, capsize=.2, color='#2ecc71', order=order, width=0.7)
plt.xticks(rotation=90)
plt.ylim(0, 100)
plt.title('')
plt.xlabel('')
plt.ylabel("F1 Score (%)")

output_file = os.path.join(
bm.figure_dir,
f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}-"
"model-f1.svg")
plt.savefig(output_file, format='svg', bbox_inches = "tight")
print(f"Model F1 plot saved to: {output_file}")
38 changes: 38 additions & 0 deletions benchmark/suite/plot/model_throughput.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os
import numpy as np
import pandas as pd
import seaborn as sns

from datetime import datetime
from matplotlib import pyplot as plt

import benchmark as bm
from benchmark.suite.plot.util import load


def plot(args):
'''
kwargs:
models (list[str]): models to include in the plot.
benchmarks (list[str]): benchmarks to include in the plot.
'''
results = load(args)

# order by f1 score
order = results.groupby('model')[['throughput']].mean().sort_values('throughput').index

# plot
plt.figure(figsize=(9, 5))
sns.set_theme(font_scale=1.4)
sns.barplot(results, x='model', y='throughput', estimator=np.mean, capsize=.2, color='#2ecc71', order=order, width=0.7)
plt.xticks(rotation=90)
plt.title('')
plt.xlabel('')
plt.ylabel("Throughput (pps)")

output_file = os.path.join(
bm.figure_dir,
f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}-"
"model-throughput.svg")
plt.savefig(output_file, format='svg', bbox_inches = "tight")
print(f"Model throughput plot saved to: {output_file}")
48 changes: 48 additions & 0 deletions benchmark/suite/plot/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import os
import pandas as pd

import benchmark as bm


def load(args) -> pd.DataFrame:
'''
kwargs:
models (list[str]): models to include in the plot.
benchmarks (list[str]): benchmarks to include in the plot,
leave empty to include all benchmarks.
'''

models = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo',
'gpt-4o', 'gpt-4o-mini', 'llama3', 'llama3.1']
if args.kwargs and 'models' in args.kwargs:
models = args.kwargs['models']

benchmarks = []
if args.kwargs and 'benchmarks' in args.kwargs:
benchmarks = args.kwargs['benchmarks']

# load in files, get newest result for each model if available
files = {}
for file in os.scandir(bm.result_dir):
# extract suite and model name from result file name
suite, name = file.name[20:25], file.name[26:-4]
if suite == 'suite' and name in models:
files[name] = file

if len(files) == 0:
raise ValueError("No suite resuts found. Run through "
"at least one benchmark suite before plotting.")

# load all files into DataFrame
dfs = []
for k, v in files.items():
df = pd.read_csv(v)
df.loc[:, 'model'] = k
dfs.append(df)
results = pd.concat(dfs)

# filter benchmarks
if len(benchmarks) > 0:
results = results[results['benchmark'].isin(benchmarks)]

return results

0 comments on commit 4a3831b

Please sign in to comment.