forked from tinygrad/tinygrad
-
Notifications
You must be signed in to change notification settings - Fork 4
153 lines (149 loc) · 5.83 KB
/
benchmark.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
name: Benchmarks
on:
push:
branches:
- master
- update_benchmark
jobs:
testmacbenchmark:
name: Mac Benchmark
runs-on: [self-hosted, macOS]
if: github.repository_owner == 'tinygrad'
env:
PYTHONPATH: .
steps:
- name: Checkout Code
uses: actions/checkout@v3
- name: Symlink models and datasets
run: |
ln -s ~/tinygrad/disassemblers/applegpu disassemblers/applegpu
ln -s ~/tinygrad/weights/sd-v1-4.ckpt weights/sd-v1-4.ckpt
ln -s ~/tinygrad/weights/bpe_simple_vocab_16e6.txt.gz weights/bpe_simple_vocab_16e6.txt.gz
ln -s ~/tinygrad/weights/LLaMA weights/LLaMA
ln -s ~/tinygrad/extra/datasets/cifar-10-python.tar.gz extra/datasets/cifar-10-python.tar.gz
- name: Run model inference benchmark
run: python3 test/external/external_model_benchmark.py
- name: Test speed vs torch
run: BIG=2 MPS=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt
shell: bash
- name: Run Tensor Core GEMM
run: DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt
shell: bash
- name: Run Stable Diffusion
run: python3 examples/stable_diffusion.py --noshow --timing | tee sd.txt
shell: bash
- name: Run LLaMA
run: |
JIT=0 python3 examples/llama.py --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt
JIT=1 python3 examples/llama.py --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt
shell: bash
- name: Run GPT2
run: |
JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt
JIT=1 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt
shell: bash
- name: Run 10 CIFAR training steps
run: STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt
shell: bash
- name: Run 10 CIFAR training steps w winograd
run: WINO=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar_wino.txt
shell: bash
- uses: actions/upload-artifact@v3
with:
name: Speed (Mac)
path: |
onnx_inference_speed.csv
torch_speed.txt
train_cifar.txt
train_cifar_wino.txt
llama_unjitted.txt
llama_jitted.txt
gpt2_unjitted.txt
gpt2_jitted.txt
matmul.txt
sd.txt
testnvidiabenchmark:
name: NVIDIA Benchmark
runs-on: [self-hosted, Linux, CUDA]
if: github.repository_owner == 'tinygrad'
env:
PYTHONPATH: .
steps:
- name: Checkout Code
uses: actions/checkout@v3
- name: Run model inference benchmark
run: CUDA=1 python3 test/external/external_model_benchmark.py
- name: Test speed vs torch
run: CUDA=1 BIG=2 TORCHCUDA=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt
shell: bash
- name: Run GPT2
run: |
CUDA=1 JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt
CUDA=1 JIT=1 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt
shell: bash
- uses: actions/upload-artifact@v3
with:
name: Speed (NVIDIA)
path: |
onnx_inference_speed.csv
torch_speed.txt
gpt2_unjitted.txt
gpt2_jitted.txt
testamdbenchmark:
name: AMD Benchmark
runs-on: [self-hosted, Linux, ROCM]
if: github.repository_owner == 'tinygrad'
env:
PYTHONPATH: .
steps:
- name: Checkout Code
uses: actions/checkout@v3
- name: Symlink models and datasets
run: |
ln -s ~/tinygrad/weights/sd-v1-4.ckpt weights/sd-v1-4.ckpt
ln -s ~/tinygrad/weights/bpe_simple_vocab_16e6.txt.gz weights/bpe_simple_vocab_16e6.txt.gz
ln -s ~/tinygrad/weights/LLaMA weights/LLaMA
ln -s ~/tinygrad/extra/datasets/cifar-10-python.tar.gz extra/datasets/cifar-10-python.tar.gz
- name: Run model inference benchmark
run: python3 test/external/external_model_benchmark.py
- name: Test speed vs torch
run: BIG=2 TORCHCUDA=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt
shell: bash
- name: Run Tensor Core GEMM
run: HIP=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt
shell: bash
- name: Run Stable Diffusion
run: python3 examples/stable_diffusion.py --noshow --timing | tee sd.txt
shell: bash
- name: Run LLaMA
run: |
JIT=0 python3 examples/llama.py --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt
JIT=1 python3 examples/llama.py --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt
shell: bash
- name: Run GPT2
run: |
JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt
JIT=1 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt
shell: bash
- name: Run 10 CIFAR training steps
run: STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt
- name: Run 10 CIFAR training steps w winograd
run: WINO=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar_wino.txt
shell: bash
- name: Run 10 CIFAR training steps w WINO/HALF/HIP
run: HALF=1 HIP=1 WINO=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar_wino_half_hip.txt
- uses: actions/upload-artifact@v3
with:
name: Speed (AMD)
path: |
onnx_inference_speed.csv
torch_speed.txt
train_cifar.txt
train_cifar_wino.txt
train_cifar_wino_half_hip.txt
llama_unjitted.txt
llama_jitted.txt
gpt2_unjitted.txt
gpt2_jitted.txt
matmul.txt
sd.txt