forked from tinygrad/tinygrad
-
Notifications
You must be signed in to change notification settings - Fork 4
/
f16_w_uint32.py
42 lines (34 loc) · 1.37 KB
/
f16_w_uint32.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import numpy as np
from tinygrad.tensor import Tensor
from tinygrad.helpers import dtypes
from tinygrad.ops import Device
# TODO: will be better when tinygrad does math in the target dtype, can remove the floor and use a mul
def bit_extract(x, s, e) -> Tensor:
# extract the top bits we don't want
top_bits = (x / (1<<(s+1))).floor() * (1<<(s+1))
x = (x - top_bits) / (1<<e)
return x.contiguous()
def u16_to_f16(x):
sign = bit_extract(x, 15, 15).float()
exponent = bit_extract(x, 14, 10).float()
fraction = bit_extract(x, 9, 0).float()
return sign.where(-1, 1) * exponent.where((exponent - 15).exp2() * (1 + fraction / 0x400), 6.103515625e-5 * (fraction / 0x400))
def u32_to_f16(oo):
oo1 = (oo/0x10000).floor().contiguous()
# TODO: this is wrong and unextractable until we do this math in u32
oo2 = (oo-(oo1*0x10000)).floor().contiguous()
f1 = u16_to_f16(oo1)
f2 = u16_to_f16(oo2)
return Tensor.cat(f2.reshape(-1, 1), f1.reshape(-1, 1), dim=1).flatten()
if __name__ == "__main__":
# random float16
Tensor.manual_seed(2)
a = Tensor.randn(100, dtype=dtypes.float16)
# this converts it to u32 on disk
oo = a.to("disk:/tmp/f16").cast(dtypes.uint32)[:50].to(Device.DEFAULT).realize()
# convert to 2xf16 using tinygrad math ops
f16 = u32_to_f16(oo)
ref = a.numpy()
out = f16.numpy().astype(np.float16)
print(ref-out)
np.testing.assert_allclose(ref, out)