-
Notifications
You must be signed in to change notification settings - Fork 0
/
atom.c
128 lines (118 loc) · 4.9 KB
/
atom.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <string.h>
#include "atom.h"
#include <assert.h>
#include <limits.h>
#include "mem.h"
#define NELEMS(x) ((sizeof (x)) / (sizeof ((x)[0])))
static struct atom{
struct atom *link;
int len;
char *str;
}*buckets[2048];
/*
* scatter 是一个256项的数组,它将字节随机映射到随机数,这些随机数
* 时通过调用标准库函数rand生成,经验表明,这种简单的方法有助于使哈希值
* 分布更加均匀。
*/
static unsigned long scatter[] = {
1903799522, 1768883887, 986051269, 1172728237, 24384077, 981454462,
1470090825, 494599998, 1007653608, 506565587, 1293695912, 806599247,
929746598, 2021190070, 315762766, 734141360, 1279715304, 1948413392,
928701523, 2119118100, 185879298, 192540520, 1310945694, 1826972868,
1528421109, 939320574, 702980327, 1942922161, 1202165413, 428899539,
776253885, 958481287, 50299778, 1762305154, 2131209524, 74683856,
596275969, 1453816701, 569283854, 1603929577, 1960382289, 1862979766,
263045176, 742645239, 1736686189, 578807942, 1476786599, 868917845,
379737686, 258004474, 840552297, 565616984, 450544995, 4014343,
245106205, 1978966104, 943334918, 948086532, 1774404617, 2145500331,
1376986071, 403174855, 956497970, 1427285850, 17996361, 940223846,
1501969706, 614272330, 246556900, 2071253560, 70718259, 59455541,
1786749678, 333763435, 802100780, 1375952219, 912571377, 131403731,
97386416, 1292309064, 389408205, 937938713, 1857926048, 839953200,
941953057, 2103032253, 671435657, 1885287975, 903635137, 298356626,
1883304658, 133137561, 701531481, 692318980, 1560423411, 719527843,
1632542826, 914909469, 1333800173, 1879099726, 838679381, 1404518433,
1938555267, 477945411, 1738281868, 593172399, 1853897631, 503369598,
724576130, 1951284047, 1795678662, 1113984336, 741739113, 1506121062,
1953937536, 1683692170, 1461669668, 477889545, 1421496497, 217821157,
776246172, 1157317507, 350958718, 1477777653, 1849636487, 1911382129,
49821848, 1334695665, 678807950, 1383622022, 1066311744, 1517487331,
640656807, 857383363, 1995432743, 231455027, 1450555763, 1701846726,
734824625, 27648245, 1505647125, 383019639, 1141632581, 99902590,
1889140702, 948086470, 1783594760, 1203326722, 1425976015, 1057607609,
1421147879, 54738539, 67441468, 1772106598, 1532516193, 1917077955,
1536005079, 1582338041, 1104289973, 67329382, 818476415, 23118069,
1584816713, 1459133222, 880501432, 1432765808, 1690588250, 183573547,
987128886, 277929227, 211221793, 345292364, 660948867, 1352854374,
445194954, 402605921, 153457196, 81306067, 1605932643, 1579433212,
1138913676, 879596874, 1634171751, 1206355145, 504219824, 1019204296,
975949452, 2040224904, 454058690, 2080239425, 2107554286, 1272535105,
2103357494, 1544887351, 584184680, 836375279, 830169512, 127289282,
1019948826, 1817298398, 405218509, 1231170619, 15107114, 1066167376,
436541346, 460302069, 1468773297, 589998542, 541608136, 927222292,
21948106, 1680521812, 1806819167, 1656119858, 739393309, 163555343,
527840506, 1715342762, 56296599, 981899196, 1648098539, 16367237,
106950654, 1603972386, 1561254589, 691135334, 292864017, 243940453,
818424616, 1312812843, 2061238851, 1223643125, 396499815, 2076345966,
142326854, 833041161, 389164387, 1611100151, 1423039703, 930772523,
390838796, 1444987810, 463810687, 50174315, 953624020, 1203203997,
213729658, 1481464526, 771063111, 270026258, 315880075, 271678002
};
int Atom_length(const char *str){
struct atom *p;
int i;
assert(str);
for(i=0;i<NELEMS(buckets);i++)
for(p=buckets[i];p;p=p->link)
if(p->str == str)
return p->len;
assert(0);//用于指明一些假定不会发生的情况
return 0;
}
const char *Atom_new(const char *str, int len){
unsigned long h;
int i;
struct atom *p;
assert(str);
assert(len>=0);
for(h=0,i=0;i<len;i++)
h=(h<<1) + scatter[(unsigned char)str[i]]; //将字符强制转换为无符号字符,可以避免在带符号机器上出现数组溢出的错误
h %= NELEMS(buckets);
for(p=buckets[h];p;p=p->link){//在buckets中查找str,如果找到直接返回
if(len==p->len){
for(i=0;i<len && p->str[i] == str[i];)
i++;
if(i == len)
return p->str;
}
}
p = ALLOC(sizeof(*p) + len + 1);//如果没有找到,为之开辟一块内存
p->len = len;
p->str = (char*)(p+1);
if(len>0)
memcpy(p->str,str,len);
p->str[len] = '\0';
p->link = buckets[h]; //并将它添加到链表的头部
buckets[h] = p;
return p->str;
}
const char *Atom_string(const char *str){
assert(str);
return Atom_new(str,strlen(str));
}
const char *Atom_int(long n){
char str[43];//43个字符可以容纳任何整数的十进制表示
char *s = str + sizeof str;
unsigned long m;
if(n == LONG_MIN) //如果n 是最小长整数,无符号的m无法表示其绝对值
m = LONG_MAX + 1UL; //最大值加一就是其最小值
else if(n<0)
m=-n;
else m=n;
do{
*--s = m%10 + '0';
}while((m/=10) > 0);
if(n<0)
*--s='-';
return Atom_new(s,(str+sizeof str)-s);
}