-
Notifications
You must be signed in to change notification settings - Fork 30
/
config.go
77 lines (67 loc) · 2.39 KB
/
config.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
package tokenizer
import (
"encoding/json"
"os"
)
// Config construct configuration for creating Tokenizer.
type Config struct {
Version string `json:"version"`
Truncation map[string]interface{} `json:"truncation"`
Padding map[string]interface{} `json:"padding"`
AddedTokens []TokenConfig `json:"added_tokens"`
Normalizer map[string]interface{} `json:"normalizer"`
PreTokenizer map[string]interface{} `json:"pre_tokenizer"`
PostProcessor map[string]interface{} `json:"post_processor"`
Decoder map[string]interface{} `json:"decoder"`
Model map[string]interface{} `json:"model"`
}
type TokenConfig struct {
Id int64 `json:"id"`
Content string `json:"content"`
SingleWord bool `json:"single_word"`
Lstrip bool `json:"lstrip"`
Rstrip bool `json:"rstrip"`
Normalized bool `json:"normalized"`
Special bool `json:"special"`
}
type NormalizerConfig struct {
Type string `json:"type"`
Normalizers []map[string]interface{} `json:"normalizers"`
}
type PreTokenizerConfig struct{}
type PostProcessorConfig struct {
Type string `json:"type"`
Single []map[string]interface{} `json:"single"`
Pair []map[string]interface{} `json:"pair"`
SpecialTokens map[string]interface{} `json:"speical_tokens"`
}
type DecoderConfig struct {
Type string `json:"type"`
Decoders []map[string]interface{} `json:"decoders"`
}
type ModelConfig struct {
Type string `json:"type"`
Dropout interface{} `json:"dropout"`
UnkToken string `json:"unk_token"`
ContinuingSubwordPrefix interface{} `json:"continuing_subword_prefix"`
EndOfWordSuffix interface{} `json:"end_of_word_suffix"`
FuseUnk bool `json:"fuse_unk"`
ByteFallback bool `json:"byte_fallback"`
Vocab map[string]int `json:"vocab"`
Merges []string `json:"merges"`
MaxInputCharsPerWord interface{} `json:"max_input_chars_per_word"`
}
// ConfigFromFile loads config from file.
func ConfigFromFile(file string) (*Config, error) {
f, err := os.Open(file)
if err != nil {
return nil, err
}
dec := json.NewDecoder(f)
var config *Config
err = dec.Decode(&config)
if err != nil {
return nil, err
}
return config, nil
}