-
Notifications
You must be signed in to change notification settings - Fork 1
/
dbf.go
552 lines (461 loc) · 14.5 KB
/
dbf.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
package dbf
import (
"bytes"
"errors"
"fmt"
"log"
"os"
"strconv"
"strings"
"time"
)
type DbfTable struct {
// dbase file header information
fileSignature uint8 // Valid dBASE III PLUS table file (03h without a memo .DBT file; 83h with a memo)
updateYear uint8 // Date of last update; in YYMMDD format.
updateMonth uint8
updateDay uint8
numberOfRecords uint32 // Number of records in the table.
headerSize uint16 // Number of bytes in the header.
recordLength uint16 // Number of bytes in the record.
reservedBytes [20]byte // Reserved bytes
fieldDescriptor [32]byte // Field descriptor array
fieldTerminator int8 // 0Dh stored as the field terminator.
numberOfFields int // number of fiels/colums in dbase file
// columns of dbase file
fields []DbfField
// used to map field names to index
fieldMap map[string]int
// list of deleted rows, helps with InsertRecord
delRows []int
// table structure can not be changed since it has records
frozenStruct bool
//
loading bool
// keeps the dbase table in memory as byte array
dataStore []byte
// keeps the dbase memo data in memory as a byte array
memoStore []byte
}
type DbfField struct {
Name string
Type string
Length uint8
Precision uint8
fieldStore [32]byte
}
// Create a new dbase table from the scratch
func New() *DbfTable {
// Create and pupulate DbaseTable struct
dt := new(DbfTable)
// read dbase table header information
dt.fileSignature = 0x03
dt.updateYear = byte(time.Now().Year() % 100)
dt.updateMonth = byte(time.Now().Month())
dt.updateDay = byte(time.Now().YearDay())
dt.numberOfRecords = 0
dt.headerSize = 32
dt.recordLength = 0
// create fieldMap to taranslate field name to index
dt.fieldMap = make(map[string]int)
// Number of fields in dbase table
dt.numberOfFields = int((dt.headerSize - 1 - 32) / 32)
s := make([]byte, dt.headerSize)
// set DbfTable dataStore slice that will store the complete file in memory
dt.dataStore = s
dt.dataStore[0] = dt.fileSignature
dt.dataStore[1] = dt.updateYear
dt.dataStore[2] = dt.updateMonth
dt.dataStore[3] = dt.updateDay
// no MDX file (index upon demand)
dt.dataStore[28] = 0x00
dt.dataStore[28] = 0xf0 // default to UTF-8 encoding, use 0x57 for ANSI.
return dt
}
func (df *DbfField) SetFieldName(fieldName string) {
df.Name = fieldName
}
// LoadFile load dBase III+ from file.
func LoadFile(fileName string) (table *DbfTable, err error) {
data, memo, err := readFile(fileName)
if err != nil {
return nil, err
}
// Create and pupulate DbaseTable struct
dt := new(DbfTable)
dt.loading = true
// set DbfTable dataStore slice that will store the complete file in memory
dt.dataStore = data
dt.memoStore = memo
// read dbase table header information
dt.fileSignature = data[0]
dt.updateYear = data[1]
dt.updateMonth = data[2]
dt.updateDay = data[3]
dt.numberOfRecords = uint32(data[4]) | (uint32(data[5]) << 8) | (uint32(data[6]) << 16) | (uint32(data[7]) << 24)
dt.headerSize = uint16(data[8]) | (uint16(data[9]) << 8)
dt.recordLength = uint16(data[10]) | (uint16(data[11]) << 8)
// create fieldMap to translate field name to index
dt.fieldMap = make(map[string]int)
// Number of fields in dbase table
dt.numberOfFields = int((dt.headerSize - 1 - 32) / 32)
// populate dbf fields
for i := 0; i < int(dt.numberOfFields); i++ {
offset := (i * 32) + 32
fieldName := strings.Trim(string(data[offset:offset+10]), string([]byte{0}))
if fieldName == "" {
fieldName = fmt.Sprintf("MISSING%d", i)
}
dt.fieldMap[fieldName] = i
var err error
switch data[offset+11] {
case 'C':
err = dt.AddTextField(fieldName, data[offset+16])
case 'N':
err = dt.AddNumberField(fieldName, data[offset+16], data[offset+17])
case 'L':
err = dt.AddBoolField(fieldName)
case 'D':
err = dt.AddDateField(fieldName)
case 'M':
err = dt.AddMemoField(fieldName)
default:
log.Printf("Unknown field type `%v` for field `%v`, defaulting to text", string(data[offset+11]), fieldName)
err = dt.AddTextField(fieldName, data[offset+16])
}
if err != nil {
return nil, err
}
}
// memorize deleted rows
sz := int(dt.numberOfRecords)
for i := 0; i < sz; i++ {
if dt.IsDeleted(i) {
dt.delRows = append(dt.delRows, i)
}
}
dt.frozenStruct = true
return dt, nil
}
// SaveFile dbf file.
func (dt *DbfTable) SaveFile(filename string) error {
// don't forget to add dbase end of file marker which is 1Ah
dt.dataStore = appendSlice(dt.dataStore, []byte{0x1A})
f, err := os.Create(filename)
if err != nil {
return err
}
defer f.Close()
_, err = f.Write(dt.dataStore)
if err != nil {
return err
}
return nil
}
// Sets field value by name.
func (dt *DbfTable) SetFieldValueByName(row int, fieldName string, value string) {
fieldName = strings.ToUpper(fieldName)
fieldIndex, ok := dt.fieldMap[fieldName]
if !ok {
panic("Field name '" + fieldName + "' does not exist")
}
// set field value and return
dt.SetFieldValue(row, fieldIndex, value)
}
func (dt *DbfTable) getRowOffset(row int) int {
// locate the offset of the field in DbfTable dataStore
offset := int(dt.headerSize)
recordLength := int(dt.recordLength)
return offset + (row * recordLength)
}
func (dt *DbfTable) findSpot() int {
// use prior deleted row
row := -1
if len(dt.delRows) > 0 {
last := len(dt.delRows) - 1
row, dt.delRows = dt.delRows[last], dt.delRows[:last]
}
return row
}
// Delete row by setting marker.
func (dt *DbfTable) Delete(row int) {
dt.dataStore[dt.getRowOffset(row)] = 0x2A // set deleted record marker
dt.delRows = append(dt.delRows, row)
}
// IsDeleted row.
func (dt *DbfTable) IsDeleted(row int) bool {
return dt.dataStore[dt.getRowOffset(row)] == 0x2A
}
// Sets field value by index.
func (dt *DbfTable) SetFieldValue(row int, fieldIndex int, value string) {
dt.frozenStruct = true // table structure can not be changed from this point
b := []byte(value)
fieldLength := int(dt.fields[fieldIndex].Length)
// locate the offset of the field in DbfTable dataStore
offset := dt.getRowOffset(row)
recordOffset := 1
for i := 0; i < len(dt.fields); i++ {
if i == fieldIndex {
break
} else {
recordOffset += int(dt.fields[i].Length)
}
}
// first fill the field with space values
for i := 0; i < fieldLength; i++ {
dt.dataStore[offset+recordOffset+i] = 0x20
}
// write new value
// TODO: this should use copy() or other fast way to move data
switch dt.fields[fieldIndex].Type {
case "C", "L", "D":
for i := 0; i < len(b) && i < fieldLength; i++ {
dt.dataStore[offset+recordOffset+i] = b[i]
}
case "N":
// Value from fmt.Sprintf("%f") has 6 digits of precision by default,
// truncate as appropriate
if precision := dt.fields[fieldIndex].Precision; precision > 0 {
if dot := bytes.Index(b, []byte(".")); dot > -1 {
cutoff := dot + int(precision) + 1
if cutoff > len(b) {
cutoff = len(b)
}
b = b[:cutoff]
}
}
// If the number is too big to fit in the field, truncate from the right
if len(b) > fieldLength {
if dot := bytes.Index(b, []byte(".")); dot > fieldLength {
log.Printf("ERROR: trying to store %s in field of size %d", string(b), fieldLength)
}
b = b[:fieldLength]
}
for i := 0; i < fieldLength; i++ {
if i < len(b) {
dt.dataStore[offset+recordOffset+(fieldLength-i-1)] = b[(len(b)-1)-i]
} else {
break
}
}
}
}
func (dt *DbfTable) RawFieldValue(row int, fieldIndex int) string {
offset := int(dt.headerSize)
recordLength := int(dt.recordLength)
field := dt.fields[fieldIndex]
offset = offset + (row * recordLength)
recordOffset := 1
for i := 0; i < len(dt.fields); i++ {
if i == fieldIndex {
break
} else {
recordOffset += int(dt.fields[i].Length)
}
}
temp := dt.dataStore[(offset + recordOffset):((offset + recordOffset) + int(field.Length))]
for i := 0; i < len(temp); i++ {
if temp[i] == 0x00 {
temp = temp[0:i]
break
}
}
value := string(temp)
if field.Type != "M" {
return value
}
if strings.TrimSpace(value) == "" {
return ""
}
return dt.readMemoBlock(strings.TrimSpace(value))
}
func (dt *DbfTable) FieldValue(row int, fieldIndex int) string {
return strings.TrimSpace(dt.RawFieldValue(row, fieldIndex))
}
func (dt *DbfTable) readMemoBlock(indexStr string) string {
blockIndex, err := strconv.Atoi(indexStr)
if err != nil {
log.Println("Invalid memo block index", indexStr, err)
return ""
}
for i := blockIndex * 512; i < len(dt.memoStore); i++ {
if dt.memoStore[i] == 0x1A {
return string(dt.memoStore[blockIndex*512 : i])
}
}
return ""
}
// FieldValueByName returns the value of a field given row number and fieldName provided.
func (dt *DbfTable) FieldValueByName(row int, fieldName string) string {
fieldName = strings.ToUpper(fieldName)
fieldIndex, ok := dt.fieldMap[fieldName]
if !ok {
panic("Field name '" + fieldName + "' does not exist")
}
return dt.FieldValue(row, fieldIndex)
}
// RawFieldValueByName returns the untrimmed value of a field given row number and fieldName provided.
func (dt *DbfTable) RawFieldValueByName(row int, fieldName string) string {
fieldName = strings.ToUpper(fieldName)
fieldIndex, ok := dt.fieldMap[fieldName]
if !ok {
panic("Field name '" + fieldName + "' does not exist")
}
return dt.RawFieldValue(row, fieldIndex)
}
// InsertRecord tries to reuse deleted records, and only then add new record to the
// end of file if no delete slots exist.
// If you are looping over rows it is better to use AddRecord.
func (dt *DbfTable) InsertRecord() int {
if row := dt.findSpot(); row > -1 {
// undelete selected row
dt.dataStore[dt.getRowOffset(row)] = 0x20
return row
}
return dt.AddRecord()
}
// AddRecord always adds new rows to the end of file.
func (dt *DbfTable) AddRecord() int {
newRecord := make([]byte, dt.recordLength)
dt.dataStore = appendSlice(dt.dataStore, newRecord)
// since row numbers are "0" based first we set newRecordNumber
// and then increment number of records in dbase table
newRecordNumber := int(dt.numberOfRecords)
dt.numberOfRecords++
s := uint32ToBytes(dt.numberOfRecords)
dt.dataStore[4] = s[0]
dt.dataStore[5] = s[1]
dt.dataStore[6] = s[2]
dt.dataStore[7] = s[3]
return newRecordNumber
}
// AddTextField max size 254 bytes.
func (dt *DbfTable) AddTextField(fieldName string, length uint8) error {
return dt.addField(fieldName, 'C', length, 0)
}
// AddNumberField can be used to add int or float number fields.
func (dt *DbfTable) AddNumberField(fieldName string, length, prec uint8) error {
return dt.addField(fieldName, 'N', length, prec)
}
// AddIntField add int.
func (dt *DbfTable) AddIntField(fieldName string, length uint8) error {
return dt.addField(fieldName, 'N', length, 0)
}
// AddFloatField add float.
func (dt *DbfTable) AddFloatField(fieldName string, length, prec uint8) error {
return dt.addField(fieldName, 'N', length, prec)
}
// Boolean field stores 't' or 'f' in the cell.
func (dt *DbfTable) AddBoolField(fieldName string) error {
return dt.addField(fieldName, 'L', 1, 0)
}
func (dt *DbfTable) AddDateField(fieldName string) error {
return dt.addField(fieldName, 'D', 8, 0)
}
func (dt *DbfTable) AddMemoField(fieldName string) error {
return dt.addField(fieldName, 'M', 10, 0)
}
// NumRecords return number of rows in dbase table.
func (dt *DbfTable) NumRecords() int {
return int(dt.numberOfRecords)
}
// Fields return slice of DbfField
func (dt *DbfTable) Fields() []DbfField {
return dt.fields
}
func (dt *DbfTable) addField(fieldName string, fieldType byte, length, prec uint8) error {
if dt.frozenStruct {
return errors.New("once you start entering data into the dBase table altering dBase table schema is not allowed")
}
s := dt.getNormalizedFieldName(fieldName)
if dt.isFieldExist(s) {
return errors.New("Field with name '" + s + "' already exist!")
}
df := new(DbfField)
df.Name = s
df.Type = string(fieldType)
df.Length = length
df.Precision = prec
slice := dt.convertToByteSlice(s, 10)
// Field name in ASCII (max 10 chracters)
for i := 0; i < len(slice); i++ {
df.fieldStore[i] = slice[i]
}
// Field names are terminated by 00h
df.fieldStore[10] = 0x00
// Set field's data type
// C (Character) All OEM code page characters.
// D (Date) Numbers and a character to separate month, day, and year (stored internally as 8 digits in YYYYMMDD format).
// N (Numeric) - . 0 1 2 3 4 5 6 7 8 9
// L (Logical) ? Y y N n T t F f (? when not initialized).
// M (Memo) Pointer to a block in the corresponding memo file
df.fieldStore[11] = fieldType
// length and precision of the field
df.fieldStore[16] = length
df.fieldStore[17] = prec
dt.fields = append(dt.fields, *df)
if !dt.loading {
dt.updateHeader()
}
return nil
}
// updateHeader updates the dbase file header after a field added
func (dt *DbfTable) updateHeader() {
// first create a slice from initial 32 bytes of datastore as the foundation of the new slice
// later we will set this slice to dt.dataStore to create the new header slice
slice := dt.dataStore[0:32]
// set dbase file signature
slice[0] = 0x03
var recordLength uint16 = 0
for i := range dt.Fields() {
recordLength += uint16(dt.Fields()[i].Length)
slice = appendSlice(slice, dt.Fields()[i].fieldStore[:])
// don't forget to update fieldMap. We need it to find the index of a field name
dt.fieldMap[dt.Fields()[i].Name] = i
}
// end of file header terminator (0Dh)
slice = appendSlice(slice, []byte{0x0D})
// now reset dt.dataStore slice with the updated one
dt.dataStore = slice
// update the number of bytes in dbase file header
dt.headerSize = uint16(len(slice))
s := uint32ToBytes(uint32(dt.headerSize))
dt.dataStore[8] = s[0]
dt.dataStore[9] = s[1]
dt.recordLength = recordLength + 1 // dont forget to add "1" for deletion marker which is 20h
// update the lenght of each record
s = uint32ToBytes(uint32(dt.recordLength))
dt.dataStore[10] = s[0]
dt.dataStore[11] = s[1]
}
// Row reads record at index.
func (dt *DbfTable) Row(row int) []string {
s := make([]string, len(dt.Fields()))
for i := 0; i < len(dt.Fields()); i++ {
s[i] = dt.FieldValue(row, i)
}
return s
}
func (dt *DbfTable) isFieldExist(name string) bool {
for i := 0; i < len(dt.fields); i++ {
if dt.fields[i].Name == name {
return true
}
}
return false
}
// convertToByteSlice converts value to byte slice.
func (dt *DbfTable) convertToByteSlice(value string, numberOfBytes int) []byte {
b := []byte(value)
if len(b) <= numberOfBytes {
return b
}
return b[0:numberOfBytes]
}
func (dt *DbfTable) getNormalizedFieldName(name string) string {
b := []byte(name)
if len(b) > 10 {
b = b[0:10]
}
return strings.ToUpper(string(b))
}