-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbatch_writer.go
154 lines (129 loc) · 3.78 KB
/
batch_writer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
package internal
import (
"bytes"
"encoding/json"
"fmt"
"github.com/hashicorp/go-retryablehttp"
"io/ioutil"
"time"
)
const MaxObjectsInBatch int = 19000
const MaxBatchRequestSize int = 20 * 1024 * 1024
type BatchWriter interface {
Flush(stream *Stream) error
Send(record *Record, stream *Stream) error
}
func NewBatchWriter(batchSize int, logger Logger, apiURL, apiToken string) BatchWriter {
client := retryablehttp.NewClient()
// Wait 3 seconds before retrying
client.RetryWaitMin = 3 * time.Second
client.Logger = nil
return &httpBatchWriter{
batchSize: batchSize,
apiURL: apiURL,
apiToken: apiToken,
logger: logger,
client: client,
messages: make([]ImportMessage, 0, batchSize),
}
}
type httpBatchWriter struct {
batchSize int
apiURL string
apiToken string
logger Logger
client *retryablehttp.Client
messages []ImportMessage
}
type BatchResponse struct {
Status string `json:"status"`
Message string `json:"message"`
}
func (h *httpBatchWriter) Flush(stream *Stream) error {
if len(h.messages) == 0 {
return nil
}
batches := getBatchMessages(h.messages, stream, MaxObjectsInBatch, MaxBatchRequestSize)
h.logger.Info(fmt.Sprintf("flushing [%v] messages for stream %q in [%v] batches", len(h.messages), stream.Name, len(batches)))
for _, batch := range batches {
b, err := json.Marshal(batch)
if err != nil {
return err
}
stitch, err := retryablehttp.NewRequest("POST", h.apiURL+"/v2/import/batch", bytes.NewBuffer(b))
if err != nil {
return err
}
stitch.Header.Set("Content-Type", "application/json")
stitch.Header.Set("Authorization", "Bearer "+h.apiToken)
stitchResponse, err := h.client.Do(stitch)
if err != nil {
return err
}
defer stitchResponse.Body.Close()
if stitchResponse.StatusCode > 203 {
body, err := ioutil.ReadAll(stitchResponse.Body)
if err != nil {
return err
}
return fmt.Errorf("server request failed with %s", body)
}
var resp BatchResponse
decoder := json.NewDecoder(stitchResponse.Body)
if err := decoder.Decode(&resp); err != nil {
return err
}
h.logger.Info(fmt.Sprintf("Server response status : %q, message : %q", resp.Status, resp.Message))
}
h.messages = h.messages[:0]
return nil
}
func (h *httpBatchWriter) Send(record *Record, stream *Stream) error {
h.messages = append(h.messages, createImportMessage(record))
if len(h.messages) >= h.batchSize {
return h.Flush(stream)
}
return nil
}
// getBatchMessages accepts a list of import messages
// and returns a slice of ImportBatch that can be safely uploaded.
// The rules are:
// 1. There cannot be more than 20,000 records in the request.
// 2. The size of the serialized JSON cannot be more than 20 MB.
func getBatchMessages(messages []ImportMessage, stream *Stream, maxObjectsInBatch int, maxBatchSerializedSize int) []ImportBatch {
var batches []ImportBatch
allocated := 0
unallocated := len(messages)
for unallocated > 0 {
batch := ImportBatch{
Table: stream.Name,
Schema: stream.Schema,
Messages: messages[allocated:],
PrimaryKeys: stream.KeyProperties,
}
// reduce the size of the batch until it is an acceptable size.
for batch.SizeOf() > maxBatchSerializedSize || len(batch.Messages) > maxObjectsInBatch {
// keep halving the number of messages until the batch is an acceptable size.
batch.Messages = batch.Messages[0:(len(batch.Messages) / 2)]
}
allocated += len(batch.Messages)
unallocated -= len(batch.Messages)
batches = append(batches, batch)
}
return batches
}
func (imb *ImportBatch) SizeOf() int {
b, err := json.Marshal(imb)
if err != nil {
return 0
}
return len(b)
}
func createImportMessage(record *Record) ImportMessage {
now := time.Now()
return ImportMessage{
Action: "upsert",
EmittedAt: now.UnixMilli(),
Data: record.Data,
}
}