Skip to content

Commit

Permalink
Merge #38208
Browse files Browse the repository at this point in the history
38208: workload/tpcc: pre-compute random data for initial data load strings r=tbg a=danhhz

This is explictly allowed by the spec only for initial data load in
4.3.2.1:

For the purpose of populating the initial database only, random numbers
can be generated by selecting entries in sequence from a set of at least
10,000 pregenerated random numbers. This technique cannot be used for
the field O_OL_CNT.

    name                             old time/op    new time/op    delta
    InitialData/tpcc/warehouses=1-8     368ms ± 9%     229ms ± 1%  -37.77%  (p=0.000 n=15+15)

    name                             old speed      new speed      delta
    InitialData/tpcc/warehouses=1-8   299MB/s ± 9%   481MB/s ± 1%  +60.50%  (p=0.000 n=15+15)

    name                             old alloc/op   new alloc/op   delta
    InitialData/tpcc/warehouses=1-8     193kB ± 0%     126kB ± 0%  -34.80%  (p=0.000 n=14+14)

    name                             old allocs/op  new allocs/op  delta
    InitialData/tpcc/warehouses=1-8       592 ± 0%       458 ± 0%  -22.59%  (p=0.000 n=15+15)



Release note: None

Co-authored-by: Daniel Harrison <daniel.harrison@gmail.com>
  • Loading branch information
craig[bot] and danhhz committed Jul 8, 2019
2 parents efcf66c + 18b4d44 commit 6299bd4
Show file tree
Hide file tree
Showing 9 changed files with 358 additions and 172 deletions.
2 changes: 1 addition & 1 deletion pkg/ccl/workloadccl/allccl/all_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ func TestDeterministicInitialData(t *testing.T) {
`roachmart`: 0xda5e73423dbdb2d9,
`sqlsmith`: 0xcbf29ce484222325,
`startrek`: 0xa0249fbdf612734c,
`tpcc`: 0x15c89d37aef774ba,
`tpcc`: 0xab32e4f5e899eb2f,
`ycsb`: 0x85dd34d8c07fd808,
}

Expand Down
118 changes: 67 additions & 51 deletions pkg/workload/tpcc/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ const (
)

type generateLocals struct {
rng *rand.Rand
rng tpccRand
uuidAlloc uuid.UUID
}

Expand All @@ -77,15 +77,16 @@ func (w *tpcc) tpccItemInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufalloc
l := w.localsPool.Get().(*generateLocals)
defer w.localsPool.Put(l)
l.rng.Seed(w.seed + uint64(rowIdx))
ao := aCharsOffset(l.rng.Intn(len(aCharsAlphabet)))

iID := rowIdx + 1

cb.Reset(itemColTypes, 1)
cb.ColVec(0).Int64()[0] = int64(iID)
cb.ColVec(1).Int64()[0] = randInt(l.rng, 1, 10000) // im_id: "Image ID associated to Item"
cb.ColVec(2).Bytes()[0] = randAString(l.rng, a, 14, 24) // name
cb.ColVec(3).Float64()[0] = float64(randInt(l.rng, 100, 10000)) / float64(100) // price
cb.ColVec(4).Bytes()[0] = randOriginalString(l.rng, a)
cb.ColVec(1).Int64()[0] = randInt(l.rng.Rand, 1, 10000) // im_id: "Image ID associated to Item"
cb.ColVec(2).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 14, 24) // name
cb.ColVec(3).Float64()[0] = float64(randInt(l.rng.Rand, 100, 10000)) / float64(100) // price
cb.ColVec(4).Bytes()[0] = randOriginalStringInitialDataOnly(&l.rng, &ao, a)
}

func (w *tpcc) tpccItemStats() []workload.JSONStatistic {
Expand Down Expand Up @@ -119,18 +120,20 @@ func (w *tpcc) tpccWarehouseInitialRowBatch(
l := w.localsPool.Get().(*generateLocals)
defer w.localsPool.Put(l)
l.rng.Seed(w.seed + uint64(rowIdx))
no := numbersOffset(l.rng.Intn(len(numbersAlphabet)))
lo := lettersOffset(l.rng.Intn(len(lettersAlphabet)))

wID := rowIdx // warehouse ids are 0-indexed. every other table is 1-indexed

cb.Reset(warehouseColTypes, 1)
cb.ColVec(0).Int64()[0] = int64(wID)
cb.ColVec(1).Bytes()[0] = []byte(strconv.FormatInt(randInt(l.rng, 6, 10), 10)) // name
cb.ColVec(2).Bytes()[0] = []byte(strconv.FormatInt(randInt(l.rng, 10, 20), 10)) // street_1
cb.ColVec(3).Bytes()[0] = []byte(strconv.FormatInt(randInt(l.rng, 10, 20), 10)) // street_2
cb.ColVec(4).Bytes()[0] = []byte(strconv.FormatInt(randInt(l.rng, 10, 20), 10)) // city
cb.ColVec(5).Bytes()[0] = randState(l.rng, a)
cb.ColVec(6).Bytes()[0] = randZip(l.rng, a)
cb.ColVec(7).Float64()[0] = randTax(l.rng)
cb.ColVec(1).Bytes()[0] = []byte(strconv.FormatInt(randInt(l.rng.Rand, 6, 10), 10)) // name
cb.ColVec(2).Bytes()[0] = []byte(strconv.FormatInt(randInt(l.rng.Rand, 10, 20), 10)) // street_1
cb.ColVec(3).Bytes()[0] = []byte(strconv.FormatInt(randInt(l.rng.Rand, 10, 20), 10)) // street_2
cb.ColVec(4).Bytes()[0] = []byte(strconv.FormatInt(randInt(l.rng.Rand, 10, 20), 10)) // city
cb.ColVec(5).Bytes()[0] = randStateInitialDataOnly(&l.rng, &lo, a)
cb.ColVec(6).Bytes()[0] = randZipInitialDataOnly(&l.rng, &no, a)
cb.ColVec(7).Float64()[0] = randTax(l.rng.Rand)
cb.ColVec(8).Float64()[0] = wYtd
}

Expand Down Expand Up @@ -175,28 +178,29 @@ func (w *tpcc) tpccStockInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufallo
l := w.localsPool.Get().(*generateLocals)
defer w.localsPool.Put(l)
l.rng.Seed(w.seed + uint64(rowIdx))
ao := aCharsOffset(l.rng.Intn(len(aCharsAlphabet)))

sID := (rowIdx % numStockPerWarehouse) + 1
wID := (rowIdx / numStockPerWarehouse)

cb.Reset(stockColTypes, 1)
cb.ColVec(0).Int64()[0] = int64(sID)
cb.ColVec(1).Int64()[0] = int64(wID)
cb.ColVec(2).Int64()[0] = randInt(l.rng, 10, 100) // quantity
cb.ColVec(3).Bytes()[0] = randAString(l.rng, a, 24, 24) // dist_01
cb.ColVec(4).Bytes()[0] = randAString(l.rng, a, 24, 24) // dist_02
cb.ColVec(5).Bytes()[0] = randAString(l.rng, a, 24, 24) // dist_03
cb.ColVec(6).Bytes()[0] = randAString(l.rng, a, 24, 24) // dist_04
cb.ColVec(7).Bytes()[0] = randAString(l.rng, a, 24, 24) // dist_05
cb.ColVec(8).Bytes()[0] = randAString(l.rng, a, 24, 24) // dist_06
cb.ColVec(9).Bytes()[0] = randAString(l.rng, a, 24, 24) // dist_07
cb.ColVec(10).Bytes()[0] = randAString(l.rng, a, 24, 24) // dist_08
cb.ColVec(11).Bytes()[0] = randAString(l.rng, a, 24, 24) // dist_09
cb.ColVec(12).Bytes()[0] = randAString(l.rng, a, 24, 24) // dist_10
cb.ColVec(13).Int64()[0] = 0 // ytd
cb.ColVec(14).Int64()[0] = 0 // order_cnt
cb.ColVec(15).Int64()[0] = 0 // remote_cnt
cb.ColVec(16).Bytes()[0] = randOriginalString(l.rng, a) // data
cb.ColVec(2).Int64()[0] = randInt(l.rng.Rand, 10, 100) // quantity
cb.ColVec(3).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 24, 24) // dist_01
cb.ColVec(4).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 24, 24) // dist_02
cb.ColVec(5).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 24, 24) // dist_03
cb.ColVec(6).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 24, 24) // dist_04
cb.ColVec(7).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 24, 24) // dist_05
cb.ColVec(8).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 24, 24) // dist_06
cb.ColVec(9).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 24, 24) // dist_07
cb.ColVec(10).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 24, 24) // dist_08
cb.ColVec(11).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 24, 24) // dist_09
cb.ColVec(12).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 24, 24) // dist_10
cb.ColVec(13).Int64()[0] = 0 // ytd
cb.ColVec(14).Int64()[0] = 0 // order_cnt
cb.ColVec(15).Int64()[0] = 0 // remote_cnt
cb.ColVec(16).Bytes()[0] = randOriginalStringInitialDataOnly(&l.rng, &ao, a) // data
}

func (w *tpcc) tpccStockStats() []workload.JSONStatistic {
Expand Down Expand Up @@ -246,20 +250,23 @@ func (w *tpcc) tpccDistrictInitialRowBatch(
l := w.localsPool.Get().(*generateLocals)
defer w.localsPool.Put(l)
l.rng.Seed(w.seed + uint64(rowIdx))
ao := aCharsOffset(l.rng.Intn(len(aCharsAlphabet)))
no := numbersOffset(l.rng.Intn(len(numbersAlphabet)))
lo := lettersOffset(l.rng.Intn(len(lettersAlphabet)))

dID := (rowIdx % numDistrictsPerWarehouse) + 1
wID := (rowIdx / numDistrictsPerWarehouse)

cb.Reset(districtColTypes, 1)
cb.ColVec(0).Int64()[0] = int64(dID)
cb.ColVec(1).Int64()[0] = int64(wID)
cb.ColVec(2).Bytes()[0] = randAString(l.rng, a, 6, 10) // name
cb.ColVec(3).Bytes()[0] = randAString(l.rng, a, 10, 20) // street 1
cb.ColVec(4).Bytes()[0] = randAString(l.rng, a, 10, 20) // street 2
cb.ColVec(5).Bytes()[0] = randAString(l.rng, a, 10, 20) // city
cb.ColVec(6).Bytes()[0] = randState(l.rng, a)
cb.ColVec(7).Bytes()[0] = randZip(l.rng, a)
cb.ColVec(8).Float64()[0] = randTax(l.rng)
cb.ColVec(2).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 6, 10) // name
cb.ColVec(3).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 10, 20) // street 1
cb.ColVec(4).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 10, 20) // street 2
cb.ColVec(5).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 10, 20) // city
cb.ColVec(6).Bytes()[0] = randStateInitialDataOnly(&l.rng, &lo, a)
cb.ColVec(7).Bytes()[0] = randZipInitialDataOnly(&l.rng, &no, a)
cb.ColVec(8).Float64()[0] = randTax(l.rng.Rand)
cb.ColVec(9).Float64()[0] = ytd
cb.ColVec(10).Int64()[0] = nextOrderID
}
Expand Down Expand Up @@ -316,6 +323,9 @@ func (w *tpcc) tpccCustomerInitialRowBatch(
l := w.localsPool.Get().(*generateLocals)
defer w.localsPool.Put(l)
l.rng.Seed(w.seed + uint64(rowIdx))
ao := aCharsOffset(l.rng.Intn(len(aCharsAlphabet)))
no := numbersOffset(l.rng.Intn(len(numbersAlphabet)))
lo := lettersOffset(l.rng.Intn(len(lettersAlphabet)))

cID := (rowIdx % numCustomersPerDistrict) + 1
dID := ((rowIdx / numCustomersPerDistrict) % numDistrictsPerWarehouse) + 1
Expand All @@ -334,31 +344,31 @@ func (w *tpcc) tpccCustomerInitialRowBatch(
if cID <= 1000 {
lastName = randCLastSyllables(cID-1, a)
} else {
lastName = w.randCLast(l.rng, a)
lastName = w.randCLast(l.rng.Rand, a)
}

cb.Reset(customerColTypes, 1)
cb.ColVec(0).Int64()[0] = int64(cID)
cb.ColVec(1).Int64()[0] = int64(dID)
cb.ColVec(2).Int64()[0] = int64(wID)
cb.ColVec(3).Bytes()[0] = randAString(l.rng, a, 8, 16) // first name
cb.ColVec(3).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 8, 16) // first name
cb.ColVec(4).Bytes()[0] = middleName
cb.ColVec(5).Bytes()[0] = lastName
cb.ColVec(6).Bytes()[0] = randAString(l.rng, a, 10, 20) // street 1
cb.ColVec(7).Bytes()[0] = randAString(l.rng, a, 10, 20) // street 2
cb.ColVec(8).Bytes()[0] = randAString(l.rng, a, 10, 20) // city name
cb.ColVec(9).Bytes()[0] = randState(l.rng, a)
cb.ColVec(10).Bytes()[0] = randZip(l.rng, a)
cb.ColVec(11).Bytes()[0] = randNString(l.rng, a, 16, 16) // phone number
cb.ColVec(6).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 10, 20) // street 1
cb.ColVec(7).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 10, 20) // street 2
cb.ColVec(8).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 10, 20) // city name
cb.ColVec(9).Bytes()[0] = randStateInitialDataOnly(&l.rng, &lo, a)
cb.ColVec(10).Bytes()[0] = randZipInitialDataOnly(&l.rng, &no, a)
cb.ColVec(11).Bytes()[0] = randNStringInitialDataOnly(&l.rng, &no, a, 16, 16) // phone number
cb.ColVec(12).Bytes()[0] = w.nowString
cb.ColVec(13).Bytes()[0] = credit
cb.ColVec(14).Float64()[0] = creditLimit
cb.ColVec(15).Float64()[0] = float64(randInt(l.rng, 0, 5000)) / float64(10000.0) // discount
cb.ColVec(15).Float64()[0] = float64(randInt(l.rng.Rand, 0, 5000)) / float64(10000.0) // discount
cb.ColVec(16).Float64()[0] = balance
cb.ColVec(17).Float64()[0] = ytdPayment
cb.ColVec(18).Int64()[0] = paymentCount
cb.ColVec(19).Int64()[0] = deliveryCount
cb.ColVec(20).Bytes()[0] = randAString(l.rng, a, 300, 500) // data
cb.ColVec(20).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 300, 500) // data
}

func (w *tpcc) tpccCustomerStats() []workload.JSONStatistic {
Expand Down Expand Up @@ -411,6 +421,7 @@ func (w *tpcc) tpccHistoryInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufal
l := w.localsPool.Get().(*generateLocals)
defer w.localsPool.Put(l)
l.rng.Seed(w.seed + uint64(rowIdx))
ao := aCharsOffset(l.rng.Intn(len(aCharsAlphabet)))

// This used to be a V4 uuid made through the normal `uuid.MakeV4`
// constructor, but we 1) want them to be deterministic and 2) want these rows
Expand All @@ -437,7 +448,7 @@ func (w *tpcc) tpccHistoryInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufal
cb.ColVec(5).Int64()[0] = int64(wID)
cb.ColVec(6).Bytes()[0] = w.nowString
cb.ColVec(7).Float64()[0] = 10.00
cb.ColVec(8).Bytes()[0] = randAString(l.rng, a, 12, 24)
cb.ColVec(8).Bytes()[0] = randAStringInitialDataOnly(&l.rng, &ao, a, 12, 24)
}

func (w *tpcc) tpccHistoryStats() []workload.JSONStatistic {
Expand Down Expand Up @@ -472,8 +483,10 @@ func (w *tpcc) tpccOrderInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufallo
l := w.localsPool.Get().(*generateLocals)
defer w.localsPool.Put(l)

// NB: numOrderLines is not allowed to use precomputed random data, make sure
// it stays that way. See 4.3.2.1.
l.rng.Seed(w.seed + uint64(rowIdx))
numOrderLines := randInt(l.rng, minOrderLinesPerOrder, maxOrderLinesPerOrder)
numOrderLines := randInt(l.rng.Rand, minOrderLinesPerOrder, maxOrderLinesPerOrder)

oID := (rowIdx % numOrdersPerDistrict) + 1
dID := ((rowIdx / numOrdersPerDistrict) % numDistrictsPerWarehouse) + 1
Expand Down Expand Up @@ -504,7 +517,7 @@ func (w *tpcc) tpccOrderInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufallo
var carrierID int64
if oID < 2101 {
carrierSet = true
carrierID = randInt(l.rng, 1, 10)
carrierID = randInt(l.rng.Rand, 1, 10)
}

cb.Reset(orderColTypes, 1)
Expand Down Expand Up @@ -591,14 +604,17 @@ func (w *tpcc) tpccOrderLineInitialRowBatch(
l := w.localsPool.Get().(*generateLocals)
defer w.localsPool.Put(l)

// NB: numOrderLines is not allowed to use precomputed random data, make sure
// it stays that way. See 4.3.2.1.
l.rng.Seed(w.seed + uint64(orderRowIdx))
numOrderLines := int(randInt(l.rng, minOrderLinesPerOrder, maxOrderLinesPerOrder))
numOrderLines := int(randInt(l.rng.Rand, minOrderLinesPerOrder, maxOrderLinesPerOrder))

// NB: There is one batch of order_line rows per order
oID := (orderRowIdx % numOrdersPerDistrict) + 1
dID := ((orderRowIdx / numOrdersPerDistrict) % numDistrictsPerWarehouse) + 1
wID := (orderRowIdx / numOrdersPerWarehouse)

ao := aCharsOffset(l.rng.Intn(len(aCharsAlphabet)))
cb.Reset(orderLineColTypes, numOrderLines)
olOIDCol := cb.ColVec(0).Int64()
olDIDCol := cb.ColVec(1).Int64()
Expand All @@ -624,14 +640,14 @@ func (w *tpcc) tpccOrderLineInitialRowBatch(
deliveryDSet = true
deliveryD = w.nowString
} else {
amount = float64(randInt(l.rng, 1, 999999)) / 100.0
amount = float64(randInt(l.rng.Rand, 1, 999999)) / 100.0
}

olOIDCol[rowIdx] = int64(oID)
olDIDCol[rowIdx] = int64(dID)
olWIDCol[rowIdx] = int64(wID)
olNumberCol[rowIdx] = int64(olNumber)
olIIDCol[rowIdx] = randInt(l.rng, 1, 100000)
olIIDCol[rowIdx] = randInt(l.rng.Rand, 1, 100000)
olSupplyWIDCol[rowIdx] = int64(wID)
if deliveryDSet {
olDeliveryDCol[rowIdx] = deliveryD
Expand All @@ -641,7 +657,7 @@ func (w *tpcc) tpccOrderLineInitialRowBatch(
}
olQuantityCol[rowIdx] = 5
olAmountCol[rowIdx] = amount
olDistInfoCol[rowIdx] = randAString(l.rng, a, 24, 24)
olDistInfoCol[rowIdx] = randAStringInitialDataOnly(&l.rng, &ao, a, 24, 24)
}
}

Expand Down
Loading

0 comments on commit 6299bd4

Please sign in to comment.