Skip to content

Commit

Permalink
Merge pull request #213 from llelf/hurry2
Browse files Browse the repository at this point in the history
Speedups improving the loadgen benchmark score.
  • Loading branch information
lukego committed Jun 24, 2014
2 parents 3f04426 + a9f532c commit 4f050e0
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 60 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ COBJ = $(CSRC:.c=.o)

LUAJIT_O := deps/luajit/src/libluajit.a

LUAJIT_CFLAGS := -DLUAJIT_USE_PERFTOOLS -DLUAJIT_USE_GDBJIT
LUAJIT_CFLAGS := -DLUAJIT_USE_PERFTOOLS -DLUAJIT_USE_GDBJIT -DLUAJIT_NUMMODE=3

all: $(LUAJIT_O)
cd src && $(MAKE)
Expand Down
65 changes: 34 additions & 31 deletions src/apps/intel/intel10g.lua
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ local index_set = require("lib.index_set")
local macaddress = require("lib.macaddress")

local bits, bitset = lib.bits, lib.bitset
local band, bor, lshift = bit.band, bit.bor, bit.lshift
local packet_ref = packet.ref

num_descriptors = 32 * 1024
--num_descriptors = 32
Expand Down Expand Up @@ -123,7 +125,7 @@ function M_sf:init_receive ()
TXCRCEN=0, RXCRCSTRP=1, JUMBOEN=2, rsv2=3, TXPADEN=10,
rsvd3=11, rsvd4=13, MDCSPD=16, RXLNGTHERREN=27,
})
self.r.MAXFRS(bit.lshift(9000+18, 16))
self.r.MAXFRS(lshift(9000+18, 16))
self:set_receive_descriptors()
self.r.RXCTRL:set(bits{RXEN=0})
return self
Expand Down Expand Up @@ -168,16 +170,17 @@ end

--- See datasheet section 7.1 "Inline Functions -- Transmit Functionality."

txdesc_flags = bits{ifcs=25, dext=29, dtyp0=20, dtyp1=21}
txdesc_flags_last = bits({eop=24}, txdesc_flags)
local txdesc_flags = bits{ifcs=25, dext=29, dtyp0=20, dtyp1=21}
local txdesc_flags_last = bits({eop=24}, txdesc_flags)

function M_sf:transmit (p)
for i = 0, p.niovecs - 1 do
local iov = p.iovecs[i]
local flags = (i + 1 < p.niovecs) and txdesc_flags or txdesc_flags_last
self.txdesc[self.tdt].address = iov.buffer.physical + iov.offset
self.txdesc[self.tdt].options = bit.bor(iov.length, flags, bit.lshift(p.length+0ULL, 46))
self.txpackets[self.tdt] = packet.ref(p)
self.tdt = (self.tdt + 1) % num_descriptors
self.txdesc[self.tdt].options = bor(iov.length, flags, lshift(p.length+0ULL, 46))
self.txpackets[self.tdt] = packet_ref(p)
self.tdt = band(self.tdt + 1, num_descriptors - 1)
end
end

Expand All @@ -189,13 +192,13 @@ function M_sf:sync_transmit ()
while old_tdh ~= self.tdh do
packet.deref(self.txpackets[old_tdh])
self.txpackets[old_tdh] = nil
old_tdh = (old_tdh + 1) % num_descriptors
old_tdh = band(old_tdh + 1, num_descriptors - 1)
end
self.r.TDT(self.tdt)
end

function M_sf:can_transmit ()
return (self.tdt + 1) % num_descriptors ~= self.tdh
return band(self.tdt + 1, num_descriptors - 1) ~= self.tdh
end

--- ### Receive
Expand All @@ -207,29 +210,29 @@ function M_sf:receive ()
local p = packet.allocate()
repeat
local wb = self.rxdesc[self.rxnext].wb
if bit.band(wb.xstatus_xerror, 1) == 1 then -- Descriptor Done
if band(wb.xstatus_xerror, 1) == 1 then -- Descriptor Done
local b = self.rxbuffers[self.rxnext]
packet.add_iovec(p, b, wb.pkt_len)
self.rxnext = (self.rxnext + 1) % num_descriptors
self.rxnext = band(self.rxnext + 1, num_descriptors - 1)
end
until bit.band(wb.xstatus_xerror, 2) == 2 -- End Of Packet
until band(wb.xstatus_xerror, 2) == 2 -- End Of Packet
return p
end

function M_sf:can_receive ()
return self.rxnext ~= self.rdh and bit.band(self.rxdesc[self.rxnext].wb.xstatus_xerror, 1) == 1
return self.rxnext ~= self.rdh and band(self.rxdesc[self.rxnext].wb.xstatus_xerror, 1) == 1
end

function M_sf:can_add_receive_buffer ()
return (self.rdt + 1) % num_descriptors ~= self.rxnext
return band(self.rdt + 1, num_descriptors - 1) ~= self.rxnext
end

function M_sf:add_receive_buffer (b)
assert(self:can_add_receive_buffer())
local desc = self.rxdesc[self.rdt].data
desc.address, desc.dd = b.physical, 0
self.rxbuffers[self.rdt] = b
self.rdt = (self.rdt + 1) % num_descriptors
self.rdt = band(self.rdt + 1, num_descriptors - 1)
end

function M_sf:sync_receive ()
Expand All @@ -254,7 +257,7 @@ function negotiated_autoc (dev, f)
dev.r.SWSM:wait(bits{SMBI=0}) -- TODO: expire at 10ms
dev.r.SWSM:set(bits{SWESMBI=1})
dev.r.SWSM:wait(bits{SWESMBI=1}) -- TODO: expire at 3s
accessible = bit.band(dev.r.SW_FW_SYNC(), 0x8) == 0
accessible = band(dev.r.SW_FW_SYNC(), 0x8) == 0
if accessible then
dev.r.SW_FW_SYNC:set(0x8)
end
Expand All @@ -273,9 +276,9 @@ end

function set_SFI (dev)
local autoc = dev.r.AUTOC()
autoc = bit.bor(
bit.band(autoc, 0xFFFF0C7E), -- clears FLU, 10g_pma, 1g_pma, restart_AN, LMS
bit.lshift(0x3, 13) -- LMS(15:13) = 011b
autoc = bor(
band(autoc, 0xFFFF0C7E), -- clears FLU, 10g_pma, 1g_pma, restart_AN, LMS
lshift(0x3, 13) -- LMS(15:13) = 011b
)
dev.r.AUTOC(autoc) -- TODO: firmware synchronization
return dev
Expand Down Expand Up @@ -372,7 +375,7 @@ function M_pf:set_vmdq_mode ()
end
-- clear PFQDE.QDE (queue drop enable) for each queue
for i = 0, 127 do
self.r.PFQDE(bit.bor(bit.lshift(1,16), bit.lshift(i,8)))
self.r.PFQDE(bor(lshift(1,16), lshift(i,8)))
self.r.FTQF[i](0x00) -- disable L3/4 filter
self.r.RAH[i](0)
self.r.RAL[i](0)
Expand Down Expand Up @@ -534,7 +537,7 @@ function M_vf:set_mirror (want_mirror)

-- mirror some or all pools
if want_mirror.pool then
mirror_rule = bit.bor(bits{VPME=0}, mirror_rule)
mirror_rule = bor(bits{VPME=0}, mirror_rule)
if want_mirror.pool == true then -- mirror all pools
self.pf.r.PFMRVM[mirror_ndx](0xFFFFFFFF)
self.pf.r.PFMRVM[mirror_ndx+4](0xFFFFFFFF)
Expand All @@ -543,9 +546,9 @@ function M_vf:set_mirror (want_mirror)
local bm1 = self.pf.r.PFMRVM[mirror_ndx+4]
for _, pool in ipairs(want_mirror.pool) do
if pool <= 32 then
bm0 = bit.bor(bit.lshift(1, pool), bm0)
bm0 = bor(lshift(1, pool), bm0)
else
bm1 = bit.bor(bit.lshift(1, pool-32), bm1)
bm1 = bor(lshift(1, pool-32), bm1)
end
end
self.pf.r.PFMRVM[mirror_ndx](bm0)
Expand All @@ -556,20 +559,20 @@ function M_vf:set_mirror (want_mirror)
-- mirror hardware port
if want_mirror.port then
if want_mirror.port == true or want_mirror.port == 'in' or want_mirror.port == 'inout' then
mirror_rule = bit.bor(bits{UPME=1}, mirror_rule)
mirror_rule = bor(bits{UPME=1}, mirror_rule)
end
if want_mirror.port == true or want_mirror.port == 'out' or want_mirror.port == 'inout' then
mirror_rule = bit.bor(bits{DPME=2}, mirror_rule)
mirror_rule = bor(bits{DPME=2}, mirror_rule)
end
end

-- mirror some or all vlans
if want_mirror.vlan then
mirror_rule = bit.bor(bits{VLME=3}, mirror_rule)
mirror_rule = bor(bits{VLME=3}, mirror_rule)
-- TODO: set which vlan's want to mirror
end
if mirror_rule ~= 0 then
mirror_rule = bit.bor(mirror_rule, bit.lshift(self.poolnum, 8))
mirror_rule = bor(mirror_rule, lshift(self.poolnum, 8))
self.pf.r.PFMRCTL[mirror_ndx]:set(mirror_rule)
end
end
Expand Down Expand Up @@ -609,15 +612,15 @@ function M_vf:set_rx_stats (counter)
if not counter then return self end
assert(counter>=0 and counter<16, "bad Rx counter")
self.rxstats = counter
self.pf.qs.RQSMR[math.floor(self.rxqn/4)]:set(bit.lshift(counter,8*(self.rxqn%4)))
self.pf.qs.RQSMR[math.floor(self.rxqn/4)]:set(lshift(counter,8*(self.rxqn%4)))
return self
end

function M_vf:set_tx_stats (counter)
if not counter then return self end
assert(counter>=0 and counter<16, "bad Tx counter")
self.txstats = counter
self.pf.qs.TQSM[math.floor(self.txqn/4)]:set(bit.lshift(counter,8*(self.txqn%4)))
self.pf.qs.TQSM[math.floor(self.txqn/4)]:set(lshift(counter,8*(self.txqn%4)))
return self
end

Expand All @@ -627,7 +630,7 @@ function M_vf:get_rxstats ()
counter_id = self.rxstats,
packets = tonumber(self.pf.qs.QPRC[self.rxstats]()),
dropped = tonumber(self.pf.qs.QPRDC[self.rxstats]()),
bytes = tonumber(bit.lshift(self.pf.qs.QBRC_H[self.rxstats]()+0LL, 32)
bytes = tonumber(lshift(self.pf.qs.QBRC_H[self.rxstats]()+0LL, 32)
+ self.pf.qs.QBRC_L[self.rxstats]())
}
end
Expand All @@ -637,15 +640,15 @@ function M_vf:get_txstats ()
return {
counter_id = self.txstats,
packets = tonumber(self.pf.qs.QPTC[self.txstats]()),
bytes = tonumber(bit.lshift(self.pf.qs.QBTC_H[self.txstats]()+0LL, 32)
bytes = tonumber(lshift(self.pf.qs.QBTC_H[self.txstats]()+0LL, 32)
+ self.pf.qs.QBTC_L[self.txstats]())
}
end

function M_vf:set_tx_rate (limit)
if not limit then return self end
local factor = 10000 / tonumber(limit) -- line rate = 10,000 Mb/s
factor = bit.band(math.floor(factor*2^14+0.5), 2^24-1) -- 10.14 bits
factor = band(math.floor(factor*2^14+0.5), 2^24-1) -- 10.14 bits
self.pf.r.RTTDQSEL(self.poolnum)
self.pf.r.RTTBCNRC(bits({RS_ENA=31}, factor))
return self
Expand Down
15 changes: 8 additions & 7 deletions src/apps/intel/intel_app.lua
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ local lib = require("core.lib")
local register = require("lib.hardware.register")
local intel10g = require("apps.intel.intel10g")
local freelist = require("core.freelist")

local receive, transmit, full, empty = link.receive, link.transmit, link.full, link.empty
Intel82599 = {}
Intel82599.__index = Intel82599

Expand Down Expand Up @@ -74,8 +74,8 @@ function Intel82599:pull ()
local l = self.output.tx
if l == nil then return end
self.dev:sync_receive()
while not link.full(l) and self.dev:can_receive() do
link.transmit(l, self.dev:receive())
while not full(l) and self.dev:can_receive() do
transmit(l, self.dev:receive())
end
self:add_receive_buffers()
end
Expand All @@ -99,10 +99,11 @@ end
function Intel82599:push ()
local l = self.input.rx
if l == nil then return end
while not link.empty(l) and self.dev:can_transmit() do
local p = link.receive(l)
self.dev:transmit(p)
packet.deref(p)
while not empty(l) and self.dev:can_transmit() do
do local p = receive(l)
self.dev:transmit(p)
packet.deref(p)
end
end
self.dev:sync_transmit()
end
Expand Down
10 changes: 7 additions & 3 deletions src/apps/intel/loadgen.lua
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ local buffer = require("core.buffer")
local intel10g = require("apps.intel.intel10g")
local memory = require("core.memory")
local register = require("lib.hardware.register")
local receive, empty = link.receive, link.empty
local can_transmit, transmit

LoadGen = {}

Expand All @@ -20,6 +22,7 @@ function LoadGen:new (pciaddress)
o.dev:wait_linkup()
disable_tx_descriptor_writeback(o.dev)
zero_descriptors(o.dev)
can_transmit, transmit = o.dev.can_transmit, o.dev.transmit
return setmetatable(o, {__index = LoadGen})
end

Expand All @@ -45,9 +48,10 @@ end

function LoadGen:push ()
if self.input.input then
while not link.empty(self.input.input) and self.dev:can_transmit() do
local p = link.receive(self.input.input)
self.dev:transmit(p)
while not link.empty(self.input.input) and can_transmit(self.dev) do
do local p = receive(self.input.input)
transmit(self.dev, p)
end
end
end
end
Expand Down
11 changes: 5 additions & 6 deletions src/core/link.lua
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ local packet = require("core.packet")
require("core.packet_h")
require("core.link_h")

local band = require("bit").band

local size = C.LINK_RING_SIZE -- NB: Huge slow-down if this is not local
max = C.LINK_MAX_PACKETS

Expand All @@ -19,10 +21,7 @@ end
function receive (r)
-- if debug then assert(not empty(r), "receive on empty link") end
local p = r.packets[r.read]
do local n = r.read + 1
if n >= size then n = 0 end
r.read = n
end
r.read = band(r.read + 1, size - 1)

r.stats.rxpackets = r.stats.rxpackets + 1
r.stats.rxbytes = r.stats.rxbytes + p.length
Expand All @@ -35,7 +34,7 @@ function transmit (r, p)
r.stats.txdrop = r.stats.txdrop + 1
else
r.packets[r.write] = p
r.write = (r.write + 1) % size
r.write = band(r.write + 1, size - 1)
r.stats.txpackets = r.stats.txpackets + 1
r.stats.txbytes = r.stats.txbytes + p.length
r.has_new_data = true
Expand All @@ -49,7 +48,7 @@ end

-- Return true if the ring is full.
function full (r)
return (r.write + 1) % size == r.read
return band(r.write + 1, size - 1) == r.read
end

-- Return the number of packets that are ready for read.
Expand Down
5 changes: 1 addition & 4 deletions src/core/packet.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,9 @@ enum {

struct packet {
int32_t refcount;
// How much "fuel" does this packet have left before it's dropped?
// This is like the Time-To-Live (TTL) IP header field.
int32_t fuel;
int32_t color;
struct packet_info info;
int niovecs;
int length;
struct packet_iovec iovecs[PACKET_IOVEC_MAX];
};
} __attribute__ ((aligned(64)));
13 changes: 6 additions & 7 deletions src/core/packet.lua
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ local buffer = require("core.buffer")
local freelist = require("core.freelist")
local lib = require("core.lib")
local memory = require("core.memory")
local freelist_add, freelist_remove = freelist.add, freelist.remove

require("core.packet_h")

initial_fuel = 1000
max_packets = 1e6
packets_fl = freelist.new("struct packet *", max_packets)
packets = ffi.new("struct packet[?]", max_packets)
Expand All @@ -26,7 +26,7 @@ end

-- Return a packet, or nil if none is available.
function allocate ()
return freelist.remove(packets_fl) or error("out of packets")
return freelist_remove(packets_fl) or error("out of packets")
end

-- Append data to a packet.
Expand Down Expand Up @@ -203,10 +203,10 @@ function free (p)
for i = 0, p.niovecs-1 do
buffer.free(p.iovecs[i].buffer)
end
ffi.fill(p, packet_size, 0)
p.length = 0
p.niovecs = 0
p.refcount = 1
p.fuel = initial_fuel
freelist.add(packets_fl, p)
freelist_add(packets_fl, p)
end

function iovec_dump (iovec)
Expand All @@ -229,7 +229,6 @@ end
function report (p)
local result = string.format([[
refcount: %d
fuel: %d
info.flags: %X
info.gso_flags: %X
info.hdr_len: %d
Expand All @@ -239,7 +238,7 @@ function report (p)
niovecs: %d
length: %d
]],
p.refcount, p.fuel, p.info.flags, p.info.gso_flags,
p.refcount, p.info.flags, p.info.gso_flags,
p.info.hdr_len, p.info.gso_size, p.info.csum_start,
p.info.csum_offset, p.niovecs, p.length
)
Expand Down
Loading

0 comments on commit 4f050e0

Please sign in to comment.