From 4516947a9701028f263d6c13bf3591031c1d348d Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 13 Oct 2016 15:56:57 +0000 Subject: [PATCH] Implement headroom by moving packet around Instead of mutating a pointer inside "struct packet" to do cheap shiftleft/shiftright, this patch instead moves around the packet pointer itself. The only performance testing I did was "snabb lwaftr bench", where it appears to be neutral-to-slight-win. I am hoping that the Hydra/R scientists can help me out on this one. This change to the packet structure does have a knock-on effect, in two significant ways. One is that the shiftleft, shiftright, and prepend functions now return a new packet pointer and silently corrupt the old one (because probably they shifted around the data but still had to update the new "length" pointer). That's somewhat OK though. The problem comes in the datagram library, which sometimes wants to work on a user-supplied packet. The sequence was: local dgram = datagram:new(pkt) dgram:push(....) dgram:free() ... foo(pkt) This pattern is no longer valid :( The reason is that the datagram methods that mutate a packet often do so by packet.prepend(). The datagram library updates its internal packet pointer, but the external one that was initially passed in is no longer valid. So much of this patch is visiting (hopefully) all of the uses/users and updating them. --- src/apps/ipv6/nd_light.lua | 5 +- src/apps/keyed_ipv6_tunnel/tunnel.lua | 4 +- src/apps/lwaftr/arp.lua | 2 + src/apps/lwaftr/fragmentv4_hardened.lua | 11 +--- src/apps/lwaftr/fragmentv6_hardened.lua | 11 +--- src/apps/lwaftr/generator.lua | 2 +- src/apps/lwaftr/icmp.lua | 5 +- src/apps/lwaftr/lwaftr.lua | 4 +- src/apps/lwaftr/ndp.lua | 19 +++--- src/apps/vpn/vpws.lua | 2 + src/core/packet.h | 5 +- src/core/packet.lua | 77 +++++++++++++++---------- src/lib/protocol/datagram.lua | 7 ++- src/lib/virtio/virtq_driver.lua | 4 +- 14 files changed, 84 insertions(+), 74 deletions(-) diff --git a/src/apps/ipv6/nd_light.lua b/src/apps/ipv6/nd_light.lua index e146e3a0ac..8bee094d2a 100644 --- a/src/apps/ipv6/nd_light.lua +++ b/src/apps/ipv6/nd_light.lua @@ -109,7 +109,6 @@ function nd_light:new (arg) -- Prepare packet for solicitation of next hop local nh = { nsent = 0 } local dgram = datagram:new() - nh.packet = dgram:packet() local sol_node_mcast = ipv6:solicited_node_mcast(conf.next_hop) local ipv6 = ipv6:new({ next_header = 58, -- ICMP6 hop_limit = 255, @@ -134,6 +133,7 @@ function nd_light:new (arg) dgram:push(ethernet:new({ src = conf.local_mac, dst = ethernet:ipv6_mcast(sol_node_mcast), type = 0x86dd })) + nh.packet = dgram:packet() dgram:free() -- Timer for retransmits of neighbor solicitations @@ -162,7 +162,6 @@ function nd_light:new (arg) -- Prepare packet for solicited neighbor advertisement local sna = {} dgram = datagram:new() - sna.packet = dgram:packet() -- Leave dst address unspecified. It will be set to the source of -- the incoming solicitation ipv6 = ipv6:new({ next_header = 58, -- ICMP6 @@ -183,6 +182,8 @@ function nd_light:new (arg) -- Leave dst address unspecified. dgram:push(ethernet:new({ src = conf.local_mac, type = 0x86dd })) + sna.packet = dgram:packet() + -- Parse the headers we want to modify later on from our template -- packet. dgram = dgram:new(sna.packet, ethernet) diff --git a/src/apps/keyed_ipv6_tunnel/tunnel.lua b/src/apps/keyed_ipv6_tunnel/tunnel.lua index 9702a67ecc..af5e4aa9a0 100644 --- a/src/apps/keyed_ipv6_tunnel/tunnel.lua +++ b/src/apps/keyed_ipv6_tunnel/tunnel.lua @@ -189,7 +189,7 @@ function SimpleKeyedTunnel:push() while not link.empty(l_in) do local p = link.receive(l_in) - packet.prepend(p, self.header, HEADER_SIZE) + p = packet.prepend(p, self.header, HEADER_SIZE) local plength = ffi.cast(plength_ctype, p.data + LENGTH_OFFSET) plength[0] = lib.htons(SESSION_COOKIE_SIZE + p.length - HEADER_SIZE) link.transmit(l_out, p) @@ -244,7 +244,7 @@ function SimpleKeyedTunnel:push() -- discard packet packet.free(p) else - packet.shiftleft(p, HEADER_SIZE) + p = packet.shiftleft(p, HEADER_SIZE) link.transmit(l_out, p) end end diff --git a/src/apps/lwaftr/arp.lua b/src/apps/lwaftr/arp.lua index 3b9bddd338..e96aa291b6 100644 --- a/src/apps/lwaftr/arp.lua +++ b/src/apps/lwaftr/arp.lua @@ -94,6 +94,7 @@ function form_request(src_eth, src_ipv4, dst_ipv4) local dgram = datagram:new(req_pkt) dgram:push(ethernet:new({ src = src_eth, dst = ethernet_broadcast, type = ethertype_arp })) + req_pkt = dgram:packet() dgram:free() return req_pkt end @@ -107,6 +108,7 @@ function form_reply(local_eth, local_ipv4, arp_request_pkt) local dgram = datagram:new(reply_pkt) dgram:push(ethernet:new({ src = local_eth, dst = dst_eth, type = ethertype_arp })) + reply_pkt = dgram:packet() dgram:free() return reply_pkt end diff --git a/src/apps/lwaftr/fragmentv4_hardened.lua b/src/apps/lwaftr/fragmentv4_hardened.lua index fc6a28e370..87a299b567 100644 --- a/src/apps/lwaftr/fragmentv4_hardened.lua +++ b/src/apps/lwaftr/fragmentv4_hardened.lua @@ -149,13 +149,6 @@ local function fix_pkt_checksum(pkt) htons(ipsum(pkt.data + ehs, ihl, 0))) end -local function pseudo_clone(data, len) - local p = packet.allocate() - p.headroom = 0 - packet.append(p, data, len) - return p -end - local function attempt_reassembly(frags_table, reassembly_buf, fragment) local ihl = get_ihl_from_offset(fragment, ehs) local frag_id = get_frag_id(fragment) @@ -213,8 +206,8 @@ local function attempt_reassembly(frags_table, reassembly_buf, fragment) local pkt_len = htons(reassembly_buf.reassembly_length - ehs) local o_len = ehs + o_ipv4_total_length wr16(reassembly_data + o_len, pkt_len) - local reassembled_packet = pseudo_clone(reassembly_buf.reassembly_data, - reassembly_buf.reassembly_length) + local reassembled_packet = packet.from_pointer( + reassembly_buf.reassembly_data, reassembly_buf.reassembly_length) fix_pkt_checksum(reassembled_packet) free_reassembly_buf_and_pkt(fragment, frags_table) return REASSEMBLY_OK, reassembled_packet diff --git a/src/apps/lwaftr/fragmentv6_hardened.lua b/src/apps/lwaftr/fragmentv6_hardened.lua index b8452a9f79..7c1336499c 100644 --- a/src/apps/lwaftr/fragmentv6_hardened.lua +++ b/src/apps/lwaftr/fragmentv6_hardened.lua @@ -145,13 +145,6 @@ local function reassembly_status(reassembly_buf) return REASSEMBLY_OK end -local function pseudo_clone(data, len) - local p = packet.allocate() - p.headroom = 0 - packet.append(p, data, len) - return p -end - local function attempt_reassembly(frags_table, reassembly_buf, fragment) local frag_id = get_frag_id(fragment) if frag_id ~= reassembly_buf.fragment_id then -- unreachable @@ -204,8 +197,8 @@ local function attempt_reassembly(frags_table, reassembly_buf, fragment) local restatus = reassembly_status(reassembly_buf) if restatus == REASSEMBLY_OK then - local reassembled_packet = pseudo_clone(reassembly_buf.reassembly_data, - reassembly_buf.reassembly_length) + local reassembled_packet = packet.from_pointer( + reassembly_buf.reassembly_data, reassembly_buf.reassembly_length) free_reassembly_buf_and_pkt(fragment, frags_table) return REASSEMBLY_OK, reassembled_packet else diff --git a/src/apps/lwaftr/generator.lua b/src/apps/lwaftr/generator.lua index 402a490382..f275a3f33a 100644 --- a/src/apps/lwaftr/generator.lua +++ b/src/apps/lwaftr/generator.lua @@ -277,7 +277,7 @@ function ipv6_encapsulate(ipv4_pkt, params) local payload_length = p.length - ethernet_header_size local dscp_and_ecn = p.data[ethernet_header_size + IPV4_DSCP_AND_ECN_OFFSET] - packet.shiftright(p, ipv6_header_size) + p = packet.shiftright(p, ipv6_header_size) -- IPv6 packet is tagged local eth_hdr diff --git a/src/apps/lwaftr/icmp.lua b/src/apps/lwaftr/icmp.lua index f4533a1528..0e56bca8d1 100644 --- a/src/apps/lwaftr/icmp.lua +++ b/src/apps/lwaftr/icmp.lua @@ -85,8 +85,9 @@ function new_icmpv4_packet(from_eth, to_eth, from_ip, to_ip, initial_pkt, config protocol = constants.proto_icmp, src = from_ip, dst = to_ip}) dgram:push(ipv4_header) + new_pkt = dgram:packet() ipv4_header:free() - packet.shiftright(new_pkt, ehs) + new_pkt = packet.shiftright(new_pkt, ehs) write_eth_header(new_pkt.data, from_eth, to_eth, constants.n_ethertype_ipv4, config.vlan_tag) -- Generate RFC 1812 ICMPv4 packets, which carry as much payload as they can, @@ -126,7 +127,7 @@ function new_icmpv6_packet(from_eth, to_eth, from_ip, to_ip, initial_pkt, config next_header = constants.proto_icmpv6, src = from_ip, dst = to_ip}) dgram:push(ipv6_header) - packet.shiftright(new_pkt, ehs) + new_pkt = packet.shiftright(dgram:packet(), ehs) write_eth_header(new_pkt.data, from_eth, to_eth, constants.n_ethertype_ipv6) local max_size = constants.max_icmpv6_packet_size diff --git a/src/apps/lwaftr/lwaftr.lua b/src/apps/lwaftr/lwaftr.lua index e86682360a..0c97708efb 100644 --- a/src/apps/lwaftr/lwaftr.lua +++ b/src/apps/lwaftr/lwaftr.lua @@ -490,7 +490,7 @@ local function encapsulate_and_transmit(lwstate, pkt, ipv6_dst, ipv6_src, pkt_sr local l3_header = get_ethernet_payload(pkt) local dscp_and_ecn = get_ipv4_dscp_and_ecn(l3_header) -- Note that this may invalidate any pointer into pkt.data. Be warned! - packet.shiftright(pkt, ipv6_fixed_header_size) + pkt = packet.shiftright(pkt, ipv6_fixed_header_size) -- Fetch possibly-moved L3 header location. l3_header = get_ethernet_payload(pkt) write_eth_header(pkt.data, ether_src, ether_dst, n_ethertype_ipv6) @@ -732,7 +732,7 @@ local function flush_decapsulation(lwstate) and ipv6_equals(get_ipv6_dst_address(ipv6_header), br_addr)) then -- Source softwire is valid; decapsulate and forward. -- Note that this may invalidate any pointer into pkt.data. Be warned! - packet.shiftleft(pkt, ipv6_fixed_header_size) + pkt = packet.shiftleft(pkt, ipv6_fixed_header_size) write_eth_header(pkt.data, lwstate.aftr_mac_inet_side, lwstate.inet_mac, n_ethertype_ipv4) transmit_ipv4(lwstate, pkt) diff --git a/src/apps/lwaftr/ndp.lua b/src/apps/lwaftr/ndp.lua index 82eeeb5153..469cc0bc8a 100644 --- a/src/apps/lwaftr/ndp.lua +++ b/src/apps/lwaftr/ndp.lua @@ -222,17 +222,18 @@ function form_ns(local_eth, local_ipv6, dst_ipv6) local i = ipv6:new({ hop_limit = hop_limit, next_header = proto_icmpv6, src = local_ipv6, dst = dst_ipv6 }) - i:payload_length(ns_pkt.length) + i:payload_length(dgram:packet().length) - local ph = i:pseudo_header(ns_pkt.length, proto_icmpv6) + local ph = i:pseudo_header(dgram:packet().length, proto_icmpv6) local ph_len = ipv6_pseudoheader_size local base_checksum = checksum.ipsum(ffi.cast("uint8_t*", ph), ph_len, 0) - local csum = checksum.ipsum(ns_pkt.data, ns_pkt.length, bit.bnot(base_checksum)) - wr16(ns_pkt.data + 2, C.htons(csum)) + local csum = checksum.ipsum(dgram:packet().data, dgram:packet().length, bit.bnot(base_checksum)) + wr16(dgram:packet().data + 2, C.htons(csum)) dgram:push(i) dgram:push(ethernet:new({ src = local_eth, dst = ethernet_broadcast, type = ethertype_ipv6 })) + ns_pkt = dgram:packet() dgram:free() return ns_pkt end @@ -265,17 +266,19 @@ local function form_sna(local_eth, local_ipv6, is_router, soliciting_pkt) local i = ipv6:new({ hop_limit = hop_limit, next_header = proto_icmpv6, src = local_ipv6, dst = dst_ipv6 }) - i:payload_length(na_pkt.length) + i:payload_length(dgram:packet().length) - local ph = i:pseudo_header(na_pkt.length, proto_icmpv6) + local ph = i:pseudo_header(dgram:packet().length, proto_icmpv6) local ph_len = ipv6_pseudoheader_size local base_checksum = checksum.ipsum(ffi.cast("uint8_t*", ph), ph_len, 0) - local csum = checksum.ipsum(na_pkt.data, na_pkt.length, bit.bnot(base_checksum)) - wr16(na_pkt.data + 2, C.htons(csum)) + local csum = checksum.ipsum(dgram:packet().data, dgram:packet().length, + bit.bnot(base_checksum)) + wr16(dgram:packet().data + 2, C.htons(csum)) dgram:push(i) dgram:push(ethernet:new({ src = local_eth, dst = dst_eth, type = ethertype_ipv6 })) + na_pkt = dgram:packet() dgram:free() return na_pkt end diff --git a/src/apps/vpn/vpws.lua b/src/apps/vpn/vpws.lua index f65d37a7b1..3dcb136597 100644 --- a/src/apps/vpn/vpws.lua +++ b/src/apps/vpn/vpws.lua @@ -88,6 +88,7 @@ function vpws:push() datagram:push(encap.gre) datagram:push(encap.ipv6) datagram:push(encap.ether) + p = datagram:packet() else -- Check for encapsulated frame coming in on uplink if self._filter:match(datagram:payload()) then @@ -115,6 +116,7 @@ function vpws:push() -- Unsupported GRE options or flags valid = false end + p = datagram:packet() if not valid then packet.free(p) p = nil diff --git a/src/core/packet.h b/src/core/packet.h index 7de818b43f..a5dcac9a5f 100644 --- a/src/core/packet.h +++ b/src/core/packet.h @@ -5,10 +5,7 @@ enum { PACKET_PAYLOAD_SIZE = 10*1024 }; // Packet of network data, with associated metadata. struct packet { - unsigned char *data; uint16_t length; // data payload length - uint16_t headroom; // payload starts this many bytes into data_ - uint32_t padding; - unsigned char data_[PACKET_PAYLOAD_SIZE]; + unsigned char data[PACKET_PAYLOAD_SIZE]; }; diff --git a/src/core/packet.lua b/src/core/packet.lua index a3dd8b5dad..652c747118 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -5,6 +5,7 @@ module(...,package.seeall) local debug = _G.developer_debug local ffi = require("ffi") +local bit = require("bit") local C = ffi.C local lib = require("core.lib") @@ -16,12 +17,14 @@ require("core.packet_h") local packet_t = ffi.typeof("struct packet") local packet_ptr_t = ffi.typeof("struct packet *") local packet_size = ffi.sizeof(packet_t) -local header_size = 8 --- By default, enough headroom for an inserted IPv6 header and a --- virtio header. -local default_headroom = 64 max_payload = tonumber(C.PACKET_PAYLOAD_SIZE) +-- For operations that add or remove headers from the beginning of a +-- packet, instead of copying around the payload we just move the +-- packet structure as a whole around. +local packet_alignment = 512 +local default_headroom = 256 + -- Freelist containing empty packets ready for use. ffi.cdef[[ @@ -69,26 +72,21 @@ end -- Create a new empty packet. function new_packet () - local p = ffi.cast(packet_ptr_t, memory.dma_alloc(packet_size)) - p.headroom = default_headroom - p.data = p.data_ + p.headroom + local base = memory.dma_alloc(packet_size + packet_alignment, + packet_alignment) + local p = ffi.cast(packet_ptr_t, base + default_headroom) p.length = 0 return p end -- Create an exact copy of a packet. function clone (p) - local p2 = allocate() - p2.length = p.length - p2.headroom = p.headroom - p2.data = p2.data_ + p2.headroom - ffi.copy(p2.data, p.data, p.length) - return p2 + return from_pointer(p.data, p.length) end -- Append data to the end of a packet. function append (p, ptr, len) - assert(p.length + len + p.headroom <= max_payload, "packet payload overflow") + assert(p.length + len <= max_payload, "packet payload overflow") ffi.copy(p.data + p.length, ptr, len) p.length = p.length + len return p @@ -96,7 +94,7 @@ end -- Prepend data to the start of a packet. function prepend (p, ptr, len) - shiftright(p, len) + p = shiftright(p, len) ffi.copy(p.data, ptr, len) -- Fill the gap return p end @@ -105,28 +103,46 @@ end -- the header bytes at the front. function shiftleft (p, bytes) assert(bytes >= 0 and bytes <= p.length) - p.data = p.data + bytes - p.headroom = p.headroom + bytes - p.length = p.length - bytes + local ptr = ffi.cast("char*", p) + local len = p.length + local headroom = bit.band(ffi.cast("uint64_t", ptr), packet_alignment - 1) + -- We only have a certain amount of headroom, otherwise the end of + -- p.data will point out of our allocation. If we're withing the + -- alignment wiggle room, just move the packet around. Otherwise + -- copy the payload, but also reset the headroom at the same time. + if bytes + headroom < packet_alignment then + p = ffi.cast(packet_ptr_t, ptr + bytes) + p.length = len - bytes + return p + else + local delta_headroom = default_headroom - headroom + C.memmove(p.data + delta_headroom, p.data + bytes, len - bytes) + p = ffi.cast(packet_ptr_t, ptr + delta_headroom) + p.length = len - bytes + return p + end end -- Move packet data to the right. This leaves length bytes of data -- at the beginning of the packet. function shiftright (p, bytes) - if bytes <= p.headroom then + local ptr = ffi.cast("char*", p) + local len = p.length + local headroom = bit.band(ffi.cast("uint64_t", ptr), packet_alignment - 1) + if bytes <= headroom then -- Take from the headroom. - assert(bytes >= 0) - p.headroom = p.headroom - bytes + p = ffi.cast(packet_ptr_t, ptr - bytes) + p.length = len + bytes + return p else -- No headroom for the shift; re-set the headroom to the default. - assert(bytes <= max_payload - p.length) - p.headroom = default_headroom - -- Could be we fit in the packet, but not with headroom. - if p.length + bytes >= max_payload - p.headroom then p.headroom = 0 end - C.memmove(p.data_ + p.headroom + bytes, p.data, p.length) + assert(bytes <= max_payload - len) + local delta_headroom = default_headroom - headroom + C.memmove(p.data + bytes + delta_headroom, p.data, len) + p = ffi.cast(packet_ptr_t, ptr + delta_headroom) + p.length = len + bytes + return p end - p.data = p.data_ + p.headroom - p.length = p.length + bytes end -- Conveniently create a packet by copying some existing data. @@ -135,9 +151,10 @@ function from_string (d) return from_pointer(d, #d) end -- Free a packet that is no longer in use. local function free_internal (p) + local ptr = ffi.cast("char*", p) + local headroom = bit.band(ffi.cast("uint64_t", ptr), packet_alignment - 1) + p = ffi.cast(packet_ptr_t, ptr - headroom + default_headroom) p.length = 0 - p.headroom = default_headroom - p.data = p.data_ + p.headroom freelist_add(packets_fl, p) end diff --git a/src/lib/protocol/datagram.lua b/src/lib/protocol/datagram.lua index f2b180e64c..a8860b27aa 100644 --- a/src/lib/protocol/datagram.lua +++ b/src/lib/protocol/datagram.lua @@ -147,7 +147,7 @@ function datagram:push_raw (data, length) -- The memmove() would invalidate the data pointer of headers -- that have already been parsed. assert(self._parse.index == 0, "parse stack not empty") - packet.prepend(self._packet[0], data, length) + self._packet[0] = packet.prepend(self._packet[0], data, length) self._parse.offset = self._parse.offset + length end end @@ -277,7 +277,7 @@ function datagram:pop_raw (length, ulp) -- The memmove() would invalidate the data pointer of headers -- that have already been parsed. assert(self._parse.index == 0, "parse stack not empty") - packet.shiftleft(self._packet[0], length) + self._packet[0] = packet.shiftleft(self._packet[0], length) end if ulp then self._parse.ulp = ulp end end @@ -347,6 +347,7 @@ function selftest () dgram:push(l2tp) dgram:push(ip) dgram:push(ether) + p = dgram:packet() local _, p_size = dgram:payload(data, data_size) assert(p_size == data_size) local _, d_size = dgram:data() @@ -379,7 +380,7 @@ function selftest () dgram:commit() _, d_size = dgram:data() assert(d_size == ether2:sizeof() + ip2:sizeof() + l2tp:sizeof() + data_size) - dgram:new(p, ethernet, { delayed_commit = true }) + dgram:new(dgram:packet(), ethernet, { delayed_commit = true }) assert(ether2:eq(dgram:parse())) assert(ip2:eq(dgram:parse())) assert(l2tp:eq(dgram:parse())) diff --git a/src/lib/virtio/virtq_driver.lua b/src/lib/virtio/virtq_driver.lua index badc70a84c..517a3763b7 100644 --- a/src/lib/virtio/virtq_driver.lua +++ b/src/lib/virtio/virtq_driver.lua @@ -96,7 +96,7 @@ function VirtioVirtq:add(p, len, flags, csum_start, csum_offset) self.num_free = self.num_free -1 desc.next = -1 - packet.shiftright(p, pk_header_size) + p = packet.shiftright(p, pk_header_size) local header = ffi.cast("struct virtio_net_hdr *", p.data) header.flags = flags header.gso_type = 0 @@ -147,7 +147,7 @@ function VirtioVirtq:get() if debug then assert(p ~= nil) end if debug then assert(physical(p.data) == desc.addr) end p.length = used.len - packet.shiftleft(p, pk_header_size) + p = packet.shiftleft(p, pk_header_size) self.last_used_idx = self.last_used_idx + 1 desc.next = self.free_head