Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

http: allow raw header capture (#347) #349

Merged
merged 1 commit into from
Feb 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions lib/http/response.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ type Response struct {
// Keys in the map are canonicalized (see CanonicalHeaderKey).
Header Header `json:"headers,omitempty"`

// The raw bytes of the MIME headers, as read from the underlying
// reader. This allows for post-processing to be done on an exact
// copy of the headers. The headers will not be canonicalized nor
// re-ordered or converted to a map.
HeadersRaw []byte `json:"headers_raw,omitempty"`

// Body represents the response body.
//
// The http Client and Transport guarantee that Body is always
Expand Down Expand Up @@ -158,11 +164,23 @@ func (r *Response) Location() (*url.URL, error) {
// After that call, clients can inspect resp.Trailer to find key/value
// pairs included in the response trailer.
func ReadResponse(r *bufio.Reader, req *Request) (*Response, error) {
return readResponse(&TeeConn{br: r}, req)
}
func ReadResponseTee(tc *TeeConn, req *Request) (*Response, error) {
return readResponse(tc, req)
}
func readResponse(tc *TeeConn, req *Request) (*Response, error) {
r := tc.BufioReader()
tp := textproto.NewReader(r)
resp := &Response{
Request: req,
}

// To extract the raw response through headers, we want to find the offsets
// for where we are at in the io.TeeReader compared to the bufio.Reader
// both at the start of the response parsing, and at the end.
hdrStart := tc.ReadPos()

// Parse the first line of the response.
line, err := tp.ReadLine()
if err != nil {
Expand Down Expand Up @@ -202,6 +220,11 @@ func ReadResponse(r *bufio.Reader, req *Request) (*Response, error) {
}
return resp, err
}
// No need to continue tee reads into the tee buffer, go ahead and
// disable it
tc.Disable()
hdrEnd := tc.ReadPos()
resp.HeadersRaw = tc.Bytes(hdrStart, hdrEnd)
resp.Header = Header(mimeHeader)

fixPragmaCacheControl(resp.Header)
Expand Down
68 changes: 59 additions & 9 deletions lib/http/transport.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package http

import (
"bufio"
"bytes"
"compress/gzip"
"container/list"
"context"
Expand Down Expand Up @@ -198,6 +199,10 @@ type Transport struct {
h2transport *http2Transport // non-nil if http2 wired up

// TODO: tunable on max per-host TCP dials in flight (Issue 13957)

// Enable raw read buffering and raw header extraction
// zgrab2-specific
RawHeaderBuffer bool
}

// onceSetNextProtoDefaults initializes TLSNextProto.
Expand Down Expand Up @@ -1027,6 +1032,8 @@ func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistCon
pconn.conn = conn
}

pconn.tee = &TeeConn{}

// Proxy setup.
switch {
case cm.proxyURL == nil:
Expand Down Expand Up @@ -1058,8 +1065,10 @@ func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistCon
// Read response.
// Okay to use and discard buffered reader here, because
// TLS server will not speak until spoken to.
br := bufio.NewReader(conn)
resp, err := ReadResponse(br, connectReq)
tee := TeeConn{
br: bufio.NewReader(conn),
}
resp, err := ReadResponseTee(&tee, connectReq)
if err != nil {
conn.Close()
return nil, err
Expand Down Expand Up @@ -1123,13 +1132,49 @@ func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistCon
}
}

pconn.br = bufio.NewReader(pconn)
pconn.tee.br = bufio.NewReader(pconn)
pconn.tee.enabled = t.RawHeaderBuffer
pconn.bw = bufio.NewWriter(persistConnWriter{pconn})
go pconn.readLoop()
go pconn.writeLoop()
return pconn, nil
}

// The underlying br Reader is bufio, so it will perform read-ahead.
// The underlying tb is a bytes buffer, that acts as a tee, receiving
// the raw bytes for reads against the io.Reader backing br.
type TeeConn struct {
enabled bool // tee writes to tb are enabled
tb bytes.Buffer // buffer that tr tees into
br *bufio.Reader // from conn
}

// To get the current position in tb as seen by the buffered io reader,
// we need to subtract out the buffered portion of the bufio reader.
func (t *TeeConn) ReadPos() int {
l := t.tb.Len()
if l == 0 {
return 0
}
return l - t.br.Buffered()
}

func (t *TeeConn) Bytes(s, e int) []byte {
if s >= t.tb.Len() {
return nil
}
return t.tb.Bytes()[s:e]
}

func (t *TeeConn) BufioReader() *bufio.Reader {
return t.br
}

// Stops the tee writes to t.tb
func (t *TeeConn) Disable() {
t.enabled = false
}

// persistConnWriter is the io.Writer written to by pc.bw.
// It accumulates the number of bytes written to the underlying conn,
// so the retry logic can determine whether any bytes made it across
Expand Down Expand Up @@ -1277,7 +1322,7 @@ type persistConn struct {
cacheKey connectMethodKey
conn net.Conn
tlsState *tls.ConnectionState
br *bufio.Reader // from conn
tee *TeeConn // from conn, includes a raw buffer and tee
bw *bufio.Writer // to conn
nwrite int64 // bytes written
reqch chan requestAndChan // written by roundTrip; read by readLoop
Expand Down Expand Up @@ -1329,6 +1374,11 @@ func (pc *persistConn) Read(p []byte) (n int, err error) {
pc.sawEOF = true
}
pc.readLimit -= int64(n)
if pc.tee.enabled && n > 0 {
if n, err := pc.tee.tb.Write(p[:n]); err != nil {
return n, err
}
}
return
}

Expand Down Expand Up @@ -1482,7 +1532,7 @@ func (pc *persistConn) readLoop() {
alive := true
for alive {
pc.readLimit = pc.maxHeaderResponseSize()
_, err := pc.br.Peek(1)
_, err := pc.tee.br.Peek(1)

pc.mu.Lock()
if pc.numExpectedResponses == 0 {
Expand Down Expand Up @@ -1636,7 +1686,7 @@ func (pc *persistConn) readLoopPeekFailLocked(peekErr error) {
if pc.closed != nil {
return
}
if n := pc.br.Buffered(); n > 0 {
if n := pc.tee.br.Buffered(); n > 0 {
}
if peekErr == io.EOF {
// common case.
Expand All @@ -1651,11 +1701,11 @@ func (pc *persistConn) readLoopPeekFailLocked(peekErr error) {
// trace is optional.
func (pc *persistConn) readResponse(rc requestAndChan, trace *httptrace.ClientTrace) (resp *Response, err error) {
if trace != nil && trace.GotFirstResponseByte != nil {
if peek, err := pc.br.Peek(1); err == nil && len(peek) == 1 {
if peek, err := pc.tee.br.Peek(1); err == nil && len(peek) == 1 {
trace.GotFirstResponseByte()
}
}
resp, err = ReadResponse(pc.br, rc.req)
resp, err = ReadResponseTee(pc.tee, rc.req)
if err != nil {
return
}
Expand All @@ -1671,7 +1721,7 @@ func (pc *persistConn) readResponse(rc requestAndChan, trace *httptrace.ClientTr
}
if resp.StatusCode == 100 {
pc.readLimit = pc.maxHeaderResponseSize() // reset the limit
resp, err = ReadResponse(pc.br, rc.req)
resp, err = ReadResponseTee(pc.tee, rc.req)
if err != nil {
return
}
Expand Down
4 changes: 4 additions & 0 deletions modules/http/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ type Flags struct {

// WithBodyLength enables adding the body_size field to the Response
WithBodyLength bool `long:"with-body-size" description:"Enable the body_size attribute, for how many bytes actually read"`

// Extract the raw header as it is on the wire
RawHeaders bool `long:"raw-headers" description:"Extract raw response up through headers"`
}

// A Results object is returned by the HTTP module's Scanner.Scan()
Expand Down Expand Up @@ -449,6 +452,7 @@ func (scanner *Scanner) newHTTPScan(t *zgrab2.ScanTarget, useHTTPS bool) *scan {
DisableKeepAlives: false,
DisableCompression: false,
MaxIdleConnsPerHost: scanner.config.MaxRedirects,
RawHeaderBuffer: scanner.config.RawHeaders,
},
client: http.MakeNewClient(),
globalDeadline: time.Now().Add(scanner.config.Timeout),
Expand Down