diff --git a/lib/http/response.go b/lib/http/response.go index a90a5751..076ca189 100644 --- a/lib/http/response.go +++ b/lib/http/response.go @@ -45,6 +45,12 @@ type Response struct { // Keys in the map are canonicalized (see CanonicalHeaderKey). Header Header `json:"headers,omitempty"` + // The raw bytes of the MIME headers, as read from the underlying + // reader. This allows for post-processing to be done on an exact + // copy of the headers. The headers will not be canonicalized nor + // re-ordered or converted to a map. + HeadersRaw []byte `json:"headers_raw,omitempty"` + // Body represents the response body. // // The http Client and Transport guarantee that Body is always @@ -158,11 +164,23 @@ func (r *Response) Location() (*url.URL, error) { // After that call, clients can inspect resp.Trailer to find key/value // pairs included in the response trailer. func ReadResponse(r *bufio.Reader, req *Request) (*Response, error) { + return readResponse(&TeeConn{br: r}, req) +} +func ReadResponseTee(tc *TeeConn, req *Request) (*Response, error) { + return readResponse(tc, req) +} +func readResponse(tc *TeeConn, req *Request) (*Response, error) { + r := tc.BufioReader() tp := textproto.NewReader(r) resp := &Response{ Request: req, } + // To extract the raw response through headers, we want to find the offsets + // for where we are at in the io.TeeReader compared to the bufio.Reader + // both at the start of the response parsing, and at the end. + hdrStart := tc.ReadPos() + // Parse the first line of the response. line, err := tp.ReadLine() if err != nil { @@ -202,6 +220,11 @@ func ReadResponse(r *bufio.Reader, req *Request) (*Response, error) { } return resp, err } + // No need to continue tee reads into the tee buffer, go ahead and + // disable it + tc.Disable() + hdrEnd := tc.ReadPos() + resp.HeadersRaw = tc.Bytes(hdrStart, hdrEnd) resp.Header = Header(mimeHeader) fixPragmaCacheControl(resp.Header) diff --git a/lib/http/transport.go b/lib/http/transport.go index 019f86dd..39154afd 100644 --- a/lib/http/transport.go +++ b/lib/http/transport.go @@ -11,6 +11,7 @@ package http import ( "bufio" + "bytes" "compress/gzip" "container/list" "context" @@ -198,6 +199,10 @@ type Transport struct { h2transport *http2Transport // non-nil if http2 wired up // TODO: tunable on max per-host TCP dials in flight (Issue 13957) + + // Enable raw read buffering and raw header extraction + // zgrab2-specific + RawHeaderBuffer bool } // onceSetNextProtoDefaults initializes TLSNextProto. @@ -1027,6 +1032,8 @@ func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistCon pconn.conn = conn } + pconn.tee = &TeeConn{} + // Proxy setup. switch { case cm.proxyURL == nil: @@ -1058,8 +1065,10 @@ func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistCon // Read response. // Okay to use and discard buffered reader here, because // TLS server will not speak until spoken to. - br := bufio.NewReader(conn) - resp, err := ReadResponse(br, connectReq) + tee := TeeConn{ + br: bufio.NewReader(conn), + } + resp, err := ReadResponseTee(&tee, connectReq) if err != nil { conn.Close() return nil, err @@ -1123,13 +1132,49 @@ func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistCon } } - pconn.br = bufio.NewReader(pconn) + pconn.tee.br = bufio.NewReader(pconn) + pconn.tee.enabled = t.RawHeaderBuffer pconn.bw = bufio.NewWriter(persistConnWriter{pconn}) go pconn.readLoop() go pconn.writeLoop() return pconn, nil } +// The underlying br Reader is bufio, so it will perform read-ahead. +// The underlying tb is a bytes buffer, that acts as a tee, receiving +// the raw bytes for reads against the io.Reader backing br. +type TeeConn struct { + enabled bool // tee writes to tb are enabled + tb bytes.Buffer // buffer that tr tees into + br *bufio.Reader // from conn +} + +// To get the current position in tb as seen by the buffered io reader, +// we need to subtract out the buffered portion of the bufio reader. +func (t *TeeConn) ReadPos() int { + l := t.tb.Len() + if l == 0 { + return 0 + } + return l - t.br.Buffered() +} + +func (t *TeeConn) Bytes(s, e int) []byte { + if s >= t.tb.Len() { + return nil + } + return t.tb.Bytes()[s:e] +} + +func (t *TeeConn) BufioReader() *bufio.Reader { + return t.br +} + +// Stops the tee writes to t.tb +func (t *TeeConn) Disable() { + t.enabled = false +} + // persistConnWriter is the io.Writer written to by pc.bw. // It accumulates the number of bytes written to the underlying conn, // so the retry logic can determine whether any bytes made it across @@ -1277,7 +1322,7 @@ type persistConn struct { cacheKey connectMethodKey conn net.Conn tlsState *tls.ConnectionState - br *bufio.Reader // from conn + tee *TeeConn // from conn, includes a raw buffer and tee bw *bufio.Writer // to conn nwrite int64 // bytes written reqch chan requestAndChan // written by roundTrip; read by readLoop @@ -1329,6 +1374,11 @@ func (pc *persistConn) Read(p []byte) (n int, err error) { pc.sawEOF = true } pc.readLimit -= int64(n) + if pc.tee.enabled && n > 0 { + if n, err := pc.tee.tb.Write(p[:n]); err != nil { + return n, err + } + } return } @@ -1482,7 +1532,7 @@ func (pc *persistConn) readLoop() { alive := true for alive { pc.readLimit = pc.maxHeaderResponseSize() - _, err := pc.br.Peek(1) + _, err := pc.tee.br.Peek(1) pc.mu.Lock() if pc.numExpectedResponses == 0 { @@ -1636,7 +1686,7 @@ func (pc *persistConn) readLoopPeekFailLocked(peekErr error) { if pc.closed != nil { return } - if n := pc.br.Buffered(); n > 0 { + if n := pc.tee.br.Buffered(); n > 0 { } if peekErr == io.EOF { // common case. @@ -1651,11 +1701,11 @@ func (pc *persistConn) readLoopPeekFailLocked(peekErr error) { // trace is optional. func (pc *persistConn) readResponse(rc requestAndChan, trace *httptrace.ClientTrace) (resp *Response, err error) { if trace != nil && trace.GotFirstResponseByte != nil { - if peek, err := pc.br.Peek(1); err == nil && len(peek) == 1 { + if peek, err := pc.tee.br.Peek(1); err == nil && len(peek) == 1 { trace.GotFirstResponseByte() } } - resp, err = ReadResponse(pc.br, rc.req) + resp, err = ReadResponseTee(pc.tee, rc.req) if err != nil { return } @@ -1671,7 +1721,7 @@ func (pc *persistConn) readResponse(rc requestAndChan, trace *httptrace.ClientTr } if resp.StatusCode == 100 { pc.readLimit = pc.maxHeaderResponseSize() // reset the limit - resp, err = ReadResponse(pc.br, rc.req) + resp, err = ReadResponseTee(pc.tee, rc.req) if err != nil { return } diff --git a/modules/http/scanner.go b/modules/http/scanner.go index 6992b7ee..14141f55 100644 --- a/modules/http/scanner.go +++ b/modules/http/scanner.go @@ -78,6 +78,9 @@ type Flags struct { // WithBodyLength enables adding the body_size field to the Response WithBodyLength bool `long:"with-body-size" description:"Enable the body_size attribute, for how many bytes actually read"` + + // Extract the raw header as it is on the wire + RawHeaders bool `long:"raw-headers" description:"Extract raw response up through headers"` } // A Results object is returned by the HTTP module's Scanner.Scan() @@ -449,6 +452,7 @@ func (scanner *Scanner) newHTTPScan(t *zgrab2.ScanTarget, useHTTPS bool) *scan { DisableKeepAlives: false, DisableCompression: false, MaxIdleConnsPerHost: scanner.config.MaxRedirects, + RawHeaderBuffer: scanner.config.RawHeaders, }, client: http.MakeNewClient(), globalDeadline: time.Now().Add(scanner.config.Timeout),