Skip to content

Commit

Permalink
build: import manually minified Chrome URL lib. (#3)
Browse files Browse the repository at this point in the history
This is a manually minified variant of
https://chromium.googlesource.com/chromium/src.git/+archive/74.0.3729.15/url.tar.gz,
providing just the parts needed for url::CanonicalizePath(). This is intended
to support a security release fix for CVE-2019-9901. Long term we need this to
be moved to absl or QUICHE for upgrades and long-term support.

Some specific transforms of interest:

* url_parse.h is minified to just Component and flattened back into the URL directory. It does not contain any non-Chromium authored code any longer and so does not have a separate LICENSE.
* envoy_shim.h adapts various macros to the Envoy context.
8 Anything not reachable from url::CanonicalizePath() has been dropped.
* Header include paths have changed as needed.
* BUILD was manually written.
* Various clang-tidy and format fixes.

Risk level: Low
Testing: Validated with WiP PR for CVE-2019-9901.

Signed-off-by: Harvey Tuch <htuch@google.com>
  • Loading branch information
htuch committed Apr 5, 2019
1 parent b155af7 commit c22cfd2
Show file tree
Hide file tree
Showing 15 changed files with 1,428 additions and 2 deletions.
14 changes: 12 additions & 2 deletions ci/run_clang_tidy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,25 @@ function exclude_testdata() {
grep -v tools/testdata/check_format/
}

# Do not run clang-tidy against Chromium URL import, this needs to largely
# reflect the upstream structure.
function exclude_chromium_url() {
grep -v source/common/chromium_url/
}

function filter_excludes() {
exclude_testdata | exclude_chromium_url
}

if [[ "${RUN_FULL_CLANG_TIDY}" == 1 ]]; then
echo "Running full clang-tidy..."
run-clang-tidy-7
elif [[ -z "${CIRCLE_PR_NUMBER}" && "$CIRCLE_BRANCH" == "master" ]]; then
echo "On master branch, running clang-tidy-diff against previous commit..."
git diff HEAD^ | exclude_testdata | clang-tidy-diff-7.py -p 1
git diff HEAD^ | filter_excludes | clang-tidy-diff-7.py -p 1
else
echo "Running clang-tidy-diff against master branch..."
git fetch https://github.com/envoyproxy/envoy.git master
git diff $(git merge-base HEAD FETCH_HEAD)..HEAD | exclude_testdata | \
git diff $(git merge-base HEAD FETCH_HEAD)..HEAD | filter_excludes | \
clang-tidy-diff-7.py -p 1
fi
28 changes: 28 additions & 0 deletions source/common/chromium_url/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
licenses(["notice"]) # Apache 2

load(
"//bazel:envoy_build_system.bzl",
"envoy_cc_library",
"envoy_package",
)

envoy_package()

envoy_cc_library(
name = "chromium_url",
srcs = [
"url_canon.cc",
"url_canon_internal.cc",
"url_canon_path.cc",
"url_canon_stdstring.cc",
],
hdrs = [
"envoy_shim.h",
"url_canon.h",
"url_canon_internal.h",
"url_canon_stdstring.h",
"url_parse.h",
"url_parse_internal.h",
],
deps = ["//source/common/common:assert_lib"],
)
27 changes: 27 additions & 0 deletions source/common/chromium_url/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
15 changes: 15 additions & 0 deletions source/common/chromium_url/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
This is a manually minified variant of
https://chromium.googlesource.com/chromium/src.git/+archive/74.0.3729.15/url.tar.gz,
providing just the parts needed for `url::CanonicalizePath()`. This is intended
to support a security release fix for CVE-2019-9901. Long term we need this to
be moved to absl or QUICHE for upgrades and long-term support.

Some specific transforms of interest:
* `url_parse.h` is minified to just `Component` and flattened back into the URL
directory. It does not contain any non-Chromium authored code any longer and
so does not have a separate LICENSE.
* `envoy_shim.h` adapts various macros to the Envoy context.
* Anything not reachable from `url::CanonicalizePath()` has been dropped.
* Header include paths have changed as needed.
* BUILD was manually written.
* Various clang-tidy and format fixes.
17 changes: 17 additions & 0 deletions source/common/chromium_url/envoy_shim.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "common/common/assert.h"

// This is a minimal Envoy adaptation layer for the Chromium URL library.
// NOLINT(namespace-envoy)

#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&) = delete; \
TypeName& operator=(const TypeName&) = delete

#define EXPORT_TEMPLATE_DECLARE(x)
#define EXPORT_TEMPLATE_DEFINE(x)
#define COMPONENT_EXPORT(x)

#define DCHECK(x) ASSERT(x)
#define NOTREACHED() NOT_REACHED_GCOVR_EXCL_LINE
16 changes: 16 additions & 0 deletions source/common/chromium_url/url_canon.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Envoy snapshot of Chromium URL path normalization, see README.md.
// NOLINT(namespace-envoy)

// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "common/chromium_url/url_canon.h"

#include "common/chromium_url/envoy_shim.h"

namespace url {

template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL)) CanonOutputT<char>;

} // namespace url
186 changes: 186 additions & 0 deletions source/common/chromium_url/url_canon.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
// Envoy snapshot of Chromium URL path normalization, see README.md.
// NOLINT(namespace-envoy)

// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef URL_URL_CANON_H_
#define URL_URL_CANON_H_

#include <stdlib.h>
#include <string.h>

#include "common/chromium_url/envoy_shim.h"
#include "common/chromium_url/url_parse.h"

namespace url {

// Canonicalizer output -------------------------------------------------------

// Base class for the canonicalizer output, this maintains a buffer and
// supports simple resizing and append operations on it.
//
// It is VERY IMPORTANT that no virtual function calls be made on the common
// code path. We only have two virtual function calls, the destructor and a
// resize function that is called when the existing buffer is not big enough.
// The derived class is then in charge of setting up our buffer which we will
// manage.
template <typename T> class CanonOutputT {
public:
CanonOutputT() : buffer_(NULL), buffer_len_(0), cur_len_(0) {}
virtual ~CanonOutputT() {}

// Implemented to resize the buffer. This function should update the buffer
// pointer to point to the new buffer, and any old data up to |cur_len_| in
// the buffer must be copied over.
//
// The new size |sz| must be larger than buffer_len_.
virtual void Resize(int sz) = 0;

// Accessor for returning a character at a given position. The input offset
// must be in the valid range.
inline T at(int offset) const { return buffer_[offset]; }

// Sets the character at the given position. The given position MUST be less
// than the length().
inline void set(int offset, T ch) { buffer_[offset] = ch; }

// Returns the number of characters currently in the buffer.
inline int length() const { return cur_len_; }

// Returns the current capacity of the buffer. The length() is the number of
// characters that have been declared to be written, but the capacity() is
// the number that can be written without reallocation. If the caller must
// write many characters at once, it can make sure there is enough capacity,
// write the data, then use set_size() to declare the new length().
int capacity() const { return buffer_len_; }

// Called by the user of this class to get the output. The output will NOT
// be NULL-terminated. Call length() to get the
// length.
const T* data() const { return buffer_; }
T* data() { return buffer_; }

// Shortens the URL to the new length. Used for "backing up" when processing
// relative paths. This can also be used if an external function writes a lot
// of data to the buffer (when using the "Raw" version below) beyond the end,
// to declare the new length.
//
// This MUST NOT be used to expand the size of the buffer beyond capacity().
void set_length(int new_len) { cur_len_ = new_len; }

// This is the most performance critical function, since it is called for
// every character.
void push_back(T ch) {
// In VC2005, putting this common case first speeds up execution
// dramatically because this branch is predicted as taken.
if (cur_len_ < buffer_len_) {
buffer_[cur_len_] = ch;
cur_len_++;
return;
}

// Grow the buffer to hold at least one more item. Hopefully we won't have
// to do this very often.
if (!Grow(1))
return;

// Actually do the insertion.
buffer_[cur_len_] = ch;
cur_len_++;
}

// Appends the given string to the output.
void Append(const T* str, int str_len) {
if (cur_len_ + str_len > buffer_len_) {
if (!Grow(cur_len_ + str_len - buffer_len_))
return;
}
for (int i = 0; i < str_len; i++)
buffer_[cur_len_ + i] = str[i];
cur_len_ += str_len;
}

void ReserveSizeIfNeeded(int estimated_size) {
// Reserve a bit extra to account for escaped chars.
if (estimated_size > buffer_len_)
Resize(estimated_size + 8);
}

protected:
// Grows the given buffer so that it can fit at least |min_additional|
// characters. Returns true if the buffer could be resized, false on OOM.
bool Grow(int min_additional) {
static const int kMinBufferLen = 16;
int new_len = (buffer_len_ == 0) ? kMinBufferLen : buffer_len_;
do {
if (new_len >= (1 << 30)) // Prevent overflow below.
return false;
new_len *= 2;
} while (new_len < buffer_len_ + min_additional);
Resize(new_len);
return true;
}

T* buffer_;
int buffer_len_;

// Used characters in the buffer.
int cur_len_;
};

// Simple implementation of the CanonOutput using new[]. This class
// also supports a static buffer so if it is allocated on the stack, most
// URLs can be canonicalized with no heap allocations.
template <typename T, int fixed_capacity = 1024> class RawCanonOutputT : public CanonOutputT<T> {
public:
RawCanonOutputT() : CanonOutputT<T>() {
this->buffer_ = fixed_buffer_;
this->buffer_len_ = fixed_capacity;
}
~RawCanonOutputT() override {
if (this->buffer_ != fixed_buffer_)
delete[] this->buffer_;
}

void Resize(int sz) override {
T* new_buf = new T[sz];
memcpy(new_buf, this->buffer_, sizeof(T) * (this->cur_len_ < sz ? this->cur_len_ : sz));
if (this->buffer_ != fixed_buffer_)
delete[] this->buffer_;
this->buffer_ = new_buf;
this->buffer_len_ = sz;
}

protected:
T fixed_buffer_[fixed_capacity];
};

// Explicitly instantiate commonly used instantiations.
extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL)) CanonOutputT<char>;

// Normally, all canonicalization output is in narrow characters. We support
// the templates so it can also be used internally if a wide buffer is
// required.
typedef CanonOutputT<char> CanonOutput;

template <int fixed_capacity>
class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {};

// Path. If the input does not begin in a slash (including if the input is
// empty), we'll prepend a slash to the path to make it canonical.
//
// The 8-bit version assumes UTF-8 encoding, but does not verify the validity
// of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid
// characters, etc.). Normally, URLs will come in as UTF-16, so this isn't
// an issue. Somebody giving us an 8-bit path is responsible for generating
// the path that the server expects (we'll escape high-bit characters), so
// if something is invalid, it's their problem.
COMPONENT_EXPORT(URL)
bool CanonicalizePath(const char* spec, const Component& path, CanonOutput* output,
Component* out_path);

} // namespace url

#endif // URL_URL_CANON_H_
Loading

0 comments on commit c22cfd2

Please sign in to comment.