Skip to content

Commit

Permalink
[Backport 2.x] Use own copy of PercentCodec for URI path encoding (op…
Browse files Browse the repository at this point in the history
…ensearch-project#1119)

* Use own copy of PercentCodec for URI path encoding (opensearch-project#1109)

* Use own copy of PercentCodec for URI path encoding

Adapted from Apache HttpComponents HttpCore v5's https://github.com/apache/httpcomponents-core/blob/e009a923eefe79cf3593efbb0c18a3525ae63669/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java

Signed-off-by: Thomas Farr <tsfarr@amazon.com>

* Refactor PercentCodec a bit

Signed-off-by: Thomas Farr <tsfarr@amazon.com>

* Add change log

Signed-off-by: Thomas Farr <tsfarr@amazon.com>

* Switch to system property

Signed-off-by: Thomas Farr <tsfarr@amazon.com>

* spotless

Signed-off-by: Thomas Farr <tsfarr@amazon.com>

* Add UPGRADING note

Signed-off-by: Thomas Farr <tsfarr@amazon.com>

---------

Signed-off-by: Thomas Farr <tsfarr@amazon.com>
(cherry picked from commit 3d8061e)

* Tweak default encoding behavior to match prior behavior

Signed-off-by: Thomas Farr <tsfarr@amazon.com>

* spotless

Signed-off-by: Thomas Farr <tsfarr@amazon.com>

---------

Signed-off-by: Thomas Farr <tsfarr@amazon.com>
  • Loading branch information
Xtansia authored Aug 1, 2024
1 parent 3324f59 commit 74668e1
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 83 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
### Dependencies

### Changed
- Changed URL path encoding to own implementation adapted from Apache HTTP Client 5's ([#1109](https://github.com/opensearch-project/opensearch-java/pull/1109))

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,6 @@ public static RuntimeException noPathTemplateFound(String what) {
}

public static void pathEncode(String src, StringBuilder dest) {
dest.append(PathEncoder.encode(src));
PathEncoder.encode(dest, src);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,59 +8,54 @@

package org.opensearch.client.util;

/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Optional;

public class PathEncoder {
private final static String HTTP_CLIENT4_UTILS_CLASS = "org.apache.http.client.utils.URLEncodedUtils";
private final static String HTTP_CLIENT5_UTILS_CLASS = "org.apache.hc.core5.net.URLEncodedUtils";
private final static MethodHandle FORMAT_SEGMENTS_MH;
private enum Encoding {
RFC3986_PATH(PercentCodec.RFC3986_PATH),
HTTP_CLIENT_V4_EQUIV(PercentCodec.RFC3986_PATH),

static {
Class<?> clazz = null;
try {
// Try Apache HttpClient4 first since this is a default one
clazz = Class.forName(HTTP_CLIENT4_UTILS_CLASS);
} catch (final ClassNotFoundException ex) {
try {
// Fallback to Apache HttpClient4
clazz = Class.forName(HTTP_CLIENT5_UTILS_CLASS);
} catch (final ClassNotFoundException ex1) {
clazz = null;
}
}
RFC3986_UNRESERVED(PercentCodec.RFC3986_UNRESERVED),
HTTP_CLIENT_V5_EQUIV(PercentCodec.RFC3986_UNRESERVED);

private final PercentCodec percentCodec;

if (clazz == null) {
throw new IllegalStateException(
"Either '" + HTTP_CLIENT5_UTILS_CLASS + "' or '" + HTTP_CLIENT4_UTILS_CLASS + "' is required by not found on classpath"
);
Encoding(PercentCodec percentCodec) {
this.percentCodec = percentCodec;
}

try {
FORMAT_SEGMENTS_MH = MethodHandles.lookup()
.findStatic(clazz, "formatSegments", MethodType.methodType(String.class, Iterable.class, Charset.class));
} catch (final NoSuchMethodException | IllegalAccessException ex) {
throw new IllegalStateException("Unable to find 'formatSegments' method in " + clazz + " class");
static Optional<Encoding> get(String name) {
try {
return Optional.of(Encoding.valueOf(name.toUpperCase()));
} catch (Exception ignored) {
return Optional.empty();
}
}
}

public static String encode(String uri) {
private static boolean isHttpClientV4InClasspath() {
try {
return ((String) FORMAT_SEGMENTS_MH.invoke(Collections.singletonList(uri), StandardCharsets.UTF_8)).substring(1);
} catch (final Throwable ex) {
throw new RuntimeException("Unable to encode URI: " + uri, ex);
Class.forName("org.apache.http.client.utils.URLEncodedUtils");
return true;
} catch (Exception ignored) {
return false;
}
}

private static final String ENCODING_PROPERTY = "org.opensearch.path.encoding";
private static final Encoding ENCODING_DEFAULT = isHttpClientV4InClasspath()
? Encoding.HTTP_CLIENT_V4_EQUIV
: Encoding.HTTP_CLIENT_V5_EQUIV;

private static final Encoding ENCODING = Optional.ofNullable(System.getProperty(ENCODING_PROPERTY))
.flatMap(Encoding::get)
.orElse(ENCODING_DEFAULT);

public static String encode(String pathSegment) {
return ENCODING.percentCodec.encode(pathSegment);
}

public static void encode(StringBuilder dest, CharSequence pathSegment) {
ENCODING.percentCodec.encode(dest, pathSegment);
}
}
180 changes: 180 additions & 0 deletions java-client/src/main/java/org/opensearch/client/util/PercentCodec.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.client.util;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;

/**
* Percent-encoding.
* <p>
* Adapted from Apache HttpComponents HttpCore v5's <a href="https://github.com/apache/httpcomponents-core/blob/e009a923eefe79cf3593efbb0c18a3525ae63669/httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java">PercentCodec.java</a>
* </p>
*/
class PercentCodec {
private static class Chars {
private final BitSet set = new BitSet(256);

public void add(char... chars) {
for (char c : chars) {
set.set(c);
}
}

public void addRange(char start, char end) {
set.set(start, end + 1);
}

public void add(Chars set) {
this.set.or(set.set);
}

public boolean contains(int c) {
return set.get(c);
}
}

private static final Chars RFC3986_GEN_DELIMS_CHARS = new Chars() {
{
add(':', '/', '?', '#', '[', ']', '@');
}
};
private static final Chars RFC3986_SUB_DELIMS_CHARS = new Chars() {
{
add('!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=');
}
};
private static final Chars RFC3986_UNRESERVED_CHARS = new Chars() {
{
addRange('a', 'z');
addRange('A', 'Z');
addRange('0', '9');
add('-', '.', '_', '~');
}
};
private static final Chars RFC3986_PATH_NO_COLON_CHARS = new Chars() {
{
add(RFC3986_UNRESERVED_CHARS);
add(RFC3986_SUB_DELIMS_CHARS);
add('@');
}
};
private static final Chars RFC3986_PATH_CHARS = new Chars() {
{
add(RFC3986_PATH_NO_COLON_CHARS);
add(':');
}
};
private static final Chars RFC3986_URIC_CHARS = new Chars() {
{
add(RFC3986_SUB_DELIMS_CHARS);
add(RFC3986_UNRESERVED_CHARS);
}
};

private static final Chars RFC5987_UNRESERVED_CHARS = new Chars() {
{
addRange('a', 'z');
addRange('A', 'Z');
addRange('0', '9');
// Additional characters as per RFC 5987 attr-char
add('!', '#', '$', '&', '+', '-', '.', '^', '_', '`', '|', '~');
}
};

private static final int RADIX = 16;

private static void encode(
final StringBuilder buf,
final CharSequence content,
final Charset charset,
final Chars safeChars,
final boolean blankAsPlus
) {
if (content == null) {
return;
}
final CharBuffer cb = CharBuffer.wrap(content);
final ByteBuffer bb = (charset != null ? charset : StandardCharsets.UTF_8).encode(cb);
while (bb.hasRemaining()) {
final int b = bb.get() & 0xff;
if (safeChars.contains(b)) {
buf.append((char) b);
} else if (blankAsPlus && b == ' ') {
buf.append("+");
} else {
buf.append("%");
final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
buf.append(hex1);
buf.append(hex2);
}
}
}

private static String decode(final CharSequence content, final Charset charset, final boolean plusAsBlank) {
if (content == null) {
return null;
}
final ByteBuffer bb = ByteBuffer.allocate(content.length());
final CharBuffer cb = CharBuffer.wrap(content);
while (cb.hasRemaining()) {
final char c = cb.get();
if (c == '%' && cb.remaining() >= 2) {
final char uc = cb.get();
final char lc = cb.get();
final int u = Character.digit(uc, RADIX);
final int l = Character.digit(lc, RADIX);
if (u != -1 && l != -1) {
bb.put((byte) ((u << 4) + l));
} else {
bb.put((byte) '%');
bb.put((byte) uc);
bb.put((byte) lc);
}
} else if (plusAsBlank && c == '+') {
bb.put((byte) ' ');
} else {
bb.put((byte) c);
}
}
bb.flip();
return (charset != null ? charset : StandardCharsets.UTF_8).decode(bb).toString();
}

public static final PercentCodec RFC3986_UNRESERVED = new PercentCodec(RFC3986_UNRESERVED_CHARS);
public static final PercentCodec RFC3986_PATH = new PercentCodec(RFC3986_PATH_CHARS);
public static final PercentCodec RFC5987_UNRESERVED = new PercentCodec(RFC5987_UNRESERVED_CHARS);

private final Chars unreserved;

private PercentCodec(final Chars unreserved) {
this.unreserved = unreserved;
}

public void encode(final StringBuilder buf, final CharSequence content) {
encode(buf, content, StandardCharsets.UTF_8, unreserved, false);
}

public String encode(final CharSequence content) {
if (content == null) {
return null;
}
final StringBuilder buf = new StringBuilder();
encode(buf, content, StandardCharsets.UTF_8, unreserved, false);
return buf.toString();
}

public String decode(final CharSequence content) {
return decode(content, StandardCharsets.UTF_8, false);
}
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.client.util;

import static org.junit.Assert.assertEquals;

import java.util.Arrays;
import java.util.Collection;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;

@RunWith(Parameterized.class)
public class PercentCodecTest {
@Parameterized.Parameters
public static Collection<Object[]> testData() {
return Arrays.asList(
new Object[][] {
// <unencoded>, <encoded_unreserved>, <encoded_path>
{ "test", "test", "test" },
{ "abc123", "abc123", "abc123" },
{ "a/b", "a%2Fb", "a%2Fb" },
{ "a/b/c/_refresh", "a%2Fb%2Fc%2F_refresh", "a%2Fb%2Fc%2F_refresh" },
{ "a:b:c:d:e::1.0", "a%3Ab%3Ac%3Ad%3Ae%3A%3A1.0", "a:b:c:d:e::1.0" },
{ "a,b,c", "a%2Cb%2Cc", "a,b,c" } }
);
}

private final String decoded;
private final String encodedRFC3986Unreserved;
private final String encodedRFC3986Path;

public PercentCodecTest(String decoded, String encodedRFC3986Unreserved, String encodedRFC3986Path) {
this.decoded = decoded;
this.encodedRFC3986Unreserved = encodedRFC3986Unreserved;
this.encodedRFC3986Path = encodedRFC3986Path;
}

@Test
public void test_RFC3986_UNRESERVED_encoding() {
assertEquals(this.encodedRFC3986Unreserved, PercentCodec.RFC3986_UNRESERVED.encode(this.decoded));
}

@Test
public void test_RFC3986_UNRESERVED_decoding() {
assertEquals(this.decoded, PercentCodec.RFC3986_UNRESERVED.decode(this.encodedRFC3986Unreserved));
}

@Test
public void test_RFC3986_PATH_encoding() {
assertEquals(this.encodedRFC3986Path, PercentCodec.RFC3986_PATH.encode(this.decoded));
}

@Test
public void test_RFC3986_PATH_decoding() {
assertEquals(this.decoded, PercentCodec.RFC3986_PATH.decode(this.encodedRFC3986Path));
}
}

0 comments on commit 74668e1

Please sign in to comment.