Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Embed the public suffix database list directly inside a class #8589

Merged
merged 12 commits into from
Nov 27, 2024
1 change: 1 addition & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ apply(plugin = "com.diffplug.spotless")
configure<SpotlessExtension> {
kotlin {
target("**/*.kt")
targetExclude("**/kotlinTemplates/**/*.kt")
ktlint()
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,8 @@ package okhttp3

import assertk.assertThat
import assertk.assertions.isEqualTo
import assertk.assertions.isGreaterThan
import okhttp3.HttpUrl.Companion.toHttpUrl
import okhttp3.internal.publicsuffix.PublicSuffixDatabase
import okhttp3.testing.PlatformRule
import okio.FileSystem
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.extension.RegisterExtension

Expand All @@ -36,12 +33,4 @@ class PublicSuffixDatabaseTest {

assertThat(url.topPrivateDomain()).isEqualTo("twitter.com")
}

@Test
fun testPublicSuffixes() {
platform.assumeNotGraalVMImage()

val metadata = FileSystem.RESOURCES.metadata(PublicSuffixDatabase.PUBLIC_SUFFIX_RESOURCE)
assertThat(metadata.size!!).isGreaterThan(30000)
}
}
23 changes: 21 additions & 2 deletions okhttp/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import com.vanniktech.maven.publish.JavadocJar
import com.vanniktech.maven.publish.KotlinJvm
import java.io.DataInputStream
import java.io.FileInputStream
import java.util.Base64
import java.util.zip.GZIPInputStream

plugins {
kotlin("jvm")
Expand All @@ -9,12 +13,27 @@ plugins {
id("binary-compatibility-validator")
}

// Build & use okhttp3/internal/-InternalVersion.kt
fun ByteArray.toByteStringExpression(): String {
return "\"${Base64.getEncoder().encodeToString(this@toByteStringExpression)}\".decodeBase64()!!"
yschimke marked this conversation as resolved.
Show resolved Hide resolved
}

val copyKotlinTemplates = tasks.register<Copy>("copyKotlinTemplates") {
from("src/main/kotlinTemplates")
into("$buildDir/generated/sources/kotlinTemplates")
expand("projectVersion" to project.version)

filteringCharset = Charsets.UTF_8.toString()

// TODO replace with KotlinPoet?
yschimke marked this conversation as resolved.
Show resolved Hide resolved
val databaseGz = project.file("src/test/resources/okhttp3/internal/publicsuffix/PublicSuffixDatabase.gz")
val listBytes = databaseGz.readBytes().toByteStringExpression()

expand(
// Build & use okhttp3/internal/-InternalVersion.kt
"projectVersion" to project.version,

// Build okhttp3/internal/publicsuffix/EmbeddedPublicSuffixList.kt
"publicSuffixListBytes" to listBytes
)
}

// Build & use okhttp3/internal/idn/IdnaMappingTableInstance.kt
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,41 +15,19 @@
*/
package okhttp3.internal.publicsuffix

import java.io.IOException
import java.io.InterruptedIOException
import java.net.IDN
import java.util.concurrent.CountDownLatch
import java.util.concurrent.atomic.AtomicBoolean
import okhttp3.internal.and
import okhttp3.internal.platform.Platform
import okio.FileSystem
import okio.GzipSource
import okio.Path
import okio.Path.Companion.toPath
import okio.buffer
import okio.ByteString
import okio.ByteString.Companion.encodeUtf8

/**
* A database of public suffixes provided by [publicsuffix.org][publicsuffix_org].
*
* [publicsuffix_org]: https://publicsuffix.org/
*/
class PublicSuffixDatabase internal constructor(
val path: Path = PUBLIC_SUFFIX_RESOURCE,
val fileSystem: FileSystem = FileSystem.RESOURCES,
private val publicSuffixList: PublicSuffixList,
) {
/** True after we've attempted to read the list for the first time. */
private val listRead = AtomicBoolean(false)

/** Used for concurrent threads reading the list for the first time. */
private val readCompleteLatch = CountDownLatch(1)

// The lists are held as a large array of UTF-8 bytes. This is to avoid allocating lots of strings
// that will likely never be used. Each rule is separated by '\n'. Please see the
// PublicSuffixListGenerator class for how these lists are generated.
// Guarded by this.
private lateinit var publicSuffixListBytes: ByteArray
private lateinit var publicSuffixExceptionListBytes: ByteArray

/**
* Returns the effective top-level domain plus one (eTLD+1) by referencing the public suffix list.
* Returns null if the domain is a public suffix or a private address.
Expand Down Expand Up @@ -101,29 +79,16 @@ class PublicSuffixDatabase internal constructor(
}

private fun findMatchingRule(domainLabels: List<String>): List<String> {
if (!listRead.get() && listRead.compareAndSet(false, true)) {
readTheListUninterruptibly()
} else {
try {
readCompleteLatch.await()
} catch (_: InterruptedException) {
Thread.currentThread().interrupt() // Retain interrupted status.
}
}

check(::publicSuffixListBytes.isInitialized) {
// May have failed with an IOException
"Unable to load $PUBLIC_SUFFIX_RESOURCE resource from the classpath."
}
publicSuffixList.ensureLoaded()

// Break apart the domain into UTF-8 labels, i.e. foo.bar.com turns into [foo, bar, com].
val domainLabelsUtf8Bytes = Array(domainLabels.size) { i -> domainLabels[i].toByteArray() }
val domainLabelsUtf8Bytes = Array(domainLabels.size) { i -> domainLabels[i].encodeUtf8() }

// Start by looking for exact matches. We start at the leftmost label. For example, foo.bar.com
// will look like: [foo, bar, com], [bar, com], [com]. The longest matching rule wins.
var exactMatch: String? = null
for (i in domainLabelsUtf8Bytes.indices) {
val rule = publicSuffixListBytes.binarySearch(domainLabelsUtf8Bytes, i)
val rule = publicSuffixList.bytes.binarySearch(domainLabelsUtf8Bytes, i)
if (rule != null) {
exactMatch = rule
break
Expand All @@ -140,7 +105,7 @@ class PublicSuffixDatabase internal constructor(
val labelsWithWildcard = domainLabelsUtf8Bytes.clone()
for (labelIndex in 0 until labelsWithWildcard.size - 1) {
labelsWithWildcard[labelIndex] = WILDCARD_LABEL
val rule = publicSuffixListBytes.binarySearch(labelsWithWildcard, labelIndex)
val rule = publicSuffixList.bytes.binarySearch(labelsWithWildcard, labelIndex)
if (rule != null) {
wildcardMatch = rule
break
Expand All @@ -153,7 +118,7 @@ class PublicSuffixDatabase internal constructor(
if (wildcardMatch != null) {
for (labelIndex in 0 until domainLabelsUtf8Bytes.size - 1) {
val rule =
publicSuffixExceptionListBytes.binarySearch(
publicSuffixList.exceptionBytes.binarySearch(
domainLabelsUtf8Bytes,
labelIndex,
)
Expand Down Expand Up @@ -182,84 +147,20 @@ class PublicSuffixDatabase internal constructor(
}
}

/**
* Reads the public suffix list treating the operation as uninterruptible. We always want to read
* the list otherwise we'll be left in a bad state. If the thread was interrupted prior to this
* operation, it will be re-interrupted after the list is read.
*/
private fun readTheListUninterruptibly() {
var interrupted = false
try {
while (true) {
try {
readTheList()
return
} catch (_: InterruptedIOException) {
Thread.interrupted() // Temporarily clear the interrupted state.
interrupted = true
} catch (e: IOException) {
Platform.get().log("Failed to read public suffix list", Platform.WARN, e)
return
}
}
} finally {
if (interrupted) {
Thread.currentThread().interrupt() // Retain interrupted status.
}
}
}

@Throws(IOException::class)
private fun readTheList() {
var publicSuffixListBytes: ByteArray?
var publicSuffixExceptionListBytes: ByteArray?

try {
GzipSource(fileSystem.source(path)).buffer().use { bufferedSource ->
val totalBytes = bufferedSource.readInt()
publicSuffixListBytes = bufferedSource.readByteArray(totalBytes.toLong())

val totalExceptionBytes = bufferedSource.readInt()
publicSuffixExceptionListBytes = bufferedSource.readByteArray(totalExceptionBytes.toLong())
}

synchronized(this) {
this.publicSuffixListBytes = publicSuffixListBytes!!
this.publicSuffixExceptionListBytes = publicSuffixExceptionListBytes!!
}
} finally {
readCompleteLatch.countDown()
}
}

/** Visible for testing. */
fun setListBytes(
publicSuffixListBytes: ByteArray,
publicSuffixExceptionListBytes: ByteArray,
) {
this.publicSuffixListBytes = publicSuffixListBytes
this.publicSuffixExceptionListBytes = publicSuffixExceptionListBytes
listRead.set(true)
readCompleteLatch.countDown()
}

companion object {
@JvmField
val PUBLIC_SUFFIX_RESOURCE = "/okhttp3/internal/publicsuffix/${PublicSuffixDatabase::class.java.simpleName}.gz".toPath()

private val WILDCARD_LABEL = byteArrayOf('*'.code.toByte())
private val WILDCARD_LABEL = ByteString.of('*'.code.toByte())
private val PREVAILING_RULE = listOf("*")

private const val EXCEPTION_MARKER = '!'

private val instance = PublicSuffixDatabase()
private val instance = PublicSuffixDatabase(EmbeddedPublicSuffixList)

fun get(): PublicSuffixDatabase {
return instance
}

private fun ByteArray.binarySearch(
labels: Array<ByteArray>,
private fun ByteString.binarySearch(
labels: Array<ByteString>,
labelIndex: Int,
): String? {
var low = 0
Expand Down Expand Up @@ -338,7 +239,7 @@ class PublicSuffixDatabase internal constructor(
low = mid + end + 1
} else {
// Found a match.
match = String(this, mid, publicSuffixLength)
match = this.substring(mid, mid + publicSuffixLength).string(Charsets.UTF_8)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like string should have an overload which takes start/end indices to avoid this double copy. Or we should have added ByteString.slice!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will raise a PR.

break
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright (C) 2024 Block, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package okhttp3.internal.publicsuffix

import okio.ByteString

/**
* Basic I/O for the PublicSuffixDatabase.gz.
*/
internal interface PublicSuffixList {
fun ensureLoaded()

val bytes: ByteString
val exceptionBytes: ByteString
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright (C) 2024 Block, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package okhttp3.internal.publicsuffix

//Note that PublicSuffixDatabase.gz is compiled from The Public Suffix List:
//https://publicsuffix.org/list/public_suffix_list.dat
//
//It is subject to the terms of the Mozilla Public License, v. 2.0:
//https://mozilla.org/MPL/2.0/

import okio.Buffer
import okio.ByteString
import okio.ByteString.Companion.decodeBase64
import okio.GzipSource
import okio.buffer

/**
* A implementation of I/O for PublicSuffixDatabase.gz by directly encoding
* the relevant byte arrays in a class file.
*/
internal object EmbeddedPublicSuffixList: PublicSuffixList {
override fun ensureLoaded() {
}

override val bytes: ByteString

override val exceptionBytes: ByteString

init {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not done in ensureLoaded?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I figured the JVM classloader would be a nice simple mutex. Ther s so much locking code in the other impl

Buffer().use { buffer ->
buffer.write($publicSuffixListBytes)
GzipSource(buffer).buffer().use { source ->
val totalBytes = source.readInt()
bytes = source.readByteString(totalBytes.toLong())

val totalExceptionBytes = source.readInt()
exceptionBytes = source.readByteString(totalExceptionBytes.toLong())
}
}
}
}
4 changes: 0 additions & 4 deletions okhttp/src/main/resources/META-INF/proguard/okhttp3.pro
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
# JSR 305 annotations are for embedding nullability information.
-dontwarn javax.annotation.**

# A resource is loaded with a relative path so the package of this class must be preserved.
-keeppackagenames okhttp3.internal.publicsuffix.*
-adaptresourcefilenames okhttp3/internal/publicsuffix/PublicSuffixDatabase.gz

# Animal Sniffer compileOnly dependency to ensure APIs are compatible with older versions of Java.
-dontwarn org.codehaus.mojo.animal_sniffer.*

Expand Down
Loading