Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Embed the public suffix database list directly inside a class #8589

Merged
merged 12 commits into from
Nov 27, 2024
43 changes: 41 additions & 2 deletions okhttp/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import com.vanniktech.maven.publish.JavadocJar
import com.vanniktech.maven.publish.KotlinJvm
import java.io.DataInputStream
import java.io.FileInputStream
import java.util.zip.GZIPInputStream

plugins {
kotlin("jvm")
Expand All @@ -9,12 +12,48 @@ plugins {
id("binary-compatibility-validator")
}

// Build & use okhttp3/internal/-InternalVersion.kt
fun ByteArray.toByteArrayExpression(): String {
return buildString {
append("byteArrayOf(")
this@toByteArrayExpression.forEach {
append(it)
append(", ")
}
append(")")
}
}

val copyKotlinTemplates = tasks.register<Copy>("copyKotlinTemplates") {
from("src/main/kotlinTemplates")
into("$buildDir/generated/sources/kotlinTemplates")
expand("projectVersion" to project.version)

filteringCharset = Charsets.UTF_8.toString()

// TODO replace with KotlinPoet?
yschimke marked this conversation as resolved.
Show resolved Hide resolved
val databaseGz = project.file("src/test/resources/okhttp3/internal/publicsuffix/PublicSuffixDatabase.gz")
println("loading $databaseGz")
val (publicSuffixListBytes, publicSuffixListExceptionBytes) = DataInputStream(GZIPInputStream(FileInputStream(databaseGz))).use {
val totalBytes = it.readInt()
val publicSuffixListBytes = it.readNBytes(totalBytes)
println("read $totalBytes")
yschimke marked this conversation as resolved.
Show resolved Hide resolved

val totalExceptionBytes = it.readInt()
val publicSuffixExceptionListBytes = it.readNBytes(totalExceptionBytes)
println("read $totalExceptionBytes")

Pair(publicSuffixListBytes.toByteArrayExpression(), publicSuffixExceptionListBytes.toByteArrayExpression())
}

println(publicSuffixListBytes.substring(0, 10))

expand(
// Build & use okhttp3/internal/-InternalVersion.kt
"projectVersion" to project.version,

// Build okhttp3/internal/publicsuffix/EmbeddedPublicSuffixList.kt
"publicSuffixListBytes" to publicSuffixListBytes,
"publicSuffixListExceptionBytes" to publicSuffixListExceptionBytes
)
}

// Build & use okhttp3/internal/idn/IdnaMappingTableInstance.kt
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,40 +15,17 @@
*/
package okhttp3.internal.publicsuffix

import java.io.IOException
import java.io.InterruptedIOException
import java.net.IDN
import java.util.concurrent.CountDownLatch
import java.util.concurrent.atomic.AtomicBoolean
import okhttp3.internal.and
import okhttp3.internal.platform.Platform
import okio.FileSystem
import okio.GzipSource
import okio.Path
import okio.Path.Companion.toPath
import okio.buffer

/**
* A database of public suffixes provided by [publicsuffix.org][publicsuffix_org].
*
* [publicsuffix_org]: https://publicsuffix.org/
*/
class PublicSuffixDatabase internal constructor(
val path: Path = PUBLIC_SUFFIX_RESOURCE,
val fileSystem: FileSystem = FileSystem.RESOURCES,
private val publicSuffixList: PublicSuffixList
) {
/** True after we've attempted to read the list for the first time. */
private val listRead = AtomicBoolean(false)

/** Used for concurrent threads reading the list for the first time. */
private val readCompleteLatch = CountDownLatch(1)

// The lists are held as a large array of UTF-8 bytes. This is to avoid allocating lots of strings
// that will likely never be used. Each rule is separated by '\n'. Please see the
// PublicSuffixListGenerator class for how these lists are generated.
// Guarded by this.
private lateinit var publicSuffixListBytes: ByteArray
private lateinit var publicSuffixExceptionListBytes: ByteArray

/**
* Returns the effective top-level domain plus one (eTLD+1) by referencing the public suffix list.
Expand Down Expand Up @@ -101,20 +78,7 @@ class PublicSuffixDatabase internal constructor(
}

private fun findMatchingRule(domainLabels: List<String>): List<String> {
if (!listRead.get() && listRead.compareAndSet(false, true)) {
readTheListUninterruptibly()
} else {
try {
readCompleteLatch.await()
} catch (_: InterruptedException) {
Thread.currentThread().interrupt() // Retain interrupted status.
}
}

check(::publicSuffixListBytes.isInitialized) {
// May have failed with an IOException
"Unable to load $PUBLIC_SUFFIX_RESOURCE resource from the classpath."
}
publicSuffixList.ensureLoaded()

// Break apart the domain into UTF-8 labels, i.e. foo.bar.com turns into [foo, bar, com].
val domainLabelsUtf8Bytes = Array(domainLabels.size) { i -> domainLabels[i].toByteArray() }
Expand All @@ -123,7 +87,7 @@ class PublicSuffixDatabase internal constructor(
// will look like: [foo, bar, com], [bar, com], [com]. The longest matching rule wins.
var exactMatch: String? = null
for (i in domainLabelsUtf8Bytes.indices) {
val rule = publicSuffixListBytes.binarySearch(domainLabelsUtf8Bytes, i)
val rule = publicSuffixList.bytes.binarySearch(domainLabelsUtf8Bytes, i)
if (rule != null) {
exactMatch = rule
break
Expand All @@ -140,7 +104,7 @@ class PublicSuffixDatabase internal constructor(
val labelsWithWildcard = domainLabelsUtf8Bytes.clone()
for (labelIndex in 0 until labelsWithWildcard.size - 1) {
labelsWithWildcard[labelIndex] = WILDCARD_LABEL
val rule = publicSuffixListBytes.binarySearch(labelsWithWildcard, labelIndex)
val rule = publicSuffixList.bytes.binarySearch(labelsWithWildcard, labelIndex)
if (rule != null) {
wildcardMatch = rule
break
Expand All @@ -153,7 +117,7 @@ class PublicSuffixDatabase internal constructor(
if (wildcardMatch != null) {
for (labelIndex in 0 until domainLabelsUtf8Bytes.size - 1) {
val rule =
publicSuffixExceptionListBytes.binarySearch(
publicSuffixList.exceptionBytes.binarySearch(
domainLabelsUtf8Bytes,
labelIndex,
)
Expand Down Expand Up @@ -182,77 +146,13 @@ class PublicSuffixDatabase internal constructor(
}
}

/**
* Reads the public suffix list treating the operation as uninterruptible. We always want to read
* the list otherwise we'll be left in a bad state. If the thread was interrupted prior to this
* operation, it will be re-interrupted after the list is read.
*/
private fun readTheListUninterruptibly() {
var interrupted = false
try {
while (true) {
try {
readTheList()
return
} catch (_: InterruptedIOException) {
Thread.interrupted() // Temporarily clear the interrupted state.
interrupted = true
} catch (e: IOException) {
Platform.get().log("Failed to read public suffix list", Platform.WARN, e)
return
}
}
} finally {
if (interrupted) {
Thread.currentThread().interrupt() // Retain interrupted status.
}
}
}

@Throws(IOException::class)
private fun readTheList() {
var publicSuffixListBytes: ByteArray?
var publicSuffixExceptionListBytes: ByteArray?

try {
GzipSource(fileSystem.source(path)).buffer().use { bufferedSource ->
val totalBytes = bufferedSource.readInt()
publicSuffixListBytes = bufferedSource.readByteArray(totalBytes.toLong())

val totalExceptionBytes = bufferedSource.readInt()
publicSuffixExceptionListBytes = bufferedSource.readByteArray(totalExceptionBytes.toLong())
}

synchronized(this) {
this.publicSuffixListBytes = publicSuffixListBytes!!
this.publicSuffixExceptionListBytes = publicSuffixExceptionListBytes!!
}
} finally {
readCompleteLatch.countDown()
}
}

/** Visible for testing. */
fun setListBytes(
publicSuffixListBytes: ByteArray,
publicSuffixExceptionListBytes: ByteArray,
) {
this.publicSuffixListBytes = publicSuffixListBytes
this.publicSuffixExceptionListBytes = publicSuffixExceptionListBytes
listRead.set(true)
readCompleteLatch.countDown()
}

companion object {
@JvmField
val PUBLIC_SUFFIX_RESOURCE = "/okhttp3/internal/publicsuffix/${PublicSuffixDatabase::class.java.simpleName}.gz".toPath()

private val WILDCARD_LABEL = byteArrayOf('*'.code.toByte())
private val PREVAILING_RULE = listOf("*")

private const val EXCEPTION_MARKER = '!'

private val instance = PublicSuffixDatabase()
private val instance = PublicSuffixDatabase(EmbeddedPublicSuffixList)

fun get(): PublicSuffixDatabase {
return instance
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright (C) 2024 Block, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package okhttp3.internal.publicsuffix

/**
* Basic I/O for the PublicSuffixDatabase.gz.
*/
internal interface PublicSuffixList {
fun ensureLoaded()

val bytes: ByteArray
val exceptionBytes: ByteArray
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright (C) 2024 Block, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package okhttp3.internal.publicsuffix

//Note that PublicSuffixDatabase.gz is compiled from The Public Suffix List:
//https://publicsuffix.org/list/public_suffix_list.dat
//
//It is subject to the terms of the Mozilla Public License, v. 2.0:
//https://mozilla.org/MPL/2.0/

/**
* A implementation of I/O for PublicSuffixDatabase.gz by directly encoding
* the relevant byte arrays in a class file.
*/
internal object EmbeddedPublicSuffixList: PublicSuffixList {
override fun ensureLoaded() {
}

override val bytes: ByteArray = $publicSuffixListBytes

override val exceptionBytes: ByteArray = $publicSuffixListExceptionBytes
}
4 changes: 0 additions & 4 deletions okhttp/src/main/resources/META-INF/proguard/okhttp3.pro
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
# JSR 305 annotations are for embedding nullability information.
-dontwarn javax.annotation.**

# A resource is loaded with a relative path so the package of this class must be preserved.
-keeppackagenames okhttp3.internal.publicsuffix.*
-adaptresourcefilenames okhttp3/internal/publicsuffix/PublicSuffixDatabase.gz

# Animal Sniffer compileOnly dependency to ensure APIs are compatible with older versions of Java.
-dontwarn org.codehaus.mojo.animal_sniffer.*

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,16 @@ import okio.use
import org.junit.jupiter.api.Test

class PublicSuffixDatabaseTest {
private val publicSuffixDatabase = PublicSuffixDatabase()
private val list = ResourcePublicSuffixList()
private val publicSuffixDatabase = PublicSuffixDatabase(list)

@Test fun longestMatchWins() {
val buffer =
Buffer()
.writeUtf8("com\n")
.writeUtf8("my.square.com\n")
.writeUtf8("square.com\n")
publicSuffixDatabase.setListBytes(buffer.readByteArray(), byteArrayOf())
list.setListBytes(buffer.readByteArray(), byteArrayOf())
assertThat(publicSuffixDatabase.getEffectiveTldPlusOne("example.com"))
.isEqualTo("example.com")
assertThat(publicSuffixDatabase.getEffectiveTldPlusOne("foo.example.com"))
Expand All @@ -56,7 +57,7 @@ class PublicSuffixDatabaseTest {
.writeUtf8("*.square.com\n")
.writeUtf8("com\n")
.writeUtf8("example.com\n")
publicSuffixDatabase.setListBytes(buffer.readByteArray(), byteArrayOf())
list.setListBytes(buffer.readByteArray(), byteArrayOf())
assertThat(publicSuffixDatabase.getEffectiveTldPlusOne("my.square.com")).isNull()
assertThat(publicSuffixDatabase.getEffectiveTldPlusOne("foo.my.square.com"))
.isEqualTo("foo.my.square.com")
Expand All @@ -70,7 +71,7 @@ class PublicSuffixDatabaseTest {
.writeUtf8("bbb\n")
.writeUtf8("ddd\n")
.writeUtf8("fff\n")
publicSuffixDatabase.setListBytes(buffer.readByteArray(), byteArrayOf())
list.setListBytes(buffer.readByteArray(), byteArrayOf())
assertThat(publicSuffixDatabase.getEffectiveTldPlusOne("aaa")).isNull()
assertThat(publicSuffixDatabase.getEffectiveTldPlusOne("ggg")).isNull()
assertThat(publicSuffixDatabase.getEffectiveTldPlusOne("ccc")).isNull()
Expand All @@ -87,7 +88,7 @@ class PublicSuffixDatabaseTest {
.writeUtf8("*.square.jp\n")
.writeUtf8("example.com\n")
.writeUtf8("square.com\n")
publicSuffixDatabase.setListBytes(buffer.readByteArray(), exception.readByteArray())
list.setListBytes(buffer.readByteArray(), exception.readByteArray())
assertThat(publicSuffixDatabase.getEffectiveTldPlusOne("my.square.jp"))
.isEqualTo("my.square.jp")
assertThat(publicSuffixDatabase.getEffectiveTldPlusOne("foo.my.square.jp"))
Expand All @@ -105,14 +106,14 @@ class PublicSuffixDatabaseTest {
.writeUtf8("*.square.jp\n")
.writeUtf8("example.com\n")
.writeUtf8("square.com\n")
publicSuffixDatabase.setListBytes(buffer.readByteArray(), exception.readByteArray())
list.setListBytes(buffer.readByteArray(), exception.readByteArray())
assertThat(publicSuffixDatabase.getEffectiveTldPlusOne("example.com")).isNull()
assertThat(publicSuffixDatabase.getEffectiveTldPlusOne("foo.square.jp")).isNull()
}

@Test fun allPublicSuffixes() {
val buffer = Buffer()
FileSystem.RESOURCES.source(PublicSuffixDatabase.PUBLIC_SUFFIX_RESOURCE).use { resource ->
FileSystem.RESOURCES.source(ResourcePublicSuffixList.PUBLIC_SUFFIX_RESOURCE).use { resource ->
GzipSource(resource).buffer().use { source ->
val length = source.readInt()
buffer.write(source, length.toLong())
Expand All @@ -132,7 +133,7 @@ class PublicSuffixDatabaseTest {

@Test fun publicSuffixExceptions() {
val buffer = Buffer()
FileSystem.RESOURCES.source(PublicSuffixDatabase.PUBLIC_SUFFIX_RESOURCE).use { resource ->
FileSystem.RESOURCES.source(ResourcePublicSuffixList.PUBLIC_SUFFIX_RESOURCE).use { resource ->
GzipSource(resource).buffer().use { source ->
var length = source.readInt()
source.skip(length.toLong())
Expand Down Expand Up @@ -163,7 +164,9 @@ class PublicSuffixDatabaseTest {
@Test fun secondReadFailsSameAsFirst() {
val badPublicSuffixDatabase =
PublicSuffixDatabase(
path = "/xxx.gz".toPath(),
ResourcePublicSuffixList(
path = "/xxx.gz".toPath(),
)
)
lateinit var firstFailure: Exception
assertFailsWith<Exception> {
Expand Down
Loading
Loading