Skip to content

Commit

Permalink
Merge pull request #5 from seomoz/myron/public_suffix_fun
Browse files Browse the repository at this point in the history
Implement `public_suffix` function.
  • Loading branch information
bkirz committed Apr 19, 2016
2 parents 0ff84c8 + ed23857 commit b40f4f8
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 24 deletions.
14 changes: 11 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,18 @@ the public suffix rules provided by https://publicsuffix.org/:
> suffixes are `.com`, `.co.uk` and `pvt.k12.ma.us`. The Public Suffix List is
> a list of all known public suffixes.
This Elixir library provides a means to get the registrable domain part
from any domain:
This Elixir library provides a means to get the public suffix and the
registrable domain from any domain:

``` iex
iex(1)> PublicSuffix.registrable_domain("mysite.foo.bar.com")
"bar.com"
iex(2)> PublicSuffix.registrable_domain("mysite.foo.bar.co.uk")
"bar.co.uk"
iex(3)> PublicSuffix.public_suffix("mysite.foo.bar.com")
"com"
iex(4)> PublicSuffix.public_suffix("mysite.foo.bar.co.uk")
"co.uk"
```

The publicsuffix.org data file contains both official ICANN records
Expand All @@ -31,8 +35,12 @@ tell it to ignore them:
``` iex
iex(1)> PublicSuffix.registrable_domain("foo.github.io")
"foo.github.io"
iex(2)> PublicSuffix.registrable_domain("foo.github.io", ignore_private: true)
iex(2)> PublicSuffix.public_suffix("foo.github.io")
"github.io"
iex(3)> PublicSuffix.registrable_domain("foo.github.io", ignore_private: true)
"github.io"
iex(4)> PublicSuffix.public_suffix("foo.github.io", ignore_private: true)
"io"
```

## Installation
Expand Down
49 changes: 40 additions & 9 deletions lib/public_suffix.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,42 @@ defmodule PublicSuffix do
showing how individual lines of code relate to the specification.
"""

@type options :: [ignore_private: boolean]

@doc """
Extracts the public suffix from the provided domain based on the publicsuffix.org rules.
## Examples
iex> public_suffix("foo.bar.com")
"com"
You can use the `ignore_private` keyword to exclude private (non-ICANN) domains.
iex> public_suffix("foo.github.io", ignore_private: false)
"github.io"
iex> public_suffix("foo.github.io", ignore_private: true)
"io"
iex> public_suffix("foo.github.io")
"github.io"
"""
@spec public_suffix(String.t) :: nil | String.t
@spec public_suffix(String.t, options) :: nil | String.t
def public_suffix(domain, options \\ []) when is_binary(domain) do
parse_domain(domain, options, 0)
end

@doc """
Extracts the _registrable_ part of the provided domain. The registrable
part is the public suffix plus one additional domain part. For example,
given a public suffix of `co.uk`, so `example.co.uk` would be the registrable
domain part.
domain part. If the domain does not contain a registrable part (for example,
if the domain is itself a public suffix), this function will return `nil`.
## Examples
iex> registrable_domain("foo.bar.com")
"bar.com"
iex> registrable_domain("com")
nil
You can use the `ignore_private` keyword to exclude private (non-ICANN) domains.
Expand All @@ -25,23 +52,28 @@ defmodule PublicSuffix do
"foo.github.io"
"""
@spec registrable_domain(String.t) :: nil | String.t
@spec registrable_domain(String.t, ignore_private: boolean) :: nil | String.t
def registrable_domain(domain, options \\ [ignore_private: false]) when is_binary(domain) do
@spec registrable_domain(String.t, options) :: nil | String.t
def registrable_domain(domain, options \\ []) when is_binary(domain) do
# "The registered or registrable domain is the public suffix plus one additional label."
parse_domain(domain, options, 1)
end

defp parse_domain(domain, options, extra_label_parts) do
domain
# "The domain...must be canonicalized in the normal way for hostnames - lower-case"
|> String.downcase
# "Empty labels are not permitted, meaning that leading and trailing dots are ignored."
|> String.strip(?.)
# "A domain or rule can be split into a list of labels using the separator "." (dot)."
|> String.split(".")
|> find_registrable_domain_labels(options)
|> extract_labels_using_rules(extra_label_parts, options)
|> case do
nil -> nil
labels -> Enum.join(labels, ".")
end
end

defp find_registrable_domain_labels(labels, options) do
defp extract_labels_using_rules(labels, extra_label_parts, options) do
allowed_rule_types = allowed_rule_types_for(options)

prevailing_rule =
Expand All @@ -51,13 +83,12 @@ defmodule PublicSuffix do
# "If no rules match, the prevailing rule is "*"."
["*"]

rule_size = length(prevailing_rule)
num_labels = length(prevailing_rule) + extra_label_parts

if length(labels) > rule_size do
if length(labels) >= num_labels do
labels
|> Enum.reverse
# "The registered or registrable domain is the public suffix plus one additional label."
|> Enum.take(rule_size + 1)
|> Enum.take(num_labels)
|> Enum.reverse
else
nil
Expand Down
53 changes: 41 additions & 12 deletions test/generated_cases_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ defmodule PublicSuffix.TestCaseGenerator do
end

defp generate_test_case({{test_case_line, line_index}, group_case_index}, group_description) do
[input, output] =
[input, registrable_domain_output] =
test_case_line
|> String.replace_prefix("checkPublicSuffix(", "")
|> String.replace_suffix(");", "")
Expand All @@ -37,28 +37,50 @@ defmodule PublicSuffix.TestCaseGenerator do
line_number: line_index + 1,
group_case_number: group_case_index + 1,
input: input,
output: output,
registrable_domain_output: registrable_domain_output,
public_suffix_output: public_suffix_output(registrable_domain_output, input),
}
end

defp parse_arg("null"), do: nil
defp parse_arg(string), do: String.strip(string, ?')

defp public_suffix_output(nil, nil), do: nil
defp public_suffix_output(nil, input) do
# If the `registrable_domain` is `nil`, it is generally because the provided input
# is itself a public suffix and therefore has no registrable domain. However, the inputs
# are not sanitized and we need to sanitize the inputs to convert them to expected
# public suffix outputs.
input
|> String.downcase
|> String.lstrip(?.)
end
defp public_suffix_output(registrable_domain, _) do
registrable_domain
|> String.split(".")
|> Enum.drop(1)
|> Enum.join(".")
end
end

defmodule PublicSuffixGeneratedCasesTest do
use ExUnit.Case
import PublicSuffix

for test_case <- PublicSuffix.TestCaseGenerator.test_cases, test_case.input do
@test_case test_case
expression = "registrable_domain(#{inspect test_case.input}) == #{inspect test_case.output}"
description = "#{test_case.group_description} ##{test_case.group_case_number} -- line #{test_case.line_number}"
test_name = case test_case.group_description do
"IDN labels" -> "#{description} (can't embed expression in test name due to chinese characters)"
_otherwise -> "#{expression} (#{description})"
test_name = fn test_case, fun_name, output_field ->
output = Map.fetch!(test_case, output_field)

expression = "#{fun_name}(#{inspect test_case.input}) == #{inspect output}"
description = "#{test_case.group_description} ##{test_case.group_case_number} -- line #{test_case.line_number}"
case test_case.group_description do
"IDN labels" -> "#{description} (can't embed expression in test name due to chinese characters)"
_otherwise -> "#{expression} (#{description})"
end
end

@tag skip: (
for test_case <- PublicSuffix.TestCaseGenerator.test_cases, test_case.input do
@test_case test_case
should_skip? = (
# the test file has some commented out tests.
String.starts_with?(test_case.input || "", "//") ||
# These two test cases are inconsistent with our reading
Expand All @@ -68,8 +90,15 @@ defmodule PublicSuffixGeneratedCasesTest do
test_case.input == ".example.example" ||
test_case.input == ".example.com"
)
test test_name do
assert registrable_domain(@test_case.input) == @test_case.output

@tag skip: should_skip?
test test_name.(test_case, "registrable_domain", :registrable_domain_output) do
assert registrable_domain(@test_case.input) == @test_case.registrable_domain_output
end

@tag skip: should_skip?
test test_name.(test_case, "public_suffix", :public_suffix_output) do
assert public_suffix(@test_case.input) == @test_case.public_suffix_output
end
end
end

0 comments on commit b40f4f8

Please sign in to comment.