Skip to content

Commit

Permalink
Ignore unknown countries (#2556)
Browse files Browse the repository at this point in the history
* Ignore XX and T1 countries

* Add fallback if country_code=nil

* Lookup city overrides directly in CityOverrides module

* Changelog

* Add empty moduledoc

* Remove redundant comment
  • Loading branch information
ukutaht authored and aerosol committed Jan 4, 2023
1 parent ff2b0c8 commit f15efe4
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 38 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ All notable changes to this project will be documented in this file.
### Changed
- Reject events with long URIs and data URIs plausible/analytics#2536
- Always show direct traffic in sources reports plausible/analytics#2531
- Stop recording XX and T1 country codes plausible/analytics#2556

## Fixed
- Cascade delete sent_renewal_notifications table when user is deleted plausible/analytics#2549
Expand Down
4 changes: 3 additions & 1 deletion config/test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ config :geolix,
{1, 1, 1, 1} => %{country: %{iso_code: "US"}},
{2, 2, 2, 2} => geolix_sample_lookup,
{1, 1, 1, 1, 1, 1, 1, 1} => %{country: %{iso_code: "US"}},
{0, 0, 0, 0} => %{country: %{iso_code: "ZZ"}}
{0, 0, 0, 0} => %{country: %{iso_code: "ZZ"}, city: %{geoname_id: 123_123}},
{0, 0, 0, 1} => %{country: %{iso_code: "XX"}, subdivisions: [%{iso_code: "IDF"}]},
{0, 0, 0, 2} => %{country: %{iso_code: "T1"}, subdivisions: [%{}, %{iso_code: "IDF"}]}
}
}
]
Expand Down
2 changes: 1 addition & 1 deletion lib/plausible/ingestion/city_overrides.ex
Original file line number Diff line number Diff line change
Expand Up @@ -198,5 +198,5 @@ defmodule Plausible.Ingestion.CityOverrides do
# Hackney -> London
2_647_694 => 2_643_743
}
def get, do: @overrides
def get(key, default), do: Map.get(@overrides, key, default)
end
39 changes: 3 additions & 36 deletions lib/plausible/ingestion/event.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ defmodule Plausible.Ingestion.Event do
are uniformly either buffered in batches (to Clickhouse) or dropped
(e.g. due to spam blocklist) from the processing pipeline.
"""
alias Plausible.Ingestion.{Request, CityOverrides}
alias Plausible.Ingestion.Request
alias Plausible.ClickhouseEvent
alias Plausible.Site.GateKeeper

Expand Down Expand Up @@ -168,39 +168,9 @@ defmodule Plausible.Ingestion.Event do
end

defp put_geolocation(%__MODULE__{} = event) do
result = Geolix.lookup(event.request.remote_ip, where: :geolocation)
result = Plausible.Ingestion.Geolocation.lookup(event.request.remote_ip)

country_code =
get_in(result, [:country, :iso_code])
|> ignore_unknown_country()

city_geoname_id = get_in(result, [:city, :geoname_id])
city_geoname_id = Map.get(CityOverrides.get(), city_geoname_id, city_geoname_id)

subdivision1_code =
case result do
%{subdivisions: [%{iso_code: iso_code} | _rest]} ->
country_code <> "-" <> iso_code

_ ->
""
end

subdivision2_code =
case result do
%{subdivisions: [_first, %{iso_code: iso_code} | _rest]} ->
country_code <> "-" <> iso_code

_ ->
""
end

update_attrs(event, %{
country_code: country_code,
subdivision1_code: subdivision1_code,
subdivision2_code: subdivision2_code,
city_geoname_id: city_geoname_id
})
update_attrs(event, result)
end

defp put_screen_size(%__MODULE__{} = event) do
Expand Down Expand Up @@ -372,9 +342,6 @@ defmodule Plausible.Ingestion.Event do
end
end

defp ignore_unknown_country("ZZ"), do: nil
defp ignore_unknown_country(country), do: country

defp generate_user_id(request, domain, hostname, salt) do
cond do
is_nil(salt) ->
Expand Down
47 changes: 47 additions & 0 deletions lib/plausible/ingestion/geolocation.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
defmodule Plausible.Ingestion.Geolocation do
@moduledoc false
alias Plausible.Ingestion.CityOverrides

def lookup(remote_ip) do
result = Geolix.lookup(remote_ip, where: :geolocation)

country_code =
get_in(result, [:country, :iso_code])
|> ignore_unknown_country()

city_geoname_id = country_code && get_in(result, [:city, :geoname_id])
city_geoname_id = CityOverrides.get(city_geoname_id, city_geoname_id)

%{
country_code: country_code,
subdivision1_code: subdivision1_code(country_code, result),
subdivision2_code: subdivision2_code(country_code, result),
city_geoname_id: city_geoname_id
}
end

defp subdivision1_code(country_code, %{subdivisions: [%{iso_code: iso_code} | _rest]})
when not is_nil(country_code) do
country_code <> "-" <> iso_code
end

defp subdivision1_code(_, _), do: nil

defp subdivision2_code(country_code, %{subdivisions: [_first, %{iso_code: iso_code} | _rest]})
when not is_nil(country_code) do
country_code <> "-" <> iso_code
end

defp subdivision2_code(_, _), do: nil

@ignored_countries [
# Worldwide
"ZZ",
# Disputed territory
"XX",
# Tor exit node
"T1"
]
defp ignore_unknown_country(code) when code in @ignored_countries, do: nil
defp ignore_unknown_country(country), do: country
end
41 changes: 41 additions & 0 deletions test/plausible_web/controllers/api/external_controller_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,47 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do
pageview = get_event(domain)

assert pageview.country_code == <<0, 0>>
assert pageview.subdivision1_code == ""
assert pageview.subdivision2_code == ""
assert pageview.city_geoname_id == 0
end

test "ignores disputed territory code XX", %{conn: conn, domain: domain} do
params = %{
name: "pageview",
domain: domain,
url: "http://gigride.live/"
}

conn
|> put_req_header("x-forwarded-for", "0.0.0.1")
|> post("/api/event", params)

pageview = get_event(domain)

assert pageview.country_code == <<0, 0>>
assert pageview.subdivision1_code == ""
assert pageview.subdivision2_code == ""
assert pageview.city_geoname_id == 0
end

test "ignores TOR exit node country code T1", %{conn: conn, domain: domain} do
params = %{
name: "pageview",
domain: domain,
url: "http://gigride.live/"
}

conn
|> put_req_header("x-forwarded-for", "0.0.0.2")
|> post("/api/event", params)

pageview = get_event(domain)

assert pageview.country_code == <<0, 0>>
assert pageview.subdivision1_code == ""
assert pageview.subdivision2_code == ""
assert pageview.city_geoname_id == 0
end

test "scrubs port from x-forwarded-for", %{conn: conn, domain: domain} do
Expand Down

0 comments on commit f15efe4

Please sign in to comment.