Skip to content

Commit

Permalink
Merge pull request #162 from nulib/getty-relevancy-ordering
Browse files Browse the repository at this point in the history
Fix Getty search relevancy issue
  • Loading branch information
mbklein committed May 2, 2024
2 parents c1335f6 + 5721b95 commit 9da7c6b
Show file tree
Hide file tree
Showing 129 changed files with 2,977 additions and 2,025 deletions.
6 changes: 5 additions & 1 deletion lib/authoritex/getty/aat.ex
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,15 @@ defmodule Authoritex.Getty.AAT do
skos:inScheme <http://vocab.getty.edu/aat/> ;
gvp:prefLabelGVP [skosxl:literalForm ?name] .
FILTER (#{sparql_search_filter(q)}) .
} LIMIT #{max_results}
} ORDER BY #{sparql_order_clause(q)} LIMIT #{max_results}
"""
end
end

defp sparql_order_clause(q) do
~s{DESC(IF(REGEX(?name, "^#{q}$", "i"), 2, IF(REGEX(?name, "^#{q}", "i"), 1, 0)))}
end

defp sparql_search_filter(q) do
String.split(q)
|> Enum.map_join(" && ", &~s{regex(?name, "#{&1}", "i")})
Expand Down
2 changes: 1 addition & 1 deletion lib/authoritex/getty/base.ex
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ defmodule Authoritex.Getty.Base do
{:error, 404}

{:ok, [%{replaced_by: replaced_by}] = result} when replaced_by != "" ->
Logger.warn("#{id} is obsolete. Fetching replacement term #{replaced_by}.")
Logger.warning("#{id} is obsolete. Fetching replacement term #{replaced_by}.")
fetch(replaced_by)

{:ok, [result]} ->
Expand Down
6 changes: 5 additions & 1 deletion lib/authoritex/getty/tgn.ex
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,15 @@ defmodule Authoritex.Getty.TGN do
gvp:prefLabelGVP [skosxl:literalForm ?name] ;
gvp:parentString ?hint .
FILTER (#{sparql_search_filter(q)}) .
} LIMIT #{max_results}
} ORDER BY #{sparql_order_clause(q)} LIMIT #{max_results}
"""
end
end

defp sparql_order_clause(q) do
~s{DESC(IF(REGEX(?name, "^#{q}$", "i"), 2, IF(REGEX(?name, "^#{q}", "i"), 1, 0)))}
end

defp sparql_search_filter(q) do
String.split(q)
|> Enum.map_join(" && ", &~s{regex(?name, "#{&1}", "i")})
Expand Down
6 changes: 5 additions & 1 deletion lib/authoritex/getty/ulan.ex
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,15 @@ defmodule Authoritex.Getty.ULAN do
foaf:focus/gvp:biographyPreferred [schema:description ?hint] ;
skos:altLabel ?alt .
FILTER (#{sparql_search_filter(q)}) .
} LIMIT #{max_results}
} ORDER BY #{sparql_order_clause(q)} LIMIT #{max_results}
"""
end
end

defp sparql_order_clause(q) do
~s{DESC(IF(REGEX(?name, "^#{q}$", "i"), 2, IF(REGEX(?name, "^#{q}", "i"), 1, 0)))}
end

defp sparql_search_filter(q) do
if String.contains?(q, " ") do
String.split(q)
Expand Down
2 changes: 1 addition & 1 deletion mix.exs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
defmodule Authoritex.MixProject do
use Mix.Project

@version "1.0.3"
@version "1.1.0"
@url "https://github.com/nulib/authoritex"

def project do
Expand Down
16 changes: 8 additions & 8 deletions test/authoritex/geonames_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ defmodule Authoritex.GeoNamesTest do
code: "geonames",
description: "GeoNames geographical database",
test_uris: [
"https://sws.geonames.org/4302561/"
"https://sws.geonames.org/4299276/"
],
bad_uri: "https://sws.geonames.org/43025619/",
bad_uri: "https://sws.geonames.org/42992769/",
expected: [
hint: "Kentucky, United States",
id: "https://sws.geonames.org/4302561/",
label: "Nicholasville",
qualified_label: "Nicholasville, Kentucky, United States",
id: "https://sws.geonames.org/4299276/",
label: "Louisville",
qualified_label: "Louisville, Kentucky, United States",
variants: []
],
search_result_term: "Kentucky",
Expand All @@ -28,9 +28,9 @@ defmodule Authoritex.GeoNamesTest do
end

test "proper URI form" do
assert Authoritex.fetch("https://sws.geonames.org/4302561") == {:error, 404}
assert Authoritex.fetch("http://sws.geonames.org/4302561") == {:error, :unknown_authority}
assert Authoritex.fetch("http://sws.geonames.org/4302561/") == {:error, :unknown_authority}
assert Authoritex.fetch("https://sws.geonames.org/4299276") == {:error, 404}
assert Authoritex.fetch("http://sws.geonames.org/4299276") == {:error, :unknown_authority}
assert Authoritex.fetch("http://sws.geonames.org/4299276/") == {:error, :unknown_authority}
end

test "search" do
Expand Down
2 changes: 1 addition & 1 deletion test/authoritex/getty/aat_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ defmodule Authoritex.Getty.AATTest do
label: "dollars (paper money)",
qualified_label: "dollars (paper money)",
hint: nil,
variants: ["dollar (paper money)", "dollar bills", "dollar bill"]
variants: ["dollar (paper money)", "dollar bills", "dollar bill", "Dollars"]
],
search_result_term: "dollars",
search_count_term: "paint"
Expand Down
2 changes: 1 addition & 1 deletion test/authoritex/getty_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ defmodule Authoritex.GettyTest do
label: "dollars (paper money)",
qualified_label: "dollars (paper money)",
hint: nil,
variants: ["dollar (paper money)", "dollar bills", "dollar bill"]
variants: ["dollar (paper money)", "dollar bills", "dollar bill", "Dollars"]
}}

assert Getty.fetch("http://vocab.getty.edu/tgn/2236134") ==
Expand Down
4 changes: 2 additions & 2 deletions test/authoritex/homosaurus_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ defmodule Authoritex.HomosaurusTest do
],
search_result_term: "adop",
search_count_term: "adop",
default_results: 10,
explicit_results: 10
default_results: 50,
explicit_results: 50
end
8 changes: 6 additions & 2 deletions test/authoritex_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,20 @@ defmodule AuthoritexTest do
end

describe "search/2" do
test "results" do
use_cassette "authoritex_search_results", match_requests_on: [:query] do
test "result count" do
use_cassette "authoritex_search_count", match_requests_on: [:query] do
with {:ok, results} <- Authoritex.search("lcnaf", "smith") do
assert length(results) == 30
end

with {:ok, results} <- Authoritex.search("lcnaf", "smith", 50) do
assert length(results) == 50
end
end
end

test "expected result" do
use_cassette "authoritex_search_results", match_requests_on: [:query] do
with {:ok, results} <- Authoritex.search("lcnaf", "valim") do
assert Enum.member?(results, %{
id: "http://id.loc.gov/authorities/names/no2011087251",
Expand Down
12 changes: 6 additions & 6 deletions test/fixtures/vcr_cassettes/aat_fetch_failure.json
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
[
{
"request": {
"options": [],
"body": "",
"url": "http://vocab.getty.edu/sparql.xml?query=SELECT+DISTINCT+%3Fs+%3Fname+%3FreplacedBy+%28group_concat%28%3Falt%3B+separator%3D%22%7C%22%29+AS+%3Fvariants%29+%7B+BIND%28%3Chttp%3A%2F%2Fvocab.getty.edu%2Faat%2Fwrong-id%3E+as+%3Fs%29+OPTIONAL+%7B%3Fs+gvp%3AprefLabelGVP%2Fxl%3AliteralForm+%3Fname%7D+OPTIONAL+%7B%3Fs+dcterms%3AisReplacedBy+%3FreplacedBy%7D+OPTIONAL+%7B%3Fs+xl%3AaltLabel%2Fxl%3AliteralForm+%3Falt%7D+%7D+GROUP+BY+%3Fs+%3Fname+%3FreplacedBy+LIMIT+1",
"headers": {
"Accept": "application/sparql-results+xml;charset=UTF-8",
"User-Agent": "Authoritex"
},
"method": "get",
"options": [],
"request_body": "",
"url": "http://vocab.getty.edu/sparql.xml?query=SELECT+DISTINCT+%3Fs+%3Fname+%3FreplacedBy+%28group_concat%28%3Falt%3B+separator%3D%22%7C%22%29+AS+%3Fvariants%29+%7B+BIND%28%3Chttp%3A%2F%2Fvocab.getty.edu%2Faat%2Fwrong-id%3E+as+%3Fs%29+OPTIONAL+%7B%3Fs+gvp%3AprefLabelGVP%2Fxl%3AliteralForm+%3Fname%7D+OPTIONAL+%7B%3Fs+dcterms%3AisReplacedBy+%3FreplacedBy%7D+OPTIONAL+%7B%3Fs+xl%3AaltLabel%2Fxl%3AliteralForm+%3Falt%7D+%7D+GROUP+BY+%3Fs+%3Fname+%3FreplacedBy+LIMIT+1"
"request_body": ""
},
"response": {
"binary": false,
"type": "ok",
"body": "<?xml version='1.0' encoding='UTF-8'?>\n<sparql xmlns='http://www.w3.org/2005/sparql-results#'>\n\t<head>\n\t\t<variable name='s'/>\n\t\t<variable name='name'/>\n\t\t<variable name='replacedBy'/>\n\t\t<variable name='variants'/>\n\t</head>\n\t<results>\n\t\t<result>\n\t\t\t<binding name='s'>\n\t\t\t\t<uri>http://vocab.getty.edu/aat/wrong-id</uri>\n\t\t\t</binding>\n\t\t\t<binding name='variants'>\n\t\t\t\t<literal></literal>\n\t\t\t</binding>\n\t\t</result>\n\t</results>\n</sparql>\n",
"headers": {
"Date": "Tue, 18 Apr 2023 19:54:04 GMT",
"Date": "Thu, 02 May 2024 16:28:43 GMT",
"Content-Type": "application/sparql-results+xml;charset=utf-8",
"Transfer-Encoding": "chunked",
"Connection": "keep-alive",
Expand All @@ -24,8 +25,7 @@
"Content-Disposition": "attachment; filename=\"sparql.xml\"",
"Content-Language": "en"
},
"status_code": 200,
"type": "ok"
"status_code": 200
}
}
]
14 changes: 7 additions & 7 deletions test/fixtures/vcr_cassettes/aat_fetch_success.json
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
[
{
"request": {
"options": [],
"body": "",
"url": "http://vocab.getty.edu/sparql.xml?query=SELECT+DISTINCT+%3Fs+%3Fname+%3FreplacedBy+%28group_concat%28%3Falt%3B+separator%3D%22%7C%22%29+AS+%3Fvariants%29+%7B+BIND%28%3Chttp%3A%2F%2Fvocab.getty.edu%2Faat%2F300265149%3E+as+%3Fs%29+OPTIONAL+%7B%3Fs+gvp%3AprefLabelGVP%2Fxl%3AliteralForm+%3Fname%7D+OPTIONAL+%7B%3Fs+dcterms%3AisReplacedBy+%3FreplacedBy%7D+OPTIONAL+%7B%3Fs+xl%3AaltLabel%2Fxl%3AliteralForm+%3Falt%7D+%7D+GROUP+BY+%3Fs+%3Fname+%3FreplacedBy+LIMIT+1",
"headers": {
"Accept": "application/sparql-results+xml;charset=UTF-8",
"User-Agent": "Authoritex"
},
"method": "get",
"options": [],
"request_body": "",
"url": "http://vocab.getty.edu/sparql.xml?query=SELECT+DISTINCT+%3Fs+%3Fname+%3FreplacedBy+%28group_concat%28%3Falt%3B+separator%3D%22%7C%22%29+AS+%3Fvariants%29+%7B+BIND%28%3Chttp%3A%2F%2Fvocab.getty.edu%2Faat%2F300265149%3E+as+%3Fs%29+OPTIONAL+%7B%3Fs+gvp%3AprefLabelGVP%2Fxl%3AliteralForm+%3Fname%7D+OPTIONAL+%7B%3Fs+dcterms%3AisReplacedBy+%3FreplacedBy%7D+OPTIONAL+%7B%3Fs+xl%3AaltLabel%2Fxl%3AliteralForm+%3Falt%7D+%7D+GROUP+BY+%3Fs+%3Fname+%3FreplacedBy+LIMIT+1"
"request_body": ""
},
"response": {
"binary": false,
"body": "<?xml version='1.0' encoding='UTF-8'?>\n<sparql xmlns='http://www.w3.org/2005/sparql-results#'>\n\t<head>\n\t\t<variable name='s'/>\n\t\t<variable name='name'/>\n\t\t<variable name='replacedBy'/>\n\t\t<variable name='variants'/>\n\t</head>\n\t<results>\n\t\t<result>\n\t\t\t<binding name='s'>\n\t\t\t\t<uri>http://vocab.getty.edu/aat/300265149</uri>\n\t\t\t</binding>\n\t\t\t<binding name='name'>\n\t\t\t\t<literal xml:lang='en'>dollars (paper money)</literal>\n\t\t\t</binding>\n\t\t\t<binding name='variants'>\n\t\t\t\t<literal>dollar (paper money)|dollar bills|dollar bill</literal>\n\t\t\t</binding>\n\t\t</result>\n\t</results>\n</sparql>\n",
"type": "ok",
"body": "<?xml version='1.0' encoding='UTF-8'?>\n<sparql xmlns='http://www.w3.org/2005/sparql-results#'>\n\t<head>\n\t\t<variable name='s'/>\n\t\t<variable name='name'/>\n\t\t<variable name='replacedBy'/>\n\t\t<variable name='variants'/>\n\t</head>\n\t<results>\n\t\t<result>\n\t\t\t<binding name='s'>\n\t\t\t\t<uri>http://vocab.getty.edu/aat/300265149</uri>\n\t\t\t</binding>\n\t\t\t<binding name='name'>\n\t\t\t\t<literal xml:lang='en'>dollars (paper money)</literal>\n\t\t\t</binding>\n\t\t\t<binding name='variants'>\n\t\t\t\t<literal>dollar (paper money)|dollar bills|dollar bill|Dollars</literal>\n\t\t\t</binding>\n\t\t</result>\n\t</results>\n</sparql>\n",
"headers": {
"Date": "Tue, 18 Apr 2023 19:54:05 GMT",
"Date": "Thu, 02 May 2024 16:28:44 GMT",
"Content-Type": "application/sparql-results+xml;charset=utf-8",
"Transfer-Encoding": "chunked",
"Connection": "keep-alive",
Expand All @@ -24,8 +25,7 @@
"Content-Disposition": "attachment; filename=\"sparql.xml\"",
"Content-Language": "en"
},
"status_code": 200,
"type": "ok"
"status_code": 200
}
}
]
Loading

0 comments on commit 9da7c6b

Please sign in to comment.