Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't write 9999 dates from MARC #8448

Merged
merged 5 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions openlibrary/catalog/marc/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def subfields(line):
class html_record:
def __init__(self, data):
assert len(data) == int(data[:5])
self.data = data
self.record = Record(data)
self.leader = self.record.leader

Expand Down
24 changes: 16 additions & 8 deletions openlibrary/catalog/marc/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,13 @@ def read_languages(rec: MarcBase, lang_008: str | None = None) -> list[str]:


def read_pub_date(rec: MarcBase) -> str | None:
"""
Read publish date from 260$c.
"""

def publish_date(s: str) -> str:
date = s.strip('[]')
if date.lower() == 'n.d.': # No date
if date.lower() in ('n.d.', 's.d.'): # No date
date = '[n.d.]'
return remove_trailing_number_dot(date)

Expand Down Expand Up @@ -667,15 +671,19 @@ def read_edition(rec: MarcBase) -> dict[str, Any]:
raise BadMARC("'008' field must not be blank")
publish_date = f[7:11]

if re_date.match(publish_date) and publish_date != '0000':
edition["publish_date"] = publish_date
if f[6] == 't':
edition["copyright_date"] = f[11:15]
if re_date.match(publish_date) and publish_date not in ('0000', '9999'):
edition['publish_date'] = publish_date
if f[6] == 'r' and f[11:15] > publish_date:
# Incorrect reprint date order
update_edition(rec, edition, read_pub_date, 'publish_date')
elif f[6] == 't': # Copyright date
edition['copyright_date'] = f[11:15]
if 'publish_date' not in edition: # Publication date fallback to 260$c
update_edition(rec, edition, read_pub_date, 'publish_date')
publish_country = f[15:18]
if publish_country not in ('|||', ' ', '\x01\x01\x01', '???'):
edition["publish_country"] = publish_country.strip()
languages = read_languages(rec, lang_008=f[35:38].lower())
if languages:
edition['publish_country'] = publish_country.strip()
if languages := read_languages(rec, lang_008=f[35:38].lower()):
edition['languages'] = languages
elif handle_missing_008:
update_edition(rec, edition, read_languages, 'languages')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"publishers": [
"[s.n.]"
],
"publish_date": "[n.d.]",
"publish_places": [
"[s.l.]"
]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
00419nam a22001695a 4500001001200000005001700012008004100029035001400070100003800084245002600122260004200148300001100190546001300201899001500214988001300229906000700242012717654-320110315150330.0110301s9999 pk 000 0 urd d a(PkLaDAR)0 aʻAbd-ul-Qayyum Tahir Malihabadi.10aIfadat-e homiyopethi. aLahore :bMaktabah-e Daniyal,c[s.d.] a150 p. aIn Urdu. a415_565960 a20110315 0MH
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
00436nam a22001695a 4500001001200000005001700012008004100029035001400070100003700084245003600121260004800157300001100205546001500216899001500231988001300246906000700259012716825-720110315150330.0110301s9999 pk 000 0 urd d a(PkLaDAR)0 aShah Moʻin-al-Din Ahmad Nadavi.10aʻAlimi aman jo Islami manshur. aHyderabad :bSindh National Academy,c2003. a236 p. aIn Sindhi. a415_565960 a20110315 0MH
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
01299cam 2200289Ka 4500001001300000003000600013005001700019008004100036040001800077020002500095020002200120035002100142100003000163245005200193246003200245250001600277260004700293300002500340440003000365500023100395520023800626650003900864655003100903700002100934994001200955948004200967ocn656308391OCoLC20100924175141.0100816r18732010tnu 000 1 eng d aLODcLODdLOD a9781403793966 (pbk.) a1403793964 (pbk.) a(OCoLC)6563083911 aVerne, Jules,d1828-1905.10aAround the world in eighty days /cJules Verne.3 aAround the world in 80 days aUnabridged. aFranklin, Tenn. :bDalmatian Press,c2010. axi, 210 p. ;c21 cm. 0aDalmatian Press Classics. a"Dalmatian Press Classics offer enjoyable editions of greats of literature. The unabridged texts are enhanced with helpful footnotes. Punctuation and spelling have been updated to conform to modern American usage."-t.p. verso. a"Phileas Fogg's bet that he can span the world in eighty days takes him by train, ship, sled, and elephant across ninettenth-century India, China, and America in one of the wittiest, most enjoyable classics of all time."--Back cover. 0aVoyages around the worldvFiction. 0aAdventure stories, French.1 aKnight, Kathryn. aZ0bPMR hNO HOLDINGS IN PMR - 1 OTHER HOLDINGS
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"entity_type": "org"
}
],
"publish_date": "[n.d.]",
"publish_places": [
"Ames"
],
Expand Down
13 changes: 13 additions & 0 deletions openlibrary/catalog/marc/tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@
'test-publish-sn-sl-nd.mrc',
]

date_tests = [ # MARC, expected publish_date
('9999_sd_dates.mrc', '[n.d.]'),
('reprint_date_wrong_order.mrc', '2010'),
('9999_with_correct_date_in_260.mrc', '2003'),
]

TEST_DATA = Path(__file__).with_name('test_data')


Expand Down Expand Up @@ -151,6 +157,13 @@ def test_raises_no_title(self):
with pytest.raises(NoTitle):
read_edition(rec)

@pytest.mark.parametrize('marcfile,expect', date_tests)
def test_dates(self, marcfile, expect):
filepath = TEST_DATA / 'bin_input' / marcfile
rec = MarcBinary(filepath.read_bytes())
edition = read_edition(rec)
assert edition['publish_date'] == expect


class TestParse:
def test_read_author_person(self):
Expand Down