Skip to content

Commit

Permalink
adjust BNF conf + banner remover (#372)
Browse files Browse the repository at this point in the history
  • Loading branch information
Benjamin Ooghe committed Jun 25, 2021
1 parent 0ed9914 commit 04713d6
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions hyphe_backend/lib/webarchives.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
"bnf.fr": {
"label": "ArchivesInternet.BNF.fr",
"description": "crawl France's official web archives maintained by BNF",
"url_prefix": "http://pcfarchivesinternet.bnf.fr",
"proxy": "pcfarchivesinternet.bnf.fr:8888"
"url_prefix": "http://pfcarchivesinternet.bnf.fr",
"proxy": "pfcarchivesinternet.bnf.fr:9115"
}
}

Expand Down Expand Up @@ -48,5 +48,5 @@ def validateArchiveDate(dt):

RE_ARCHIVE_REDIRECT = r'function go\(\) \{.*document.location.href = "(%s/[^"]*)".*<p class="code shift red">Got an HTTP (\d+) response at crawl time</p>.*<p class="code">Redirecting to...</p>'
RE_BNF_ARCHIVES_PERMALINK = re.compile(r'<input id="permalink" class="BANNER_PERMALIEN_LINK_CUSTOMED" value="([^"]+)"')
RE_BNF_ARCHIVES_BANNER = re.compile(r'<div id="MAIN_BANNER_BNF_CUSTOM".*$', re.DOTALL)
RE_BNF_ARCHIVES_BANNER = re.compile(r'<!--\n\s+FILE ARCHIVED ON.*<!--\n\s+END.*?-->', re.DOTALL)

0 comments on commit 04713d6

Please sign in to comment.