From 4e53650fe9428f384df938a9d0dd509d5365d232 Mon Sep 17 00:00:00 2001 From: RCbot Date: Wed, 25 Nov 2020 12:39:05 +0100 Subject: [PATCH] add code for partielles 2020 --- bin/scrap.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/bin/scrap.py b/bin/scrap.py index 7a42ef6..4a69dcd 100755 --- a/bin/scrap.py +++ b/bin/scrap.py @@ -9,6 +9,7 @@ HOSTURL = 'https://programme-candidats.interieur.gouv.fr/' DATAURL = HOSTURL + 'data-jsons/' DATAURL2 = HOSTURL + 'ajax/data/' +DATAURL3 = HOSTURL + 'ajax/' PDFSURL = HOSTURL + "data-pdf-propagandes/" def downloadPDF(eldir, filename, url, retries=3): @@ -211,6 +212,59 @@ def scrape_municipales(elcode="MN20"): print "%s: %s new documents collected (%s total candidates are published out of %s listed in %s departments and %s communes)." % (elcode, nb_n, nb_d, nb_c, nb_dep, nb_com) +def scrape_partielles(elcode="LG20"): + eldir = os.path.join("documents", elcode) + if not os.path.exists(eldir): + os.makedirs(eldir) + for tour in [1, 2]: + nb_dep = 0 + nb_circo = 0 + nb_c = 0 + nb_d = 0 + nb_n = 0 + url = DATAURL3 + "%s_departements" % tour + data = {} + for dept in request_data(url, "departements"): + nb_dep += 1 + depcode = dept["id"] + depname = dept["name"] + depurl = DATAURL3 + "%s_circonscriptions_dpt_%s" % (tour, depcode) + data[depcode] = { + "name": depname, + "url": depurl, + "circonscriptions": {} + } + deptdir = os.path.join(eldir, depcode) + if not os.path.exists(deptdir): + os.makedirs(deptdir) + for circo in request_data(depurl, "data"): + nb_circo += 1 + circocode = circo["codeCirconscription"] + circoname = circo["circonscription"] + circourl = DATAURL3 + "%s_candidats_circonscription_%s" % (tour, circocode) + data[depcode]["circonscriptions"][circocode] = { + "name": circoname, + "url": circourl, + "candidats": request_data(circourl, "data") + } + circodir = os.path.join(deptdir, circocode) + if not os.path.exists(circodir): + os.makedirs(circodir) + for candidat in data[depcode]["circonscriptions"][circocode]["candidats"]: + nb_c += 1 + name = candidat["candidat"].split(",")[0].replace(" ", "_") + codeId = "%s-%s-%s-%s-%s-tour%s-" % (elcode, depcode, circocode, name, candidat["numPanneau"], tour) + if candidat["pdf"] != "0": + nb_d += 1 + nb_n += downloadPDF(circodir, codeId + "profession_foi", PDFSURL + "%s.pdf" % candidat["pdf"]) + + with open(os.path.join(eldir, "%s-tour%s-metadata.json" % (elcode, tour)), "w") as f: + json.dump(data, f, indent=2) + if nb_n: + print "%s tour %s: %s new documents collected (%s total candidates are published out of %s listed in %s departments and %s circonscriptions)." % (elcode, tour, nb_n, nb_d, nb_c, nb_dep, nb_circo) + + + if __name__ == '__main__': election = "" if len(sys.argv) > 1: @@ -219,6 +273,8 @@ def scrape_municipales(elcode="MN20"): collect_regionales() elif election == "MN20": scrape_municipales() + elif election == "LG20": + scrape_partielles() else: for el in list_elections(): if not election or election == el["code"]: