Skip to content

Commit

Permalink
Ajout librairies dépendantes - cf #150
Browse files Browse the repository at this point in the history
  • Loading branch information
amandine-sahl committed Dec 6, 2017
1 parent 4ce58a5 commit d5ff62f
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 9 deletions.
5 changes: 2 additions & 3 deletions data/scripts/import_wikimedia_commons/functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@

import requests
import psycopg2
from lxml import etree
import xmltodict

from SPARQLWrapper import SPARQLWrapper, JSON

Expand All @@ -24,7 +26,6 @@ def main(dbconnexion, cd_refs, refreshAtlas=True, simulate=True):
?item wdt:P3186 '%s'
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr" }
} LIMIT 200"""

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

sqlI = """INSERT INTO taxonomie.t_medias
Expand All @@ -42,11 +43,9 @@ def main(dbconnexion, cd_refs, refreshAtlas=True, simulate=True):
for result in results["results"]["bindings"]:
if (result['image']['value']):
print(' -- INSERT IMAGE')
from lxml import etree
# Recuperation des donnees sur commons
url = "https://tools.wmflabs.org/magnus-toolserver/commonsapi.php?image=%s" % result['image']['value'].split('Special:FilePath/', 1 )[1]
r = requests.get(url)
import xmltodict
a = xmltodict.parse(r.content)
try:
aut = 'Commons'
Expand Down
20 changes: 14 additions & 6 deletions data/scripts/import_wikimedia_commons/run_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@
ALTER TABLE taxonomie.t_medias ADD COLUMN source varchar(25);
ALTER TABLE taxonomie.t_medias ADD COLUMN licence varchar(100);
ALTER TABLE taxonomie.t_medias ALTER COLUMN auteur TYPE character varying(1000);
Librairie requises (à installer via pip dans un virtualenv de préférence)
lxml
psycopg2
requests
SPARQLWrapper
xmltodict
'''
try:
conn = psycopg2.connect(SQLALCHEMY_DATABASE_URI)
Expand All @@ -28,12 +36,12 @@

try:
cur = conn.cursor()
sql = """SELECT DISTINCT cd_ref
FROM taxonomie.bib_noms
LEFT OUTER JOIN taxonomie.t_medias USING(cd_ref)
WHERE id_media IS NULL
"""
# sql = """SELECT cd_ref from taxonomie.bib_noms LIMIT 10"""
# sql = """SELECT DISTINCT cd_ref
# FROM taxonomie.bib_noms
# LEFT OUTER JOIN taxonomie.t_medias USING(cd_ref)
# WHERE id_media IS NULL
# """
sql = """SELECT cd_ref from taxonomie.bib_noms LIMIT 10"""
# sql = """SELECT cd_ref from atlas.vm_taxons_plus_observes LIMIT 100"""
cur.execute(sql)
rows = cur.fetchall()
Expand Down

0 comments on commit d5ff62f

Please sign in to comment.