Skip to content

Commit

Permalink
Backport of the fix for #50
Browse files Browse the repository at this point in the history
  • Loading branch information
Pieter De Praetere committed Mar 17, 2016
1 parent 4b59a83 commit c7c9088
Showing 1 changed file with 54 additions and 27 deletions.
81 changes: 54 additions & 27 deletions resolver/controllers/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,21 @@ def file_allowed(filename):


def import_file(file):
"""
Import a CSV file
CSV layout: PID, entity_type, title, document_type, URL, enabled, notes, format, reference, order
0 1 2 3 4 5 6 7 8 9
:param file:
:return:
"""
reader = UnicodeReader(file)
# NOTE: we always assume the first row is a header
# As this feature is mainly used for imports/edits from Excel, it is
# possible that Excel uses `;' as a separator instead of `,' ...
if len(reader.next()) != 10:
file.seek(0)
reader = UnicodeReader(file, delimiter=';')
reader.next() # Skip header again
reader.next() # Skip header again

# Create id for the import logging function (id = unique identifier of this import action)
import_id = str(time.time())
Expand All @@ -42,54 +49,64 @@ def import_file(file):
failures = []
bad_records = []
for record in reader:
id = record[0]
record_id = record[0]
# Skip wrong types now
# TODO: do we actually fail on importing a wrong type?
if not record[1] in entity_types:
failures.append((id, "Wrong entity type `%s'" % record[1]))
failures.append((record_id, "Wrong entity type `%s'" % record[1]))
bad_records.append(record)
continue
if not record[3] in document_types:
failures.append((id, "Wrong document type `%s'" % record[3]))
failures.append((record_id, "Wrong document type `%s'" % record[3]))
bad_records.append(record)
continue

rows += 1
if records.get(id, False):
records[id].append(record)
import_log(import_id, "Appended document to PID %s" % id)
# Check whether the record_id is already in records: if it is, we append this record. Else, we add it (as a list)
if records.get(record_id, False):
records[record_id].append(record)
import_log(import_id, "Appended document to PID %s" % record_id)
else:
records[id] = [record]
records[record_id] = [record]
count_pids += 1
import_log(import_id, "Added new PID %s" % id)
import_log(import_id, "Added new PID %s" % record_id)

for id, record_list in records.iteritems():
clean_id = cleanID(id)
for record_id, record_list in records.iteritems():
clean_id = cleanID(record_id)
ent = Entity.query. \
filter(Entity.id == clean_id).first()
if ent:
if not ent.original_id == id:
failures.append((id, "PID collision with `%s'" % ent.original_id))
if not ent.original_id == record_id:
failures.append((clean_id, "PID collision with `%s'" % ent.original_id))
bad_records += record_list
continue
else:
ent = Entity(id)
ent = Entity(clean_id)
db.session.add(ent)
db.session.flush()
log(ent.id, "Added entity `%s'" % ent)

# All records in the list have the same title and type
ent.title = record_list[0][2]
ent.type = record_list[0][1]

for record in record_list:
url = record[4] if record[4] != 'None' else ''
enabled = record[5] == '1'
if record[4] == 'None':
url = ''
else:
url = record[4]
if record[5] == '1':
enabled = '1'
else:
enabled = '0'

# record[3] = document_type
if record[3] == 'data':
if not(record[7] and record[7] in data_formats):
failures.append((id, "Format missing or invalid for PID `%s'" % ent.id))
if not (record[7] and record[7] in data_formats):
failures.append((clean_id, "Format missing or invalid for PID `%s'" % ent.id))
bad_records.append(record)
continue
# record[7] = format
doc = Data.query.filter(Data.format == record[7],
Document.entity_id == ent.id).first()
if doc:
Expand All @@ -102,9 +119,19 @@ def import_file(file):
db.session.add(doc)
log(id, "Added data document `%s'" % doc)
elif record[3] == 'representation':
doc = Representation.query. \
filter(Document.entity_id == ent.id,
Representation.order == record[9]).first()
# This function expects order in the database to be "" (empty) when not set,
# but in reality it is set to count(documents) + 1 when not provided.
# When we search for it with an unset order, we will never find it and thus
# create a new representation which is not needed.
# See https://github.com/PACKED-vzw/resolver/issues/50
if record[4] == "" or not record[4]:
# r_url is None (NULL) in the DB, but "" in the CSV
r_url = None
else:
r_url = record[4]
doc = Representation.query.filter(Document.entity_id == ent.id,
Document.url == r_url,
Document.type == record[3]).first()
if doc:
doc.url = url
doc.enabled = enabled
Expand All @@ -113,13 +140,14 @@ def import_file(file):
if record[9] and record[9] != "":
order = int(record[9])
else:
# We set order to the total amount representation documents for this entity + 1
order = Representation.query \
.filter(Document.entity_id == ent.id).count() + 1

doc = Representation(ent.id, order, url=url,
enabled=enabled, notes=record[6])
db.session.add(doc)
log(id, "Added representation document `%s'" % doc)
log(clean_id, "Added representation document `%s'" % doc)

reference = record[8] == '1'
if reference:
Expand All @@ -133,9 +161,9 @@ def import_file(file):

db.session.flush()

for id in records:
for record_id in records:
reps = Representation.query. \
filter(Document.entity_id == id). \
filter(Document.entity_id == record_id). \
order_by(Representation.order.asc()).all()
i = 1
has_reference = False
Expand All @@ -144,15 +172,14 @@ def import_file(file):
i += 1
has_reference = rep.reference

if (not has_reference) and i>1:
if (not has_reference) and i > 1:
reps[0].reference = True

db.session.commit()

return (import_id, rows, count_pids, failures, bad_records)



# TODO: script for imports
@app.route('/resolver/csv/import', methods=["POST"])
@check_privilege
Expand Down Expand Up @@ -257,4 +284,4 @@ def purge_database():
Entity.query.delete()
db.session.commit()
flash("Database has been purged", "success")
return redirect('/resolver/csv')
return redirect('/resolver/csv')

0 comments on commit c7c9088

Please sign in to comment.