Skip to content

Commit

Permalink
refactor: split off JSON input to document corpus conversion in rest …
Browse files Browse the repository at this point in the history
…module
  • Loading branch information
osma committed Feb 27, 2019
1 parent 3157783 commit 7624d21
Showing 1 changed file with 12 additions and 6 deletions.
18 changes: 12 additions & 6 deletions annif/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ def analyze(project_id, text, limit, threshold):
return {'results': [hit._asdict() for hit in hits]}


def _documents_to_corpus(documents):
corpus = [Document(text=d['text'],
uris=[subj['uri'] for subj in d['subjects']],
labels=[subj['label'] for subj in d['subjects']])
for d in documents
if 'text' in d and 'subjects' in d]
return DocumentList(corpus)


def learn(project_id, documents):
"""learn from documents and return an empty 204 response if succesful"""

Expand All @@ -76,13 +85,10 @@ def learn(project_id, documents):
except ValueError:
return project_not_found_error(project_id)

corpus = [Document(text=d['text'],
uris=[subj['uri'] for subj in d['subjects']],
labels=[subj['label'] for subj in d['subjects']])
for d in documents
if 'text' in d and 'subjects' in d]
corpus = _documents_to_corpus(documents)

try:
project.learn(DocumentList(corpus))
project.learn(corpus)
except AnnifException as err:
return server_error(err)

Expand Down

0 comments on commit 7624d21

Please sign in to comment.