-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_sample.py
52 lines (43 loc) · 1.64 KB
/
run_sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from rejected_article_tracker import ScholarOneRejectedArticlesMatch
import os
import pandas as pd
def run():
filepath = os.path.abspath(r'rejected_article_tracker/data/fake_rejected_articles.xlsx')
# read in the sample file
data = pd.read_excel(filepath)
# set parameters
config = {
# from 0.0 - 1.0
# Set higher for better precision, lower for better recall
"threshold": 0.5,
# any number from 1 - 10. If there are multiple versions of the article out there, it's worth picking number >1
"max_results_per_article":10,
# limit results to these types see: https://api.crossref.org/types
"article_types":['journal-article',
'posted-content',
'book-chapter',
'proceedings-article'],
}
# convert dataframe to a list of dicts
articles = data.T.to_dict().values()
# The CrossRef API requires an email address for lookups.
# add MY_EMAIL as an environment variable,
# or simply set `email = YOUR EMAIL ADDRESS`
email = os.getenv('MY_EMAIL','')
# Define a 'results' list.
results = []
# Run match
ScholarOneRejectedArticlesMatch(
articles=articles,
config=config,
email=email,
results=results
).match()
result_df = pd.DataFrame(results)
print('FOUND RESULTS:')
print(result_df)
output_path = os.path.abspath('output.xlsx')
print(f'output written to {output_path}' )
result_df.to_excel(output_path)
if __name__ == '__main__':
run()