From af8a5be4b7b5ccb24487eaa7cda516c56355e1f2 Mon Sep 17 00:00:00 2001 From: Jueri Date: Fri, 4 Mar 2022 08:58:03 +0100 Subject: [PATCH 01/15] initial commit --- .devcontainer/Dockerfile | 24 - .devcontainer/devcontainer.json | 70 -- .devcontainer/requirements.txt | 3 - .gitignore | 3 +- {clean_bibtex => BibTexTools}/__init__.py | 0 BibTexTools/bibliography.py | 312 ++++++++ BibTexTools/cleaner.py | 85 +++ BibTexTools/parser.py | 128 ++++ BibTexTools/tests/test_bibliography.py | 91 +++ BibTexTools/tests/test_cleaner.py | 90 +++ BibTexTools/tests/test_entry.py | 114 +++ BibTexTools/tests/test_parser.py | 91 +++ Pipfile | 18 + Pipfile.lock | 573 ++++++++++++++ app.py | 95 --- clean_bibtex/clean_bibtex.py | 118 --- setup.py | 16 +- test.ipynb | 387 ++++++++++ test.json | 870 ++++++++++++++++++++++ 19 files changed, 2769 insertions(+), 319 deletions(-) delete mode 100644 .devcontainer/Dockerfile delete mode 100644 .devcontainer/devcontainer.json delete mode 100644 .devcontainer/requirements.txt rename {clean_bibtex => BibTexTools}/__init__.py (100%) create mode 100644 BibTexTools/bibliography.py create mode 100644 BibTexTools/cleaner.py create mode 100644 BibTexTools/parser.py create mode 100644 BibTexTools/tests/test_bibliography.py create mode 100644 BibTexTools/tests/test_cleaner.py create mode 100644 BibTexTools/tests/test_entry.py create mode 100644 BibTexTools/tests/test_parser.py create mode 100644 Pipfile create mode 100644 Pipfile.lock delete mode 100644 app.py delete mode 100644 clean_bibtex/clean_bibtex.py create mode 100644 test.ipynb create mode 100644 test.json diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile deleted file mode 100644 index d17194d..0000000 --- a/.devcontainer/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.187.0/containers/python-3/.devcontainer/base.Dockerfile - -# [Choice] Python version: 3, 3.9, 3.8, 3.7, 3.6 -ARG VARIANT="3.9" -FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT} - -# [Option] Install Node.js -# ARG INSTALL_NODE="true" -# ARG NODE_VERSION="lts/*" -# RUN if [ "${INSTALL_NODE}" = "true" ]; then su vscode -c "umask 0002 && . /usr/local/share/nvm/nvm.sh && nvm install ${NODE_VERSION} 2>&1"; fi - -# [Optional] If your pip requirements rarely change, uncomment this section to add them to the image. -COPY .devcontainer/requirements.txt /tmp/pip-tmp/ - -RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements.txt \ - && rm -rf /tmp/pip-tmp - - -# [Optional] Uncomment this section to install additional OS packages. -# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ -# && apt-get -y install --no-install-recommends - -# [Optional] Uncomment this line to install global node packages. -# RUN su vscode -c "source /usr/local/share/nvm/nvm.sh && npm install -g " 2>&1 \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index 528b8cf..0000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,70 +0,0 @@ -// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: -// https://github.com/microsoft/vscode-dev-containers/tree/v0.187.0/containers/python-3 -{ - "name": "Python 3", - "build": { - "dockerfile": "Dockerfile", - "context": "..", - "args": { - // Update 'VARIANT' to pick a Python version: 3, 3.6, 3.7, 3.8, 3.9 - "VARIANT": "3.9", - // Options - "INSTALL_NODE": "false", - "NODE_VERSION": "lts/*" - } - }, - - // Set *default* container specific settings.json values on container create. - "settings": { - "python.pythonPath": "/usr/local/bin/python", - "python.languageServer": "Pylance", - "python.linting.enabled": true, - "python.linting.pylintEnabled": false, - "python.linting.mypyEnabled": true, - "python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8", - "python.formatting.blackPath": "/usr/local/py-utils/bin/black", - "python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf", - "python.linting.banditPath": "/usr/local/py-utils/bin/bandit", - "python.linting.flake8Path": "/usr/local/py-utils/bin/flake8", - "python.linting.mypyPath": "/usr/local/py-utils/bin/mypy", - "python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle", - "python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle", - "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint", - "python.formatting.blackArgs": ["--line-length", "120"], - "python.formatting.provider": "black", - "files.exclude": { - "**/*.pyc": { - "when": "$(basename).py" - }, - "**/__pycache__": true, - ".pytest_cache": true, - ".mypy_cache": true, - ".vscode": true - } - }, - - - // Add the IDs of extensions you want installed when the container is created. - "extensions": [ - "ms-python.python", - "ms-python.vscode-pylance", - "njpwerner.autodocstring" - ], - - // Use 'forwardPorts' to make a list of ports inside the container available locally. - // "forwardPorts": [], - - // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "pip3 install --user -r .devcontainer/requirements.txt", - - // Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. - //"remoteUser": "vscode" - - // "name" property scheint nicht benutzt zu werden - // siehe: https://github.com/microsoft/vscode-remote-release/issues/2590 - "runArgs": [ - "--name", - "bibtex-to-dblp", - // "--env-file", - // "${localWorkspaceFolder}/.devcontainer/.env" - ],} \ No newline at end of file diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt deleted file mode 100644 index f9c759e..0000000 --- a/.devcontainer/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -black -requests -ipykernel diff --git a/.gitignore b/.gitignore index 9e9ffbe..6d5feb7 100644 --- a/.gitignore +++ b/.gitignore @@ -142,4 +142,5 @@ cython_debug/ # My Files data/ .devcontainer/.env -.vscode \ No newline at end of file +.vscode +build/ \ No newline at end of file diff --git a/clean_bibtex/__init__.py b/BibTexTools/__init__.py similarity index 100% rename from clean_bibtex/__init__.py rename to BibTexTools/__init__.py diff --git a/BibTexTools/bibliography.py b/BibTexTools/bibliography.py new file mode 100644 index 0000000..5243510 --- /dev/null +++ b/BibTexTools/bibliography.py @@ -0,0 +1,312 @@ +import json +import warnings +from dataclasses import dataclass, field +from typing import Dict, List, Union, Any + +STANDARD_FIELDS = [ + "address", + "author", + "booktitle", + "chapter", + "crossref", + "edition", + "editor", + "howpublished", + "institution", + "journal", + "month", + "note", + "number", + "organization", + "pages", + "publisher", + "school", + "series", + "title", + "volume", + "year", + "type", + "key", +] + + +def extract_content_of_field(field_value: str) -> str: + """Extract the value of a field by stripping all trailing whitespaces and "{}". + + Args: + field_value (str): Input field string to be cleaned. + + Returns: + str: Value of the field. + """ + assert isinstance(field_value, str) + field_value = field_value.strip() + return field_value.replace("{", "").replace("}", "") + + +class Field: + """BibTex field object containing a field name and a field value.""" + + def __init__(self, name, value): + self.name: str = name + self.value: str = value + + def to_bibtex(self) -> str: + """Serialize the field object into a BibTex string. + + Returns: + str: Bibtex string of the field. + """ + return self.name + " = " + self.value + + def to_dict( + self, + ) -> Dict[str, Union[str, int]]: + """Serielize the field object into a dictionary. + + Returns: + str: Dictionary of the field. + """ + value = extract_content_of_field(self.value) + return {self.name: int(value) if value.isdigit() else value} + + +class Author: + """Author name object.""" + + def __init__(self, first, last): + self.first = first + self.last = last + self.mid = [str] + + def abbreviate(self, middle: bool) -> str: + """Abbreviate the author name, with or without middle names. + + Args: + middle (bool): True if the middle names should also be abbreviated. + + Returns: + str: Full abbreviated name string. + """ + + def _abbreviate_name(name: str) -> str: + """Abbreviate a name if not already abbreviated. + + Args: + name (str): Name to be abbreviated. + + Returns: + str: Abbreviated name. + """ + if name.endswith("."): + if " " in name: # First III. + name_split = name.split(" ") + assert len(name_split) == 2 + return name_split[0][0].upper() + "." + " " + name_split[1] + else: # Name is allready abbreviated + return name + else: + return name[0].upper() + "." + + if " Jr." in self.last: + last = self.last.split(" ")[0] + else: + last = self.last + + name_short = last + ", " + _abbreviate_name(self.first) + + if middle: + for name in self.mid: + name_short += " " + _abbreviate_name(name) + + if " Jr." in self.last: + name_short += " Jr." + + self.short = name_short + return name_short + + +class Author_field(Field): + """Dedicated field for the author information.""" + + def __init__(self, name, value): + super().__init__(name, value) + self.author_list = self.split_authorlist() + + def split_authorlist(self) -> List[Author]: + """Create a list of author objects from the value of the field. + + Returns: + List[Author]: List of author objects. + """ + author_list = [] + authors_str = extract_content_of_field(self.value) + + for author in authors_str.split(" and "): + if "," in author: + author_parts = author.split(", ") + + last = author_parts[0] + mid = author_parts[1:-1] + first = author_parts[-1] + else: + author_parts = author.split( + " " + ) # if author name is not comma seperated names are in order + first = author_parts[0] + mid = [] + last = author_parts[-1] + for name in author_parts[1:-1]: + if name.lower() == "von": + last = name + " " + last + else: + mid.append(name) + + author_obj = Author(first, last) + author_obj.mid = mid + author_list.append(author_obj) + + return author_list + + def abbreviate(self, middle: bool) -> List[str]: + """Abbreviate all authors from the author list. + + Args: + middle (bool): True if middle names should be included. + + Returns: + List[str]: List of abbreviated author names. + """ + author_list_abbreviated = [] + for author in self.author_list: + author_list_abbreviated.append(author.abbreviate(middle)) + + self.author_list_abbreviated = author_list_abbreviated + return author_list_abbreviated + + def to_bibtex(self) -> str: + return self.name + " = " + self.value.replace(" and ", " and\n") + + +class Journal_field(Field): + """Dedicated field for journal information.""" + + def __init__(self, name, value): + super().__init__(name, value) + + # def abbreviate(TODO): + + +@dataclass +class Entry: + """Entry class representing a document in a BibTex bibliography.""" + + string: str = "" + fields: List[str] = field(default_factory=list) + + def add_field(self, field_name: str, value: str): + """Add a field to the entry to store information about the document. The field is added + according to the field type to store the information correctly. + + Args: + field_name (str): Field type. + value (str): Value of the field. + """ + if field_name not in STANDARD_FIELDS: + warnings.warn( + UserWarning(f'Warning: "{field_name}" is not a standard Bibtex field') + ) + if field_name == "author": + field = Author_field(field_name, value) + setattr(self, field_name, field) + elif field_name == "journal": + field = Journal_field(field_name, value) # type: ignore + setattr(self, field_name, field) + else: + field = Field(field_name, value) # type: ignore + setattr(self, field_name, field) + self.fields.append(field_name) + + def to_bibtex(self, fields: List[str] = []) -> str: + """Serialize the full Entry object into a BibTex string. + + Returns: + str: Bibtex string of the Entry. + """ + bibtex = [] + assert len(self.fields) > 1 + if fields == []: + fields = self.fields + for field in fields: + if field in ["key", "type"]: + continue + bibtex.append(self.__getattribute__(field).to_bibtex()) + + return ",\n".join( + ["@" + self.type.value + "{" + self.key.value] + bibtex + ["}"] # type: ignore + ) + + def to_dict(self, fields: List[str] = []) -> Dict[str, Dict[str, Union[str, int]]]: + """Serielize the entry object into a dictionary. + + Returns: + str: Dictionary of all fields. + """ + assert len(self.fields) > 1 + bibtex: Dict[str, Any] = {} + if fields == []: + fields = self.fields + + for field in set(fields): + if field == "key": + continue + bibtex = {**bibtex, **self.__getattribute__(field).to_dict()} # join dicts + return {self.key.value: bibtex} # type: ignore + + # abbreviate_names() TODO + # abbreviate_journals() TODO + + +@dataclass +class Bibliography: + """Bibliography object representing the full BibTex bibliography.""" + + entries: List[Entry] = field(default_factory=list) + + def to_bibtex(self, fields: List[str] = []) -> str: + """Serialize the bibliography object into a BibTex string. + + Returns: + str: Bibtex string of the Bibliography. + """ + assert len(self.entries) > 1 + bibtex = [] + for entry in self.entries: + bibtex.append(entry.to_bibtex(fields)) + return "\n\n\n".join(bibtex) + + def to_bib(self, path: str, fields: List[str] = []): + """Write the bibliography into a .bib file. + + Args: + path (str): Path to the bib file to write to. + """ + with open(path, "w") as fout: + fout.write(self.to_bibtex(fields)) + + def to_json(self, path: str, fields: List[str] = []): + """Write the bibliography into a JSON file. + + Args: + path (str): Path to the JSON file. + """ + bibtex: Dict[str, Any] = {} + assert len(self.entries) > 1 + for entry in self.entries: + bibtex = {**bibtex, **entry.to_dict(fields)} + + with open(path, "w") as fout: + json.dump(bibtex, fout, indent=4) + + # def abbreviate_names(TODO) + # def abbreviate_yournals(TODO) diff --git a/BibTexTools/cleaner.py b/BibTexTools/cleaner.py new file mode 100644 index 0000000..8d7e451 --- /dev/null +++ b/BibTexTools/cleaner.py @@ -0,0 +1,85 @@ +import logging +import time +from typing import Optional + +import requests + +from BibTexTools.bibliography import Bibliography +from BibTexTools.parser import Parser + + +class Cleaner: + """Clean a bibliography by searchin the title in the DBLP.""" + + def __init__(self, keep_keys: bool = False, keep_unknown: bool = False): + self.keep_keys = keep_keys + self.keep_unknown = keep_unknown + + def _search_publication(self, title: str) -> Optional[str]: + """Search the DBLP with title and retrieve the publication URL of the best match. + + Args: + title (str): The title of the publication. + + Returns: + str: URL of the publication site at DBLP or None if an error occured. + """ + url = f"https://dblp.org/search/publ/api?q={title}&format=json" + result = requests.get(url) + + if result.status_code != 200: + logging.info( + f'Info: Publication with the title "{title}" could not be found.' + ) + return None + + if result.json()["result"]["hits"].get("hit"): + return result.json()["result"]["hits"]["hit"][0]["info"]["url"] + else: + logging.info( + f'Info: Publication with the title "{title}" could not be found.' + ) + return None + + def _get_dblp_bibtext(self, url: str) -> Optional[str]: + """Get the bibtext reference from a dblp publikation site URL. + Args: + url (str): URL to the publication site. + Returns: + Optional[str]: Bibtex reference for the publication or None if an error occurred. + """ + r = requests.get(url + ".bib") + if r.status_code == 200: + return r.text + else: + logging.error(f'Error: Could not retrieve citation frum URL:"{url}".') + return None + + def clean(self, bibliography: Bibliography) -> Bibliography: + """Clean a given bibliography with by searching the title in the DBLP and retrieving the citation from th ebest match. + + Args: + bibliography (Bibliography): Bibliography to be cleaned. + + Returns: + Bibliography: Cleaned bibliography. + """ + cleaned_bib = Bibliography() + parser = Parser() + assert len(bibliography.entries) > 0 + + for entry in bibliography.entries: + if publication_url := self._search_publication(entry.title.value): # type: ignore + if dblp_citation := self._get_dblp_bibtext(publication_url): + cleaned_entry = parser.parse(dblp_citation) + cleaned_entry = cleaned_entry.entries[0] # type: ignore + if self.keep_keys: + cleaned_entry.key.value = entry.key.value # type: ignore + cleaned_bib.entries.append(cleaned_entry) # type: ignore + elif self.keep_unknown: + cleaned_bib.entries.append(entry) + elif self.keep_unknown: + cleaned_bib.entries.append(entry) + + time.sleep(1) # abide dblp crawl-delay + return cleaned_bib diff --git a/BibTexTools/parser.py b/BibTexTools/parser.py new file mode 100644 index 0000000..8d1c6fb --- /dev/null +++ b/BibTexTools/parser.py @@ -0,0 +1,128 @@ +import re +from typing import Tuple + +from BibTexTools.bibliography import Bibliography, Entry + +TRAILING_WHITESPACES = re.compile(r"\s\s+") + + +def clean_line(line: str) -> str: + """Remove trailing white spaces and newlines from string. + + Args: + line (str): String to be cleaned. + + Returns: + str: Cleaned string. + """ + line = line.replace("\n", "") # clean newline + line = TRAILING_WHITESPACES.sub("", line) + + return line.strip() + + +def get_type(line: str) -> str: + """Extract the value of a BibTex type field following the "@" at the beginning of a BibTex reference. + + Args: + line (str): Full line the type string is expected in. + + Returns: + str: Document type. + """ + results = re.match(r"@(.*){", line) + if results: + entry_type = results.groups()[0] + return entry_type + + +def get_key(line: str) -> str: + """Extract the citation key of a BibTex Document. + + Args: + line (str): Full line the key string is expected in. + + Returns: + str: Document citation key. + """ + results = re.match(r"@.*{(.*),", line) + if results: + key = results.groups()[0] + return key + + +def parse_field(line: str) -> Tuple[str, str]: + """Parse a BibTex field into its field name and field value. + + Args: + line (str): Full line containing the field and value. + + Returns: + Tuple[str, str]: Name of the BibTex field and its value. + """ + assert "=" in line + field_str = line.split("=") + field_name: str = field_str[0].strip() + value: str = field_str[-1] + value = value.strip(" ,") + return field_name, value + + +class Parser: + """Load a BibTex bibliography.""" + + def parse(self, bibtex_string: str) -> Bibliography: + """Parse a BibTex string into a BibTexTools bibliography. + + Args: + bibtex_string (str): Multiline string containing one or more BibTex entries to be parsed. + + Returns: + Bibliography: Bibliography object. + """ + bibliography = Bibliography() + entry = Entry() + field_str: str = "" + + for line in bibtex_string.split("\n"): + line = clean_line(line) + if line == "}": + continue + elif not line.strip(): + continue + field_str += line + " " + + if field_str.startswith("@"): # entry start + if hasattr(entry, "key"): + bibliography.entries.append(entry) # add last entry + entry = Entry() + + entry.string += field_str + entry.add_field("type", get_type(field_str)) + entry.add_field("key", get_key(field_str)) + field_str = "" + + elif field_str.count("{") != field_str.count("}"): # incomplete field + continue + else: + field_name, value = parse_field(field_str) + entry.add_field(field_name, value) + entry.string += field_str + field_str = "" + + bibliography.entries.append(entry) + return bibliography + + def from_file(self, bibtes_path: str) -> Bibliography: + """Parse a BibTex file into a BibTexTools bibliography. + + Args: + bibtes_path (str): Path to the bibtex file. + + Returns: + Bibliography: Bibliography object. + """ + with open(bibtes_path, "r") as fin: + bibtex_string = fin.read() + + return self.parse(bibtex_string) diff --git a/BibTexTools/tests/test_bibliography.py b/BibTexTools/tests/test_bibliography.py new file mode 100644 index 0000000..dc590d0 --- /dev/null +++ b/BibTexTools/tests/test_bibliography.py @@ -0,0 +1,91 @@ +import os +from BibTexTools.parser import Parser +import pytest +import json + +Bib_string = """@Atype{Akey, +author = {A1_First von A1_Last and + von A2_Last, A2_First}, +title = {A_Title}, +journal = {A_Journal}, +volume = {A_Volume}, +year = {A_Year}, +url = {A_Url}, +} + + +@Btype{Bkey, +author = {B1_First von B1_Last and + von B2_Last, B2_First}, +title = {B_Title}, +journal = {B_Journal}, +volume = {B_Volume}, +year = {B_Year}, +url = {B_Url}, +}""" + +Bib_string_fields = """@Atype{Akey, +author = {A1_First von A1_Last and + von A2_Last, A2_First}, +title = {A_Title}, +} + + +@Btype{Bkey, +author = {B1_First von B1_Last and + von B2_Last, B2_First}, +title = {B_Title}, +}""" + + +@pytest.fixture +def bib_obj_full(): + with pytest.warns(UserWarning): + parser_obj = Parser() + file_path = os.path.join("BibTexTools", "tests", "data", "full.bib") + parsed_bibtex = parser_obj.from_file(file_path) + return parsed_bibtex + + +class TestClassBibliography: + def test_to_bibtex(self, bib_obj_full): + bibtex_str = bib_obj_full.to_bibtex() + assert bibtex_str.replace(" ", "") == Bib_string.replace(" ", "") + + def test_to_bibtex_fields(self, bib_obj_full): + fields = ["author", "title"] + bibtex_str = bib_obj_full.to_bibtex(fields) + assert bibtex_str.replace(" ", "") == Bib_string_fields.replace(" ", "") + + def test_to_bib(self, bib_obj_full): + file_path = os.path.join("BibTexTools", "tests", "data", "to_bib.bib") + bib_obj_full.to_bib(file_path) + assert os.path.isfile(file_path) + os.remove(file_path) + + def test_to_bib_fields(self): + pass + # TODO + + def test_to_json_exists(self, bib_obj_full): + file_path = os.path.join("BibTexTools", "tests", "data", "to_json.json") + bib_obj_full.to_json(file_path) + assert os.path.isfile(file_path) + os.remove(file_path) + + def test_to_json_file(self, bib_obj_full): + file_path = os.path.join("BibTexTools", "tests", "data", "to_json.json") + bib_obj_full.to_json(file_path) + with open(file_path, "r") as fin: + new_file = json.load(fin) + + ref_file_path = os.path.join("BibTexTools", "tests", "data", "to_json_ref.json") + with open(ref_file_path, "r") as fin: + ref_file = json.load(fin) + + assert ref_file == new_file + os.remove(file_path) + + def test_to_json_fields(self): + pass + # TODO diff --git a/BibTexTools/tests/test_cleaner.py b/BibTexTools/tests/test_cleaner.py new file mode 100644 index 0000000..99e45de --- /dev/null +++ b/BibTexTools/tests/test_cleaner.py @@ -0,0 +1,90 @@ +import os + +import pytest +from BibTexTools.cleaner import Cleaner +from BibTexTools.parser import Parser + + +@pytest.fixture +def cleaner_obj_simple(): + cleaner = Cleaner() + return cleaner + + +@pytest.fixture +def cleaner_obj_keep_keys(): + cleaner = Cleaner(keep_keys=True) + return cleaner + + +@pytest.fixture +def cleaner_obj_keep_unknown(): + cleaner = Cleaner(keep_unknown=True) + return cleaner + + +@pytest.fixture +def cleaner_obj_ignore_unknown(): + cleaner = Cleaner(keep_unknown=False) + return cleaner + + +@pytest.fixture +def bib_bert(): + bert_path = os.path.join("BibTexTools", "tests", "data", "bert.bib") + parser = Parser() + bert_bib = parser.from_file(bert_path) + return bert_bib + + +@pytest.fixture +def bib_bert_short(): + bert_path = os.path.join("BibTexTools", "tests", "data", "bib_bert_short.bib") + parser = Parser() + bert_bib = parser.from_file(bert_path) + return bert_bib + + +@pytest.fixture +def bib_unknown(): + bib_unknown = os.path.join("BibTexTools", "tests", "data", "bib_unknown.bib") + parser = Parser() + bib_unknown = parser.from_file(bib_unknown) + return bib_unknown + + +class TestClassCleaner: + def test_clean_simple(self, cleaner_obj_simple, bib_bert_short): + with pytest.warns(UserWarning): + cleaned_bib = cleaner_obj_simple.clean(bib_bert_short) + cleaned_entry = cleaned_bib.entries[0] + assert cleaned_entry.year.value == "{2019}" + assert cleaned_entry.key.value == "DBLP:conf/naacl/DevlinCLT19" + assert ( + cleaned_entry.publisher.value + == r"{Association for Computational Linguistics}" + ) + + def test_clean_keep_keys(self, cleaner_obj_keep_keys, bib_bert_short): + with pytest.warns(UserWarning): + cleaned_bib = cleaner_obj_keep_keys.clean(bib_bert_short) + cleaned_entry = cleaned_bib.entries[0] + assert cleaned_entry.year.value == "{2019}" + assert cleaned_entry.key.value == "devlin2018bert" + assert ( + cleaned_entry.publisher.value + == r"{Association for Computational Linguistics}" + ) + + def test_keep_unknown(self, cleaner_obj_keep_unknown, bib_unknown): + with pytest.warns(UserWarning): + cleaned_bib = cleaner_obj_keep_unknown.clean(bib_unknown) + assert len(cleaned_bib.entries) == 2 + assert cleaned_bib.entries[0].key.value == "DBLP:conf/naacl/DevlinCLT19" + assert cleaned_bib.entries[1].key.value == "unknown" + + def test_ignore_unknown(self, cleaner_obj_ignore_unknown, bib_unknown): + with pytest.warns(UserWarning): + cleaned_bib = cleaner_obj_ignore_unknown.clean(bib_unknown) + assert len(cleaned_bib.entries) == 1 + assert cleaned_bib.entries[0].key.value == "DBLP:conf/naacl/DevlinCLT19" diff --git a/BibTexTools/tests/test_entry.py b/BibTexTools/tests/test_entry.py new file mode 100644 index 0000000..2857a28 --- /dev/null +++ b/BibTexTools/tests/test_entry.py @@ -0,0 +1,114 @@ +import os +from BibTexTools.parser import Parser +from BibTexTools.bibliography import ( + Author_field, + Entry, + Author_field, + Journal_field, + Author, +) +import pytest + +A_string = """@Atype{Akey, +author = {A1_First von A1_Last and + von A2_Last, A2_First}, +title = {A_Title}, +journal = {A_Journal}, +volume = {A_Volume}, +year = {A_Year}, +url = {A_Url}, +}""" + +A_string_fields = """@Atype{Akey, +author = {A1_First von A1_Last and + von A2_Last, A2_First}, +title = {A_Title}, +}""" + + +@pytest.fixture +def entry_obj(): + parser_obj = Parser() + file_path = os.path.join("BibTexTools", "tests", "data", "authors_abbreviate.bib") + parsed_bibtex = parser_obj.from_file(file_path) + entry = parsed_bibtex.entries[0] + return entry + + +@pytest.fixture +def entry_obj_full(): + with pytest.warns(UserWarning): + parser_obj = Parser() + file_path = os.path.join("BibTexTools", "tests", "data", "full.bib") + parsed_bibtex = parser_obj.from_file(file_path) + entry = parsed_bibtex.entries[0] + return entry + + +@pytest.fixture +def empty_entry(): + entry = Entry() + return entry + + +class TestClassEntety: + def test_add_field(self, empty_entry): + empty_entry.add_field("year", "2020") + assert empty_entry.year.value == "2020" + assert empty_entry.year.name == "year" + + def test_add_nonstandard_field(self, empty_entry): + with pytest.warns(UserWarning): + empty_entry.add_field("my_field", "value") + assert empty_entry.my_field.value == "value" + assert empty_entry.my_field.name == "my_field" + + def test_add_author_field(self, empty_entry): + empty_entry.add_field("author", "{first mid last}") + assert isinstance(empty_entry.author, Author_field) + assert isinstance(empty_entry.author.author_list[0], Author) + assert empty_entry.author.author_list[0].first == "first" + assert empty_entry.author.author_list[0].mid == ["mid"] + assert empty_entry.author.author_list[0].last == "last" + + def test_add_journal(self, empty_entry): + empty_entry.add_field("journal", "my_journal") + assert isinstance(empty_entry.journal, Journal_field) + + def test_author_abbreviation(self, entry_obj): + author_list_abbreviated = entry_obj.author.abbreviate(middle=True) + + assert author_list_abbreviated[0] == "A1_Last, A. B." + assert author_list_abbreviated[1] == "A2_Last, A. B." + assert author_list_abbreviated[2] == "A3_Last, A. III." + assert author_list_abbreviated[3] == "A4_Last, A. B. Jr." + + def test_to_bibtex(self, entry_obj_full): + bibtex_str = entry_obj_full.to_bibtex() + assert bibtex_str.replace(" ", "") == A_string.replace(" ", "") + + def test_to_bibtex_fields(self, entry_obj_full): + fields = ["type", "author", "title"] + bibtex_str = entry_obj_full.to_bibtex(fields) + assert bibtex_str.replace(" ", "") == A_string_fields.replace(" ", "") + + def test_to_dict(self, entry_obj_full): + ref_dict = { + "Akey": { + "type": "Atype", + "author": "A1_First von A1_Last and von A2_Last, A2_First", + "title": "A_Title", + "journal": "A_Journal", + "volume": "A_Volume", + "year": "A_Year", + "url": "A_Url", + } + } + entry_dict = entry_obj_full.to_dict() + assert ref_dict == entry_dict + + def test_to_dict_fields(self, entry_obj_full): + fields = ["type", "author", "title"] + entry_dict = entry_obj_full.to_dict(fields) + assert len(entry_dict["Akey"].keys()) == 3 + assert set(entry_dict["Akey"].keys()) == set(fields) diff --git a/BibTexTools/tests/test_parser.py b/BibTexTools/tests/test_parser.py new file mode 100644 index 0000000..19f3461 --- /dev/null +++ b/BibTexTools/tests/test_parser.py @@ -0,0 +1,91 @@ +import pytest +import os +from BibTexTools.parser import Parser + + +@pytest.fixture +def bib_simple(): + with open(os.path.join("BibTexTools", "tests", "data", "simple.bib"), "r") as fin: + bibtex_string = fin.read() + return bibtex_string + + +@pytest.fixture +def parser_obj(): + return Parser() + + +class TestClassParser: + def test_parse_len(self, parser_obj, bib_simple): + parsed_bibtex = parser_obj.parse(bib_simple) + assert isinstance(parsed_bibtex.entries, list) + assert len(parsed_bibtex.entries) == 1 + + def test_parse_from_string(self, parser_obj, bib_simple): + parsed_bibtex = parser_obj.parse(bib_simple) + entry = parsed_bibtex.entries[0] + + assert entry.key.value == "key" + assert entry.type.value == "type" + assert entry.journal.value == r"{myjournal}" + assert entry.title.value == r"{mytitle}" + + def test_file_not_exists(self, parser_obj): + with pytest.raises(FileNotFoundError): + parser_obj.from_file("not_there.bib") + + def test_parse_from_file(self, parser_obj): + file_path = os.path.join("BibTexTools", "tests", "data", "simple.bib") + parsed_bibtex = parser_obj.from_file(file_path) + entry = parsed_bibtex.entries[0] + + assert entry.key.value == "key" + assert entry.type.value == "type" + assert entry.journal.value == r"{myjournal}" + assert entry.title.value == r"{mytitle}" + + def test_not_standard_field(self, parser_obj): + file_path = os.path.join("BibTexTools", "tests", "data", "not_standard.bib") + with pytest.warns(UserWarning): + parser_obj.from_file(file_path) + + def test_authors(self, parser_obj): + file_path = os.path.join("BibTexTools", "tests", "data", "authors.bib") + parsed_bibtex = parser_obj.from_file(file_path) + entry = parsed_bibtex.entries[0] + + assert entry.author.author_list[0].first == "A1_First" + assert entry.author.author_list[0].last == "von A1_Last" + assert entry.author.author_list[0].mid == [] + assert entry.author.author_list[1].first == "A2_First" + assert entry.author.author_list[1].last == "von A2_Last" + assert entry.author.author_list[1].mid == [] + assert entry.author.author_list[2].first == "A3_First" + assert entry.author.author_list[2].last == "von A3_Last" + assert entry.author.author_list[2].mid == ["A3_Jr"] + assert entry.author.author_list[3].first == "A4_First" + assert entry.author.author_list[3].last == "A4_Last" + assert entry.author.author_list[3].mid == ["A4_Mid", "A4_Mid2"] + + def test_authors_abbreviate(self, parser_obj): + file_path = os.path.join( + "BibTexTools", "tests", "data", "authors_abbreviate.bib" + ) + parsed_bibtex = parser_obj.from_file(file_path) + entry = parsed_bibtex.entries[0] + + assert entry.author.author_list[0].first == "A1_First" + assert entry.author.author_list[0].last == "A1_Last" + assert entry.author.author_list[0].mid == ["B1_Mid"] + + assert entry.author.author_list[1].first == "A2_First" + assert entry.author.author_list[1].last == "A2_Last" + assert entry.author.author_list[1].mid == ["B."] + + assert entry.author.author_list[2].first == "A3_First III." + assert entry.author.author_list[2].last == "A3_Last" + assert entry.author.author_list[2].mid == [] + + assert entry.author.author_list[3].first == "A4_First" + assert entry.author.author_list[3].last == "A4_Last Jr." + assert entry.author.author_list[3].mid == ["B4_Mid"] diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..78b8cfa --- /dev/null +++ b/Pipfile @@ -0,0 +1,18 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +mypy = "*" +types-requests = "*" + +[dev-packages] +black = "*" +pytest = "*" +mypy = "*" +ipykernel = "*" +bibtextools = {editable = true, path = "."} + +[requires] +python_version = "3" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..1942de0 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,573 @@ +{ + "_meta": { + "hash": { + "sha256": "fb15753e83a62055755228a58e87aabef3a95c6646fc3209f3c375dd585daf7c" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "mypy": { + "hashes": [ + "sha256:0038b21890867793581e4cb0d810829f5fd4441aa75796b53033af3aa30430ce", + "sha256:1171f2e0859cfff2d366da2c7092b06130f232c636a3f7301e3feb8b41f6377d", + "sha256:1b06268df7eb53a8feea99cbfff77a6e2b205e70bf31743e786678ef87ee8069", + "sha256:1b65714dc296a7991000b6ee59a35b3f550e0073411ac9d3202f6516621ba66c", + "sha256:1bf752559797c897cdd2c65f7b60c2b6969ffe458417b8d947b8340cc9cec08d", + "sha256:300717a07ad09525401a508ef5d105e6b56646f7942eb92715a1c8d610149714", + "sha256:3c5b42d0815e15518b1f0990cff7a705805961613e701db60387e6fb663fe78a", + "sha256:4365c60266b95a3f216a3047f1d8e3f895da6c7402e9e1ddfab96393122cc58d", + "sha256:50c7346a46dc76a4ed88f3277d4959de8a2bd0a0fa47fa87a4cde36fe247ac05", + "sha256:5b56154f8c09427bae082b32275a21f500b24d93c88d69a5e82f3978018a0266", + "sha256:74f7eccbfd436abe9c352ad9fb65872cc0f1f0a868e9d9c44db0893440f0c697", + "sha256:7b3f6f557ba4afc7f2ce6d3215d5db279bcf120b3cfd0add20a5d4f4abdae5bc", + "sha256:8c11003aaeaf7cc2d0f1bc101c1cc9454ec4cc9cb825aef3cafff8a5fdf4c799", + "sha256:8ca7f8c4b1584d63c9a0f827c37ba7a47226c19a23a753d52e5b5eddb201afcd", + "sha256:c89702cac5b302f0c5d33b172d2b55b5df2bede3344a2fbed99ff96bddb2cf00", + "sha256:d8f1ff62f7a879c9fe5917b3f9eb93a79b78aad47b533911b853a757223f72e7", + "sha256:d9d2b84b2007cea426e327d2483238f040c49405a6bf4074f605f0156c91a47a", + "sha256:e839191b8da5b4e5d805f940537efcaa13ea5dd98418f06dc585d2891d228cf0", + "sha256:f9fe20d0872b26c4bba1c1be02c5340de1019530302cf2dcc85c7f9fc3252ae0", + "sha256:ff3bf387c14c805ab1388185dd22d6b210824e164d4bb324b195ff34e322d166" + ], + "index": "pypi", + "version": "==0.931" + }, + "mypy-extensions": { + "hashes": [ + "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", + "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" + ], + "version": "==0.4.3" + }, + "tomli": { + "hashes": [ + "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", + "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" + ], + "markers": "python_version >= '3.7'", + "version": "==2.0.1" + }, + "types-requests": { + "hashes": [ + "sha256:506279bad570c7b4b19ac1f22e50146538befbe0c133b2cea66a9b04a533a859", + "sha256:6a7ed24b21780af4a5b5e24c310b2cd885fb612df5fd95584d03d87e5f2a195a" + ], + "index": "pypi", + "version": "==2.27.11" + }, + "types-urllib3": { + "hashes": [ + "sha256:a26898f530e6c3f43f25b907f2b884486868ffd56a9faa94cbf9b3eb6e165d6a", + "sha256:d755278d5ecd7a7a6479a190e54230f241f1a99c19b81518b756b19dc69e518c" + ], + "version": "==1.26.10" + }, + "typing-extensions": { + "hashes": [ + "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42", + "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2" + ], + "markers": "python_version >= '3.6'", + "version": "==4.1.1" + } + }, + "develop": { + "appnope": { + "hashes": [ + "sha256:93aa393e9d6c54c5cd570ccadd8edad61ea0c4b9ea7a01409020c9aa019eb442", + "sha256:dd83cd4b5b460958838f6eb3000c660b1f9caf2a5b1de4264e941512f603258a" + ], + "markers": "platform_system == 'Darwin'", + "version": "==0.1.2" + }, + "asttokens": { + "hashes": [ + "sha256:0844691e88552595a6f4a4281a9f7f79b8dd45ca4ccea82e5e05b4bbdb76705c", + "sha256:9a54c114f02c7a9480d56550932546a3f1fe71d8a02f1bc7ccd0ee3ee35cf4d5" + ], + "version": "==2.0.5" + }, + "attrs": { + "hashes": [ + "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4", + "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==21.4.0" + }, + "backcall": { + "hashes": [ + "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e", + "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255" + ], + "version": "==0.2.0" + }, + "bibtextools": { + "editable": true, + "path": "." + }, + "black": { + "hashes": [ + "sha256:07e5c049442d7ca1a2fc273c79d1aecbbf1bc858f62e8184abe1ad175c4f7cc2", + "sha256:0e21e1f1efa65a50e3960edd068b6ae6d64ad6235bd8bfea116a03b21836af71", + "sha256:1297c63b9e1b96a3d0da2d85d11cd9bf8664251fd69ddac068b98dc4f34f73b6", + "sha256:228b5ae2c8e3d6227e4bde5920d2fc66cc3400fde7bcc74f480cb07ef0b570d5", + "sha256:2d6f331c02f0f40aa51a22e479c8209d37fcd520c77721c034517d44eecf5912", + "sha256:2ff96450d3ad9ea499fc4c60e425a1439c2120cbbc1ab959ff20f7c76ec7e866", + "sha256:3524739d76b6b3ed1132422bf9d82123cd1705086723bc3e235ca39fd21c667d", + "sha256:35944b7100af4a985abfcaa860b06af15590deb1f392f06c8683b4381e8eeaf0", + "sha256:373922fc66676133ddc3e754e4509196a8c392fec3f5ca4486673e685a421321", + "sha256:5fa1db02410b1924b6749c245ab38d30621564e658297484952f3d8a39fce7e8", + "sha256:6f2f01381f91c1efb1451998bd65a129b3ed6f64f79663a55fe0e9b74a5f81fd", + "sha256:742ce9af3086e5bd07e58c8feb09dbb2b047b7f566eb5f5bc63fd455814979f3", + "sha256:7835fee5238fc0a0baf6c9268fb816b5f5cd9b8793423a75e8cd663c48d073ba", + "sha256:8871fcb4b447206904932b54b567923e5be802b9b19b744fdff092bd2f3118d0", + "sha256:a7c0192d35635f6fc1174be575cb7915e92e5dd629ee79fdaf0dcfa41a80afb5", + "sha256:b1a5ed73ab4c482208d20434f700d514f66ffe2840f63a6252ecc43a9bc77e8a", + "sha256:c8226f50b8c34a14608b848dc23a46e5d08397d009446353dad45e04af0c8e28", + "sha256:ccad888050f5393f0d6029deea2a33e5ae371fd182a697313bdbd835d3edaf9c", + "sha256:dae63f2dbf82882fa3b2a3c49c32bffe144970a573cd68d247af6560fc493ae1", + "sha256:e2f69158a7d120fd641d1fa9a921d898e20d52e44a74a6fbbcc570a62a6bc8ab", + "sha256:efbadd9b52c060a8fc3b9658744091cb33c31f830b3f074422ed27bad2b18e8f", + "sha256:f5660feab44c2e3cb24b2419b998846cbb01c23c7fe645fee45087efa3da2d61", + "sha256:fdb8754b453fb15fad3f72cd9cad3e16776f0964d67cf30ebcbf10327a3777a3" + ], + "index": "pypi", + "version": "==22.1.0" + }, + "click": { + "hashes": [ + "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1", + "sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb" + ], + "markers": "python_version >= '3.6'", + "version": "==8.0.4" + }, + "debugpy": { + "hashes": [ + "sha256:01e98c594b3e66d529e40edf314f849cd1a21f7a013298df58cd8e263bf8e184", + "sha256:16db27b4b91991442f91d73604d32080b30de655aca9ba821b1972ea8171021b", + "sha256:17a25ce9d7714f92fc97ef00cc06269d7c2b163094990ada30156ed31d9a5030", + "sha256:194f95dd3e84568b5489aab5689a3a2c044e8fdc06f1890b8b4f70b6b89f2778", + "sha256:1ec3a086e14bba6c472632025b8fe5bdfbaef2afa1ebd5c6615ce6ed8d89bc67", + "sha256:23df67fc56d59e386c342428a7953c2c06cc226d8525b11319153e96afb65b0c", + "sha256:26fbe53cca45a608679094791ce587b6e2798acd1d4777a8b303b07622e85182", + "sha256:2b073ad5e8d8c488fbb6a116986858bab0c9c4558f28deb8832c7a5a27405bd6", + "sha256:318f81f37341e4e054b4267d39896b73cddb3612ca13b39d7eea45af65165e1d", + "sha256:3a457ad9c0059a21a6c7d563c1f18e924f5cf90278c722bd50ede6f56b77c7fe", + "sha256:4404a62fb5332ea5c8c9132290eef50b3a0ba38cecacad5529e969a783bcbdd7", + "sha256:5d76a4fd028d8009c3faf1185b4b78ceb2273dd2499447664b03939e0368bb90", + "sha256:70b422c63a833630c33e3f9cdbd9b6971f8c5afd452697e464339a21bbe862ba", + "sha256:82f5f9ce93af6861a0713f804e62ab390bb12a17f113153e47fea8bbb1dfbe36", + "sha256:a2aa64f6d2ca7ded8a7e8a4e7cae3bc71866b09876b7b05cecad231779cb9156", + "sha256:b2df2c373e85871086bd55271c929670cd4e1dba63e94a08d442db830646203b", + "sha256:b5b3157372e0e0a1297a8b6b5280bcf1d35a40f436c7973771c972726d1e32d5", + "sha256:d2b09e91fbd1efa4f4fda121d49af89501beda50c18ed7499712c71a4bf3452e", + "sha256:d876db8c312eeb02d85611e0f696abe66a2c1515e6405943609e725d5ff36f2a", + "sha256:f3a3dca9104aa14fd4210edcce6d9ce2b65bd9618c0b222135a40b9d6e2a9eeb", + "sha256:f73988422b17f071ad3c4383551ace1ba5ed810cbab5f9c362783d22d40a08dc" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==1.5.1" + }, + "decorator": { + "hashes": [ + "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", + "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186" + ], + "markers": "python_version >= '3.5'", + "version": "==5.1.1" + }, + "entrypoints": { + "hashes": [ + "sha256:b706eddaa9218a19ebcd67b56818f05bb27589b1ca9e8d797b74affad4ccacd4", + "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f" + ], + "markers": "python_version >= '3.6'", + "version": "==0.4" + }, + "executing": { + "hashes": [ + "sha256:c6554e21c6b060590a6d3be4b82fb78f8f0194d809de5ea7df1c093763311501", + "sha256:d1eef132db1b83649a3905ca6dd8897f71ac6f8cac79a7e58a1a09cf137546c9" + ], + "version": "==0.8.3" + }, + "iniconfig": { + "hashes": [ + "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", + "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32" + ], + "version": "==1.1.1" + }, + "ipykernel": { + "hashes": [ + "sha256:4fae9df6e192837552b2406a6052d707046dd2e153860be73c68484bacba18ed", + "sha256:f95070a2dfd3147f8ab19f18ee46733310813758593745e07ec18fb08b409f1d" + ], + "index": "pypi", + "version": "==6.9.1" + }, + "ipython": { + "hashes": [ + "sha256:6f56bfaeaa3247aa3b9cd3b8cbab3a9c0abf7428392f97b21902d12b2f42a381", + "sha256:8138762243c9b3a3ffcf70b37151a2a35c23d3a29f9743878c33624f4207be3d" + ], + "markers": "python_version >= '3.8'", + "version": "==8.1.1" + }, + "jedi": { + "hashes": [ + "sha256:637c9635fcf47945ceb91cd7f320234a7be540ded6f3e99a50cb6febdfd1ba8d", + "sha256:74137626a64a99c8eb6ae5832d99b3bdd7d29a3850fe2aa80a4126b2a7d949ab" + ], + "markers": "python_version >= '3.6'", + "version": "==0.18.1" + }, + "jupyter-client": { + "hashes": [ + "sha256:4ea61033726c8e579edb55626d8ee2e6bf0a83158ddf3751b8dd46b2c5cd1e96", + "sha256:d56f1c57bef42ff31e61b1185d3348a5b2bcde7c9a05523ae4dbe5ee0871797c" + ], + "markers": "python_full_version >= '3.6.1'", + "version": "==7.1.2" + }, + "jupyter-core": { + "hashes": [ + "sha256:d69baeb9ffb128b8cd2657fcf2703f89c769d1673c851812119e3a2a0e93ad9a", + "sha256:f875e4d27e202590311d468fa55f90c575f201490bd0c18acabe4e318db4a46d" + ], + "markers": "python_version >= '3.6'", + "version": "==4.9.2" + }, + "matplotlib-inline": { + "hashes": [ + "sha256:a04bfba22e0d1395479f866853ec1ee28eea1485c1d69a6faf00dc3e24ff34ee", + "sha256:aed605ba3b72462d64d475a21a9296f400a19c4f74a31b59103d2a99ffd5aa5c" + ], + "markers": "python_version >= '3.5'", + "version": "==0.1.3" + }, + "mypy": { + "hashes": [ + "sha256:0038b21890867793581e4cb0d810829f5fd4441aa75796b53033af3aa30430ce", + "sha256:1171f2e0859cfff2d366da2c7092b06130f232c636a3f7301e3feb8b41f6377d", + "sha256:1b06268df7eb53a8feea99cbfff77a6e2b205e70bf31743e786678ef87ee8069", + "sha256:1b65714dc296a7991000b6ee59a35b3f550e0073411ac9d3202f6516621ba66c", + "sha256:1bf752559797c897cdd2c65f7b60c2b6969ffe458417b8d947b8340cc9cec08d", + "sha256:300717a07ad09525401a508ef5d105e6b56646f7942eb92715a1c8d610149714", + "sha256:3c5b42d0815e15518b1f0990cff7a705805961613e701db60387e6fb663fe78a", + "sha256:4365c60266b95a3f216a3047f1d8e3f895da6c7402e9e1ddfab96393122cc58d", + "sha256:50c7346a46dc76a4ed88f3277d4959de8a2bd0a0fa47fa87a4cde36fe247ac05", + "sha256:5b56154f8c09427bae082b32275a21f500b24d93c88d69a5e82f3978018a0266", + "sha256:74f7eccbfd436abe9c352ad9fb65872cc0f1f0a868e9d9c44db0893440f0c697", + "sha256:7b3f6f557ba4afc7f2ce6d3215d5db279bcf120b3cfd0add20a5d4f4abdae5bc", + "sha256:8c11003aaeaf7cc2d0f1bc101c1cc9454ec4cc9cb825aef3cafff8a5fdf4c799", + "sha256:8ca7f8c4b1584d63c9a0f827c37ba7a47226c19a23a753d52e5b5eddb201afcd", + "sha256:c89702cac5b302f0c5d33b172d2b55b5df2bede3344a2fbed99ff96bddb2cf00", + "sha256:d8f1ff62f7a879c9fe5917b3f9eb93a79b78aad47b533911b853a757223f72e7", + "sha256:d9d2b84b2007cea426e327d2483238f040c49405a6bf4074f605f0156c91a47a", + "sha256:e839191b8da5b4e5d805f940537efcaa13ea5dd98418f06dc585d2891d228cf0", + "sha256:f9fe20d0872b26c4bba1c1be02c5340de1019530302cf2dcc85c7f9fc3252ae0", + "sha256:ff3bf387c14c805ab1388185dd22d6b210824e164d4bb324b195ff34e322d166" + ], + "index": "pypi", + "version": "==0.931" + }, + "mypy-extensions": { + "hashes": [ + "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", + "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" + ], + "version": "==0.4.3" + }, + "nest-asyncio": { + "hashes": [ + "sha256:3fdd0d6061a2bb16f21fe8a9c6a7945be83521d81a0d15cff52e9edee50101d6", + "sha256:f969f6013a16fadb4adcf09d11a68a4f617c6049d7af7ac2c676110169a63abd" + ], + "markers": "python_version >= '3.5'", + "version": "==1.5.4" + }, + "packaging": { + "hashes": [ + "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb", + "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522" + ], + "markers": "python_version >= '3.6'", + "version": "==21.3" + }, + "parso": { + "hashes": [ + "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0", + "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75" + ], + "markers": "python_version >= '3.6'", + "version": "==0.8.3" + }, + "pathspec": { + "hashes": [ + "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a", + "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1" + ], + "version": "==0.9.0" + }, + "pexpect": { + "hashes": [ + "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937", + "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c" + ], + "markers": "sys_platform != 'win32'", + "version": "==4.8.0" + }, + "pickleshare": { + "hashes": [ + "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca", + "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56" + ], + "version": "==0.7.5" + }, + "platformdirs": { + "hashes": [ + "sha256:7535e70dfa32e84d4b34996ea99c5e432fa29a708d0f4e394bbcb2a8faa4f16d", + "sha256:bcae7cab893c2d310a711b70b24efb93334febe65f8de776ee320b517471e227" + ], + "markers": "python_version >= '3.7'", + "version": "==2.5.1" + }, + "pluggy": { + "hashes": [ + "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159", + "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3" + ], + "markers": "python_version >= '3.6'", + "version": "==1.0.0" + }, + "prompt-toolkit": { + "hashes": [ + "sha256:30129d870dcb0b3b6a53efdc9d0a83ea96162ffd28ffe077e94215b233dc670c", + "sha256:9f1cd16b1e86c2968f2519d7fb31dd9d669916f515612c269d14e9ed52b51650" + ], + "markers": "python_full_version >= '3.6.2'", + "version": "==3.0.28" + }, + "ptyprocess": { + "hashes": [ + "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", + "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" + ], + "version": "==0.7.0" + }, + "pure-eval": { + "hashes": [ + "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", + "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3" + ], + "version": "==0.2.2" + }, + "py": { + "hashes": [ + "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719", + "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==1.11.0" + }, + "pygments": { + "hashes": [ + "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65", + "sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a" + ], + "markers": "python_version >= '3.5'", + "version": "==2.11.2" + }, + "pyparsing": { + "hashes": [ + "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea", + "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484" + ], + "markers": "python_version >= '3.6'", + "version": "==3.0.7" + }, + "pytest": { + "hashes": [ + "sha256:9ce3ff477af913ecf6321fe337b93a2c0dcf2a0a1439c43f5452112c1e4280db", + "sha256:e30905a0c131d3d94b89624a1cc5afec3e0ba2fbdb151867d8e0ebd49850f171" + ], + "index": "pypi", + "version": "==7.0.1" + }, + "python-dateutil": { + "hashes": [ + "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", + "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.8.2" + }, + "pyzmq": { + "hashes": [ + "sha256:08c4e315a76ef26eb833511ebf3fa87d182152adf43dedee8d79f998a2162a0b", + "sha256:0ca6cd58f62a2751728016d40082008d3b3412a7f28ddfb4a2f0d3c130f69e74", + "sha256:1621e7a2af72cced1f6ec8ca8ca91d0f76ac236ab2e8828ac8fe909512d566cb", + "sha256:18cd854b423fce44951c3a4d3e686bac8f1243d954f579e120a1714096637cc0", + "sha256:2841997a0d85b998cbafecb4183caf51fd19c4357075dfd33eb7efea57e4c149", + "sha256:2b97502c16a5ec611cd52410bdfaab264997c627a46b0f98d3f666227fd1ea2d", + "sha256:3a4c9886d61d386b2b493377d980f502186cd71d501fffdba52bd2a0880cef4f", + "sha256:3c1895c95be92600233e476fe283f042e71cf8f0b938aabf21b7aafa62a8dac9", + "sha256:42abddebe2c6a35180ca549fadc7228d23c1e1f76167c5ebc8a936b5804ea2df", + "sha256:468bd59a588e276961a918a3060948ae68f6ff5a7fa10bb2f9160c18fe341067", + "sha256:480b9931bfb08bf8b094edd4836271d4d6b44150da051547d8c7113bf947a8b0", + "sha256:53f4fd13976789ffafedd4d46f954c7bb01146121812b72b4ddca286034df966", + "sha256:62bcade20813796c426409a3e7423862d50ff0639f5a2a95be4b85b09a618666", + "sha256:67db33bea0a29d03e6eeec55a8190e033318cee3cbc732ba8fd939617cbf762d", + "sha256:6b217b8f9dfb6628f74b94bdaf9f7408708cb02167d644edca33f38746ca12dd", + "sha256:7661fc1d5cb73481cf710a1418a4e1e301ed7d5d924f91c67ba84b2a1b89defd", + "sha256:76c532fd68b93998aab92356be280deec5de8f8fe59cd28763d2cc8a58747b7f", + "sha256:79244b9e97948eaf38695f4b8e6fc63b14b78cc37f403c6642ba555517ac1268", + "sha256:7c58f598d9fcc52772b89a92d72bf8829c12d09746a6d2c724c5b30076c1f11d", + "sha256:7dc09198e4073e6015d9a8ea093fc348d4e59de49382476940c3dd9ae156fba8", + "sha256:80e043a89c6cadefd3a0712f8a1322038e819ebe9dbac7eca3bce1721bcb63bf", + "sha256:851977788b9caa8ed011f5f643d3ee8653af02c5fc723fa350db5125abf2be7b", + "sha256:8eddc033e716f8c91c6a2112f0a8ebc5e00532b4a6ae1eb0ccc48e027f9c671c", + "sha256:902319cfe23366595d3fa769b5b751e6ee6750a0a64c5d9f757d624b2ac3519e", + "sha256:954e73c9cd4d6ae319f1c936ad159072b6d356a92dcbbabfd6e6204b9a79d356", + "sha256:ab888624ed68930442a3f3b0b921ad7439c51ba122dbc8c386e6487a658e4a4e", + "sha256:acebba1a23fb9d72b42471c3771b6f2f18dcd46df77482612054bd45c07dfa36", + "sha256:b4ebed0977f92320f6686c96e9e8dd29eed199eb8d066936bac991afc37cbb70", + "sha256:badb868fff14cfd0e200eaa845887b1011146a7d26d579aaa7f966c203736b92", + "sha256:be4e0f229cf3a71f9ecd633566bd6f80d9fa6afaaff5489492be63fe459ef98c", + "sha256:c0f84360dcca3481e8674393bdf931f9f10470988f87311b19d23cda869bb6b7", + "sha256:c1e41b32d6f7f9c26bc731a8b529ff592f31fc8b6ef2be9fa74abd05c8a342d7", + "sha256:c88fa7410e9fc471e0858638f403739ee869924dd8e4ae26748496466e27ac59", + "sha256:cf98fd7a6c8aaa08dbc699ffae33fd71175696d78028281bc7b832b26f00ca57", + "sha256:d072f7dfbdb184f0786d63bda26e8a0882041b1e393fbe98940395f7fab4c5e2", + "sha256:d1b5d457acbadcf8b27561deeaa386b0217f47626b29672fa7bd31deb6e91e1b", + "sha256:d3dcb5548ead4f1123851a5ced467791f6986d68c656bc63bfff1bf9e36671e2", + "sha256:d6157793719de168b199194f6b6173f0ccd3bf3499e6870fac17086072e39115", + "sha256:d728b08448e5ac3e4d886b165385a262883c34b84a7fe1166277fe675e1c197a", + "sha256:de8df0684398bd74ad160afdc2a118ca28384ac6f5e234eb0508858d8d2d9364", + "sha256:e6a02cf7271ee94674a44f4e62aa061d2d049001c844657740e156596298b70b", + "sha256:ea12133df25e3a6918718fbb9a510c6ee5d3fdd5a346320421aac3882f4feeea", + "sha256:ea5a79e808baef98c48c884effce05c31a0698c1057de8fc1c688891043c1ce1", + "sha256:f43b4a2e6218371dd4f41e547bd919ceeb6ebf4abf31a7a0669cd11cd91ea973", + "sha256:f762442bab706fd874064ca218b33a1d8e40d4938e96c24dafd9b12e28017f45", + "sha256:f89468059ebc519a7acde1ee50b779019535db8dcf9b8c162ef669257fef7a93", + "sha256:f907c7359ce8bf7f7e63c82f75ad0223384105f5126f313400b7e8004d9b33c3" + ], + "markers": "python_version >= '3.6'", + "version": "==22.3.0" + }, + "setuptools": { + "hashes": [ + "sha256:2347b2b432c891a863acadca2da9ac101eae6169b1d3dfee2ec605ecd50dbfe5", + "sha256:e4f30b9f84e5ab3decf945113119649fec09c1fc3507c6ebffec75646c56e62b" + ], + "markers": "python_version >= '3.7'", + "version": "==60.9.3" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "stack-data": { + "hashes": [ + "sha256:45692d41bd633a9503a5195552df22b583caf16f0b27c4e58c98d88c8b648e12", + "sha256:999762f9c3132308789affa03e9271bbbe947bf78311851f4d485d8402ed858e" + ], + "version": "==0.2.0" + }, + "tomli": { + "hashes": [ + "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", + "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" + ], + "markers": "python_version >= '3.7'", + "version": "==2.0.1" + }, + "tornado": { + "hashes": [ + "sha256:0a00ff4561e2929a2c37ce706cb8233b7907e0cdc22eab98888aca5dd3775feb", + "sha256:0d321a39c36e5f2c4ff12b4ed58d41390460f798422c4504e09eb5678e09998c", + "sha256:1e8225a1070cd8eec59a996c43229fe8f95689cb16e552d130b9793cb570a288", + "sha256:20241b3cb4f425e971cb0a8e4ffc9b0a861530ae3c52f2b0434e6c1b57e9fd95", + "sha256:25ad220258349a12ae87ede08a7b04aca51237721f63b1808d39bdb4b2164558", + "sha256:33892118b165401f291070100d6d09359ca74addda679b60390b09f8ef325ffe", + "sha256:33c6e81d7bd55b468d2e793517c909b139960b6c790a60b7991b9b6b76fb9791", + "sha256:3447475585bae2e77ecb832fc0300c3695516a47d46cefa0528181a34c5b9d3d", + "sha256:34ca2dac9e4d7afb0bed4677512e36a52f09caa6fded70b4e3e1c89dbd92c326", + "sha256:3e63498f680547ed24d2c71e6497f24bca791aca2fe116dbc2bd0ac7f191691b", + "sha256:548430be2740e327b3fe0201abe471f314741efcb0067ec4f2d7dcfb4825f3e4", + "sha256:6196a5c39286cc37c024cd78834fb9345e464525d8991c21e908cc046d1cc02c", + "sha256:61b32d06ae8a036a6607805e6720ef00a3c98207038444ba7fd3d169cd998910", + "sha256:6286efab1ed6e74b7028327365cf7346b1d777d63ab30e21a0f4d5b275fc17d5", + "sha256:65d98939f1a2e74b58839f8c4dab3b6b3c1ce84972ae712be02845e65391ac7c", + "sha256:66324e4e1beede9ac79e60f88de548da58b1f8ab4b2f1354d8375774f997e6c0", + "sha256:6c77c9937962577a6a76917845d06af6ab9197702a42e1346d8ae2e76b5e3675", + "sha256:70dec29e8ac485dbf57481baee40781c63e381bebea080991893cd297742b8fd", + "sha256:7250a3fa399f08ec9cb3f7b1b987955d17e044f1ade821b32e5f435130250d7f", + "sha256:748290bf9112b581c525e6e6d3820621ff020ed95af6f17fedef416b27ed564c", + "sha256:7da13da6f985aab7f6f28debab00c67ff9cbacd588e8477034c0652ac141feea", + "sha256:8f959b26f2634a091bb42241c3ed8d3cedb506e7c27b8dd5c7b9f745318ddbb6", + "sha256:9de9e5188a782be6b1ce866e8a51bc76a0fbaa0e16613823fc38e4fc2556ad05", + "sha256:a48900ecea1cbb71b8c71c620dee15b62f85f7c14189bdeee54966fbd9a0c5bd", + "sha256:b87936fd2c317b6ee08a5741ea06b9d11a6074ef4cc42e031bc6403f82a32575", + "sha256:c77da1263aa361938476f04c4b6c8916001b90b2c2fdd92d8d535e1af48fba5a", + "sha256:cb5ec8eead331e3bb4ce8066cf06d2dfef1bfb1b2a73082dfe8a161301b76e37", + "sha256:cc0ee35043162abbf717b7df924597ade8e5395e7b66d18270116f8745ceb795", + "sha256:d14d30e7f46a0476efb0deb5b61343b1526f73ebb5ed84f23dc794bdb88f9d9f", + "sha256:d371e811d6b156d82aa5f9a4e08b58debf97c302a35714f6f45e35139c332e32", + "sha256:d3d20ea5782ba63ed13bc2b8c291a053c8d807a8fa927d941bd718468f7b950c", + "sha256:d3f7594930c423fd9f5d1a76bee85a2c36fd8b4b16921cae7e965f22575e9c01", + "sha256:dcef026f608f678c118779cd6591c8af6e9b4155c44e0d1bc0c87c036fb8c8c4", + "sha256:e0791ac58d91ac58f694d8d2957884df8e4e2f6687cdf367ef7eb7497f79eaa2", + "sha256:e385b637ac3acaae8022e7e47dfa7b83d3620e432e3ecb9a3f7f58f150e50921", + "sha256:e519d64089b0876c7b467274468709dadf11e41d65f63bba207e04217f47c085", + "sha256:e7229e60ac41a1202444497ddde70a48d33909e484f96eb0da9baf8dc68541df", + "sha256:ed3ad863b1b40cd1d4bd21e7498329ccaece75db5a5bf58cd3c9f130843e7102", + "sha256:f0ba29bafd8e7e22920567ce0d232c26d4d47c8b5cf4ed7b562b5db39fa199c5", + "sha256:fa2ba70284fa42c2a5ecb35e322e68823288a4251f9ba9cc77be04ae15eada68", + "sha256:fba85b6cd9c39be262fcd23865652920832b61583de2a2ca907dbd8e8a8c81e5" + ], + "markers": "python_version >= '3.5'", + "version": "==6.1" + }, + "traitlets": { + "hashes": [ + "sha256:059f456c5a7c1c82b98c2e8c799f39c9b8128f6d0d46941ee118daace9eb70c7", + "sha256:2d313cc50a42cd6c277e7d7dc8d4d7fedd06a2c215f78766ae7b1a66277e0033" + ], + "markers": "python_version >= '3.7'", + "version": "==5.1.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42", + "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2" + ], + "markers": "python_version >= '3.6'", + "version": "==4.1.1" + }, + "wcwidth": { + "hashes": [ + "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784", + "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83" + ], + "version": "==0.2.5" + } + } +} diff --git a/app.py b/app.py deleted file mode 100644 index 05759ed..0000000 --- a/app.py +++ /dev/null @@ -1,95 +0,0 @@ -import gradio as gr -from clean_bibtex.clean_bibtex import get_url, get_dblp_bibtext, parse_bibtext_file_titles - -DEFAULT_TEXT = """@inproceedings{DBLP:conf/naacl/DevlinCLT19, - author = {Jacob Devlin and - Ming{-}Wei Chang and - Kenton Lee and - Kristina Toutanova}, - editor = {Jill Burstein and - Christy Doran and - Thamar Solorio}, - title = {{BERT:} Pre-training of Deep Bidirectional Transformers for Language - Understanding}, - booktitle = {Proceedings of the 2019 Conference of the North American Chapter of - the Association for Computational Linguistics: Human Language Technologies, - {NAACL-HLT} 2019, Minneapolis, MN, USA, June 2-7, 2019, Volume 1 (Long - and Short Papers)}, - pages = {4171--4186}, - publisher = {Association for Computational Linguistics}, - year = {2019}, - url = {https://doi.org/10.18653/v1/n19-1423}, - doi = {10.18653/v1/n19-1423}, - timestamp = {Fri, 06 Aug 2021 00:41:31 +0200}, - biburl = {https://dblp.org/rec/conf/naacl/DevlinCLT19.bib}, - bibsource = {dblp computer science bibliography, https://dblp.org} -} -""" - - -def parse_titles(bibtex): - titles = [] - lines = bibtex.split(",") - for line in lines: - if line.strip().startswith("title"): - title = "".join(line.split("=")[1:]) - title_clean = title.replace("{", "").replace("}", "").replace(",\n", "").strip() - titles.append(title_clean) - return titles - - -def cleaner(bibtex, file_obj): - dblp_citations = [] - errors = [] - - if file_obj: - titles = parse_bibtext_file_titles(file_obj.name) - - elif bibtex: - titles = parse_titles(bibtex) - - # request bibtex - for publication in titles: - if site_url := get_url(publication): - if dblp_citation := get_dblp_bibtext(site_url): - dblp_citations.append(dblp_citation) - else: - errors.append(" - " + publication) - else: - errors.append(" - " + publication) - - if dblp_citations: - filename = "cleaned.bib" - bibliography = "\n".join(dblp_citations) - with open(filename, "w") as outFile: - outFile.write(bibliography) - else: - filename = None - bibliography = None - errors.append("All") - - if errors: - errors = "Couldnt parse files: " + "\n".join(errors) - else: - errors = "Success!" - - return errors, filename, bibliography - - -iface = gr.Interface( - fn=cleaner, - title="BibTeX cleaner", - description="Clean a BibTeX file or string by dragging the incomplete or broken BibTeX file into the file box or pasting a BibTeX string into the string field. The titles are extracted, searched at the DBLP, compiled into a clean BibTeX file.", - article="

CLI and repo

", - inputs=[ - gr.inputs.Textbox(label="Paste a string here:", lines=1), - # gr.inputs.Checkbox(label="Keep original keys:"), - gr.inputs.File(label="Drag a Bibtex file here:", file_count="single", type="file", optional=True), - ], - outputs=[ - gr.outputs.Textbox(type="auto", label="Result Message:"), - gr.outputs.File(label="Cleaned bibtext file:"), - gr.outputs.Textbox(type="auto", label="Cleaned Bibliography:"), - ], -) -iface.launch() diff --git a/clean_bibtex/clean_bibtex.py b/clean_bibtex/clean_bibtex.py deleted file mode 100644 index d0f7298..0000000 --- a/clean_bibtex/clean_bibtex.py +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: utf-8 -*- -"""This python script parses an incomplete BibTeX file to a BibTeX file with dblp references and styling. - -Example: - python bibtext_to_dblp -""" - -import requests -import click -from typing import Optional -import time - - -def parse_bibtext_file_titles(file_path: str) -> list[str]: - """Function to parse the titles of the publications from a BibTeX file. - - Args: - file_path (str): File path of the BibTeX file to parse. - - Returns: - list[str]: List with the parsed titles. - """ - try: - titles = [] - with open(file_path, "r") as inFile: - for line in inFile.readlines(): - if line.strip().startswith("title"): - title = "".join(line.split("=")[1:]) - title_clean = title.replace("{", "").replace("}", "").replace(",\n", "").strip() - titles.append(title_clean) - return titles - except OSError as err: - print("OS error: {0}".format(err)) - raise - except ValueError: - print("Could not parse, bibtext file is malformed.") - raise - except BaseException as err: - print(f"Unexpected {err}, {type(err)}") - raise - - -def get_url(title: str) -> Optional[str]: - """Search DBLP with a publication title and parse the pdf from the best result.json. - - Args: - title (str): Title of the publication to search for. - - Returns: - Optional[str]: URL of the DBLP page of the publication or None. - """ - url = f"https://dblp.org/search/publ/api?q={title}&format=json" - result = requests.get(url) - - try: - url = result.json()["result"]["hits"]["hit"][0]["info"]["url"] - return url - except: - return None - - -def get_dblp_bibtext(url: str) -> Optional[str]: - """Get the bibtext reference from a dblp publikation site url. - - Args: - url (str): Url to the publication site. - - Returns: - Optional[str]: Bibtex reference for the publication or None if an error occurred. - """ - r = requests.get(url + ".bib") - if r.status_code == 200: - return r.text - else: - return None - - -@click.command() -@click.argument("input_file") -@click.argument("outpu_file") -def clean_bibtex(outpu_file: str, input_file: str): - """Convert an incomplete BibTeX file into a complete BibTeX file with dblp styling. - - Args: - outpu_file (str): Destination for the new file. - input_file (str): Input file to parse bibtext citations from. - """ - titles = parse_bibtext_file_titles(input_file) - errors = [] - num_publications = str(len(titles)) - - click.echo("Requesting citation metadata for {num_publications} publications, this may take a while...") - with click.progressbar(length=len(titles)) as bar: - dblp_citations = [] - for publication in titles: - if site_url := get_url(publication): - if dblp_citation := get_dblp_bibtext(site_url): - dblp_citations.append(dblp_citation) - else: - errors.append(" - " + publication) - else: - errors.append(" - " + publication) - time.sleep(1) # abide dblp crawl-delay - bar.update(1) - - if dblp_citations: - with open(outpu_file, "w") as outFile: - outFile.write("\n".join(dblp_citations)) - click.echo(f"\nNew BibTeX file written to: {outpu_file}") - else: - click.echo("No citations to write.") - if errors: - click.echo("\nCould not create citations for:") - click.echo("\n".join(errors)) - - -if __name__ == "__main__": - clean_bibtex() diff --git a/setup.py b/setup.py index 8d9bde0..c0da123 100644 --- a/setup.py +++ b/setup.py @@ -1,14 +1,14 @@ -from setuptools import setup, find_packages +from setuptools import setup, find_packages # type: ignore setup( - name="clean_bibtex", + name="BibTexTools", version="0.1.0", packages=find_packages(), include_package_data=True, - install_requires=["Click", "requests"], - entry_points={ - "console_scripts": [ - "clean_bibtex = clean_bibtex.clean_bibtex:clean_bibtex", - ], - }, + # install_requires=["Click", "requests"], + # entry_points={ + # "console_scripts": [ + # "BibTexTools = BibTexTools.clean_bibtex:clean_bibtex", + # ], + # }, ) diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 0000000..de38a76 --- /dev/null +++ b/test.ipynb @@ -0,0 +1,387 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bibtes_path = \"BibTexTools/test/data/cleaned.bib\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert os.path.isfile(bibtes_path)\n", + "with open(bibtes_path, \"r\") as fin:\n", + " a = readlines(fin)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from BibTexTools import bibtex_parser" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from BibTexTools.bibtex_parser import Parser\n", + "\n", + "parser = Parser()\n", + "\n", + "bibtes_path = \"BibTexTools/tests/data/cleaned.bib\"\n", + "a = parser.from_file(bibtes_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "with open(bibtes_path, \"r\") as fin:\n", + " bibtex_string = fin.read()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "bib = parser.parse(bibtex_string)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'article'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.entries[0]._type" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'@article{DBLP:journals/corr/ZahediCW17, author= {Zohreh Zahedi and Rodrigo Costas and Paul Wouters}, title= {Mendeley readership as a filtering tool to identify highly cited publications}, journal= {CoRR}, volume= {abs/1703.07104}, year= {2017}, url= {http://arxiv.org/abs/1703.07104}, eprinttype = {arXiv}, eprint= {1703.07104}, timestamp = {Mon, 13 Aug 2018 16:48:04 +0200}, biburl= {https://dblp.org/rec/journals/corr/ZahediCW17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} '" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.entries[0].field_str" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'@article{DBLP:journals/corr/ZahediCW17, author= {Zohreh Zahedi and Rodrigo Costas and Paul Wouters}, title= {Mendeley readership as a filtering tool to identify highly cited publications}, journal= {CoRR}, volume= {abs/1703.07104}, year= {2017}, url= {http://arxiv.org/abs/1703.07104}, eprinttype = {arXiv}, eprint= {1703.07104}, timestamp = {Mon, 13 Aug 2018 16:48:04 +0200}, biburl= {https://dblp.org/rec/journals/corr/ZahediCW17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} '" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.entries[0].field_str" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jueri/dev/clean_bibtex/BibTexTools/bibliography.py:217: UserWarning: Warning: \"url\" is not a standard Bibtex field\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "from BibTexTools.bibtex_parser import Parser\n", + "\n", + "parser = Parser()\n", + "\n", + "with open(\n", + " os.path.join(\"BibTexTools\", \"tests\", \"data\", \"full.bib\"), \"r\"\n", + ") as fin:\n", + " bibtex_string = fin.read()\n", + "\n", + "parsed_bibtex = parser.parse(bibtex_string)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "parsed_bibtex.to_json(os.path.join(\"BibTexTools\", \"tests\", \"data\", \"to_json_ref.json\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Akey': {'type': 'Atype',\n", + " 'author': 'A1_First von A1_Last and von A2_Last, A2_First',\n", + " 'title': 'A_Title',\n", + " 'journal': 'A_Journal',\n", + " 'volume': 'A_Volume',\n", + " 'year': 'A_Year',\n", + " 'url': 'A_Url'}}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "entry = parsed_bibtex.to_dict()\n", + "entry.to_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BibTexTools.bibliography.Journal_field" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(entry.journal)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'parsed_bibtex' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [2]\u001b[0m, in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0m parsed_bibtex\u001b[39m.\u001b[39mentries[\u001b[39m0\u001b[39m]\u001b[39m.\u001b[39myear\u001b[39m.\u001b[39mvalue\n", + "\u001b[0;31mNameError\u001b[0m: name 'parsed_bibtex' is not defined" + ] + } + ], + "source": [ + "parsed_bibtex.entries[0].year.value#.bibsource" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"2017\".isdigit()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'@article{DBLP:journals/ipm/Park97, author= {H. Park}, title= {Relevance of Science Information: Origins and Dimensions of Relevance and Their Implications to Information Retrieval}, journal= {Inf. Process. Manag.}, volume= {33}, number= {3}, pages= {339--352}, year= {1997}, url= {https://doi.org/10.1016/S0306-4573(96)00072-6}, doi= {10.1016/S0306-4573(96)00072-6}, timestamp = {Fri, 21 Feb 2020 13:11:34 +0100}, biburl= {https://dblp.org/rec/journals/ipm/Park97.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} '" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "parsed_bibtex.entries[10].string" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from dataclasses import dataclass, field\n", + "\n", + "@dataclass\n", + "class Entry:\n", + " field_str: str = \"\"\n", + " _type: str = \"\"\n", + " key: str = \"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "e = Entry()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "''" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = list(e.__dict__.keys())[0]\n", + "e.__getattribute__(a)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "name = \"test\"\n", + "value = \"Hi\"\n", + "setattr(e, name, value)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'field_str': '', '_type': '', 'key': '', 'test': 'Hi'}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "e.__dict__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "95fc40b8f07b755b1d46d3c633b6bcaf977032494613b4a25479a60eb79a03a2" + }, + "kernelspec": { + "display_name": "Python 3.8.9 ('clean_bibtex-IH1MGB3M')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/test.json b/test.json new file mode 100644 index 0000000..53d4092 --- /dev/null +++ b/test.json @@ -0,0 +1,870 @@ +{ + "DBLP:journals/corr/ZahediCW17": { + "type": "article", + "author": "Zohreh Zahedi and Rodrigo Costas and Paul Wouters", + "title": "Mendeley readership as a filtering tool to identify highly cited publications", + "journal": "CoRR", + "volume": "abs/1703.07104", + "year": 2017, + "url": "http://arxiv.org/abs/1703.07104", + "eprinttype": "arXiv", + "eprint": "1703.07104", + "timestamp": "Mon, 13 Aug 2018 16:48:04 +0200", + "biburl": "https://dblp.org/rec/journals/corr/ZahediCW17.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/jet/Dietrich15": { + "type": "article", + "author": "Franz Dietrich", + "title": "Aggregation theory and the relevance of some issues to others", + "journal": "J. Econ. Theory", + "volume": 160, + "pages": "463--493", + "year": 2015, + "url": "https://doi.org/10.1016/j.jet.2015.03.012", + "doi": "10.1016/j.jet.2015.03.012", + "timestamp": "Mon, 24 Feb 2020 15:54:33 +0100", + "biburl": "https://dblp.org/rec/journals/jet/Dietrich15.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/spatialCognition/Tenbrink12": { + "type": "inproceedings", + "author": "Thora Tenbrink", + "editor": "Cyrill Stachniss and Kerstin Schill and David H. Uttal", + "title": "Relevance in Spatial Navigation and Communication", + "booktitle": "Spatial Cognition VIII - International Conference, Spatial Cognition 2012, Kloster Seeon, Germany, August 31 - September 3, 2012. Proceedings", + "series": "Lecture Notes in Computer Science", + "volume": 7463, + "pages": "358--377", + "publisher": "Springer", + "year": 2012, + "url": "https://doi.org/10.1007/978-3-642-32732-2\\_23", + "doi": "10.1007/978-3-642-32732-2\\_23", + "timestamp": "Sun, 02 Jun 2019 21:21:23 +0200", + "biburl": "https://dblp.org/rec/conf/spatialCognition/Tenbrink12.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/iiix/LarsenIK06": { + "type": "inproceedings", + "author": "Birger Larsen and Peter Ingwersen and Jaana Kek\\\"al\\\"ainen", + "editor": "Ian Ruthven", + "title": "The polyrepresentation continuum in IR", + "booktitle": "Proceedings of the 1st International Conference on Information Interaction in Context, IIiX 2006, Copenhagen, Denmark, October 18-20, 2006", + "pages": "88--96", + "publisher": "ACM", + "year": 2006, + "url": "https://doi.org/10.1145/1164820.1164840", + "doi": "10.1145/1164820.1164840", + "timestamp": "Tue, 06 Nov 2018 16:58:20 +0100", + "biburl": "https://dblp.org/rec/conf/iiix/LarsenIK06.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/sigir/Ingwersen94": { + "type": "inproceedings", + "author": "Peter Ingwersen", + "editor": "W. Bruce Croft and C. J. van Rijsbergen", + "title": "Polyrepresentation of Information Needs and Semantic Entities: Elements of a Cognitive Theory for Information Retrieval Interaction", + "booktitle": "Proceedings of the 17th Annual International ACM-SIGIR Conference on Research and Development in Information Retrieval. Dublin, Ireland, 3-6 July 1994 (Special Issue of the SIGIR Forum)", + "pages": "101--110", + "publisher": "ACM/Springer", + "year": 1994, + "url": "https://doi.org/10.1007/978-1-4471-2099-5\\_11", + "doi": "10.1007/978-1-4471-2099-5\\_11", + "timestamp": "Thu, 25 Jul 2019 17:43:05 +0200", + "biburl": "https://dblp.org/rec/conf/sigir/Ingwersen94.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/scientometrics/Wada20": { + "type": "article", + "author": "Tetsuo Wada", + "title": "When do the USPTO examiners cite as the EPO examiners? An analysis of examination spillovers through rejection citations at the international family-to-family level", + "journal": "Scientometrics", + "volume": 125, + "number": 2, + "pages": "1591--1615", + "year": 2020, + "url": "https://doi.org/10.1007/s11192-020-03674-4", + "doi": "10.1007/s11192-020-03674-4", + "timestamp": "Fri, 14 May 2021 08:32:27 +0200", + "biburl": "https://dblp.org/rec/journals/scientometrics/Wada20.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/scientometrics/Garfield98": { + "type": "article", + "author": "Eugene Garfield", + "title": "Random thoughts on citationology its theory and practice", + "journal": "Scientometrics", + "volume": 43, + "number": 1, + "pages": "69--76", + "year": 1998, + "url": "https://doi.org/10.1007/BF02458396", + "doi": "10.1007/BF02458396", + "timestamp": "Fri, 17 Jul 2020 06:39:13 +0200", + "biburl": "https://dblp.org/rec/journals/scientometrics/Garfield98.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/crl/Tyckoson15": { + "type": "article", + "author": "David A. Tyckoson", + "title": "Question-Negotiation and Information Seeking in Libraries: A Timeless Topic in a Timeless Article", + "journal": "Coll. Res. Libr.", + "volume": 76, + "number": 3, + "pages": "247--250", + "year": 2015, + "url": "https://doi.org/10.5860/crl.76.3.247", + "doi": "10.5860/crl.76.3.247", + "timestamp": "Thu, 25 Jun 2020 21:29:22 +0200", + "biburl": "https://dblp.org/rec/journals/crl/Tyckoson15.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/issi/Holmberg15a": { + "type": "inproceedings", + "author": "Kim Holmberg", + "editor": "Albert Ali Salah and Yasar Tonta and Alkim Almila Akdag Salah and Cassidy R. Sugimoto and Umut Al", + "title": "Classifying Altmetrics by Level of Impact", + "booktitle": "Proceedings of the 15th International Conference on Scientometrics and Informetrics, Istanbul, Turkey, June 29 - July 3, 2015", + "publisher": "ISSI Society", + "year": 2015, + "timestamp": "Tue, 14 Apr 2020 11:09:56 +0200", + "biburl": "https://dblp.org/rec/conf/issi/Holmberg15a.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/ipm/SchamberEN90": { + "type": "article", + "author": "Linda Schamber and Michael B. Eisenberg and Michael Sanford Nilan", + "title": "A re-examination of relevance: toward a dynamic, situational definition", + "journal": "Inf. Process. Manag.", + "volume": 26, + "number": 6, + "pages": "755--776", + "year": 1990, + "url": "https://doi.org/10.1016/0306-4573(90)90050-C", + "doi": "10.1016/0306-4573(90)90050-C", + "timestamp": "Fri, 21 Feb 2020 13:11:07 +0100", + "biburl": "https://dblp.org/rec/journals/ipm/SchamberEN90.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/ipm/Park97": { + "type": "article", + "author": "H. Park", + "title": "Relevance of Science Information: Origins and Dimensions of Relevance and Their Implications to Information Retrieval", + "journal": "Inf. Process. Manag.", + "volume": 33, + "number": 3, + "pages": "339--352", + "year": 1997, + "url": "https://doi.org/10.1016/S0306-4573(96)00072-6", + "doi": "10.1016/S0306-4573(96)00072-6", + "timestamp": "Fri, 21 Feb 2020 13:11:34 +0100", + "biburl": "https://dblp.org/rec/journals/ipm/Park97.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/siamrev/ClausetSN09": { + "type": "article", + "author": "Aaron Clauset and Cosma Rohilla Shalizi and Mark E. J. Newman", + "title": "Power-Law Distributions in Empirical Data", + "journal": "SIAM Rev.", + "volume": 51, + "number": 4, + "pages": "661--703", + "year": 2009, + "url": "https://doi.org/10.1137/070710111", + "doi": "10.1137/070710111", + "timestamp": "Tue, 29 Sep 2020 10:58:15 +0200", + "biburl": "https://dblp.org/rec/journals/siamrev/ClausetSN09.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/jasis/KoushaT17b": { + "type": "article", + "author": "Kayvan Kousha and Mike Thelwall", + "title": "News stories as evidence for research? BBC citations from articles, Books, and Wikipedia", + "journal": "J. Assoc. Inf. Sci. Technol.", + "volume": 68, + "number": 8, + "pages": "2017--2028", + "year": 2017, + "url": "https://doi.org/10.1002/asi.23862", + "doi": "10.1002/asi.23862", + "timestamp": "Mon, 02 Mar 2020 17:21:46 +0100", + "biburl": "https://dblp.org/rec/journals/jasis/KoushaT17b.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/trec/RobertsDVHBLPM19": { + "type": "inproceedings", + "author": "Kirk Roberts and Dina Demner-Fushman and Ellen M. Voorhees and William R. Hersh and Steven Bedrick and Alexander J. Lazar and Shubham Pant and Funda Meric-Bernstam", + "editor": "Ellen M. Voorhees and Angela Ellis", + "title": "Overview of the TREC 2019 Precision Medicine Track", + "booktitle": "Proceedings of the Twenty-Eighth Text REtrieval Conference, TREC 2019, Gaithersburg, Maryland, USA, November 13-15, 2019", + "series": "NIST Special Publication", + "volume": 1250, + "publisher": "National Institute of Standards and Technology (NIST)", + "year": 2019, + "url": "https://trec.nist.gov/pubs/trec28/papers/OVERVIEW.PM.pdf", + "timestamp": "Wed, 03 Feb 2021 08:31:24 +0100", + "biburl": "https://dblp.org/rec/conf/trec/RobertsDVHBLPM19.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/trec/RobertsDVHBL18": { + "type": "inproceedings", + "author": "Kirk Roberts and Dina Demner-Fushman and Ellen M. Voorhees and William R. Hersh and Steven Bedrick and Alexander J. Lazar", + "editor": "Ellen M. Voorhees and Angela Ellis", + "title": "Overview of the TREC 2018 Precision Medicine Track", + "booktitle": "Proceedings of the Twenty-Seventh Text REtrieval Conference, TREC 2018, Gaithersburg, Maryland, USA, November 14-16, 2018", + "series": "NIST Special Publication", + "volume": "500-331", + "publisher": "National Institute of Standards and Technology (NIST)", + "year": 2018, + "url": "https://trec.nist.gov/pubs/trec27/papers/Overview-PM.pdf", + "timestamp": "Wed, 03 Feb 2021 08:31:25 +0100", + "biburl": "https://dblp.org/rec/conf/trec/RobertsDVHBL18.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/trec/RobertsDVHBLP17": { + "type": "inproceedings", + "author": "Kirk Roberts and Dina Demner-Fushman and Ellen M. Voorhees and William R. Hersh and Steven Bedrick and Alexander J. Lazar and Shubham Pant", + "editor": "Ellen M. Voorhees and Angela Ellis", + "title": "Overview of the TREC 2017 Precision Medicine Track", + "booktitle": "Proceedings of The Twenty-Sixth Text REtrieval Conference, TREC 2017, Gaithersburg, Maryland, USA, November 15-17, 2017", + "series": "NIST Special Publication", + "volume": "500-324", + "publisher": "National Institute of Standards and Technology (NIST)", + "year": 2017, + "url": "https://trec.nist.gov/pubs/trec26/papers/Overview-PM.pdf", + "timestamp": "Wed, 07 Jul 2021 16:44:22 +0200", + "biburl": "https://dblp.org/rec/conf/trec/RobertsDVHBLP17.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/trec/RobertsDVH16": { + "type": "inproceedings", + "author": "Kirk Roberts and Dina Demner-Fushman and Ellen M. Voorhees and William R. Hersh", + "editor": "Ellen M. Voorhees and Angela Ellis", + "title": "Overview of the TREC 2016 Clinical Decision Support Track", + "booktitle": "Proceedings of The Twenty-Fifth Text REtrieval Conference, TREC 2016, Gaithersburg, Maryland, USA, November 15-18, 2016", + "series": "NIST Special Publication", + "volume": "500-321", + "publisher": "National Institute of Standards and Technology (NIST)", + "year": 2016, + "url": "http://trec.nist.gov/pubs/trec25/papers/Overview-CL.pdf", + "timestamp": "Wed, 03 Feb 2021 08:31:25 +0100", + "biburl": "https://dblp.org/rec/conf/trec/RobertsDVH16.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/trec/RobertsSVH15": { + "type": "inproceedings", + "author": "Kirk Roberts and Matthew S. Simpson and Ellen M. Voorhees and William R. Hersh", + "editor": "Ellen M. Voorhees and Angela Ellis", + "title": "Overview of the TREC 2015 Clinical Decision Support Track", + "booktitle": "Proceedings of The Twenty-Fourth Text REtrieval Conference, TREC 2015, Gaithersburg, Maryland, USA, November 17-20, 2015", + "series": "NIST Special Publication", + "volume": "500-319", + "publisher": "National Institute of Standards and Technology (NIST)", + "year": 2015, + "url": "http://trec.nist.gov/pubs/trec24/papers/Overview-CL.pdf", + "timestamp": "Wed, 03 Feb 2021 08:31:23 +0100", + "biburl": "https://dblp.org/rec/conf/trec/RobertsSVH15.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/trec/SimpsonVH14": { + "type": "inproceedings", + "author": "Matthew S. Simpson and Ellen M. Voorhees and William R. Hersh", + "editor": "Ellen M. Voorhees and Angela Ellis", + "title": "Overview of the TREC 2014 Clinical Decision Support Track", + "booktitle": "Proceedings of The Twenty-Third Text REtrieval Conference, TREC 2014, Gaithersburg, Maryland, USA, November 19-21, 2014", + "series": "NIST Special Publication", + "volume": "500-308", + "publisher": "National Institute of Standards and Technology (NIST)", + "year": 2014, + "url": "https://trec.nist.gov/pubs/trec23/papers/overview-clinical.pdf", + "timestamp": "Wed, 03 Feb 2021 08:31:24 +0100", + "biburl": "https://dblp.org/rec/conf/trec/SimpsonVH14.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/scientometrics/MutschkeMSS11": { + "type": "article", + "author": "Peter Mutschke and Philipp Mayr and Philipp Schaer and York Sure", + "title": "Science models as value-added services for scholarly information systems", + "journal": "Scientometrics", + "volume": 89, + "number": 1, + "pages": "349--364", + "year": 2011, + "url": "https://doi.org/10.1007/s11192-011-0430-x", + "doi": "10.1007/s11192-011-0430-x", + "timestamp": "Fri, 17 Jul 2020 06:40:46 +0200", + "biburl": "https://dblp.org/rec/journals/scientometrics/MutschkeMSS11.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/jd/Langham95": { + "type": "article", + "author": "Thomas Langham", + "title": "Consistency in Referencing", + "journal": "J. Documentation", + "volume": 51, + "number": 4, + "pages": "360--369", + "year": 1995, + "url": "https://doi.org/10.1108/eb026955", + "doi": "10.1108/eb026955", + "timestamp": "Sun, 06 Sep 2020 16:55:56 +0200", + "biburl": "https://dblp.org/rec/journals/jd/Langham95.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/iiix/Ingwersen12": { + "type": "inproceedings", + "author": "Peter Ingwersen", + "editor": "Jaap Kamps and Wessel Kraaij and Norbert Fuhr", + "title": "Citations and references as keys to relevance ranking in interactive IR", + "booktitle": "Information Interaction in Context: 2012, IIix'12, Nijmegen, The Netherlands, August 21-24, 2012", + "pages": 1, + "publisher": "ACM", + "year": 2012, + "url": "https://doi.org/10.1145/2362724.2362726", + "doi": "10.1145/2362724.2362726", + "timestamp": "Tue, 06 Nov 2018 16:58:20 +0100", + "biburl": "https://dblp.org/rec/conf/iiix/Ingwersen12.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/tois/JarvelinK02": { + "type": "article", + "author": "Kalervo J\\\"arvelin and Jaana Kek\\\"al\\\"ainen", + "title": "Cumulated gain-based evaluation of IR techniques", + "journal": "ACM Trans. Inf. Syst.", + "volume": 20, + "number": 4, + "pages": "422--446", + "year": 2002, + "url": "http://doi.acm.org/10.1145/582415.582418", + "doi": "10.1145/582415.582418", + "timestamp": "Fri, 09 Jun 2017 11:03:19 +0200", + "biburl": "https://dblp.org/rec/journals/tois/JarvelinK02.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/ir/Fuhr08": { + "type": "article", + "author": "Norbert Fuhr", + "title": "A probability ranking principle for interactive information retrieval", + "journal": "Inf. Retr.", + "volume": 11, + "number": 3, + "pages": "251--265", + "year": 2008, + "url": "https://doi.org/10.1007/s10791-008-9045-0", + "doi": "10.1007/s10791-008-9045-0", + "timestamp": "Sat, 27 May 2017 14:25:37 +0200", + "biburl": "https://dblp.org/rec/journals/ir/Fuhr08.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:series/synthesis/2019Alonso": { + "type": "book", + "author": "Omar Alonso", + "title": "The Practice of Crowdsourcing", + "series": "Synthesis Lectures on Information Concepts, Retrieval, and Services", + "publisher": "Morgan \\& Claypool Publishers", + "year": 2019, + "url": "https://doi.org/10.2200/S00904ED1V01Y201903ICR066", + "doi": "10.2200/S00904ED1V01Y201903ICR066", + "timestamp": "Tue, 18 Jun 2019 11:34:18 +0200", + "biburl": "https://dblp.org/rec/series/synthesis/2019Alonso.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/clef/Schaer12": { + "type": "inproceedings", + "author": "Philipp Schaer", + "editor": "Tiziana Catarci and Pamela Forner and Djoerd Hiemstra and Anselmo Pe\\~nas and Giuseppe Santucci", + "title": "Better than Their Reputation? On the Reliability of Relevance Assessments with Students", + "booktitle": "Information Access Evaluation. Multilinguality, Multimodality, and Visual Analytics - Third International Conference of the CLEF Initiative, CLEF 2012, Rome, Italy, September 17-20, 2012. Proceedings", + "series": "Lecture Notes in Computer Science", + "volume": 7488, + "pages": "124--135", + "publisher": "Springer", + "year": 2012, + "url": "https://doi.org/10.1007/978-3-642-33247-0\\_14", + "doi": "10.1007/978-3-642-33247-0\\_14", + "timestamp": "Tue, 14 May 2019 10:00:50 +0200", + "biburl": "https://dblp.org/rec/conf/clef/Schaer12.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/ipm/White17": { + "type": "article", + "author": "Howard D. White", + "title": "Relevance theory and distributions of judgments in document retrieval", + "journal": "Inf. Process. Manag.", + "volume": 53, + "number": 5, + "pages": "1080--1102", + "year": 2017, + "url": "https://doi.org/10.1016/j.ipm.2017.02.010", + "doi": "10.1016/j.ipm.2017.02.010", + "timestamp": "Fri, 21 Feb 2020 13:11:26 +0100", + "biburl": "https://dblp.org/rec/journals/ipm/White17.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/birws/BreuerST20": { + "type": "inproceedings", + "author": "Timo Breuer and Philipp Schaer and Dirk Tunger", + "editor": "Guillaume Cabanac and Ingo Frommholz and Philipp Mayr", + "title": "Relations Between Relevance Assessments, Bibliometrics and Altmetrics", + "booktitle": "Proceedings of the 10th International Workshop on Bibliometric-enhanced Information Retrieval co-located with 42nd European Conference on Information Retrieval, BIR@ECIR 2020, Lisbon, Portugal, April 14th, 2020 [online only]", + "series": "CEUR Workshop Proceedings", + "volume": 2591, + "pages": "101--112", + "publisher": "CEUR-WS.org", + "year": 2020, + "url": "http://ceur-ws.org/Vol-2591/paper-10.pdf", + "timestamp": "Tue, 14 Apr 2020 17:42:07 +0200", + "biburl": "https://dblp.org/rec/conf/birws/BreuerST20.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/jd/Ingwersen96": { + "type": "article", + "author": "Peter Ingwersen", + "title": "Cognitive Perspectives of Information Retrieval Interaction: Elements of a Cognitive IR Theory", + "journal": "J. Documentation", + "volume": 52, + "number": 1, + "pages": "3--50", + "year": 1996, + "url": "https://doi.org/10.1108/eb026960", + "doi": "10.1108/eb026960", + "timestamp": "Sun, 06 Sep 2020 16:56:08 +0200", + "biburl": "https://dblp.org/rec/journals/jd/Ingwersen96.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/jasis/Mizzaro97": { + "type": "article", + "author": "Stefano Mizzaro", + "title": "Relevance: The Whole History", + "journal": "J. Am. Soc. Inf. Sci.", + "volume": 48, + "number": 9, + "pages": "810--832", + "year": 1997, + "url": "https://doi.org/10.1002/(SICI)1097-4571(199709)48:9\\%3C810::AID-ASI6\\%3E3.0.CO;2-U", + "doi": "10.1002/(SICI)1097-4571(199709)48:9\\%3C810::AID-ASI6\\%3E3.0.CO;2-U", + "timestamp": "Wed, 13 May 2020 17:19:45 +0200", + "biburl": "https://dblp.org/rec/journals/jasis/Mizzaro97.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/scientometrics/ClermontKT21": { + "type": "article", + "author": "Marcel Clermont and Johanna Krolak and Dirk Tunger", + "title": "Does the citation period have any effect on the informative value of selected citation indicators in research evaluations?", + "journal": "Scientometrics", + "volume": 126, + "number": 2, + "pages": "1019--1047", + "year": 2021, + "url": "https://doi.org/10.1007/s11192-020-03782-1", + "doi": "10.1007/s11192-020-03782-1", + "timestamp": "Fri, 14 May 2021 08:32:26 +0200", + "biburl": "https://dblp.org/rec/journals/scientometrics/ClermontKT21.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/joi/SungWHC15": { + "type": "article", + "author": "Hui-Yun Sung and Chun-Chieh Wang and Mu-Hsuan Huang and Dar-Zen Chen", + "title": "Measuring science-based science linkage and non-science-based linkage of patents through non-patent references", + "journal": "J. Informetrics", + "volume": 9, + "number": 3, + "pages": "488--498", + "year": 2015, + "url": "https://doi.org/10.1016/j.joi.2015.04.004", + "doi": "10.1016/j.joi.2015.04.004", + "timestamp": "Fri, 27 Mar 2020 08:32:34 +0100", + "biburl": "https://dblp.org/rec/journals/joi/SungWHC15.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/jasis/SmolinskyLM15": { + "type": "article", + "author": "Lawrence J. Smolinsky and Aaron J. Lercher and Andrew McDaniel", + "title": "Testing theories of preferential attachment in random networks of citations", + "journal": "J. Assoc. Inf. Sci. Technol.", + "volume": 66, + "number": 10, + "pages": "2132--2145", + "year": 2015, + "url": "https://doi.org/10.1002/asi.23312", + "doi": "10.1002/asi.23312", + "timestamp": "Thu, 29 Apr 2021 11:10:56 +0200", + "biburl": "https://dblp.org/rec/journals/jasis/SmolinskyLM15.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/jdis/Raan17": { + "type": "article", + "author": "Anthony F. J. van Raan", + "title": "Patent Citations Analysis and Its Value in Research Evaluation: A Review and a New Approach to Map Technology-relevant Research", + "journal": "J. Data Inf. Sci.", + "volume": 2, + "number": 1, + "pages": "13--50", + "year": 2017, + "url": "https://doi.org/10.1515/jdis-2017-0002", + "doi": "10.1515/jdis-2017-0002", + "timestamp": "Thu, 06 Jan 2022 14:19:59 +0100", + "biburl": "https://dblp.org/rec/journals/jdis/Raan17.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/chiir/2021birds": { + "type": "proceedings", + "editor": "Ingo Frommholz and Haiming Liu and Massimo Melucci and Nicholas J. Belkin and Gareth J. F. Jones and Noriko Kando and Gabriella Pasi", + "title": "Joint Proceedings of the Second Workshop on Bridging the Gap between Information Science, Information Retrieval and Data Science, and Third Workshop on Evaluation of Personalisation in Information Retrieval co-located with 6th ACM SIGIR Conference on Human Information Interaction and Retrieval (CHIIR 2021), Canberra, Australia (Virtual Event), March 19th, 2021", + "series": "CEUR Workshop Proceedings", + "volume": 2863, + "publisher": "CEUR-WS.org", + "year": 2021, + "url": "http://ceur-ws.org/Vol-2863", + "urn": "urn:nbn:de:0074-2863-0", + "timestamp": "Wed, 19 May 2021 16:09:08 +0200", + "biburl": "https://dblp.org/rec/conf/chiir/2021birds.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/jd/Cronin81": { + "type": "article", + "author": "Blaise Cronin", + "title": "The Need for a Theory of citing", + "journal": "J. Documentation", + "volume": 37, + "number": 1, + "pages": "16--24", + "year": 1981, + "url": "https://doi.org/10.1108/eb026703", + "doi": "10.1108/eb026703", + "timestamp": "Sun, 06 Sep 2020 16:56:21 +0200", + "biburl": "https://dblp.org/rec/journals/jd/Cronin81.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/scientometrics/Larsen02": { + "type": "article", + "author": "Birger Larsen", + "title": "Exploiting citation overlaps for Information Retrieval: Generating a boomerang effect from the network of scientific papers", + "journal": "Scientometrics", + "volume": 54, + "number": 2, + "pages": "155--178", + "year": 2002, + "url": "https://doi.org/10.1023/A:1016011326300", + "doi": "10.1023/A:1016011326300", + "timestamp": "Fri, 17 Jul 2020 06:40:02 +0200", + "biburl": "https://dblp.org/rec/journals/scientometrics/Larsen02.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/ecir/LykkeLLI10": { + "type": "inproceedings", + "author": "Marianne Lykke and Birger Larsen and Haakon Lund and Peter Ingwersen", + "editor": "Cathal Gurrin and Yulan He and Gabriella Kazai and Udo Kruschwitz and Suzanne Little and Thomas Roelleke and Stefan M. R\\\"uger and Keith van Rijsbergen", + "title": "Developing a Test Collection for the Evaluation of Integrated Search", + "booktitle": "Advances in Information Retrieval, 32nd European Conference on IR Research, ECIR 2010, Milton Keynes, UK, March 28-31, 2010. Proceedings", + "series": "Lecture Notes in Computer Science", + "volume": 5993, + "pages": "627--630", + "publisher": "Springer", + "year": 2010, + "url": "https://doi.org/10.1007/978-3-642-12275-0\\_63", + "doi": "10.1007/978-3-642-12275-0\\_63", + "timestamp": "Sun, 25 Oct 2020 22:33:08 +0100", + "biburl": "https://dblp.org/rec/conf/ecir/LykkeLLI10.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/scientometrics/BallT06": { + "type": "article", + "author": "Rafael Ball and Dirk Tunger", + "title": "Bibliometric analysis - A new business area for information professionals in libraries?", + "journal": "Scientometrics", + "volume": 66, + "number": 3, + "pages": "561--577", + "year": 2006, + "url": "https://doi.org/10.1007/s11192-006-0041-0", + "doi": "10.1007/s11192-006-0041-0", + "timestamp": "Mon, 26 Oct 2020 08:45:29 +0100", + "biburl": "https://dblp.org/rec/journals/scientometrics/BallT06.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/icis/Jain03": { + "type": "inproceedings", + "author": "Abhijit Jain", + "title": "Performance Paradox : Information Technology Investments and Administrative Performance in the Case of the 50 U.S. State Governments", + "booktitle": "Proceedings of the International Conference on Information Systems, ICIS 2003, December 14-17, 2003, Seattle, Washington, USA", + "pages": "389--400", + "publisher": "Association for Information Systems", + "year": 2003, + "url": "http://aisel.aisnet.org/icis2003/33", + "timestamp": "Sun, 29 Mar 2015 13:07:34 +0200", + "biburl": "https://dblp.org/rec/conf/icis/Jain03.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/ipm/SkovLI08": { + "type": "article", + "author": "Mette Skov and Birger Larsen and Peter Ingwersen", + "title": "Inter and intra-document contexts applied in polyrepresentation for best match IR", + "journal": "Inf. Process. Manag.", + "volume": 44, + "number": 5, + "pages": "1673--1683", + "year": 2008, + "url": "https://doi.org/10.1016/j.ipm.2008.05.006", + "doi": "10.1016/j.ipm.2008.05.006", + "timestamp": "Fri, 21 Feb 2020 13:11:05 +0100", + "biburl": "https://dblp.org/rec/journals/ipm/SkovLI08.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/issi/Tunger19": { + "type": "inproceedings", + "author": "Dirk Tunger", + "editor": "Giuseppe Catalano and Cinzia Daraio and Martina Gregori and Henk F. Moed and Giancarlo Ruocco", + "title": "Altmetrics - on the way to the \"economy of attention\"? Feasibility study Altmetrics for the German Ministry of Science and Research (BMBF)", + "booktitle": "Proceedings of the 17th International Conference on Scientometrics and Informetrics, ISSI 2019, Rome, Italy, September 2-5, 2019", + "pages": "2262--2272", + "publisher": "ISSI Society", + "year": 2019, + "timestamp": "Tue, 14 Apr 2020 11:09:56 +0200", + "biburl": "https://dblp.org/rec/conf/issi/Tunger19.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/ecir/Bessagnet18": { + "type": "inproceedings", + "author": "Marie-No\\\"elle Bessagnet", + "editor": "Philipp Mayr and Ingo Frommholz and Guillaume Cabanac", + "title": "A Generic Framework to Perform Comprehensive Analysis of Tweets", + "booktitle": "Proceedings of the 7th International Workshop on Bibliometric-enhanced Information Retrieval (BIR 2018) co-located with the 40th European Conference on Information Retrieval (ECIR 2018), Grenoble, France, March 26, 2018", + "series": "CEUR Workshop Proceedings", + "volume": 2080, + "pages": "80--85", + "publisher": "CEUR-WS.org", + "year": 2018, + "url": "http://ceur-ws.org/Vol-2080/paper9.pdf", + "timestamp": "Wed, 12 Feb 2020 16:44:12 +0100", + "biburl": "https://dblp.org/rec/conf/ecir/Bessagnet18.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/ecir/SaierF19": { + "type": "inproceedings", + "author": "Tarek Saier and Michael F\\\"arber", + "editor": "Guillaume Cabanac and Ingo Frommholz and Philipp Mayr", + "title": "Bibliometric-Enhanced arXiv: A Data Set for Paper-Based and Citation-Based Tasks", + "booktitle": "Proceedings of the 8th International Workshop on Bibliometric-enhanced Information Retrieval (BIR 2019) co-located with the 41st European Conference on Information Retrieval (ECIR 2019), Cologne, Germany, April 14, 2019", + "series": "CEUR Workshop Proceedings", + "volume": 2345, + "pages": "14--26", + "publisher": "CEUR-WS.org", + "year": 2019, + "url": "http://ceur-ws.org/Vol-2345/paper2.pdf", + "timestamp": "Thu, 21 Jan 2021 17:36:28 +0100", + "biburl": "https://dblp.org/rec/conf/ecir/SaierF19.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/ecir/DabrowskaL15": { + "type": "inproceedings", + "author": "Anna Dabrowska and Birger Larsen", + "editor": "Philipp Mayr and Ingo Frommholz and Peter Mutschke", + "title": "Exploiting Citation Contexts for Physics Retrieval", + "booktitle": "Proceedings of the Second Workshop on Bibliometric-enhanced Information Retrieval co-located with the 37th European Conference on Information Retrieval (ECIR 2015), Vienna, Austria, March 29th, 2015", + "series": "CEUR Workshop Proceedings", + "volume": 1344, + "pages": "14--21", + "publisher": "CEUR-WS.org", + "year": 2015, + "url": "http://ceur-ws.org/Vol-1344/paper2.pdf", + "timestamp": "Wed, 12 Feb 2020 16:44:12 +0100", + "biburl": "https://dblp.org/rec/conf/ecir/DabrowskaL15.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/aslib/CostasZW15": { + "type": "article", + "author": "Rodrigo Costas and Zohreh Zahedi and Paul Wouters", + "title": "The thematic orientation of publications mentioned on social media: Large-scale disciplinary comparison of social media metrics with citations", + "journal": "Aslib J. Inf. Manag.", + "volume": 67, + "number": 3, + "pages": "260--288", + "year": 2015, + "url": "https://doi.org/10.1108/AJIM-12-2014-0173", + "doi": "10.1108/AJIM-12-2014-0173", + "timestamp": "Mon, 03 Jan 2022 22:07:53 +0100", + "biburl": "https://dblp.org/rec/journals/aslib/CostasZW15.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/scientometrics/MayrS15a": { + "type": "article", + "author": "Philipp Mayr and Andrea Scharnhorst", + "title": "Scientometrics and information retrieval: weak-links revitalized", + "journal": "Scientometrics", + "volume": 102, + "number": 3, + "pages": "2193--2199", + "year": 2015, + "url": "https://doi.org/10.1007/s11192-014-1484-3", + "doi": "10.1007/s11192-014-1484-3", + "timestamp": "Fri, 17 Jul 2020 06:39:21 +0200", + "biburl": "https://dblp.org/rec/journals/scientometrics/MayrS15a.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/sigir/Azzopardi11": { + "type": "inproceedings", + "author": "Leif Azzopardi", + "editor": "Wei-Ying Ma and Jian-Yun Nie and Ricardo Baeza-Yates and Tat-Seng Chua and W. Bruce Croft", + "title": "The economics in interactive information retrieval", + "booktitle": "Proceeding of the 34th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2011, Beijing, China, July 25-29, 2011", + "pages": "15--24", + "publisher": "ACM", + "year": 2011, + "url": "https://doi.org/10.1145/2009916.2009923", + "doi": "10.1145/2009916.2009923", + "timestamp": "Sun, 22 Sep 2019 18:15:38 +0200", + "biburl": "https://dblp.org/rec/conf/sigir/Azzopardi11.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/sigir/LarsenI02": { + "type": "inproceedings", + "author": "Birger Larsen and Peter Ingwersen", + "editor": "Kalervo J\\\"arvelin and Micheline Beaulieu and Ricardo A. Baeza-Yates and Sung-Hyon Myaeng", + "title": "The boomerang effect: retrieving scientific documents via the network of references and citations", + "booktitle": "SIGIR 2002: Proceedings of the 25th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, August 11-15, 2002, Tampere, Finland", + "pages": "397--398", + "publisher": "ACM", + "year": 2002, + "url": "https://doi.org/10.1145/564376.564462", + "doi": "10.1145/564376.564462", + "timestamp": "Wed, 07 Nov 2018 14:52:44 +0100", + "biburl": "https://dblp.org/rec/conf/sigir/LarsenI02.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/isu/MeierT18": { + "type": "article", + "author": "Andreas Meier and Dirk Tunger", + "title": "Investigating the transparency and influenceability of altmetrics using the example of the RG score and the ResearchGate platform", + "journal": "Inf. Serv. Use", + "volume": 38, + "number": "1-2", + "pages": "99--110", + "year": 2018, + "url": "https://doi.org/10.3233/ISU-180001", + "doi": "10.3233/ISU-180001", + "timestamp": "Mon, 11 May 2020 15:37:55 +0200", + "biburl": "https://dblp.org/rec/journals/isu/MeierT18.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/ecir/KacemM18": { + "type": "inproceedings", + "author": "Ameni Kacem and Philipp Mayr", + "editor": "Philipp Mayr and Ingo Frommholz and Guillaume Cabanac", + "title": "Users are not Influenced by High Impact and Core Journals while Searching", + "booktitle": "Proceedings of the 7th International Workshop on Bibliometric-enhanced Information Retrieval (BIR 2018) co-located with the 40th European Conference on Information Retrieval (ECIR 2018), Grenoble, France, March 26, 2018", + "series": "CEUR Workshop Proceedings", + "volume": 2080, + "pages": "63--75", + "publisher": "CEUR-WS.org", + "year": 2018, + "url": "http://ceur-ws.org/Vol-2080/paper7.pdf", + "timestamp": "Wed, 12 Feb 2020 16:44:12 +0100", + "biburl": "https://dblp.org/rec/conf/ecir/KacemM18.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/ecir/CarevicS14": { + "type": "inproceedings", + "author": "Zeljko Carevic and Philipp Schaer", + "editor": "Philipp Mayr and Philipp Schaer and Andrea Scharnhorst and Birger Larsen and Peter Mutschke", + "title": "On the Connection Between Citation-based and Topical Relevance Ranking: Results of a Pretest using iSearch", + "booktitle": "Proceedings of the First Workshop on Bibliometric-enhanced Information Retrieval co-located with 36th European Conference on Information Retrieval (ECIR 2014), Amsterdam, The Netherlands, April 13, 2014", + "series": "CEUR Workshop Proceedings", + "volume": 1143, + "pages": "37--44", + "publisher": "CEUR-WS.org", + "year": 2014, + "url": "http://ceur-ws.org/Vol-1143/paper5.pdf", + "timestamp": "Wed, 12 Feb 2020 16:44:12 +0100", + "biburl": "https://dblp.org/rec/conf/ecir/CarevicS14.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/ecir/JackLHK14": { + "type": "inproceedings", + "author": "Kris Jack and Pablo L\\'opez-Garc\\'\\ia and Maya Hristakeva and Roman Kern", + "editor": "Philipp Mayr and Philipp Schaer and Andrea Scharnhorst and Birger Larsen and Peter Mutschke", + "title": "\\\\citation needed\\\\: Filling in Wikipedia's Citation Shaped Holes", + "booktitle": "Proceedings of the First Workshop on Bibliometric-enhanced Information Retrieval co-located with 36th European Conference on Information Retrieval (ECIR 2014), Amsterdam, The Netherlands, April 13, 2014", + "series": "CEUR Workshop Proceedings", + "volume": 1143, + "pages": "45--52", + "publisher": "CEUR-WS.org", + "year": 2014, + "url": "http://ceur-ws.org/Vol-1143/paper6.pdf", + "timestamp": "Wed, 12 Feb 2020 16:44:12 +0100", + "biburl": "https://dblp.org/rec/conf/ecir/JackLHK14.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/ecir/2014bir": { + "type": "proceedings", + "editor": "Philipp Mayr and Philipp Schaer and Andrea Scharnhorst and Birger Larsen and Peter Mutschke", + "title": "Proceedings of the First Workshop on Bibliometric-enhanced Information Retrieval co-located with 36th European Conference on Information Retrieval (ECIR 2014), Amsterdam, The Netherlands, April 13, 2014", + "series": "CEUR Workshop Proceedings", + "volume": 1143, + "publisher": "CEUR-WS.org", + "year": 2014, + "url": "http://ceur-ws.org/Vol-1143", + "urn": "urn:nbn:de:0074-1143-7", + "timestamp": "Wed, 12 Feb 2020 16:44:12 +0100", + "biburl": "https://dblp.org/rec/conf/ecir/2014bir.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/jasis/Cole11": { + "type": "article", + "author": "Charles Cole", + "title": "A theory of information need for information retrieval that connects information to knowledge", + "journal": "J. Assoc. Inf. Sci. Technol.", + "volume": 62, + "number": 7, + "pages": "1216--1231", + "year": 2011, + "url": "https://doi.org/10.1002/asi.21541", + "doi": "10.1002/asi.21541", + "timestamp": "Mon, 02 Mar 2020 17:22:25 +0100", + "biburl": "https://dblp.org/rec/journals/jasis/Cole11.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:conf/icsm/GayHMM09": { + "type": "inproceedings", + "author": "Gregory Gay and Sonia Haiduc and Andrian Marcus and Tim Menzies", + "title": "On the use of relevance feedback in IR-based concept location", + "booktitle": "25th IEEE International Conference on Software Maintenance (ICSM 2009), September 20-26, 2009, Edmonton, Alberta, Canada", + "pages": "351--360", + "publisher": "IEEE Computer Society", + "year": 2009, + "url": "https://doi.org/10.1109/ICSM.2009.5306315", + "doi": "10.1109/ICSM.2009.5306315", + "timestamp": "Thu, 14 Oct 2021 10:27:26 +0200", + "biburl": "https://dblp.org/rec/conf/icsm/GayHMM09.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/corr/HeckS13": { + "type": "article", + "author": "Tamara Heck and Philipp Schaer", + "title": "Performing Informetric Analysis on Information Retrieval Test Collections: Preliminary Experiments in the Physics Domain", + "journal": "CoRR", + "volume": "abs/1306.1743", + "year": 2013, + "url": "http://arxiv.org/abs/1306.1743", + "eprinttype": "arXiv", + "eprint": "1306.1743", + "timestamp": "Mon, 13 Aug 2018 16:48:24 +0200", + "biburl": "https://dblp.org/rec/journals/corr/HeckS13.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + }, + "DBLP:journals/cacm/Voorhees07": { + "type": "article", + "author": "Ellen M. Voorhees", + "title": "TREC: Continuing information retrieval's tradition of experimentation", + "journal": "Commun. ACM", + "volume": 50, + "number": 11, + "pages": "51--54", + "year": 2007, + "url": "https://doi.org/10.1145/1297797.1297822", + "doi": "10.1145/1297797.1297822", + "timestamp": "Thu, 14 Oct 2021 09:00:21 +0200", + "biburl": "https://dblp.org/rec/journals/cacm/Voorhees07.bib", + "bibsource": "dblp computer science bibliography, https://dblp.org" + } +} \ No newline at end of file From 91364656b1eca86c7733003d046dbe2339553893 Mon Sep 17 00:00:00 2001 From: Jueri Date: Fri, 4 Mar 2022 08:59:22 +0100 Subject: [PATCH 02/15] cleanup --- test.ipynb | 387 ------------------------ test.json | 870 ----------------------------------------------------- 2 files changed, 1257 deletions(-) delete mode 100644 test.ipynb delete mode 100644 test.json diff --git a/test.ipynb b/test.ipynb deleted file mode 100644 index de38a76..0000000 --- a/test.ipynb +++ /dev/null @@ -1,387 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bibtes_path = \"BibTexTools/test/data/cleaned.bib\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "assert os.path.isfile(bibtes_path)\n", - "with open(bibtes_path, \"r\") as fin:\n", - " a = readlines(fin)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "from BibTexTools import bibtex_parser" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from BibTexTools.bibtex_parser import Parser\n", - "\n", - "parser = Parser()\n", - "\n", - "bibtes_path = \"BibTexTools/tests/data/cleaned.bib\"\n", - "a = parser.from_file(bibtes_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "with open(bibtes_path, \"r\") as fin:\n", - " bibtex_string = fin.read()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "bib = parser.parse(bibtex_string)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'article'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a.entries[0]._type" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'@article{DBLP:journals/corr/ZahediCW17, author= {Zohreh Zahedi and Rodrigo Costas and Paul Wouters}, title= {Mendeley readership as a filtering tool to identify highly cited publications}, journal= {CoRR}, volume= {abs/1703.07104}, year= {2017}, url= {http://arxiv.org/abs/1703.07104}, eprinttype = {arXiv}, eprint= {1703.07104}, timestamp = {Mon, 13 Aug 2018 16:48:04 +0200}, biburl= {https://dblp.org/rec/journals/corr/ZahediCW17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} '" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a.entries[0].field_str" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'@article{DBLP:journals/corr/ZahediCW17, author= {Zohreh Zahedi and Rodrigo Costas and Paul Wouters}, title= {Mendeley readership as a filtering tool to identify highly cited publications}, journal= {CoRR}, volume= {abs/1703.07104}, year= {2017}, url= {http://arxiv.org/abs/1703.07104}, eprinttype = {arXiv}, eprint= {1703.07104}, timestamp = {Mon, 13 Aug 2018 16:48:04 +0200}, biburl= {https://dblp.org/rec/journals/corr/ZahediCW17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} '" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a.entries[0].field_str" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/jueri/dev/clean_bibtex/BibTexTools/bibliography.py:217: UserWarning: Warning: \"url\" is not a standard Bibtex field\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "from BibTexTools.bibtex_parser import Parser\n", - "\n", - "parser = Parser()\n", - "\n", - "with open(\n", - " os.path.join(\"BibTexTools\", \"tests\", \"data\", \"full.bib\"), \"r\"\n", - ") as fin:\n", - " bibtex_string = fin.read()\n", - "\n", - "parsed_bibtex = parser.parse(bibtex_string)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "parsed_bibtex.to_json(os.path.join(\"BibTexTools\", \"tests\", \"data\", \"to_json_ref.json\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Akey': {'type': 'Atype',\n", - " 'author': 'A1_First von A1_Last and von A2_Last, A2_First',\n", - " 'title': 'A_Title',\n", - " 'journal': 'A_Journal',\n", - " 'volume': 'A_Volume',\n", - " 'year': 'A_Year',\n", - " 'url': 'A_Url'}}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "entry = parsed_bibtex.to_dict()\n", - "entry.to_dict()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "BibTexTools.bibliography.Journal_field" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(entry.journal)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'parsed_bibtex' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [2]\u001b[0m, in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0m parsed_bibtex\u001b[39m.\u001b[39mentries[\u001b[39m0\u001b[39m]\u001b[39m.\u001b[39myear\u001b[39m.\u001b[39mvalue\n", - "\u001b[0;31mNameError\u001b[0m: name 'parsed_bibtex' is not defined" - ] - } - ], - "source": [ - "parsed_bibtex.entries[0].year.value#.bibsource" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\"2017\".isdigit()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'@article{DBLP:journals/ipm/Park97, author= {H. Park}, title= {Relevance of Science Information: Origins and Dimensions of Relevance and Their Implications to Information Retrieval}, journal= {Inf. Process. Manag.}, volume= {33}, number= {3}, pages= {339--352}, year= {1997}, url= {https://doi.org/10.1016/S0306-4573(96)00072-6}, doi= {10.1016/S0306-4573(96)00072-6}, timestamp = {Fri, 21 Feb 2020 13:11:34 +0100}, biburl= {https://dblp.org/rec/journals/ipm/Park97.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} '" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "parsed_bibtex.entries[10].string" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "from dataclasses import dataclass, field\n", - "\n", - "@dataclass\n", - "class Entry:\n", - " field_str: str = \"\"\n", - " _type: str = \"\"\n", - " key: str = \"\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "e = Entry()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "''" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a = list(e.__dict__.keys())[0]\n", - "e.__getattribute__(a)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "name = \"test\"\n", - "value = \"Hi\"\n", - "setattr(e, name, value)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'field_str': '', '_type': '', 'key': '', 'test': 'Hi'}" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "e.__dict__" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "interpreter": { - "hash": "95fc40b8f07b755b1d46d3c633b6bcaf977032494613b4a25479a60eb79a03a2" - }, - "kernelspec": { - "display_name": "Python 3.8.9 ('clean_bibtex-IH1MGB3M')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.9" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/test.json b/test.json deleted file mode 100644 index 53d4092..0000000 --- a/test.json +++ /dev/null @@ -1,870 +0,0 @@ -{ - "DBLP:journals/corr/ZahediCW17": { - "type": "article", - "author": "Zohreh Zahedi and Rodrigo Costas and Paul Wouters", - "title": "Mendeley readership as a filtering tool to identify highly cited publications", - "journal": "CoRR", - "volume": "abs/1703.07104", - "year": 2017, - "url": "http://arxiv.org/abs/1703.07104", - "eprinttype": "arXiv", - "eprint": "1703.07104", - "timestamp": "Mon, 13 Aug 2018 16:48:04 +0200", - "biburl": "https://dblp.org/rec/journals/corr/ZahediCW17.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/jet/Dietrich15": { - "type": "article", - "author": "Franz Dietrich", - "title": "Aggregation theory and the relevance of some issues to others", - "journal": "J. Econ. Theory", - "volume": 160, - "pages": "463--493", - "year": 2015, - "url": "https://doi.org/10.1016/j.jet.2015.03.012", - "doi": "10.1016/j.jet.2015.03.012", - "timestamp": "Mon, 24 Feb 2020 15:54:33 +0100", - "biburl": "https://dblp.org/rec/journals/jet/Dietrich15.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/spatialCognition/Tenbrink12": { - "type": "inproceedings", - "author": "Thora Tenbrink", - "editor": "Cyrill Stachniss and Kerstin Schill and David H. Uttal", - "title": "Relevance in Spatial Navigation and Communication", - "booktitle": "Spatial Cognition VIII - International Conference, Spatial Cognition 2012, Kloster Seeon, Germany, August 31 - September 3, 2012. Proceedings", - "series": "Lecture Notes in Computer Science", - "volume": 7463, - "pages": "358--377", - "publisher": "Springer", - "year": 2012, - "url": "https://doi.org/10.1007/978-3-642-32732-2\\_23", - "doi": "10.1007/978-3-642-32732-2\\_23", - "timestamp": "Sun, 02 Jun 2019 21:21:23 +0200", - "biburl": "https://dblp.org/rec/conf/spatialCognition/Tenbrink12.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/iiix/LarsenIK06": { - "type": "inproceedings", - "author": "Birger Larsen and Peter Ingwersen and Jaana Kek\\\"al\\\"ainen", - "editor": "Ian Ruthven", - "title": "The polyrepresentation continuum in IR", - "booktitle": "Proceedings of the 1st International Conference on Information Interaction in Context, IIiX 2006, Copenhagen, Denmark, October 18-20, 2006", - "pages": "88--96", - "publisher": "ACM", - "year": 2006, - "url": "https://doi.org/10.1145/1164820.1164840", - "doi": "10.1145/1164820.1164840", - "timestamp": "Tue, 06 Nov 2018 16:58:20 +0100", - "biburl": "https://dblp.org/rec/conf/iiix/LarsenIK06.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/sigir/Ingwersen94": { - "type": "inproceedings", - "author": "Peter Ingwersen", - "editor": "W. Bruce Croft and C. J. van Rijsbergen", - "title": "Polyrepresentation of Information Needs and Semantic Entities: Elements of a Cognitive Theory for Information Retrieval Interaction", - "booktitle": "Proceedings of the 17th Annual International ACM-SIGIR Conference on Research and Development in Information Retrieval. Dublin, Ireland, 3-6 July 1994 (Special Issue of the SIGIR Forum)", - "pages": "101--110", - "publisher": "ACM/Springer", - "year": 1994, - "url": "https://doi.org/10.1007/978-1-4471-2099-5\\_11", - "doi": "10.1007/978-1-4471-2099-5\\_11", - "timestamp": "Thu, 25 Jul 2019 17:43:05 +0200", - "biburl": "https://dblp.org/rec/conf/sigir/Ingwersen94.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/scientometrics/Wada20": { - "type": "article", - "author": "Tetsuo Wada", - "title": "When do the USPTO examiners cite as the EPO examiners? An analysis of examination spillovers through rejection citations at the international family-to-family level", - "journal": "Scientometrics", - "volume": 125, - "number": 2, - "pages": "1591--1615", - "year": 2020, - "url": "https://doi.org/10.1007/s11192-020-03674-4", - "doi": "10.1007/s11192-020-03674-4", - "timestamp": "Fri, 14 May 2021 08:32:27 +0200", - "biburl": "https://dblp.org/rec/journals/scientometrics/Wada20.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/scientometrics/Garfield98": { - "type": "article", - "author": "Eugene Garfield", - "title": "Random thoughts on citationology its theory and practice", - "journal": "Scientometrics", - "volume": 43, - "number": 1, - "pages": "69--76", - "year": 1998, - "url": "https://doi.org/10.1007/BF02458396", - "doi": "10.1007/BF02458396", - "timestamp": "Fri, 17 Jul 2020 06:39:13 +0200", - "biburl": "https://dblp.org/rec/journals/scientometrics/Garfield98.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/crl/Tyckoson15": { - "type": "article", - "author": "David A. Tyckoson", - "title": "Question-Negotiation and Information Seeking in Libraries: A Timeless Topic in a Timeless Article", - "journal": "Coll. Res. Libr.", - "volume": 76, - "number": 3, - "pages": "247--250", - "year": 2015, - "url": "https://doi.org/10.5860/crl.76.3.247", - "doi": "10.5860/crl.76.3.247", - "timestamp": "Thu, 25 Jun 2020 21:29:22 +0200", - "biburl": "https://dblp.org/rec/journals/crl/Tyckoson15.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/issi/Holmberg15a": { - "type": "inproceedings", - "author": "Kim Holmberg", - "editor": "Albert Ali Salah and Yasar Tonta and Alkim Almila Akdag Salah and Cassidy R. Sugimoto and Umut Al", - "title": "Classifying Altmetrics by Level of Impact", - "booktitle": "Proceedings of the 15th International Conference on Scientometrics and Informetrics, Istanbul, Turkey, June 29 - July 3, 2015", - "publisher": "ISSI Society", - "year": 2015, - "timestamp": "Tue, 14 Apr 2020 11:09:56 +0200", - "biburl": "https://dblp.org/rec/conf/issi/Holmberg15a.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/ipm/SchamberEN90": { - "type": "article", - "author": "Linda Schamber and Michael B. Eisenberg and Michael Sanford Nilan", - "title": "A re-examination of relevance: toward a dynamic, situational definition", - "journal": "Inf. Process. Manag.", - "volume": 26, - "number": 6, - "pages": "755--776", - "year": 1990, - "url": "https://doi.org/10.1016/0306-4573(90)90050-C", - "doi": "10.1016/0306-4573(90)90050-C", - "timestamp": "Fri, 21 Feb 2020 13:11:07 +0100", - "biburl": "https://dblp.org/rec/journals/ipm/SchamberEN90.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/ipm/Park97": { - "type": "article", - "author": "H. Park", - "title": "Relevance of Science Information: Origins and Dimensions of Relevance and Their Implications to Information Retrieval", - "journal": "Inf. Process. Manag.", - "volume": 33, - "number": 3, - "pages": "339--352", - "year": 1997, - "url": "https://doi.org/10.1016/S0306-4573(96)00072-6", - "doi": "10.1016/S0306-4573(96)00072-6", - "timestamp": "Fri, 21 Feb 2020 13:11:34 +0100", - "biburl": "https://dblp.org/rec/journals/ipm/Park97.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/siamrev/ClausetSN09": { - "type": "article", - "author": "Aaron Clauset and Cosma Rohilla Shalizi and Mark E. J. Newman", - "title": "Power-Law Distributions in Empirical Data", - "journal": "SIAM Rev.", - "volume": 51, - "number": 4, - "pages": "661--703", - "year": 2009, - "url": "https://doi.org/10.1137/070710111", - "doi": "10.1137/070710111", - "timestamp": "Tue, 29 Sep 2020 10:58:15 +0200", - "biburl": "https://dblp.org/rec/journals/siamrev/ClausetSN09.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/jasis/KoushaT17b": { - "type": "article", - "author": "Kayvan Kousha and Mike Thelwall", - "title": "News stories as evidence for research? BBC citations from articles, Books, and Wikipedia", - "journal": "J. Assoc. Inf. Sci. Technol.", - "volume": 68, - "number": 8, - "pages": "2017--2028", - "year": 2017, - "url": "https://doi.org/10.1002/asi.23862", - "doi": "10.1002/asi.23862", - "timestamp": "Mon, 02 Mar 2020 17:21:46 +0100", - "biburl": "https://dblp.org/rec/journals/jasis/KoushaT17b.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/trec/RobertsDVHBLPM19": { - "type": "inproceedings", - "author": "Kirk Roberts and Dina Demner-Fushman and Ellen M. Voorhees and William R. Hersh and Steven Bedrick and Alexander J. Lazar and Shubham Pant and Funda Meric-Bernstam", - "editor": "Ellen M. Voorhees and Angela Ellis", - "title": "Overview of the TREC 2019 Precision Medicine Track", - "booktitle": "Proceedings of the Twenty-Eighth Text REtrieval Conference, TREC 2019, Gaithersburg, Maryland, USA, November 13-15, 2019", - "series": "NIST Special Publication", - "volume": 1250, - "publisher": "National Institute of Standards and Technology (NIST)", - "year": 2019, - "url": "https://trec.nist.gov/pubs/trec28/papers/OVERVIEW.PM.pdf", - "timestamp": "Wed, 03 Feb 2021 08:31:24 +0100", - "biburl": "https://dblp.org/rec/conf/trec/RobertsDVHBLPM19.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/trec/RobertsDVHBL18": { - "type": "inproceedings", - "author": "Kirk Roberts and Dina Demner-Fushman and Ellen M. Voorhees and William R. Hersh and Steven Bedrick and Alexander J. Lazar", - "editor": "Ellen M. Voorhees and Angela Ellis", - "title": "Overview of the TREC 2018 Precision Medicine Track", - "booktitle": "Proceedings of the Twenty-Seventh Text REtrieval Conference, TREC 2018, Gaithersburg, Maryland, USA, November 14-16, 2018", - "series": "NIST Special Publication", - "volume": "500-331", - "publisher": "National Institute of Standards and Technology (NIST)", - "year": 2018, - "url": "https://trec.nist.gov/pubs/trec27/papers/Overview-PM.pdf", - "timestamp": "Wed, 03 Feb 2021 08:31:25 +0100", - "biburl": "https://dblp.org/rec/conf/trec/RobertsDVHBL18.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/trec/RobertsDVHBLP17": { - "type": "inproceedings", - "author": "Kirk Roberts and Dina Demner-Fushman and Ellen M. Voorhees and William R. Hersh and Steven Bedrick and Alexander J. Lazar and Shubham Pant", - "editor": "Ellen M. Voorhees and Angela Ellis", - "title": "Overview of the TREC 2017 Precision Medicine Track", - "booktitle": "Proceedings of The Twenty-Sixth Text REtrieval Conference, TREC 2017, Gaithersburg, Maryland, USA, November 15-17, 2017", - "series": "NIST Special Publication", - "volume": "500-324", - "publisher": "National Institute of Standards and Technology (NIST)", - "year": 2017, - "url": "https://trec.nist.gov/pubs/trec26/papers/Overview-PM.pdf", - "timestamp": "Wed, 07 Jul 2021 16:44:22 +0200", - "biburl": "https://dblp.org/rec/conf/trec/RobertsDVHBLP17.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/trec/RobertsDVH16": { - "type": "inproceedings", - "author": "Kirk Roberts and Dina Demner-Fushman and Ellen M. Voorhees and William R. Hersh", - "editor": "Ellen M. Voorhees and Angela Ellis", - "title": "Overview of the TREC 2016 Clinical Decision Support Track", - "booktitle": "Proceedings of The Twenty-Fifth Text REtrieval Conference, TREC 2016, Gaithersburg, Maryland, USA, November 15-18, 2016", - "series": "NIST Special Publication", - "volume": "500-321", - "publisher": "National Institute of Standards and Technology (NIST)", - "year": 2016, - "url": "http://trec.nist.gov/pubs/trec25/papers/Overview-CL.pdf", - "timestamp": "Wed, 03 Feb 2021 08:31:25 +0100", - "biburl": "https://dblp.org/rec/conf/trec/RobertsDVH16.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/trec/RobertsSVH15": { - "type": "inproceedings", - "author": "Kirk Roberts and Matthew S. Simpson and Ellen M. Voorhees and William R. Hersh", - "editor": "Ellen M. Voorhees and Angela Ellis", - "title": "Overview of the TREC 2015 Clinical Decision Support Track", - "booktitle": "Proceedings of The Twenty-Fourth Text REtrieval Conference, TREC 2015, Gaithersburg, Maryland, USA, November 17-20, 2015", - "series": "NIST Special Publication", - "volume": "500-319", - "publisher": "National Institute of Standards and Technology (NIST)", - "year": 2015, - "url": "http://trec.nist.gov/pubs/trec24/papers/Overview-CL.pdf", - "timestamp": "Wed, 03 Feb 2021 08:31:23 +0100", - "biburl": "https://dblp.org/rec/conf/trec/RobertsSVH15.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/trec/SimpsonVH14": { - "type": "inproceedings", - "author": "Matthew S. Simpson and Ellen M. Voorhees and William R. Hersh", - "editor": "Ellen M. Voorhees and Angela Ellis", - "title": "Overview of the TREC 2014 Clinical Decision Support Track", - "booktitle": "Proceedings of The Twenty-Third Text REtrieval Conference, TREC 2014, Gaithersburg, Maryland, USA, November 19-21, 2014", - "series": "NIST Special Publication", - "volume": "500-308", - "publisher": "National Institute of Standards and Technology (NIST)", - "year": 2014, - "url": "https://trec.nist.gov/pubs/trec23/papers/overview-clinical.pdf", - "timestamp": "Wed, 03 Feb 2021 08:31:24 +0100", - "biburl": "https://dblp.org/rec/conf/trec/SimpsonVH14.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/scientometrics/MutschkeMSS11": { - "type": "article", - "author": "Peter Mutschke and Philipp Mayr and Philipp Schaer and York Sure", - "title": "Science models as value-added services for scholarly information systems", - "journal": "Scientometrics", - "volume": 89, - "number": 1, - "pages": "349--364", - "year": 2011, - "url": "https://doi.org/10.1007/s11192-011-0430-x", - "doi": "10.1007/s11192-011-0430-x", - "timestamp": "Fri, 17 Jul 2020 06:40:46 +0200", - "biburl": "https://dblp.org/rec/journals/scientometrics/MutschkeMSS11.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/jd/Langham95": { - "type": "article", - "author": "Thomas Langham", - "title": "Consistency in Referencing", - "journal": "J. Documentation", - "volume": 51, - "number": 4, - "pages": "360--369", - "year": 1995, - "url": "https://doi.org/10.1108/eb026955", - "doi": "10.1108/eb026955", - "timestamp": "Sun, 06 Sep 2020 16:55:56 +0200", - "biburl": "https://dblp.org/rec/journals/jd/Langham95.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/iiix/Ingwersen12": { - "type": "inproceedings", - "author": "Peter Ingwersen", - "editor": "Jaap Kamps and Wessel Kraaij and Norbert Fuhr", - "title": "Citations and references as keys to relevance ranking in interactive IR", - "booktitle": "Information Interaction in Context: 2012, IIix'12, Nijmegen, The Netherlands, August 21-24, 2012", - "pages": 1, - "publisher": "ACM", - "year": 2012, - "url": "https://doi.org/10.1145/2362724.2362726", - "doi": "10.1145/2362724.2362726", - "timestamp": "Tue, 06 Nov 2018 16:58:20 +0100", - "biburl": "https://dblp.org/rec/conf/iiix/Ingwersen12.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/tois/JarvelinK02": { - "type": "article", - "author": "Kalervo J\\\"arvelin and Jaana Kek\\\"al\\\"ainen", - "title": "Cumulated gain-based evaluation of IR techniques", - "journal": "ACM Trans. Inf. Syst.", - "volume": 20, - "number": 4, - "pages": "422--446", - "year": 2002, - "url": "http://doi.acm.org/10.1145/582415.582418", - "doi": "10.1145/582415.582418", - "timestamp": "Fri, 09 Jun 2017 11:03:19 +0200", - "biburl": "https://dblp.org/rec/journals/tois/JarvelinK02.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/ir/Fuhr08": { - "type": "article", - "author": "Norbert Fuhr", - "title": "A probability ranking principle for interactive information retrieval", - "journal": "Inf. Retr.", - "volume": 11, - "number": 3, - "pages": "251--265", - "year": 2008, - "url": "https://doi.org/10.1007/s10791-008-9045-0", - "doi": "10.1007/s10791-008-9045-0", - "timestamp": "Sat, 27 May 2017 14:25:37 +0200", - "biburl": "https://dblp.org/rec/journals/ir/Fuhr08.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:series/synthesis/2019Alonso": { - "type": "book", - "author": "Omar Alonso", - "title": "The Practice of Crowdsourcing", - "series": "Synthesis Lectures on Information Concepts, Retrieval, and Services", - "publisher": "Morgan \\& Claypool Publishers", - "year": 2019, - "url": "https://doi.org/10.2200/S00904ED1V01Y201903ICR066", - "doi": "10.2200/S00904ED1V01Y201903ICR066", - "timestamp": "Tue, 18 Jun 2019 11:34:18 +0200", - "biburl": "https://dblp.org/rec/series/synthesis/2019Alonso.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/clef/Schaer12": { - "type": "inproceedings", - "author": "Philipp Schaer", - "editor": "Tiziana Catarci and Pamela Forner and Djoerd Hiemstra and Anselmo Pe\\~nas and Giuseppe Santucci", - "title": "Better than Their Reputation? On the Reliability of Relevance Assessments with Students", - "booktitle": "Information Access Evaluation. Multilinguality, Multimodality, and Visual Analytics - Third International Conference of the CLEF Initiative, CLEF 2012, Rome, Italy, September 17-20, 2012. Proceedings", - "series": "Lecture Notes in Computer Science", - "volume": 7488, - "pages": "124--135", - "publisher": "Springer", - "year": 2012, - "url": "https://doi.org/10.1007/978-3-642-33247-0\\_14", - "doi": "10.1007/978-3-642-33247-0\\_14", - "timestamp": "Tue, 14 May 2019 10:00:50 +0200", - "biburl": "https://dblp.org/rec/conf/clef/Schaer12.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/ipm/White17": { - "type": "article", - "author": "Howard D. White", - "title": "Relevance theory and distributions of judgments in document retrieval", - "journal": "Inf. Process. Manag.", - "volume": 53, - "number": 5, - "pages": "1080--1102", - "year": 2017, - "url": "https://doi.org/10.1016/j.ipm.2017.02.010", - "doi": "10.1016/j.ipm.2017.02.010", - "timestamp": "Fri, 21 Feb 2020 13:11:26 +0100", - "biburl": "https://dblp.org/rec/journals/ipm/White17.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/birws/BreuerST20": { - "type": "inproceedings", - "author": "Timo Breuer and Philipp Schaer and Dirk Tunger", - "editor": "Guillaume Cabanac and Ingo Frommholz and Philipp Mayr", - "title": "Relations Between Relevance Assessments, Bibliometrics and Altmetrics", - "booktitle": "Proceedings of the 10th International Workshop on Bibliometric-enhanced Information Retrieval co-located with 42nd European Conference on Information Retrieval, BIR@ECIR 2020, Lisbon, Portugal, April 14th, 2020 [online only]", - "series": "CEUR Workshop Proceedings", - "volume": 2591, - "pages": "101--112", - "publisher": "CEUR-WS.org", - "year": 2020, - "url": "http://ceur-ws.org/Vol-2591/paper-10.pdf", - "timestamp": "Tue, 14 Apr 2020 17:42:07 +0200", - "biburl": "https://dblp.org/rec/conf/birws/BreuerST20.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/jd/Ingwersen96": { - "type": "article", - "author": "Peter Ingwersen", - "title": "Cognitive Perspectives of Information Retrieval Interaction: Elements of a Cognitive IR Theory", - "journal": "J. Documentation", - "volume": 52, - "number": 1, - "pages": "3--50", - "year": 1996, - "url": "https://doi.org/10.1108/eb026960", - "doi": "10.1108/eb026960", - "timestamp": "Sun, 06 Sep 2020 16:56:08 +0200", - "biburl": "https://dblp.org/rec/journals/jd/Ingwersen96.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/jasis/Mizzaro97": { - "type": "article", - "author": "Stefano Mizzaro", - "title": "Relevance: The Whole History", - "journal": "J. Am. Soc. Inf. Sci.", - "volume": 48, - "number": 9, - "pages": "810--832", - "year": 1997, - "url": "https://doi.org/10.1002/(SICI)1097-4571(199709)48:9\\%3C810::AID-ASI6\\%3E3.0.CO;2-U", - "doi": "10.1002/(SICI)1097-4571(199709)48:9\\%3C810::AID-ASI6\\%3E3.0.CO;2-U", - "timestamp": "Wed, 13 May 2020 17:19:45 +0200", - "biburl": "https://dblp.org/rec/journals/jasis/Mizzaro97.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/scientometrics/ClermontKT21": { - "type": "article", - "author": "Marcel Clermont and Johanna Krolak and Dirk Tunger", - "title": "Does the citation period have any effect on the informative value of selected citation indicators in research evaluations?", - "journal": "Scientometrics", - "volume": 126, - "number": 2, - "pages": "1019--1047", - "year": 2021, - "url": "https://doi.org/10.1007/s11192-020-03782-1", - "doi": "10.1007/s11192-020-03782-1", - "timestamp": "Fri, 14 May 2021 08:32:26 +0200", - "biburl": "https://dblp.org/rec/journals/scientometrics/ClermontKT21.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/joi/SungWHC15": { - "type": "article", - "author": "Hui-Yun Sung and Chun-Chieh Wang and Mu-Hsuan Huang and Dar-Zen Chen", - "title": "Measuring science-based science linkage and non-science-based linkage of patents through non-patent references", - "journal": "J. Informetrics", - "volume": 9, - "number": 3, - "pages": "488--498", - "year": 2015, - "url": "https://doi.org/10.1016/j.joi.2015.04.004", - "doi": "10.1016/j.joi.2015.04.004", - "timestamp": "Fri, 27 Mar 2020 08:32:34 +0100", - "biburl": "https://dblp.org/rec/journals/joi/SungWHC15.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/jasis/SmolinskyLM15": { - "type": "article", - "author": "Lawrence J. Smolinsky and Aaron J. Lercher and Andrew McDaniel", - "title": "Testing theories of preferential attachment in random networks of citations", - "journal": "J. Assoc. Inf. Sci. Technol.", - "volume": 66, - "number": 10, - "pages": "2132--2145", - "year": 2015, - "url": "https://doi.org/10.1002/asi.23312", - "doi": "10.1002/asi.23312", - "timestamp": "Thu, 29 Apr 2021 11:10:56 +0200", - "biburl": "https://dblp.org/rec/journals/jasis/SmolinskyLM15.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/jdis/Raan17": { - "type": "article", - "author": "Anthony F. J. van Raan", - "title": "Patent Citations Analysis and Its Value in Research Evaluation: A Review and a New Approach to Map Technology-relevant Research", - "journal": "J. Data Inf. Sci.", - "volume": 2, - "number": 1, - "pages": "13--50", - "year": 2017, - "url": "https://doi.org/10.1515/jdis-2017-0002", - "doi": "10.1515/jdis-2017-0002", - "timestamp": "Thu, 06 Jan 2022 14:19:59 +0100", - "biburl": "https://dblp.org/rec/journals/jdis/Raan17.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/chiir/2021birds": { - "type": "proceedings", - "editor": "Ingo Frommholz and Haiming Liu and Massimo Melucci and Nicholas J. Belkin and Gareth J. F. Jones and Noriko Kando and Gabriella Pasi", - "title": "Joint Proceedings of the Second Workshop on Bridging the Gap between Information Science, Information Retrieval and Data Science, and Third Workshop on Evaluation of Personalisation in Information Retrieval co-located with 6th ACM SIGIR Conference on Human Information Interaction and Retrieval (CHIIR 2021), Canberra, Australia (Virtual Event), March 19th, 2021", - "series": "CEUR Workshop Proceedings", - "volume": 2863, - "publisher": "CEUR-WS.org", - "year": 2021, - "url": "http://ceur-ws.org/Vol-2863", - "urn": "urn:nbn:de:0074-2863-0", - "timestamp": "Wed, 19 May 2021 16:09:08 +0200", - "biburl": "https://dblp.org/rec/conf/chiir/2021birds.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/jd/Cronin81": { - "type": "article", - "author": "Blaise Cronin", - "title": "The Need for a Theory of citing", - "journal": "J. Documentation", - "volume": 37, - "number": 1, - "pages": "16--24", - "year": 1981, - "url": "https://doi.org/10.1108/eb026703", - "doi": "10.1108/eb026703", - "timestamp": "Sun, 06 Sep 2020 16:56:21 +0200", - "biburl": "https://dblp.org/rec/journals/jd/Cronin81.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/scientometrics/Larsen02": { - "type": "article", - "author": "Birger Larsen", - "title": "Exploiting citation overlaps for Information Retrieval: Generating a boomerang effect from the network of scientific papers", - "journal": "Scientometrics", - "volume": 54, - "number": 2, - "pages": "155--178", - "year": 2002, - "url": "https://doi.org/10.1023/A:1016011326300", - "doi": "10.1023/A:1016011326300", - "timestamp": "Fri, 17 Jul 2020 06:40:02 +0200", - "biburl": "https://dblp.org/rec/journals/scientometrics/Larsen02.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/ecir/LykkeLLI10": { - "type": "inproceedings", - "author": "Marianne Lykke and Birger Larsen and Haakon Lund and Peter Ingwersen", - "editor": "Cathal Gurrin and Yulan He and Gabriella Kazai and Udo Kruschwitz and Suzanne Little and Thomas Roelleke and Stefan M. R\\\"uger and Keith van Rijsbergen", - "title": "Developing a Test Collection for the Evaluation of Integrated Search", - "booktitle": "Advances in Information Retrieval, 32nd European Conference on IR Research, ECIR 2010, Milton Keynes, UK, March 28-31, 2010. Proceedings", - "series": "Lecture Notes in Computer Science", - "volume": 5993, - "pages": "627--630", - "publisher": "Springer", - "year": 2010, - "url": "https://doi.org/10.1007/978-3-642-12275-0\\_63", - "doi": "10.1007/978-3-642-12275-0\\_63", - "timestamp": "Sun, 25 Oct 2020 22:33:08 +0100", - "biburl": "https://dblp.org/rec/conf/ecir/LykkeLLI10.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/scientometrics/BallT06": { - "type": "article", - "author": "Rafael Ball and Dirk Tunger", - "title": "Bibliometric analysis - A new business area for information professionals in libraries?", - "journal": "Scientometrics", - "volume": 66, - "number": 3, - "pages": "561--577", - "year": 2006, - "url": "https://doi.org/10.1007/s11192-006-0041-0", - "doi": "10.1007/s11192-006-0041-0", - "timestamp": "Mon, 26 Oct 2020 08:45:29 +0100", - "biburl": "https://dblp.org/rec/journals/scientometrics/BallT06.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/icis/Jain03": { - "type": "inproceedings", - "author": "Abhijit Jain", - "title": "Performance Paradox : Information Technology Investments and Administrative Performance in the Case of the 50 U.S. State Governments", - "booktitle": "Proceedings of the International Conference on Information Systems, ICIS 2003, December 14-17, 2003, Seattle, Washington, USA", - "pages": "389--400", - "publisher": "Association for Information Systems", - "year": 2003, - "url": "http://aisel.aisnet.org/icis2003/33", - "timestamp": "Sun, 29 Mar 2015 13:07:34 +0200", - "biburl": "https://dblp.org/rec/conf/icis/Jain03.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/ipm/SkovLI08": { - "type": "article", - "author": "Mette Skov and Birger Larsen and Peter Ingwersen", - "title": "Inter and intra-document contexts applied in polyrepresentation for best match IR", - "journal": "Inf. Process. Manag.", - "volume": 44, - "number": 5, - "pages": "1673--1683", - "year": 2008, - "url": "https://doi.org/10.1016/j.ipm.2008.05.006", - "doi": "10.1016/j.ipm.2008.05.006", - "timestamp": "Fri, 21 Feb 2020 13:11:05 +0100", - "biburl": "https://dblp.org/rec/journals/ipm/SkovLI08.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/issi/Tunger19": { - "type": "inproceedings", - "author": "Dirk Tunger", - "editor": "Giuseppe Catalano and Cinzia Daraio and Martina Gregori and Henk F. Moed and Giancarlo Ruocco", - "title": "Altmetrics - on the way to the \"economy of attention\"? Feasibility study Altmetrics for the German Ministry of Science and Research (BMBF)", - "booktitle": "Proceedings of the 17th International Conference on Scientometrics and Informetrics, ISSI 2019, Rome, Italy, September 2-5, 2019", - "pages": "2262--2272", - "publisher": "ISSI Society", - "year": 2019, - "timestamp": "Tue, 14 Apr 2020 11:09:56 +0200", - "biburl": "https://dblp.org/rec/conf/issi/Tunger19.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/ecir/Bessagnet18": { - "type": "inproceedings", - "author": "Marie-No\\\"elle Bessagnet", - "editor": "Philipp Mayr and Ingo Frommholz and Guillaume Cabanac", - "title": "A Generic Framework to Perform Comprehensive Analysis of Tweets", - "booktitle": "Proceedings of the 7th International Workshop on Bibliometric-enhanced Information Retrieval (BIR 2018) co-located with the 40th European Conference on Information Retrieval (ECIR 2018), Grenoble, France, March 26, 2018", - "series": "CEUR Workshop Proceedings", - "volume": 2080, - "pages": "80--85", - "publisher": "CEUR-WS.org", - "year": 2018, - "url": "http://ceur-ws.org/Vol-2080/paper9.pdf", - "timestamp": "Wed, 12 Feb 2020 16:44:12 +0100", - "biburl": "https://dblp.org/rec/conf/ecir/Bessagnet18.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/ecir/SaierF19": { - "type": "inproceedings", - "author": "Tarek Saier and Michael F\\\"arber", - "editor": "Guillaume Cabanac and Ingo Frommholz and Philipp Mayr", - "title": "Bibliometric-Enhanced arXiv: A Data Set for Paper-Based and Citation-Based Tasks", - "booktitle": "Proceedings of the 8th International Workshop on Bibliometric-enhanced Information Retrieval (BIR 2019) co-located with the 41st European Conference on Information Retrieval (ECIR 2019), Cologne, Germany, April 14, 2019", - "series": "CEUR Workshop Proceedings", - "volume": 2345, - "pages": "14--26", - "publisher": "CEUR-WS.org", - "year": 2019, - "url": "http://ceur-ws.org/Vol-2345/paper2.pdf", - "timestamp": "Thu, 21 Jan 2021 17:36:28 +0100", - "biburl": "https://dblp.org/rec/conf/ecir/SaierF19.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/ecir/DabrowskaL15": { - "type": "inproceedings", - "author": "Anna Dabrowska and Birger Larsen", - "editor": "Philipp Mayr and Ingo Frommholz and Peter Mutschke", - "title": "Exploiting Citation Contexts for Physics Retrieval", - "booktitle": "Proceedings of the Second Workshop on Bibliometric-enhanced Information Retrieval co-located with the 37th European Conference on Information Retrieval (ECIR 2015), Vienna, Austria, March 29th, 2015", - "series": "CEUR Workshop Proceedings", - "volume": 1344, - "pages": "14--21", - "publisher": "CEUR-WS.org", - "year": 2015, - "url": "http://ceur-ws.org/Vol-1344/paper2.pdf", - "timestamp": "Wed, 12 Feb 2020 16:44:12 +0100", - "biburl": "https://dblp.org/rec/conf/ecir/DabrowskaL15.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/aslib/CostasZW15": { - "type": "article", - "author": "Rodrigo Costas and Zohreh Zahedi and Paul Wouters", - "title": "The thematic orientation of publications mentioned on social media: Large-scale disciplinary comparison of social media metrics with citations", - "journal": "Aslib J. Inf. Manag.", - "volume": 67, - "number": 3, - "pages": "260--288", - "year": 2015, - "url": "https://doi.org/10.1108/AJIM-12-2014-0173", - "doi": "10.1108/AJIM-12-2014-0173", - "timestamp": "Mon, 03 Jan 2022 22:07:53 +0100", - "biburl": "https://dblp.org/rec/journals/aslib/CostasZW15.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/scientometrics/MayrS15a": { - "type": "article", - "author": "Philipp Mayr and Andrea Scharnhorst", - "title": "Scientometrics and information retrieval: weak-links revitalized", - "journal": "Scientometrics", - "volume": 102, - "number": 3, - "pages": "2193--2199", - "year": 2015, - "url": "https://doi.org/10.1007/s11192-014-1484-3", - "doi": "10.1007/s11192-014-1484-3", - "timestamp": "Fri, 17 Jul 2020 06:39:21 +0200", - "biburl": "https://dblp.org/rec/journals/scientometrics/MayrS15a.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/sigir/Azzopardi11": { - "type": "inproceedings", - "author": "Leif Azzopardi", - "editor": "Wei-Ying Ma and Jian-Yun Nie and Ricardo Baeza-Yates and Tat-Seng Chua and W. Bruce Croft", - "title": "The economics in interactive information retrieval", - "booktitle": "Proceeding of the 34th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2011, Beijing, China, July 25-29, 2011", - "pages": "15--24", - "publisher": "ACM", - "year": 2011, - "url": "https://doi.org/10.1145/2009916.2009923", - "doi": "10.1145/2009916.2009923", - "timestamp": "Sun, 22 Sep 2019 18:15:38 +0200", - "biburl": "https://dblp.org/rec/conf/sigir/Azzopardi11.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/sigir/LarsenI02": { - "type": "inproceedings", - "author": "Birger Larsen and Peter Ingwersen", - "editor": "Kalervo J\\\"arvelin and Micheline Beaulieu and Ricardo A. Baeza-Yates and Sung-Hyon Myaeng", - "title": "The boomerang effect: retrieving scientific documents via the network of references and citations", - "booktitle": "SIGIR 2002: Proceedings of the 25th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, August 11-15, 2002, Tampere, Finland", - "pages": "397--398", - "publisher": "ACM", - "year": 2002, - "url": "https://doi.org/10.1145/564376.564462", - "doi": "10.1145/564376.564462", - "timestamp": "Wed, 07 Nov 2018 14:52:44 +0100", - "biburl": "https://dblp.org/rec/conf/sigir/LarsenI02.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/isu/MeierT18": { - "type": "article", - "author": "Andreas Meier and Dirk Tunger", - "title": "Investigating the transparency and influenceability of altmetrics using the example of the RG score and the ResearchGate platform", - "journal": "Inf. Serv. Use", - "volume": 38, - "number": "1-2", - "pages": "99--110", - "year": 2018, - "url": "https://doi.org/10.3233/ISU-180001", - "doi": "10.3233/ISU-180001", - "timestamp": "Mon, 11 May 2020 15:37:55 +0200", - "biburl": "https://dblp.org/rec/journals/isu/MeierT18.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/ecir/KacemM18": { - "type": "inproceedings", - "author": "Ameni Kacem and Philipp Mayr", - "editor": "Philipp Mayr and Ingo Frommholz and Guillaume Cabanac", - "title": "Users are not Influenced by High Impact and Core Journals while Searching", - "booktitle": "Proceedings of the 7th International Workshop on Bibliometric-enhanced Information Retrieval (BIR 2018) co-located with the 40th European Conference on Information Retrieval (ECIR 2018), Grenoble, France, March 26, 2018", - "series": "CEUR Workshop Proceedings", - "volume": 2080, - "pages": "63--75", - "publisher": "CEUR-WS.org", - "year": 2018, - "url": "http://ceur-ws.org/Vol-2080/paper7.pdf", - "timestamp": "Wed, 12 Feb 2020 16:44:12 +0100", - "biburl": "https://dblp.org/rec/conf/ecir/KacemM18.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/ecir/CarevicS14": { - "type": "inproceedings", - "author": "Zeljko Carevic and Philipp Schaer", - "editor": "Philipp Mayr and Philipp Schaer and Andrea Scharnhorst and Birger Larsen and Peter Mutschke", - "title": "On the Connection Between Citation-based and Topical Relevance Ranking: Results of a Pretest using iSearch", - "booktitle": "Proceedings of the First Workshop on Bibliometric-enhanced Information Retrieval co-located with 36th European Conference on Information Retrieval (ECIR 2014), Amsterdam, The Netherlands, April 13, 2014", - "series": "CEUR Workshop Proceedings", - "volume": 1143, - "pages": "37--44", - "publisher": "CEUR-WS.org", - "year": 2014, - "url": "http://ceur-ws.org/Vol-1143/paper5.pdf", - "timestamp": "Wed, 12 Feb 2020 16:44:12 +0100", - "biburl": "https://dblp.org/rec/conf/ecir/CarevicS14.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/ecir/JackLHK14": { - "type": "inproceedings", - "author": "Kris Jack and Pablo L\\'opez-Garc\\'\\ia and Maya Hristakeva and Roman Kern", - "editor": "Philipp Mayr and Philipp Schaer and Andrea Scharnhorst and Birger Larsen and Peter Mutschke", - "title": "\\\\citation needed\\\\: Filling in Wikipedia's Citation Shaped Holes", - "booktitle": "Proceedings of the First Workshop on Bibliometric-enhanced Information Retrieval co-located with 36th European Conference on Information Retrieval (ECIR 2014), Amsterdam, The Netherlands, April 13, 2014", - "series": "CEUR Workshop Proceedings", - "volume": 1143, - "pages": "45--52", - "publisher": "CEUR-WS.org", - "year": 2014, - "url": "http://ceur-ws.org/Vol-1143/paper6.pdf", - "timestamp": "Wed, 12 Feb 2020 16:44:12 +0100", - "biburl": "https://dblp.org/rec/conf/ecir/JackLHK14.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/ecir/2014bir": { - "type": "proceedings", - "editor": "Philipp Mayr and Philipp Schaer and Andrea Scharnhorst and Birger Larsen and Peter Mutschke", - "title": "Proceedings of the First Workshop on Bibliometric-enhanced Information Retrieval co-located with 36th European Conference on Information Retrieval (ECIR 2014), Amsterdam, The Netherlands, April 13, 2014", - "series": "CEUR Workshop Proceedings", - "volume": 1143, - "publisher": "CEUR-WS.org", - "year": 2014, - "url": "http://ceur-ws.org/Vol-1143", - "urn": "urn:nbn:de:0074-1143-7", - "timestamp": "Wed, 12 Feb 2020 16:44:12 +0100", - "biburl": "https://dblp.org/rec/conf/ecir/2014bir.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/jasis/Cole11": { - "type": "article", - "author": "Charles Cole", - "title": "A theory of information need for information retrieval that connects information to knowledge", - "journal": "J. Assoc. Inf. Sci. Technol.", - "volume": 62, - "number": 7, - "pages": "1216--1231", - "year": 2011, - "url": "https://doi.org/10.1002/asi.21541", - "doi": "10.1002/asi.21541", - "timestamp": "Mon, 02 Mar 2020 17:22:25 +0100", - "biburl": "https://dblp.org/rec/journals/jasis/Cole11.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:conf/icsm/GayHMM09": { - "type": "inproceedings", - "author": "Gregory Gay and Sonia Haiduc and Andrian Marcus and Tim Menzies", - "title": "On the use of relevance feedback in IR-based concept location", - "booktitle": "25th IEEE International Conference on Software Maintenance (ICSM 2009), September 20-26, 2009, Edmonton, Alberta, Canada", - "pages": "351--360", - "publisher": "IEEE Computer Society", - "year": 2009, - "url": "https://doi.org/10.1109/ICSM.2009.5306315", - "doi": "10.1109/ICSM.2009.5306315", - "timestamp": "Thu, 14 Oct 2021 10:27:26 +0200", - "biburl": "https://dblp.org/rec/conf/icsm/GayHMM09.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/corr/HeckS13": { - "type": "article", - "author": "Tamara Heck and Philipp Schaer", - "title": "Performing Informetric Analysis on Information Retrieval Test Collections: Preliminary Experiments in the Physics Domain", - "journal": "CoRR", - "volume": "abs/1306.1743", - "year": 2013, - "url": "http://arxiv.org/abs/1306.1743", - "eprinttype": "arXiv", - "eprint": "1306.1743", - "timestamp": "Mon, 13 Aug 2018 16:48:24 +0200", - "biburl": "https://dblp.org/rec/journals/corr/HeckS13.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - }, - "DBLP:journals/cacm/Voorhees07": { - "type": "article", - "author": "Ellen M. Voorhees", - "title": "TREC: Continuing information retrieval's tradition of experimentation", - "journal": "Commun. ACM", - "volume": 50, - "number": 11, - "pages": "51--54", - "year": 2007, - "url": "https://doi.org/10.1145/1297797.1297822", - "doi": "10.1145/1297797.1297822", - "timestamp": "Thu, 14 Oct 2021 09:00:21 +0200", - "biburl": "https://dblp.org/rec/journals/cacm/Voorhees07.bib", - "bibsource": "dblp computer science bibliography, https://dblp.org" - } -} \ No newline at end of file From ff8b0583938df0cfbae6271435f833eb768a1486 Mon Sep 17 00:00:00 2001 From: Jueri Date: Fri, 4 Mar 2022 12:19:11 +0100 Subject: [PATCH 03/15] add name abbreviation to entry and bibliography objects. --- BibTexTools/bibliography.py | 61 ++++++++++++++++++++++---- BibTexTools/tests/test_bibliography.py | 49 ++++++++++++++++++--- BibTexTools/tests/test_entry.py | 10 ++--- 3 files changed, 100 insertions(+), 20 deletions(-) diff --git a/BibTexTools/bibliography.py b/BibTexTools/bibliography.py index 5243510..4e6b57c 100644 --- a/BibTexTools/bibliography.py +++ b/BibTexTools/bibliography.py @@ -1,3 +1,4 @@ +from __future__ import annotations import json import warnings from dataclasses import dataclass, field @@ -79,7 +80,7 @@ def __init__(self, first, last): self.last = last self.mid = [str] - def abbreviate(self, middle: bool) -> str: + def abbreviate(self, middle: bool) -> Author: """Abbreviate the author name, with or without middle names. Args: @@ -113,17 +114,21 @@ def _abbreviate_name(name: str) -> str: else: last = self.last + self.first_short = _abbreviate_name(self.first) name_short = last + ", " + _abbreviate_name(self.first) if middle: + mid_short = [] for name in self.mid: + mid_short.append(_abbreviate_name(name)) name_short += " " + _abbreviate_name(name) + self.mid_short = mid_short if " Jr." in self.last: name_short += " Jr." - self.short = name_short - return name_short + self.name_short = name_short + return self class Author_field(Field): @@ -168,7 +173,7 @@ def split_authorlist(self) -> List[Author]: return author_list - def abbreviate(self, middle: bool) -> List[str]: + def abbreviate(self, middle: bool) -> Author_field: """Abbreviate all authors from the author list. Args: @@ -182,9 +187,14 @@ def abbreviate(self, middle: bool) -> List[str]: author_list_abbreviated.append(author.abbreviate(middle)) self.author_list_abbreviated = author_list_abbreviated - return author_list_abbreviated + return self def to_bibtex(self) -> str: + """Serielize into a BibTex string. + + Returns: + str: Field as BibTex string. + """ return self.name + " = " + self.value.replace(" and ", " and\n") @@ -263,8 +273,23 @@ def to_dict(self, fields: List[str] = []) -> Dict[str, Dict[str, Union[str, int] bibtex = {**bibtex, **self.__getattribute__(field).to_dict()} # join dicts return {self.key.value: bibtex} # type: ignore - # abbreviate_names() TODO - # abbreviate_journals() TODO + def abbreviate_names(self, middle: bool) -> Entry: + """Abbreviate all author names from entry. The full names are preserved as `authors_full`. + + Args: + middle (bool): Abbriviate or delete the moddle names. + + Returns: + Entry: The full entry with the abbreviated and full names. + """ + assert "author" in self.fields + author_full = self.author # type: ignore + self.author = self.author.abbreviate(middle=middle) # type:ignore + self.author_full = author_full + return self + + # def abbreviate_journals() + # TODO @dataclass @@ -308,5 +333,23 @@ def to_json(self, path: str, fields: List[str] = []): with open(path, "w") as fout: json.dump(bibtex, fout, indent=4) - # def abbreviate_names(TODO) - # def abbreviate_yournals(TODO) + def abbreviate_names(self, middle: bool) -> Bibliography: + """Abbreviate all author names from all entries. + + Args: + middle (bool): Abbreviate the middle name. + + Returns: + Bibliography: The full bibliography with abbreviated names. + """ + abbreviated_entries = [] + for entry in self.entries: + if "author" in entry.fields: + abbreviated_entries.append(entry.abbreviate_names(middle)) + else: + abbreviated_entries.append(entry) + self.entries = abbreviated_entries + return self + + # def abbreviate_journals( + # TODO) diff --git a/BibTexTools/tests/test_bibliography.py b/BibTexTools/tests/test_bibliography.py index dc590d0..82deaff 100644 --- a/BibTexTools/tests/test_bibliography.py +++ b/BibTexTools/tests/test_bibliography.py @@ -63,9 +63,18 @@ def test_to_bib(self, bib_obj_full): assert os.path.isfile(file_path) os.remove(file_path) - def test_to_bib_fields(self): - pass - # TODO + def test_to_bib_fields(self, bib_obj_full): + fields = ["author", "title"] + file_path = os.path.join("BibTexTools", "tests", "data", "to_bib.bib") + bib_obj_full.to_bib(file_path, fields) + + parser_obj = Parser() + parsed_bibtex = parser_obj.from_file(file_path) + all_fields = [] + for entry in parsed_bibtex.entries: + all_fields += entry.fields + fields += ["key", "type"] # allways present + assert set(fields) == set(all_fields) def test_to_json_exists(self, bib_obj_full): file_path = os.path.join("BibTexTools", "tests", "data", "to_json.json") @@ -86,6 +95,34 @@ def test_to_json_file(self, bib_obj_full): assert ref_file == new_file os.remove(file_path) - def test_to_json_fields(self): - pass - # TODO + def test_to_json_fields(self, bib_obj_full): + fields = ["author", "title"] + file_path = os.path.join("BibTexTools", "tests", "data", "to_json_fields.json") + bib_obj_full.to_json(file_path, fields) + with open(file_path, "r") as fin: + to_json_fields = json.load(fin) + + docs = list(to_json_fields.keys()) + all_keys = [] + for doc in docs: + all_keys += list(to_json_fields[doc].keys()) + assert set(all_keys) == set(fields) + + def test_abbreviate_names(self, bib_obj_full): + bib_obj_full = bib_obj_full.abbreviate_names(True) + assert ( + bib_obj_full.entries[0].author.author_list[0].name_short + == "von A1_Last, A." + ) + assert ( + bib_obj_full.entries[0].author.author_list[1].name_short + == "von A2_Last, A." + ) + assert ( + bib_obj_full.entries[1].author.author_list[0].name_short + == "von B1_Last, B." + ) + assert ( + bib_obj_full.entries[1].author.author_list[1].name_short + == "von B2_Last, B." + ) diff --git a/BibTexTools/tests/test_entry.py b/BibTexTools/tests/test_entry.py index 2857a28..c13bf70 100644 --- a/BibTexTools/tests/test_entry.py +++ b/BibTexTools/tests/test_entry.py @@ -76,12 +76,12 @@ def test_add_journal(self, empty_entry): assert isinstance(empty_entry.journal, Journal_field) def test_author_abbreviation(self, entry_obj): - author_list_abbreviated = entry_obj.author.abbreviate(middle=True) + entry_obj = entry_obj.abbreviate_names(middle=True) - assert author_list_abbreviated[0] == "A1_Last, A. B." - assert author_list_abbreviated[1] == "A2_Last, A. B." - assert author_list_abbreviated[2] == "A3_Last, A. III." - assert author_list_abbreviated[3] == "A4_Last, A. B. Jr." + assert entry_obj.author.author_list[0].name_short == "A1_Last, A. B." + assert entry_obj.author.author_list[1].name_short == "A2_Last, A. B." + assert entry_obj.author.author_list[2].name_short == "A3_Last, A. III." + assert entry_obj.author.author_list[3].name_short == "A4_Last, A. B. Jr." def test_to_bibtex(self, entry_obj_full): bibtex_str = entry_obj_full.to_bibtex() From 51350ebee8821e947782409777e74b0eccc1eb7e Mon Sep 17 00:00:00 2001 From: Jueri Date: Fri, 4 Mar 2022 13:19:29 +0100 Subject: [PATCH 04/15] update export functions to obey abbreviations. --- BibTexTools/bibliography.py | 92 ++++++++++++++++---------- BibTexTools/tests/test_bibliography.py | 16 ++--- BibTexTools/tests/test_entry.py | 15 +++-- 3 files changed, 74 insertions(+), 49 deletions(-) diff --git a/BibTexTools/bibliography.py b/BibTexTools/bibliography.py index 4e6b57c..fa18678 100644 --- a/BibTexTools/bibliography.py +++ b/BibTexTools/bibliography.py @@ -45,40 +45,17 @@ def extract_content_of_field(field_value: str) -> str: return field_value.replace("{", "").replace("}", "") -class Field: - """BibTex field object containing a field name and a field value.""" - - def __init__(self, name, value): - self.name: str = name - self.value: str = value - - def to_bibtex(self) -> str: - """Serialize the field object into a BibTex string. - - Returns: - str: Bibtex string of the field. - """ - return self.name + " = " + self.value - - def to_dict( - self, - ) -> Dict[str, Union[str, int]]: - """Serielize the field object into a dictionary. - - Returns: - str: Dictionary of the field. - """ - value = extract_content_of_field(self.value) - return {self.name: int(value) if value.isdigit() else value} - - class Author: """Author name object.""" def __init__(self, first, last): self.first = first self.last = last - self.mid = [str] + self.mid = [] + self.name_string = self.make_name() + + def make_name(self): + return " ".join([self.first] + self.mid + [self.last]) def abbreviate(self, middle: bool) -> Author: """Abbreviate the author name, with or without middle names. @@ -113,24 +90,55 @@ def _abbreviate_name(name: str) -> str: last = self.last.split(" ")[0] else: last = self.last - - self.first_short = _abbreviate_name(self.first) - name_short = last + ", " + _abbreviate_name(self.first) + first_full = self.first + self.first = _abbreviate_name(self.first) + self.first_full = first_full + name_short = last + ", " + self.first if middle: mid_short = [] for name in self.mid: mid_short.append(_abbreviate_name(name)) name_short += " " + _abbreviate_name(name) - self.mid_short = mid_short + self.mid_full = self.mid + self.mid = mid_short if " Jr." in self.last: name_short += " Jr." - self.name_short = name_short + self.name_string_full = self.name_string + self.name_string = name_short + return self +class Field: + """BibTex field object containing a field name and a field value.""" + + def __init__(self, name, value): + self.name: str = name + self.value: str = value + + def to_bibtex(self) -> str: + """Serialize the field object into a BibTex string. + + Returns: + str: Bibtex string of the field. + """ + return self.name + " = " + self.value + + def to_dict( + self, + ) -> Dict[str, Any]: + """Serielize the field object into a dictionary. + + Returns: + str: Dictionary of the field. + """ + value = extract_content_of_field(self.value) + return {self.name: int(value) if value.isdigit() else value} + + class Author_field(Field): """Dedicated field for the author information.""" @@ -195,7 +203,23 @@ def to_bibtex(self) -> str: Returns: str: Field as BibTex string. """ - return self.name + " = " + self.value.replace(" and ", " and\n") + return ( + self.name + + " = {" + + " and\n".join([author.name_string for author in self.author_list]) + + "}" + ) + + def to_dict( + self, + ) -> Dict[str, List[str]]: + """Serielize the field object into a dictionary. + + Returns: + str: Dictionary of the field. + """ + authors = [author.name_string for author in self.author_list] + return {self.name: authors} class Journal_field(Field): diff --git a/BibTexTools/tests/test_bibliography.py b/BibTexTools/tests/test_bibliography.py index 82deaff..fbd9613 100644 --- a/BibTexTools/tests/test_bibliography.py +++ b/BibTexTools/tests/test_bibliography.py @@ -5,7 +5,7 @@ Bib_string = """@Atype{Akey, author = {A1_First von A1_Last and - von A2_Last, A2_First}, + A2_First von A2_Last}, title = {A_Title}, journal = {A_Journal}, volume = {A_Volume}, @@ -16,7 +16,7 @@ @Btype{Bkey, author = {B1_First von B1_Last and - von B2_Last, B2_First}, + B2_First von B2_Last}, title = {B_Title}, journal = {B_Journal}, volume = {B_Volume}, @@ -26,14 +26,14 @@ Bib_string_fields = """@Atype{Akey, author = {A1_First von A1_Last and - von A2_Last, A2_First}, + A2_First von A2_Last}, title = {A_Title}, } @Btype{Bkey, author = {B1_First von B1_Last and - von B2_Last, B2_First}, + B2_First von B2_Last}, title = {B_Title}, }""" @@ -111,18 +111,18 @@ def test_to_json_fields(self, bib_obj_full): def test_abbreviate_names(self, bib_obj_full): bib_obj_full = bib_obj_full.abbreviate_names(True) assert ( - bib_obj_full.entries[0].author.author_list[0].name_short + bib_obj_full.entries[0].author.author_list[0].name_string == "von A1_Last, A." ) assert ( - bib_obj_full.entries[0].author.author_list[1].name_short + bib_obj_full.entries[0].author.author_list[1].name_string == "von A2_Last, A." ) assert ( - bib_obj_full.entries[1].author.author_list[0].name_short + bib_obj_full.entries[1].author.author_list[0].name_string == "von B1_Last, B." ) assert ( - bib_obj_full.entries[1].author.author_list[1].name_short + bib_obj_full.entries[1].author.author_list[1].name_string == "von B2_Last, B." ) diff --git a/BibTexTools/tests/test_entry.py b/BibTexTools/tests/test_entry.py index c13bf70..5474601 100644 --- a/BibTexTools/tests/test_entry.py +++ b/BibTexTools/tests/test_entry.py @@ -11,7 +11,7 @@ A_string = """@Atype{Akey, author = {A1_First von A1_Last and - von A2_Last, A2_First}, + A2_First von A2_Last}, title = {A_Title}, journal = {A_Journal}, volume = {A_Volume}, @@ -21,7 +21,7 @@ A_string_fields = """@Atype{Akey, author = {A1_First von A1_Last and - von A2_Last, A2_First}, + A2_First von A2_Last}, title = {A_Title}, }""" @@ -78,10 +78,10 @@ def test_add_journal(self, empty_entry): def test_author_abbreviation(self, entry_obj): entry_obj = entry_obj.abbreviate_names(middle=True) - assert entry_obj.author.author_list[0].name_short == "A1_Last, A. B." - assert entry_obj.author.author_list[1].name_short == "A2_Last, A. B." - assert entry_obj.author.author_list[2].name_short == "A3_Last, A. III." - assert entry_obj.author.author_list[3].name_short == "A4_Last, A. B. Jr." + assert entry_obj.author.author_list[0].name_string == "A1_Last, A. B." + assert entry_obj.author.author_list[1].name_string == "A2_Last, A. B." + assert entry_obj.author.author_list[2].name_string == "A3_Last, A. III." + assert entry_obj.author.author_list[3].name_string == "A4_Last, A. B. Jr." def test_to_bibtex(self, entry_obj_full): bibtex_str = entry_obj_full.to_bibtex() @@ -96,7 +96,7 @@ def test_to_dict(self, entry_obj_full): ref_dict = { "Akey": { "type": "Atype", - "author": "A1_First von A1_Last and von A2_Last, A2_First", + "author": ["A1_First von A1_Last", "A2_First von A2_Last"], "title": "A_Title", "journal": "A_Journal", "volume": "A_Volume", @@ -111,4 +111,5 @@ def test_to_dict_fields(self, entry_obj_full): fields = ["type", "author", "title"] entry_dict = entry_obj_full.to_dict(fields) assert len(entry_dict["Akey"].keys()) == 3 + assert set(entry_dict["Akey"].keys()) == set(fields) From 06d2991c58cd18cd7f58ea18447d8b23413a884f Mon Sep 17 00:00:00 2001 From: Jueri Date: Sat, 5 Mar 2022 10:44:49 +0100 Subject: [PATCH 05/15] add test action --- .github/workflows/pytest.yml | 35 +++++++++ Pipfile | 6 ++ Pipfile.lock | 141 ++++++++++++++++++++++++++++++++++- setup.py | 12 +-- 4 files changed, 186 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/pytest.yml diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml new file mode 100644 index 0000000..8b67598 --- /dev/null +++ b/.github/workflows/pytest.yml @@ -0,0 +1,35 @@ +# .github/workflows/app.yaml +name: PyTest +on: push + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 10 + + steps: + - name: Check out repository code + uses: actions/checkout@v2 + + # Setup Python (faster than using Python container) + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: "3.x" + + - name: Install pipenv + run: | + python -m pip install --upgrade pipenv wheel + - id: cache-pipenv + uses: actions/cache@v1 + with: + path: ~/.local/share/virtualenvs + key: ${{ runner.os }}-pipenv-${{ hashFiles('**/Pipfile.lock') }} + + - name: Install dependencies + if: steps.cache-pipenv.outputs.cache-hit != 'true' + run: | + pipenv install --deploy --dev + - name: Run test suite + run: | + pipenv run test -v diff --git a/Pipfile b/Pipfile index 78b8cfa..6617803 100644 --- a/Pipfile +++ b/Pipfile @@ -6,6 +6,8 @@ name = "pypi" [packages] mypy = "*" types-requests = "*" +click = "*" +bibtextools = {editable = true, path = "."} [dev-packages] black = "*" @@ -13,6 +15,10 @@ pytest = "*" mypy = "*" ipykernel = "*" bibtextools = {editable = true, path = "."} +coverage = "*" [requires] python_version = "3" + +[scripts] +test = "pytest" diff --git a/Pipfile.lock b/Pipfile.lock index 1942de0..c368b3d 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "fb15753e83a62055755228a58e87aabef3a95c6646fc3209f3c375dd585daf7c" + "sha256": "a3eb815c20ce28f8248b0f4264b11dd90a2c1dad7cd6c64e7c2b36b040ffcc2c" }, "pipfile-spec": 6, "requires": { @@ -16,6 +16,41 @@ ] }, "default": { + "bibtextools": { + "editable": true, + "path": "." + }, + "certifi": { + "hashes": [ + "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", + "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" + ], + "version": "==2021.10.8" + }, + "charset-normalizer": { + "hashes": [ + "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597", + "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df" + ], + "markers": "python_version >= '3'", + "version": "==2.0.12" + }, + "click": { + "hashes": [ + "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1", + "sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb" + ], + "index": "pypi", + "version": "==8.0.4" + }, + "idna": { + "hashes": [ + "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", + "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" + ], + "markers": "python_version >= '3'", + "version": "==3.3" + }, "mypy": { "hashes": [ "sha256:0038b21890867793581e4cb0d810829f5fd4441aa75796b53033af3aa30430ce", @@ -49,6 +84,14 @@ ], "version": "==0.4.3" }, + "requests": { + "hashes": [ + "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61", + "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==2.27.1" + }, "tomli": { "hashes": [ "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", @@ -79,6 +122,14 @@ ], "markers": "python_version >= '3.6'", "version": "==4.1.1" + }, + "urllib3": { + "hashes": [ + "sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed", + "sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", + "version": "==1.26.8" } }, "develop": { @@ -145,14 +196,76 @@ "index": "pypi", "version": "==22.1.0" }, + "certifi": { + "hashes": [ + "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", + "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" + ], + "version": "==2021.10.8" + }, + "charset-normalizer": { + "hashes": [ + "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597", + "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df" + ], + "markers": "python_version >= '3'", + "version": "==2.0.12" + }, "click": { "hashes": [ "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1", "sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb" ], - "markers": "python_version >= '3.6'", + "index": "pypi", "version": "==8.0.4" }, + "coverage": { + "hashes": [ + "sha256:03e2a7826086b91ef345ff18742ee9fc47a6839ccd517061ef8fa1976e652ce9", + "sha256:07e6db90cd9686c767dcc593dff16c8c09f9814f5e9c51034066cad3373b914d", + "sha256:18d520c6860515a771708937d2f78f63cc47ab3b80cb78e86573b0a760161faf", + "sha256:1ebf730d2381158ecf3dfd4453fbca0613e16eaa547b4170e2450c9707665ce7", + "sha256:21b7745788866028adeb1e0eca3bf1101109e2dc58456cb49d2d9b99a8c516e6", + "sha256:26e2deacd414fc2f97dd9f7676ee3eaecd299ca751412d89f40bc01557a6b1b4", + "sha256:2c6dbb42f3ad25760010c45191e9757e7dce981cbfb90e42feef301d71540059", + "sha256:2fea046bfb455510e05be95e879f0e768d45c10c11509e20e06d8fcaa31d9e39", + "sha256:34626a7eee2a3da12af0507780bb51eb52dca0e1751fd1471d0810539cefb536", + "sha256:37d1141ad6b2466a7b53a22e08fe76994c2d35a5b6b469590424a9953155afac", + "sha256:46191097ebc381fbf89bdce207a6c107ac4ec0890d8d20f3360345ff5976155c", + "sha256:4dd8bafa458b5c7d061540f1ee9f18025a68e2d8471b3e858a9dad47c8d41903", + "sha256:4e21876082ed887baed0146fe222f861b5815455ada3b33b890f4105d806128d", + "sha256:58303469e9a272b4abdb9e302a780072c0633cdcc0165db7eec0f9e32f901e05", + "sha256:5ca5aeb4344b30d0bec47481536b8ba1181d50dbe783b0e4ad03c95dc1296684", + "sha256:68353fe7cdf91f109fc7d474461b46e7f1f14e533e911a2a2cbb8b0fc8613cf1", + "sha256:6f89d05e028d274ce4fa1a86887b071ae1755082ef94a6740238cd7a8178804f", + "sha256:7a15dc0a14008f1da3d1ebd44bdda3e357dbabdf5a0b5034d38fcde0b5c234b7", + "sha256:8bdde1177f2311ee552f47ae6e5aa7750c0e3291ca6b75f71f7ffe1f1dab3dca", + "sha256:8ce257cac556cb03be4a248d92ed36904a59a4a5ff55a994e92214cde15c5bad", + "sha256:8cf5cfcb1521dc3255d845d9dca3ff204b3229401994ef8d1984b32746bb45ca", + "sha256:8fbbdc8d55990eac1b0919ca69eb5a988a802b854488c34b8f37f3e2025fa90d", + "sha256:9548f10d8be799551eb3a9c74bbf2b4934ddb330e08a73320123c07f95cc2d92", + "sha256:96f8a1cb43ca1422f36492bebe63312d396491a9165ed3b9231e778d43a7fca4", + "sha256:9b27d894748475fa858f9597c0ee1d4829f44683f3813633aaf94b19cb5453cf", + "sha256:9baff2a45ae1f17c8078452e9e5962e518eab705e50a0aa8083733ea7d45f3a6", + "sha256:a2a8b8bcc399edb4347a5ca8b9b87e7524c0967b335fbb08a83c8421489ddee1", + "sha256:acf53bc2cf7282ab9b8ba346746afe703474004d9e566ad164c91a7a59f188a4", + "sha256:b0be84e5a6209858a1d3e8d1806c46214e867ce1b0fd32e4ea03f4bd8b2e3359", + "sha256:b31651d018b23ec463e95cf10070d0b2c548aa950a03d0b559eaa11c7e5a6fa3", + "sha256:b78e5afb39941572209f71866aa0b206c12f0109835aa0d601e41552f9b3e620", + "sha256:c76aeef1b95aff3905fb2ae2d96e319caca5b76fa41d3470b19d4e4a3a313512", + "sha256:dd035edafefee4d573140a76fdc785dc38829fe5a455c4bb12bac8c20cfc3d69", + "sha256:dd6fe30bd519694b356cbfcaca9bd5c1737cddd20778c6a581ae20dc8c04def2", + "sha256:e5f4e1edcf57ce94e5475fe09e5afa3e3145081318e5fd1a43a6b4539a97e518", + "sha256:ec6bc7fe73a938933d4178c9b23c4e0568e43e220aef9472c4f6044bfc6dd0f0", + "sha256:f1555ea6d6da108e1999b2463ea1003fe03f29213e459145e70edbaf3e004aaa", + "sha256:f5fa5803f47e095d7ad8443d28b01d48c0359484fec1b9d8606d0e3282084bc4", + "sha256:f7331dbf301b7289013175087636bbaf5b2405e57259dd2c42fdcc9fcc47325e", + "sha256:f9987b0354b06d4df0f4d3e0ec1ae76d7ce7cbca9a2f98c25041eb79eec766f1", + "sha256:fd9e830e9d8d89b20ab1e5af09b32d33e1a08ef4c4e14411e559556fd788e6b2" + ], + "index": "pypi", + "version": "==6.3.2" + }, "debugpy": { "hashes": [ "sha256:01e98c594b3e66d529e40edf314f849cd1a21f7a013298df58cd8e263bf8e184", @@ -203,6 +316,14 @@ ], "version": "==0.8.3" }, + "idna": { + "hashes": [ + "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", + "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" + ], + "markers": "python_version >= '3'", + "version": "==3.3" + }, "iniconfig": { "hashes": [ "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", @@ -468,6 +589,14 @@ "markers": "python_version >= '3.6'", "version": "==22.3.0" }, + "requests": { + "hashes": [ + "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61", + "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==2.27.1" + }, "setuptools": { "hashes": [ "sha256:2347b2b432c891a863acadca2da9ac101eae6169b1d3dfee2ec605ecd50dbfe5", @@ -562,6 +691,14 @@ "markers": "python_version >= '3.6'", "version": "==4.1.1" }, + "urllib3": { + "hashes": [ + "sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed", + "sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", + "version": "==1.26.8" + }, "wcwidth": { "hashes": [ "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784", diff --git a/setup.py b/setup.py index c0da123..aa23cee 100644 --- a/setup.py +++ b/setup.py @@ -5,10 +5,10 @@ version="0.1.0", packages=find_packages(), include_package_data=True, - # install_requires=["Click", "requests"], - # entry_points={ - # "console_scripts": [ - # "BibTexTools = BibTexTools.clean_bibtex:clean_bibtex", - # ], - # }, + install_requires=["Click", "requests"], + entry_points={ + "console_scripts": [ + "BibTexTools = BibTexTools.CLI:cli", + ], + }, ) From c51fcf5353971df9a550a73c25822798d5bd0d08 Mon Sep 17 00:00:00 2001 From: Jueri Date: Sat, 5 Mar 2022 10:48:14 +0100 Subject: [PATCH 06/15] unignore the data dir for tests --- .gitignore | 3 +- BibTexTools/tests/data/authors.bib | 6 + BibTexTools/tests/data/authors_abbreviate.bib | 6 + BibTexTools/tests/data/bib_bert_short.bib | 5 + BibTexTools/tests/data/bib_unknown.bib | 9 + BibTexTools/tests/data/cleaned.bib | 1207 +++++++++++++++++ BibTexTools/tests/data/full.bib | 20 + BibTexTools/tests/data/not_standard.bib | 5 + BibTexTools/tests/data/simple.bib | 6 + BibTexTools/tests/data/to_bib.bib | 12 + BibTexTools/tests/data/to_json_fields.json | 16 + BibTexTools/tests/data/to_json_ref.json | 26 + 12 files changed, 1319 insertions(+), 2 deletions(-) create mode 100644 BibTexTools/tests/data/authors.bib create mode 100644 BibTexTools/tests/data/authors_abbreviate.bib create mode 100644 BibTexTools/tests/data/bib_bert_short.bib create mode 100644 BibTexTools/tests/data/bib_unknown.bib create mode 100644 BibTexTools/tests/data/cleaned.bib create mode 100644 BibTexTools/tests/data/full.bib create mode 100644 BibTexTools/tests/data/not_standard.bib create mode 100644 BibTexTools/tests/data/simple.bib create mode 100644 BibTexTools/tests/data/to_bib.bib create mode 100644 BibTexTools/tests/data/to_json_fields.json create mode 100644 BibTexTools/tests/data/to_json_ref.json diff --git a/.gitignore b/.gitignore index 6d5feb7..01aa896 100644 --- a/.gitignore +++ b/.gitignore @@ -140,7 +140,6 @@ cython_debug/ # My Files -data/ .devcontainer/.env .vscode -build/ \ No newline at end of file +build/ diff --git a/BibTexTools/tests/data/authors.bib b/BibTexTools/tests/data/authors.bib new file mode 100644 index 0000000..85fd5ae --- /dev/null +++ b/BibTexTools/tests/data/authors.bib @@ -0,0 +1,6 @@ +@type{key, + author = {A1_First von A1_Last and + von A2_Last, A2_First and + von A3_Last, A3_Jr, A3_First and + A4_Last, A4_Mid, A4_Mid2, A4_First}, + } \ No newline at end of file diff --git a/BibTexTools/tests/data/authors_abbreviate.bib b/BibTexTools/tests/data/authors_abbreviate.bib new file mode 100644 index 0000000..775841b --- /dev/null +++ b/BibTexTools/tests/data/authors_abbreviate.bib @@ -0,0 +1,6 @@ +@type{key, + author = {A1_First B1_Mid A1_Last and + A2_Last, B., A2_First and + A3_Last, A3_First III. and + A4_Last Jr., B4_Mid, A4_First}, + } \ No newline at end of file diff --git a/BibTexTools/tests/data/bib_bert_short.bib b/BibTexTools/tests/data/bib_bert_short.bib new file mode 100644 index 0000000..0baeff1 --- /dev/null +++ b/BibTexTools/tests/data/bib_bert_short.bib @@ -0,0 +1,5 @@ +@article{devlin2018bert, + title={Bert: Pre-training of deep bidirectional transformers for language understanding}, + author={Devlin, and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, + year={2018} +} \ No newline at end of file diff --git a/BibTexTools/tests/data/bib_unknown.bib b/BibTexTools/tests/data/bib_unknown.bib new file mode 100644 index 0000000..a34a0e2 --- /dev/null +++ b/BibTexTools/tests/data/bib_unknown.bib @@ -0,0 +1,9 @@ +@article{devlin2018bert, + title={Bert: Pre-training of deep bidirectional transformers for language understanding}, + author={Devlin, and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, + year={2018} +} + +@article{unknown, + title={_} +} \ No newline at end of file diff --git a/BibTexTools/tests/data/cleaned.bib b/BibTexTools/tests/data/cleaned.bib new file mode 100644 index 0000000..395dd3e --- /dev/null +++ b/BibTexTools/tests/data/cleaned.bib @@ -0,0 +1,1207 @@ +@article{DBLP:journals/corr/ZahediCW17, + author = {Zohreh Zahedi and + Rodrigo Costas and + Paul Wouters}, + title = {Mendeley readership as a filtering tool to identify highly cited publications}, + journal = {CoRR}, + volume = {abs/1703.07104}, + year = {2017}, + url = {http://arxiv.org/abs/1703.07104}, + eprinttype = {arXiv}, + eprint = {1703.07104}, + timestamp = {Mon, 13 Aug 2018 16:48:04 +0200}, + biburl = {https://dblp.org/rec/journals/corr/ZahediCW17.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/jet/Dietrich15, + author = {Franz Dietrich}, + title = {Aggregation theory and the relevance of some issues to others}, + journal = {J. Econ. Theory}, + volume = {160}, + pages = {463--493}, + year = {2015}, + url = {https://doi.org/10.1016/j.jet.2015.03.012}, + doi = {10.1016/j.jet.2015.03.012}, + timestamp = {Mon, 24 Feb 2020 15:54:33 +0100}, + biburl = {https://dblp.org/rec/journals/jet/Dietrich15.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/spatialCognition/Tenbrink12, + author = {Thora Tenbrink}, + editor = {Cyrill Stachniss and + Kerstin Schill and + David H. Uttal}, + title = {Relevance in Spatial Navigation and Communication}, + booktitle = {Spatial Cognition {VIII} - International Conference, Spatial Cognition + 2012, Kloster Seeon, Germany, August 31 - September 3, 2012. Proceedings}, + series = {Lecture Notes in Computer Science}, + volume = {7463}, + pages = {358--377}, + publisher = {Springer}, + year = {2012}, + url = {https://doi.org/10.1007/978-3-642-32732-2\_23}, + doi = {10.1007/978-3-642-32732-2\_23}, + timestamp = {Sun, 02 Jun 2019 21:21:23 +0200}, + biburl = {https://dblp.org/rec/conf/spatialCognition/Tenbrink12.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/iiix/LarsenIK06, + author = {Birger Larsen and + Peter Ingwersen and + Jaana Kek{\"{a}}l{\"{a}}inen}, + editor = {Ian Ruthven}, + title = {The polyrepresentation continuum in {IR}}, + booktitle = {Proceedings of the 1st International Conference on Information Interaction + in Context, IIiX 2006, Copenhagen, Denmark, October 18-20, 2006}, + pages = {88--96}, + publisher = {{ACM}}, + year = {2006}, + url = {https://doi.org/10.1145/1164820.1164840}, + doi = {10.1145/1164820.1164840}, + timestamp = {Tue, 06 Nov 2018 16:58:20 +0100}, + biburl = {https://dblp.org/rec/conf/iiix/LarsenIK06.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/sigir/Ingwersen94, + author = {Peter Ingwersen}, + editor = {W. Bruce Croft and + C. J. van Rijsbergen}, + title = {Polyrepresentation of Information Needs and Semantic Entities: Elements + of a Cognitive Theory for Information Retrieval Interaction}, + booktitle = {Proceedings of the 17th Annual International {ACM-SIGIR} Conference + on Research and Development in Information Retrieval. Dublin, Ireland, + 3-6 July 1994 (Special Issue of the {SIGIR} Forum)}, + pages = {101--110}, + publisher = {ACM/Springer}, + year = {1994}, + url = {https://doi.org/10.1007/978-1-4471-2099-5\_11}, + doi = {10.1007/978-1-4471-2099-5\_11}, + timestamp = {Thu, 25 Jul 2019 17:43:05 +0200}, + biburl = {https://dblp.org/rec/conf/sigir/Ingwersen94.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/scientometrics/Wada20, + author = {Tetsuo Wada}, + title = {When do the {USPTO} examiners cite as the {EPO} examiners? An analysis + of examination spillovers through rejection citations at the international + family-to-family level}, + journal = {Scientometrics}, + volume = {125}, + number = {2}, + pages = {1591--1615}, + year = {2020}, + url = {https://doi.org/10.1007/s11192-020-03674-4}, + doi = {10.1007/s11192-020-03674-4}, + timestamp = {Fri, 14 May 2021 08:32:27 +0200}, + biburl = {https://dblp.org/rec/journals/scientometrics/Wada20.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/scientometrics/Garfield98, + author = {Eugene Garfield}, + title = {Random thoughts on citationology its theory and practice}, + journal = {Scientometrics}, + volume = {43}, + number = {1}, + pages = {69--76}, + year = {1998}, + url = {https://doi.org/10.1007/BF02458396}, + doi = {10.1007/BF02458396}, + timestamp = {Fri, 17 Jul 2020 06:39:13 +0200}, + biburl = {https://dblp.org/rec/journals/scientometrics/Garfield98.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/crl/Tyckoson15, + author = {David A. Tyckoson}, + title = {Question-Negotiation and Information Seeking in Libraries: {A} Timeless + Topic in a Timeless Article}, + journal = {Coll. Res. Libr.}, + volume = {76}, + number = {3}, + pages = {247--250}, + year = {2015}, + url = {https://doi.org/10.5860/crl.76.3.247}, + doi = {10.5860/crl.76.3.247}, + timestamp = {Thu, 25 Jun 2020 21:29:22 +0200}, + biburl = {https://dblp.org/rec/journals/crl/Tyckoson15.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/issi/Holmberg15a, + author = {Kim Holmberg}, + editor = {Albert Ali Salah and + Yasar Tonta and + Alkim Almila Akdag Salah and + Cassidy R. Sugimoto and + Umut Al}, + title = {Classifying Altmetrics by Level of Impact}, + booktitle = {Proceedings of the 15th International Conference on Scientometrics + and Informetrics, Istanbul, Turkey, June 29 - July 3, 2015}, + publisher = {{ISSI} Society}, + year = {2015}, + timestamp = {Tue, 14 Apr 2020 11:09:56 +0200}, + biburl = {https://dblp.org/rec/conf/issi/Holmberg15a.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/ipm/SchamberEN90, + author = {Linda Schamber and + Michael B. Eisenberg and + Michael Sanford Nilan}, + title = {A re-examination of relevance: toward a dynamic, situational definition}, + journal = {Inf. Process. Manag.}, + volume = {26}, + number = {6}, + pages = {755--776}, + year = {1990}, + url = {https://doi.org/10.1016/0306-4573(90)90050-C}, + doi = {10.1016/0306-4573(90)90050-C}, + timestamp = {Fri, 21 Feb 2020 13:11:07 +0100}, + biburl = {https://dblp.org/rec/journals/ipm/SchamberEN90.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/ipm/Park97, + author = {H. Park}, + title = {Relevance of Science Information: Origins and Dimensions of Relevance + and Their Implications to Information Retrieval}, + journal = {Inf. Process. Manag.}, + volume = {33}, + number = {3}, + pages = {339--352}, + year = {1997}, + url = {https://doi.org/10.1016/S0306-4573(96)00072-6}, + doi = {10.1016/S0306-4573(96)00072-6}, + timestamp = {Fri, 21 Feb 2020 13:11:34 +0100}, + biburl = {https://dblp.org/rec/journals/ipm/Park97.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/siamrev/ClausetSN09, + author = {Aaron Clauset and + Cosma Rohilla Shalizi and + Mark E. J. Newman}, + title = {Power-Law Distributions in Empirical Data}, + journal = {{SIAM} Rev.}, + volume = {51}, + number = {4}, + pages = {661--703}, + year = {2009}, + url = {https://doi.org/10.1137/070710111}, + doi = {10.1137/070710111}, + timestamp = {Tue, 29 Sep 2020 10:58:15 +0200}, + biburl = {https://dblp.org/rec/journals/siamrev/ClausetSN09.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/jasis/KoushaT17b, + author = {Kayvan Kousha and + Mike Thelwall}, + title = {News stories as evidence for research? {BBC} citations from articles, + Books, and Wikipedia}, + journal = {J. Assoc. Inf. Sci. Technol.}, + volume = {68}, + number = {8}, + pages = {2017--2028}, + year = {2017}, + url = {https://doi.org/10.1002/asi.23862}, + doi = {10.1002/asi.23862}, + timestamp = {Mon, 02 Mar 2020 17:21:46 +0100}, + biburl = {https://dblp.org/rec/journals/jasis/KoushaT17b.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/trec/RobertsDVHBLPM19, + author = {Kirk Roberts and + Dina Demner{-}Fushman and + Ellen M. Voorhees and + William R. Hersh and + Steven Bedrick and + Alexander J. Lazar and + Shubham Pant and + Funda Meric{-}Bernstam}, + editor = {Ellen M. Voorhees and + Angela Ellis}, + title = {Overview of the {TREC} 2019 Precision Medicine Track}, + booktitle = {Proceedings of the Twenty-Eighth Text REtrieval Conference, {TREC} + 2019, Gaithersburg, Maryland, USA, November 13-15, 2019}, + series = {{NIST} Special Publication}, + volume = {1250}, + publisher = {National Institute of Standards and Technology {(NIST)}}, + year = {2019}, + url = {https://trec.nist.gov/pubs/trec28/papers/OVERVIEW.PM.pdf}, + timestamp = {Wed, 03 Feb 2021 08:31:24 +0100}, + biburl = {https://dblp.org/rec/conf/trec/RobertsDVHBLPM19.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/trec/RobertsDVHBL18, + author = {Kirk Roberts and + Dina Demner{-}Fushman and + Ellen M. Voorhees and + William R. Hersh and + Steven Bedrick and + Alexander J. Lazar}, + editor = {Ellen M. Voorhees and + Angela Ellis}, + title = {Overview of the {TREC} 2018 Precision Medicine Track}, + booktitle = {Proceedings of the Twenty-Seventh Text REtrieval Conference, {TREC} + 2018, Gaithersburg, Maryland, USA, November 14-16, 2018}, + series = {{NIST} Special Publication}, + volume = {500-331}, + publisher = {National Institute of Standards and Technology {(NIST)}}, + year = {2018}, + url = {https://trec.nist.gov/pubs/trec27/papers/Overview-PM.pdf}, + timestamp = {Wed, 03 Feb 2021 08:31:25 +0100}, + biburl = {https://dblp.org/rec/conf/trec/RobertsDVHBL18.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/trec/RobertsDVHBLP17, + author = {Kirk Roberts and + Dina Demner{-}Fushman and + Ellen M. Voorhees and + William R. Hersh and + Steven Bedrick and + Alexander J. Lazar and + Shubham Pant}, + editor = {Ellen M. Voorhees and + Angela Ellis}, + title = {Overview of the {TREC} 2017 Precision Medicine Track}, + booktitle = {Proceedings of The Twenty-Sixth Text REtrieval Conference, {TREC} + 2017, Gaithersburg, Maryland, USA, November 15-17, 2017}, + series = {{NIST} Special Publication}, + volume = {500-324}, + publisher = {National Institute of Standards and Technology {(NIST)}}, + year = {2017}, + url = {https://trec.nist.gov/pubs/trec26/papers/Overview-PM.pdf}, + timestamp = {Wed, 07 Jul 2021 16:44:22 +0200}, + biburl = {https://dblp.org/rec/conf/trec/RobertsDVHBLP17.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/trec/RobertsDVH16, + author = {Kirk Roberts and + Dina Demner{-}Fushman and + Ellen M. Voorhees and + William R. Hersh}, + editor = {Ellen M. Voorhees and + Angela Ellis}, + title = {Overview of the {TREC} 2016 Clinical Decision Support Track}, + booktitle = {Proceedings of The Twenty-Fifth Text REtrieval Conference, {TREC} + 2016, Gaithersburg, Maryland, USA, November 15-18, 2016}, + series = {{NIST} Special Publication}, + volume = {500-321}, + publisher = {National Institute of Standards and Technology {(NIST)}}, + year = {2016}, + url = {http://trec.nist.gov/pubs/trec25/papers/Overview-CL.pdf}, + timestamp = {Wed, 03 Feb 2021 08:31:25 +0100}, + biburl = {https://dblp.org/rec/conf/trec/RobertsDVH16.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/trec/RobertsSVH15, + author = {Kirk Roberts and + Matthew S. Simpson and + Ellen M. Voorhees and + William R. Hersh}, + editor = {Ellen M. Voorhees and + Angela Ellis}, + title = {Overview of the {TREC} 2015 Clinical Decision Support Track}, + booktitle = {Proceedings of The Twenty-Fourth Text REtrieval Conference, {TREC} + 2015, Gaithersburg, Maryland, USA, November 17-20, 2015}, + series = {{NIST} Special Publication}, + volume = {500-319}, + publisher = {National Institute of Standards and Technology {(NIST)}}, + year = {2015}, + url = {http://trec.nist.gov/pubs/trec24/papers/Overview-CL.pdf}, + timestamp = {Wed, 03 Feb 2021 08:31:23 +0100}, + biburl = {https://dblp.org/rec/conf/trec/RobertsSVH15.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/trec/SimpsonVH14, + author = {Matthew S. Simpson and + Ellen M. Voorhees and + William R. Hersh}, + editor = {Ellen M. Voorhees and + Angela Ellis}, + title = {Overview of the {TREC} 2014 Clinical Decision Support Track}, + booktitle = {Proceedings of The Twenty-Third Text REtrieval Conference, {TREC} + 2014, Gaithersburg, Maryland, USA, November 19-21, 2014}, + series = {{NIST} Special Publication}, + volume = {500-308}, + publisher = {National Institute of Standards and Technology {(NIST)}}, + year = {2014}, + url = {https://trec.nist.gov/pubs/trec23/papers/overview-clinical.pdf}, + timestamp = {Wed, 03 Feb 2021 08:31:24 +0100}, + biburl = {https://dblp.org/rec/conf/trec/SimpsonVH14.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/scientometrics/MutschkeMSS11, + author = {Peter Mutschke and + Philipp Mayr and + Philipp Schaer and + York Sure}, + title = {Science models as value-added services for scholarly information systems}, + journal = {Scientometrics}, + volume = {89}, + number = {1}, + pages = {349--364}, + year = {2011}, + url = {https://doi.org/10.1007/s11192-011-0430-x}, + doi = {10.1007/s11192-011-0430-x}, + timestamp = {Fri, 17 Jul 2020 06:40:46 +0200}, + biburl = {https://dblp.org/rec/journals/scientometrics/MutschkeMSS11.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/jd/Langham95, + author = {Thomas Langham}, + title = {Consistency in Referencing}, + journal = {J. Documentation}, + volume = {51}, + number = {4}, + pages = {360--369}, + year = {1995}, + url = {https://doi.org/10.1108/eb026955}, + doi = {10.1108/eb026955}, + timestamp = {Sun, 06 Sep 2020 16:55:56 +0200}, + biburl = {https://dblp.org/rec/journals/jd/Langham95.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/jet/Dietrich15, + author = {Franz Dietrich}, + title = {Aggregation theory and the relevance of some issues to others}, + journal = {J. Econ. Theory}, + volume = {160}, + pages = {463--493}, + year = {2015}, + url = {https://doi.org/10.1016/j.jet.2015.03.012}, + doi = {10.1016/j.jet.2015.03.012}, + timestamp = {Mon, 24 Feb 2020 15:54:33 +0100}, + biburl = {https://dblp.org/rec/journals/jet/Dietrich15.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/iiix/Ingwersen12, + author = {Peter Ingwersen}, + editor = {Jaap Kamps and + Wessel Kraaij and + Norbert Fuhr}, + title = {Citations and references as keys to relevance ranking in interactive + {IR}}, + booktitle = {Information Interaction in Context: 2012, IIix'12, Nijmegen, The Netherlands, + August 21-24, 2012}, + pages = {1}, + publisher = {{ACM}}, + year = {2012}, + url = {https://doi.org/10.1145/2362724.2362726}, + doi = {10.1145/2362724.2362726}, + timestamp = {Tue, 06 Nov 2018 16:58:20 +0100}, + biburl = {https://dblp.org/rec/conf/iiix/Ingwersen12.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/tois/JarvelinK02, + author = {Kalervo J{\"{a}}rvelin and + Jaana Kek{\"{a}}l{\"{a}}inen}, + title = {Cumulated gain-based evaluation of {IR} techniques}, + journal = {{ACM} Trans. Inf. Syst.}, + volume = {20}, + number = {4}, + pages = {422--446}, + year = {2002}, + url = {http://doi.acm.org/10.1145/582415.582418}, + doi = {10.1145/582415.582418}, + timestamp = {Fri, 09 Jun 2017 11:03:19 +0200}, + biburl = {https://dblp.org/rec/journals/tois/JarvelinK02.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/ir/Fuhr08, + author = {Norbert Fuhr}, + title = {A probability ranking principle for interactive information retrieval}, + journal = {Inf. Retr.}, + volume = {11}, + number = {3}, + pages = {251--265}, + year = {2008}, + url = {https://doi.org/10.1007/s10791-008-9045-0}, + doi = {10.1007/s10791-008-9045-0}, + timestamp = {Sat, 27 May 2017 14:25:37 +0200}, + biburl = {https://dblp.org/rec/journals/ir/Fuhr08.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@book{DBLP:series/synthesis/2019Alonso, + author = {Omar Alonso}, + title = {The Practice of Crowdsourcing}, + series = {Synthesis Lectures on Information Concepts, Retrieval, and Services}, + publisher = {Morgan {\&} Claypool Publishers}, + year = {2019}, + url = {https://doi.org/10.2200/S00904ED1V01Y201903ICR066}, + doi = {10.2200/S00904ED1V01Y201903ICR066}, + timestamp = {Tue, 18 Jun 2019 11:34:18 +0200}, + biburl = {https://dblp.org/rec/series/synthesis/2019Alonso.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/clef/Schaer12, + author = {Philipp Schaer}, + editor = {Tiziana Catarci and + Pamela Forner and + Djoerd Hiemstra and + Anselmo Pe{\~{n}}as and + Giuseppe Santucci}, + title = {Better than Their Reputation? On the Reliability of Relevance Assessments + with Students}, + booktitle = {Information Access Evaluation. Multilinguality, Multimodality, and + Visual Analytics - Third International Conference of the {CLEF} Initiative, + {CLEF} 2012, Rome, Italy, September 17-20, 2012. Proceedings}, + series = {Lecture Notes in Computer Science}, + volume = {7488}, + pages = {124--135}, + publisher = {Springer}, + year = {2012}, + url = {https://doi.org/10.1007/978-3-642-33247-0\_14}, + doi = {10.1007/978-3-642-33247-0\_14}, + timestamp = {Tue, 14 May 2019 10:00:50 +0200}, + biburl = {https://dblp.org/rec/conf/clef/Schaer12.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/ipm/White17, + author = {Howard D. White}, + title = {Relevance theory and distributions of judgments in document retrieval}, + journal = {Inf. Process. Manag.}, + volume = {53}, + number = {5}, + pages = {1080--1102}, + year = {2017}, + url = {https://doi.org/10.1016/j.ipm.2017.02.010}, + doi = {10.1016/j.ipm.2017.02.010}, + timestamp = {Fri, 21 Feb 2020 13:11:26 +0100}, + biburl = {https://dblp.org/rec/journals/ipm/White17.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/birws/BreuerST20, + author = {Timo Breuer and + Philipp Schaer and + Dirk Tunger}, + editor = {Guillaume Cabanac and + Ingo Frommholz and + Philipp Mayr}, + title = {Relations Between Relevance Assessments, Bibliometrics and Altmetrics}, + booktitle = {Proceedings of the 10th International Workshop on Bibliometric-enhanced + Information Retrieval co-located with 42nd European Conference on + Information Retrieval, BIR@ECIR 2020, Lisbon, Portugal, April 14th, + 2020 [online only]}, + series = {{CEUR} Workshop Proceedings}, + volume = {2591}, + pages = {101--112}, + publisher = {CEUR-WS.org}, + year = {2020}, + url = {http://ceur-ws.org/Vol-2591/paper-10.pdf}, + timestamp = {Tue, 14 Apr 2020 17:42:07 +0200}, + biburl = {https://dblp.org/rec/conf/birws/BreuerST20.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/birws/BreuerST20, + author = {Timo Breuer and + Philipp Schaer and + Dirk Tunger}, + editor = {Guillaume Cabanac and + Ingo Frommholz and + Philipp Mayr}, + title = {Relations Between Relevance Assessments, Bibliometrics and Altmetrics}, + booktitle = {Proceedings of the 10th International Workshop on Bibliometric-enhanced + Information Retrieval co-located with 42nd European Conference on + Information Retrieval, BIR@ECIR 2020, Lisbon, Portugal, April 14th, + 2020 [online only]}, + series = {{CEUR} Workshop Proceedings}, + volume = {2591}, + pages = {101--112}, + publisher = {CEUR-WS.org}, + year = {2020}, + url = {http://ceur-ws.org/Vol-2591/paper-10.pdf}, + timestamp = {Tue, 14 Apr 2020 17:42:07 +0200}, + biburl = {https://dblp.org/rec/conf/birws/BreuerST20.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/jd/Ingwersen96, + author = {Peter Ingwersen}, + title = {Cognitive Perspectives of Information Retrieval Interaction: Elements + of a Cognitive {IR} Theory}, + journal = {J. Documentation}, + volume = {52}, + number = {1}, + pages = {3--50}, + year = {1996}, + url = {https://doi.org/10.1108/eb026960}, + doi = {10.1108/eb026960}, + timestamp = {Sun, 06 Sep 2020 16:56:08 +0200}, + biburl = {https://dblp.org/rec/journals/jd/Ingwersen96.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/jasis/Mizzaro97, + author = {Stefano Mizzaro}, + title = {Relevance: The Whole History}, + journal = {J. Am. Soc. Inf. Sci.}, + volume = {48}, + number = {9}, + pages = {810--832}, + year = {1997}, + url = {https://doi.org/10.1002/(SICI)1097-4571(199709)48:9\%3C810::AID-ASI6\%3E3.0.CO;2-U}, + doi = {10.1002/(SICI)1097-4571(199709)48:9\%3C810::AID-ASI6\%3E3.0.CO;2-U}, + timestamp = {Wed, 13 May 2020 17:19:45 +0200}, + biburl = {https://dblp.org/rec/journals/jasis/Mizzaro97.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/scientometrics/ClermontKT21, + author = {Marcel Clermont and + Johanna Krolak and + Dirk Tunger}, + title = {Does the citation period have any effect on the informative value + of selected citation indicators in research evaluations?}, + journal = {Scientometrics}, + volume = {126}, + number = {2}, + pages = {1019--1047}, + year = {2021}, + url = {https://doi.org/10.1007/s11192-020-03782-1}, + doi = {10.1007/s11192-020-03782-1}, + timestamp = {Fri, 14 May 2021 08:32:26 +0200}, + biburl = {https://dblp.org/rec/journals/scientometrics/ClermontKT21.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/joi/SungWHC15, + author = {Hui{-}Yun Sung and + Chun{-}Chieh Wang and + Mu{-}Hsuan Huang and + Dar{-}Zen Chen}, + title = {Measuring science-based science linkage and non-science-based linkage + of patents through non-patent references}, + journal = {J. Informetrics}, + volume = {9}, + number = {3}, + pages = {488--498}, + year = {2015}, + url = {https://doi.org/10.1016/j.joi.2015.04.004}, + doi = {10.1016/j.joi.2015.04.004}, + timestamp = {Fri, 27 Mar 2020 08:32:34 +0100}, + biburl = {https://dblp.org/rec/journals/joi/SungWHC15.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/jasis/SmolinskyLM15, + author = {Lawrence J. Smolinsky and + Aaron J. Lercher and + Andrew McDaniel}, + title = {Testing theories of preferential attachment in random networks of + citations}, + journal = {J. Assoc. Inf. Sci. Technol.}, + volume = {66}, + number = {10}, + pages = {2132--2145}, + year = {2015}, + url = {https://doi.org/10.1002/asi.23312}, + doi = {10.1002/asi.23312}, + timestamp = {Thu, 29 Apr 2021 11:10:56 +0200}, + biburl = {https://dblp.org/rec/journals/jasis/SmolinskyLM15.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/jdis/Raan17, + author = {Anthony F. J. van Raan}, + title = {Patent Citations Analysis and Its Value in Research Evaluation: {A} + Review and a New Approach to Map Technology-relevant Research}, + journal = {J. Data Inf. Sci.}, + volume = {2}, + number = {1}, + pages = {13--50}, + year = {2017}, + url = {https://doi.org/10.1515/jdis-2017-0002}, + doi = {10.1515/jdis-2017-0002}, + timestamp = {Thu, 06 Jan 2022 14:19:59 +0100}, + biburl = {https://dblp.org/rec/journals/jdis/Raan17.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@proceedings{DBLP:conf/chiir/2021birds, + editor = {Ingo Frommholz and + Haiming Liu and + Massimo Melucci and + Nicholas J. Belkin and + Gareth J. F. Jones and + Noriko Kando and + Gabriella Pasi}, + title = {Joint Proceedings of the Second Workshop on Bridging the Gap between + Information Science, Information Retrieval and Data Science, and Third + Workshop on Evaluation of Personalisation in Information Retrieval + co-located with 6th {ACM} {SIGIR} Conference on Human Information + Interaction and Retrieval {(CHIIR} 2021), Canberra, Australia (Virtual + Event), March 19th, 2021}, + series = {{CEUR} Workshop Proceedings}, + volume = {2863}, + publisher = {CEUR-WS.org}, + year = {2021}, + url = {http://ceur-ws.org/Vol-2863}, + urn = {urn:nbn:de:0074-2863-0}, + timestamp = {Wed, 19 May 2021 16:09:08 +0200}, + biburl = {https://dblp.org/rec/conf/chiir/2021birds.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/jd/Cronin81, + author = {Blaise Cronin}, + title = {The Need for a Theory of citing}, + journal = {J. Documentation}, + volume = {37}, + number = {1}, + pages = {16--24}, + year = {1981}, + url = {https://doi.org/10.1108/eb026703}, + doi = {10.1108/eb026703}, + timestamp = {Sun, 06 Sep 2020 16:56:21 +0200}, + biburl = {https://dblp.org/rec/journals/jd/Cronin81.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/scientometrics/Larsen02, + author = {Birger Larsen}, + title = {Exploiting citation overlaps for Information Retrieval: Generating + a boomerang effect from the network of scientific papers}, + journal = {Scientometrics}, + volume = {54}, + number = {2}, + pages = {155--178}, + year = {2002}, + url = {https://doi.org/10.1023/A:1016011326300}, + doi = {10.1023/A:1016011326300}, + timestamp = {Fri, 17 Jul 2020 06:40:02 +0200}, + biburl = {https://dblp.org/rec/journals/scientometrics/Larsen02.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/ecir/LykkeLLI10, + author = {Marianne Lykke and + Birger Larsen and + Haakon Lund and + Peter Ingwersen}, + editor = {Cathal Gurrin and + Yulan He and + Gabriella Kazai and + Udo Kruschwitz and + Suzanne Little and + Thomas Roelleke and + Stefan M. R{\"{u}}ger and + Keith van Rijsbergen}, + title = {Developing a Test Collection for the Evaluation of Integrated Search}, + booktitle = {Advances in Information Retrieval, 32nd European Conference on {IR} + Research, {ECIR} 2010, Milton Keynes, UK, March 28-31, 2010. Proceedings}, + series = {Lecture Notes in Computer Science}, + volume = {5993}, + pages = {627--630}, + publisher = {Springer}, + year = {2010}, + url = {https://doi.org/10.1007/978-3-642-12275-0\_63}, + doi = {10.1007/978-3-642-12275-0\_63}, + timestamp = {Sun, 25 Oct 2020 22:33:08 +0100}, + biburl = {https://dblp.org/rec/conf/ecir/LykkeLLI10.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/scientometrics/BallT06, + author = {Rafael Ball and + Dirk Tunger}, + title = {Bibliometric analysis - {A} new business area for information professionals + in libraries?}, + journal = {Scientometrics}, + volume = {66}, + number = {3}, + pages = {561--577}, + year = {2006}, + url = {https://doi.org/10.1007/s11192-006-0041-0}, + doi = {10.1007/s11192-006-0041-0}, + timestamp = {Mon, 26 Oct 2020 08:45:29 +0100}, + biburl = {https://dblp.org/rec/journals/scientometrics/BallT06.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/icis/Jain03, + author = {Abhijit Jain}, + title = {Performance Paradox : Information Technology Investments and Administrative + Performance in the Case of the 50 {U.S.} State Governments}, + booktitle = {Proceedings of the International Conference on Information Systems, + {ICIS} 2003, December 14-17, 2003, Seattle, Washington, {USA}}, + pages = {389--400}, + publisher = {Association for Information Systems}, + year = {2003}, + url = {http://aisel.aisnet.org/icis2003/33}, + timestamp = {Sun, 29 Mar 2015 13:07:34 +0200}, + biburl = {https://dblp.org/rec/conf/icis/Jain03.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/ipm/SkovLI08, + author = {Mette Skov and + Birger Larsen and + Peter Ingwersen}, + title = {Inter and intra-document contexts applied in polyrepresentation for + best match {IR}}, + journal = {Inf. Process. Manag.}, + volume = {44}, + number = {5}, + pages = {1673--1683}, + year = {2008}, + url = {https://doi.org/10.1016/j.ipm.2008.05.006}, + doi = {10.1016/j.ipm.2008.05.006}, + timestamp = {Fri, 21 Feb 2020 13:11:05 +0100}, + biburl = {https://dblp.org/rec/journals/ipm/SkovLI08.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/issi/Tunger19, + author = {Dirk Tunger}, + editor = {Giuseppe Catalano and + Cinzia Daraio and + Martina Gregori and + Henk F. Moed and + Giancarlo Ruocco}, + title = {Altmetrics - on the way to the "economy of attention"? Feasibility + study Altmetrics for the German Ministry of Science and Research {(BMBF)}}, + booktitle = {Proceedings of the 17th International Conference on Scientometrics + and Informetrics, {ISSI} 2019, Rome, Italy, September 2-5, 2019}, + pages = {2262--2272}, + publisher = {{ISSI} Society}, + year = {2019}, + timestamp = {Tue, 14 Apr 2020 11:09:56 +0200}, + biburl = {https://dblp.org/rec/conf/issi/Tunger19.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/ecir/Bessagnet18, + author = {Marie{-}No{\"{e}}lle Bessagnet}, + editor = {Philipp Mayr and + Ingo Frommholz and + Guillaume Cabanac}, + title = {A Generic Framework to Perform Comprehensive Analysis of Tweets}, + booktitle = {Proceedings of the 7th International Workshop on Bibliometric-enhanced + Information Retrieval {(BIR} 2018) co-located with the 40th European + Conference on Information Retrieval {(ECIR} 2018), Grenoble, France, + March 26, 2018}, + series = {{CEUR} Workshop Proceedings}, + volume = {2080}, + pages = {80--85}, + publisher = {CEUR-WS.org}, + year = {2018}, + url = {http://ceur-ws.org/Vol-2080/paper9.pdf}, + timestamp = {Wed, 12 Feb 2020 16:44:12 +0100}, + biburl = {https://dblp.org/rec/conf/ecir/Bessagnet18.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/ecir/SaierF19, + author = {Tarek Saier and + Michael F{\"{a}}rber}, + editor = {Guillaume Cabanac and + Ingo Frommholz and + Philipp Mayr}, + title = {Bibliometric-Enhanced arXiv: {A} Data Set for Paper-Based and Citation-Based + Tasks}, + booktitle = {Proceedings of the 8th International Workshop on Bibliometric-enhanced + Information Retrieval {(BIR} 2019) co-located with the 41st European + Conference on Information Retrieval {(ECIR} 2019), Cologne, Germany, + April 14, 2019}, + series = {{CEUR} Workshop Proceedings}, + volume = {2345}, + pages = {14--26}, + publisher = {CEUR-WS.org}, + year = {2019}, + url = {http://ceur-ws.org/Vol-2345/paper2.pdf}, + timestamp = {Thu, 21 Jan 2021 17:36:28 +0100}, + biburl = {https://dblp.org/rec/conf/ecir/SaierF19.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/ecir/DabrowskaL15, + author = {Anna Dabrowska and + Birger Larsen}, + editor = {Philipp Mayr and + Ingo Frommholz and + Peter Mutschke}, + title = {Exploiting Citation Contexts for Physics Retrieval}, + booktitle = {Proceedings of the Second Workshop on Bibliometric-enhanced Information + Retrieval co-located with the 37th European Conference on Information + Retrieval {(ECIR} 2015), Vienna, Austria, March 29th, 2015}, + series = {{CEUR} Workshop Proceedings}, + volume = {1344}, + pages = {14--21}, + publisher = {CEUR-WS.org}, + year = {2015}, + url = {http://ceur-ws.org/Vol-1344/paper2.pdf}, + timestamp = {Wed, 12 Feb 2020 16:44:12 +0100}, + biburl = {https://dblp.org/rec/conf/ecir/DabrowskaL15.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/aslib/CostasZW15, + author = {Rodrigo Costas and + Zohreh Zahedi and + Paul Wouters}, + title = {The thematic orientation of publications mentioned on social media: + Large-scale disciplinary comparison of social media metrics with citations}, + journal = {Aslib J. Inf. Manag.}, + volume = {67}, + number = {3}, + pages = {260--288}, + year = {2015}, + url = {https://doi.org/10.1108/AJIM-12-2014-0173}, + doi = {10.1108/AJIM-12-2014-0173}, + timestamp = {Mon, 03 Jan 2022 22:07:53 +0100}, + biburl = {https://dblp.org/rec/journals/aslib/CostasZW15.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/scientometrics/MutschkeMSS11, + author = {Peter Mutschke and + Philipp Mayr and + Philipp Schaer and + York Sure}, + title = {Science models as value-added services for scholarly information systems}, + journal = {Scientometrics}, + volume = {89}, + number = {1}, + pages = {349--364}, + year = {2011}, + url = {https://doi.org/10.1007/s11192-011-0430-x}, + doi = {10.1007/s11192-011-0430-x}, + timestamp = {Fri, 17 Jul 2020 06:40:46 +0200}, + biburl = {https://dblp.org/rec/journals/scientometrics/MutschkeMSS11.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/scientometrics/MayrS15a, + author = {Philipp Mayr and + Andrea Scharnhorst}, + title = {Scientometrics and information retrieval: weak-links revitalized}, + journal = {Scientometrics}, + volume = {102}, + number = {3}, + pages = {2193--2199}, + year = {2015}, + url = {https://doi.org/10.1007/s11192-014-1484-3}, + doi = {10.1007/s11192-014-1484-3}, + timestamp = {Fri, 17 Jul 2020 06:39:21 +0200}, + biburl = {https://dblp.org/rec/journals/scientometrics/MayrS15a.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/sigir/Azzopardi11, + author = {Leif Azzopardi}, + editor = {Wei{-}Ying Ma and + Jian{-}Yun Nie and + Ricardo Baeza{-}Yates and + Tat{-}Seng Chua and + W. Bruce Croft}, + title = {The economics in interactive information retrieval}, + booktitle = {Proceeding of the 34th International {ACM} {SIGIR} Conference on Research + and Development in Information Retrieval, {SIGIR} 2011, Beijing, China, + July 25-29, 2011}, + pages = {15--24}, + publisher = {{ACM}}, + year = {2011}, + url = {https://doi.org/10.1145/2009916.2009923}, + doi = {10.1145/2009916.2009923}, + timestamp = {Sun, 22 Sep 2019 18:15:38 +0200}, + biburl = {https://dblp.org/rec/conf/sigir/Azzopardi11.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/jdis/Raan17, + author = {Anthony F. J. van Raan}, + title = {Patent Citations Analysis and Its Value in Research Evaluation: {A} + Review and a New Approach to Map Technology-relevant Research}, + journal = {J. Data Inf. Sci.}, + volume = {2}, + number = {1}, + pages = {13--50}, + year = {2017}, + url = {https://doi.org/10.1515/jdis-2017-0002}, + doi = {10.1515/jdis-2017-0002}, + timestamp = {Thu, 06 Jan 2022 14:19:59 +0100}, + biburl = {https://dblp.org/rec/journals/jdis/Raan17.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/sigir/LarsenI02, + author = {Birger Larsen and + Peter Ingwersen}, + editor = {Kalervo J{\"{a}}rvelin and + Micheline Beaulieu and + Ricardo A. Baeza{-}Yates and + Sung{-}Hyon Myaeng}, + title = {The boomerang effect: retrieving scientific documents via the network + of references and citations}, + booktitle = {{SIGIR} 2002: Proceedings of the 25th Annual International {ACM} {SIGIR} + Conference on Research and Development in Information Retrieval, August + 11-15, 2002, Tampere, Finland}, + pages = {397--398}, + publisher = {{ACM}}, + year = {2002}, + url = {https://doi.org/10.1145/564376.564462}, + doi = {10.1145/564376.564462}, + timestamp = {Wed, 07 Nov 2018 14:52:44 +0100}, + biburl = {https://dblp.org/rec/conf/sigir/LarsenI02.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/isu/MeierT18, + author = {Andreas Meier and + Dirk Tunger}, + title = {Investigating the transparency and influenceability of altmetrics + using the example of the {RG} score and the ResearchGate platform}, + journal = {Inf. Serv. Use}, + volume = {38}, + number = {1-2}, + pages = {99--110}, + year = {2018}, + url = {https://doi.org/10.3233/ISU-180001}, + doi = {10.3233/ISU-180001}, + timestamp = {Mon, 11 May 2020 15:37:55 +0200}, + biburl = {https://dblp.org/rec/journals/isu/MeierT18.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/ecir/KacemM18, + author = {Ameni Kacem and + Philipp Mayr}, + editor = {Philipp Mayr and + Ingo Frommholz and + Guillaume Cabanac}, + title = {Users are not Influenced by High Impact and Core Journals while Searching}, + booktitle = {Proceedings of the 7th International Workshop on Bibliometric-enhanced + Information Retrieval {(BIR} 2018) co-located with the 40th European + Conference on Information Retrieval {(ECIR} 2018), Grenoble, France, + March 26, 2018}, + series = {{CEUR} Workshop Proceedings}, + volume = {2080}, + pages = {63--75}, + publisher = {CEUR-WS.org}, + year = {2018}, + url = {http://ceur-ws.org/Vol-2080/paper7.pdf}, + timestamp = {Wed, 12 Feb 2020 16:44:12 +0100}, + biburl = {https://dblp.org/rec/conf/ecir/KacemM18.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/ecir/CarevicS14, + author = {Zeljko Carevic and + Philipp Schaer}, + editor = {Philipp Mayr and + Philipp Schaer and + Andrea Scharnhorst and + Birger Larsen and + Peter Mutschke}, + title = {On the Connection Between Citation-based and Topical Relevance Ranking: + Results of a Pretest using iSearch}, + booktitle = {Proceedings of the First Workshop on Bibliometric-enhanced Information + Retrieval co-located with 36th European Conference on Information + Retrieval {(ECIR} 2014), Amsterdam, The Netherlands, April 13, 2014}, + series = {{CEUR} Workshop Proceedings}, + volume = {1143}, + pages = {37--44}, + publisher = {CEUR-WS.org}, + year = {2014}, + url = {http://ceur-ws.org/Vol-1143/paper5.pdf}, + timestamp = {Wed, 12 Feb 2020 16:44:12 +0100}, + biburl = {https://dblp.org/rec/conf/ecir/CarevicS14.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/ecir/JackLHK14, + author = {Kris Jack and + Pablo L{\'{o}}pez{-}Garc{\'{\i}}a and + Maya Hristakeva and + Roman Kern}, + editor = {Philipp Mayr and + Philipp Schaer and + Andrea Scharnhorst and + Birger Larsen and + Peter Mutschke}, + title = {\{\{citation needed\}\}: Filling in Wikipedia's Citation Shaped Holes}, + booktitle = {Proceedings of the First Workshop on Bibliometric-enhanced Information + Retrieval co-located with 36th European Conference on Information + Retrieval {(ECIR} 2014), Amsterdam, The Netherlands, April 13, 2014}, + series = {{CEUR} Workshop Proceedings}, + volume = {1143}, + pages = {45--52}, + publisher = {CEUR-WS.org}, + year = {2014}, + url = {http://ceur-ws.org/Vol-1143/paper6.pdf}, + timestamp = {Wed, 12 Feb 2020 16:44:12 +0100}, + biburl = {https://dblp.org/rec/conf/ecir/JackLHK14.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@proceedings{DBLP:conf/ecir/2014bir, + editor = {Philipp Mayr and + Philipp Schaer and + Andrea Scharnhorst and + Birger Larsen and + Peter Mutschke}, + title = {Proceedings of the First Workshop on Bibliometric-enhanced Information + Retrieval co-located with 36th European Conference on Information + Retrieval {(ECIR} 2014), Amsterdam, The Netherlands, April 13, 2014}, + series = {{CEUR} Workshop Proceedings}, + volume = {1143}, + publisher = {CEUR-WS.org}, + year = {2014}, + url = {http://ceur-ws.org/Vol-1143}, + urn = {urn:nbn:de:0074-1143-7}, + timestamp = {Wed, 12 Feb 2020 16:44:12 +0100}, + biburl = {https://dblp.org/rec/conf/ecir/2014bir.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/jasis/Cole11, + author = {Charles Cole}, + title = {A theory of information need for information retrieval that connects + information to knowledge}, + journal = {J. Assoc. Inf. Sci. Technol.}, + volume = {62}, + number = {7}, + pages = {1216--1231}, + year = {2011}, + url = {https://doi.org/10.1002/asi.21541}, + doi = {10.1002/asi.21541}, + timestamp = {Mon, 02 Mar 2020 17:22:25 +0100}, + biburl = {https://dblp.org/rec/journals/jasis/Cole11.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{DBLP:conf/icsm/GayHMM09, + author = {Gregory Gay and + Sonia Haiduc and + Andrian Marcus and + Tim Menzies}, + title = {On the use of relevance feedback in IR-based concept location}, + booktitle = {25th {IEEE} International Conference on Software Maintenance {(ICSM} + 2009), September 20-26, 2009, Edmonton, Alberta, Canada}, + pages = {351--360}, + publisher = {{IEEE} Computer Society}, + year = {2009}, + url = {https://doi.org/10.1109/ICSM.2009.5306315}, + doi = {10.1109/ICSM.2009.5306315}, + timestamp = {Thu, 14 Oct 2021 10:27:26 +0200}, + biburl = {https://dblp.org/rec/conf/icsm/GayHMM09.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/corr/HeckS13, + author = {Tamara Heck and + Philipp Schaer}, + title = {Performing Informetric Analysis on Information Retrieval Test Collections: + Preliminary Experiments in the Physics Domain}, + journal = {CoRR}, + volume = {abs/1306.1743}, + year = {2013}, + url = {http://arxiv.org/abs/1306.1743}, + eprinttype = {arXiv}, + eprint = {1306.1743}, + timestamp = {Mon, 13 Aug 2018 16:48:24 +0200}, + biburl = {https://dblp.org/rec/journals/corr/HeckS13.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@article{DBLP:journals/cacm/Voorhees07, + author = {Ellen M. Voorhees}, + title = {{TREC:} Continuing information retrieval's tradition of experimentation}, + journal = {Commun. {ACM}}, + volume = {50}, + number = {11}, + pages = {51--54}, + year = {2007}, + url = {https://doi.org/10.1145/1297797.1297822}, + doi = {10.1145/1297797.1297822}, + timestamp = {Thu, 14 Oct 2021 09:00:21 +0200}, + biburl = {https://dblp.org/rec/journals/cacm/Voorhees07.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + diff --git a/BibTexTools/tests/data/full.bib b/BibTexTools/tests/data/full.bib new file mode 100644 index 0000000..a006397 --- /dev/null +++ b/BibTexTools/tests/data/full.bib @@ -0,0 +1,20 @@ +@Atype{Akey, + author = {A1_First von A1_Last and + von A2_Last, A2_First}, + title = {A_Title}, + journal = {A_Journal}, + volume = {A_Volume}, + year = {A_Year}, + url = {A_Url}, +} + + +@Btype{Bkey, + author = {B1_First von B1_Last and + von B2_Last, B2_First}, + title = {B_Title}, + journal = {B_Journal}, + volume = {B_Volume}, + year = {B_Year}, + url = {B_Url}, +} \ No newline at end of file diff --git a/BibTexTools/tests/data/not_standard.bib b/BibTexTools/tests/data/not_standard.bib new file mode 100644 index 0000000..942e614 --- /dev/null +++ b/BibTexTools/tests/data/not_standard.bib @@ -0,0 +1,5 @@ +@type{key, + author = {first1 last1 and + first2 last2}, + myfield = {myvalue}, +} \ No newline at end of file diff --git a/BibTexTools/tests/data/simple.bib b/BibTexTools/tests/data/simple.bib new file mode 100644 index 0000000..4040832 --- /dev/null +++ b/BibTexTools/tests/data/simple.bib @@ -0,0 +1,6 @@ +@type{key, + author = {first1 last1 and + first2 last2}, + title = {mytitle}, + journal = {myjournal}, +} \ No newline at end of file diff --git a/BibTexTools/tests/data/to_bib.bib b/BibTexTools/tests/data/to_bib.bib new file mode 100644 index 0000000..b8613f1 --- /dev/null +++ b/BibTexTools/tests/data/to_bib.bib @@ -0,0 +1,12 @@ +@Atype{Akey, +author = {A1_First von A1_Last and +A2_First von A2_Last}, +title = {A_Title}, +} + + +@Btype{Bkey, +author = {B1_First von B1_Last and +B2_First von B2_Last}, +title = {B_Title}, +} \ No newline at end of file diff --git a/BibTexTools/tests/data/to_json_fields.json b/BibTexTools/tests/data/to_json_fields.json new file mode 100644 index 0000000..53f0802 --- /dev/null +++ b/BibTexTools/tests/data/to_json_fields.json @@ -0,0 +1,16 @@ +{ + "Akey": { + "title": "A_Title", + "author": [ + "A1_First von A1_Last", + "A2_First von A2_Last" + ] + }, + "Bkey": { + "title": "B_Title", + "author": [ + "B1_First von B1_Last", + "B2_First von B2_Last" + ] + } +} \ No newline at end of file diff --git a/BibTexTools/tests/data/to_json_ref.json b/BibTexTools/tests/data/to_json_ref.json new file mode 100644 index 0000000..267456a --- /dev/null +++ b/BibTexTools/tests/data/to_json_ref.json @@ -0,0 +1,26 @@ +{ + "Akey": { + "type": "Atype", + "author": [ + "A1_First von A1_Last", + "A2_First von A2_Last" + ], + "title": "A_Title", + "journal": "A_Journal", + "volume": "A_Volume", + "year": "A_Year", + "url": "A_Url" + }, + "Bkey": { + "type": "Btype", + "author": [ + "B1_First von B1_Last", + "B2_First von B2_Last" + ], + "title": "B_Title", + "journal": "B_Journal", + "volume": "B_Volume", + "year": "B_Year", + "url": "B_Url" + } +} \ No newline at end of file From 548a68ad689f1d923038e81a3d182eccf0a70883 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCri=20Keller?= Date: Mon, 7 Mar 2022 09:59:01 +0100 Subject: [PATCH 07/15] Update README.md --- README.md | 71 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 2c583d3..0880b6e 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,73 @@ -# Clean BibTeX references -**Clean BibTeX** is a handy tool to resolve incomplete or misleading BibTeX references in computer science. -The CLI tool parses publication titles from a BibTeX and retrieves high-quality references from [dblp](https://dblp.uni-trier.de/). - -
Spaces Demo +# 📚 BibTexTools +
+ ![GitHub Pipenv locked Python version](https://img.shields.io/github/pipenv/locked/python-version/jueri/BibTexTools)
+ +**BibTexTools** is a handy tool to parse and manipulate BibTex bibliographies and references. **BibTexTools** can abbreviate author names and resolve incomplete references based on the reference title and the computer science bibliography [dblp](https://dblp.uni-trier.de/). + +If you encounter any error or parsing mistake, feel free to open a new (issue)[https://github.com/jueri/BibTexTools/issues/new].
-## Installation: -**Clean BibTeX** can be installed using pip and this repository: +## ⚙️ Installation: +**BibTexTools** can be installed using pip and this repository: ``` -pip install git+https://github.com/jueri/clean_bibtex.git +pip install git+https://github.com/jueri/BibTexTools.git ``` -Alternatively, you can clone this repository and install it from source. +Alternatively, you can clone this repository and install it from the source. 1. Clone the repository: -` git clone https://github.com/jueri/clean_bibtex.git` +` git clone https://github.com/jueri/BibTexTools.git` 2. Change working directory: -`cd clean_bibtex` +`cd BibTexTools` 3. install with: -`pip install -e .` +`pip install .`
-## Usage: -To resolve a `.bib` file, simply call `clean_bibtex` and specify an input and output file: +## 📖 Usage: +BibTexTools provides the following commands: ``` -clean_bibtex [input file] [output file] +Usage: BibTexTools [OPTIONS] COMMAND [ARGS]... + +Options: + --help Show this message and exit. + +Commands: + abbreviate-authors Abbreviate the author names of a BibTex bibliography + clean Clean a BibTex bibliography +``` + +A bibliography file as input and an output destination need to be specified for all operations. + +### Abbreviate-authors: +The `abbreviate-authors` command will abbreviate all author names from a bibliography. The middle names are also included if the `-m` flag is set. +``` +Usage: BibTexTools abbreviate-authors [OPTIONS] INPUT OUTPUT + + Abbreviate the author names of a BibTex bibliography + +Options: + -m, --middle_names Include the middle names + --help Show this message and exit. +``` + +### Clean: +The `clean` command may help resolve incomplete references by retrieving high-quality references from [dblp](https://dblp.uni-trier.de/). +``` +Usage: BibTexTools clean [OPTIONS] INPUT OUTPUT + + Clean a BibTex bibliography + +Options: + -k, --keep_keys Keep original keys + -u, --keep_unknown Keep entries that can not be cleaned + --help Show this message and exit. ```
-## Example: +## ✨ Example: Imagine you found an interesting paper online and saved it to your collection. Unluckily, in addition to the paper itself, you only got incomplete metadata like this: ```BibTeX @article{devlin2018bert, @@ -41,7 +77,7 @@ Imagine you found an interesting paper online and saved it to your collection. U } ``` -**Clean BibTeX** will extract the name of the paper and retrieve complete BibTeX metadata from dblp, resolving the reference into: +**BibTexTools** will extract the title of the paper and retrieve complete BibTeX metadata from dblp, resolving the reference into: ```BibTeX @inproceedings{DBLP:conf/naacl/DevlinCLT19, @@ -67,5 +103,4 @@ Imagine you found an interesting paper online and saved it to your collection. U biburl = {https://dblp.org/rec/conf/naacl/DevlinCLT19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } - ``` From 8a7e97974300b918c1b4ebb189c0dd221c29fd5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCri=20Keller?= Date: Mon, 7 Mar 2022 10:00:08 +0100 Subject: [PATCH 08/15] Update README.md --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 0880b6e..6a07403 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,9 @@ # 📚 BibTexTools -
- ![GitHub Pipenv locked Python version](https://img.shields.io/github/pipenv/locked/python-version/jueri/BibTexTools) -
+
![GitHub Pipenv locked Python version](https://img.shields.io/github/pipenv/locked/python-version/jueri/BibTexTools)
**BibTexTools** is a handy tool to parse and manipulate BibTex bibliographies and references. **BibTexTools** can abbreviate author names and resolve incomplete references based on the reference title and the computer science bibliography [dblp](https://dblp.uni-trier.de/). -If you encounter any error or parsing mistake, feel free to open a new (issue)[https://github.com/jueri/BibTexTools/issues/new]. +If you encounter any error or parsing mistake, feel free to open a new [issue](https://github.com/jueri/BibTexTools/issues/new).
## ⚙️ Installation: From 53aaec23302df73a19218b388d0f49a4f88b0984 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCri=20Keller?= Date: Mon, 7 Mar 2022 10:02:30 +0100 Subject: [PATCH 09/15] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6a07403..f3b28aa 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # 📚 BibTexTools -
![GitHub Pipenv locked Python version](https://img.shields.io/github/pipenv/locked/python-version/jueri/BibTexTools)
- +
+![GitHub Pipenv locked Python version](https://img.shields.io/github/pipenv/locked/python-version/jueri/BibTexTools) **BibTexTools** is a handy tool to parse and manipulate BibTex bibliographies and references. **BibTexTools** can abbreviate author names and resolve incomplete references based on the reference title and the computer science bibliography [dblp](https://dblp.uni-trier.de/). If you encounter any error or parsing mistake, feel free to open a new [issue](https://github.com/jueri/BibTexTools/issues/new). @@ -25,7 +25,7 @@ Alternatively, you can clone this repository and install it from the source.
## 📖 Usage: -BibTexTools provides the following commands: +**BibTexTools** provides the following commands: ``` Usage: BibTexTools [OPTIONS] COMMAND [ARGS]... From 789a8745bdc4244d2bf73becf6db383419d60132 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCri=20Keller?= Date: Mon, 7 Mar 2022 10:03:31 +0100 Subject: [PATCH 10/15] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index f3b28aa..d959c08 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@ # 📚 BibTexTools -
-![GitHub Pipenv locked Python version](https://img.shields.io/github/pipenv/locked/python-version/jueri/BibTexTools) +
GitHub Pipenv locked Python version
**BibTexTools** is a handy tool to parse and manipulate BibTex bibliographies and references. **BibTexTools** can abbreviate author names and resolve incomplete references based on the reference title and the computer science bibliography [dblp](https://dblp.uni-trier.de/). If you encounter any error or parsing mistake, feel free to open a new [issue](https://github.com/jueri/BibTexTools/issues/new). From 201996c1da853c880b84899f5bdcba128472369a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCri=20Keller?= Date: Mon, 7 Mar 2022 10:03:54 +0100 Subject: [PATCH 11/15] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d959c08..d6943e9 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # 📚 BibTexTools
GitHub Pipenv locked Python version
+ **BibTexTools** is a handy tool to parse and manipulate BibTex bibliographies and references. **BibTexTools** can abbreviate author names and resolve incomplete references based on the reference title and the computer science bibliography [dblp](https://dblp.uni-trier.de/). If you encounter any error or parsing mistake, feel free to open a new [issue](https://github.com/jueri/BibTexTools/issues/new). From 09ad025409f3a3b91046c7d7fc0c13db4355b97b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCri=20Keller?= Date: Mon, 7 Mar 2022 10:05:02 +0100 Subject: [PATCH 12/15] Update README.md --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d6943e9..c36fe31 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ -# 📚 BibTexTools -
GitHub Pipenv locked Python version
+
+

📚 BibTexTools


+ GitHub Pipenv locked Python version + +
**BibTexTools** is a handy tool to parse and manipulate BibTex bibliographies and references. **BibTexTools** can abbreviate author names and resolve incomplete references based on the reference title and the computer science bibliography [dblp](https://dblp.uni-trier.de/). From 064a1c426323465f6b46f705241efd450dfddef2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCri=20Keller?= Date: Mon, 7 Mar 2022 10:05:45 +0100 Subject: [PATCH 13/15] Update README.md --- README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index c36fe31..3055e9d 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,5 @@ -
-

📚 BibTexTools


- GitHub Pipenv locked Python version - -
+📚 BibTexTools +
GitHub Pipenv locked Python version
**BibTexTools** is a handy tool to parse and manipulate BibTex bibliographies and references. **BibTexTools** can abbreviate author names and resolve incomplete references based on the reference title and the computer science bibliography [dblp](https://dblp.uni-trier.de/). From 4f0c5d718a355883acc6940fcbd1898ab2159156 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCri=20Keller?= Date: Mon, 7 Mar 2022 10:05:55 +0100 Subject: [PATCH 14/15] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3055e9d..d6943e9 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -📚 BibTexTools +# 📚 BibTexTools
GitHub Pipenv locked Python version
**BibTexTools** is a handy tool to parse and manipulate BibTex bibliographies and references. **BibTexTools** can abbreviate author names and resolve incomplete references based on the reference title and the computer science bibliography [dblp](https://dblp.uni-trier.de/). From 5eec3e84fae702e154fd5bbd6bf8ed87e456c768 Mon Sep 17 00:00:00 2001 From: Jueri Date: Mon, 7 Mar 2022 10:11:35 +0100 Subject: [PATCH 15/15] add CLI --- BibTexTools/CLI.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 BibTexTools/CLI.py diff --git a/BibTexTools/CLI.py b/BibTexTools/CLI.py new file mode 100644 index 0000000..9b4b5df --- /dev/null +++ b/BibTexTools/CLI.py @@ -0,0 +1,55 @@ +import click +from BibTexTools.parser import Parser +from BibTexTools.cleaner import Cleaner + + +@click.group() +def cli(): + pass + + +@cli.command() +@click.argument("input", type=click.Path(exists=True)) +@click.option("--keep_keys", "-k", is_flag=True, help="Keep original keys") +@click.option( + "--keep_unknown", "-u", is_flag=True, help="Keep enties that can not be cleaned" +) +@click.argument("output", type=click.File("w")) +def clean(input, keep_keys, keep_unknown, output): + """Clean a BibTex bibliography""" + # parse + parser_obj = Parser() + bib = parser_obj.from_file(input) + + # process + click.echo( + "Requesting citation metadata for {num_publications} publications, this may take a while..." + ) + cleaner_obj = Cleaner(keep_keys=keep_keys, keep_unknown=keep_unknown) + processed_bib = cleaner_obj.clean(bib) + + # write + bibtex_str = processed_bib.to_bibtex() + output.write(bibtex_str) + + +@cli.command() +@click.argument("input", type=click.Path(exists=True)) +@click.option("--middle_names", "-m", is_flag=True, help="Include the middle names") +@click.argument("output", type=click.File("w")) +def abbreviate_authors(input, middle_names, output): + """Abbreviate the author names of a BibTex bibliography""" + # parse + parser_obj = Parser() + bib = parser_obj.from_file(input) + + # process + processed_bib = bib.abbreviate_names(middle_names) + + # write + bibtex_str = processed_bib.to_bibtex() + output.write(bibtex_str) + + +cli.add_command(clean) +cli.add_command(abbreviate_authors)