This repository has been archived by the owner on Aug 4, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a script to create provider API script template (#128)
This script will make it easier for new contributors to add provider API scripts by giving them the skeletons of the script, workflow file and tests Signed-off-by: Olga Bulat <obulat@gmail.com>
- Loading branch information
Showing
9 changed files
with
731 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,20 @@ | ||
# flake8: noqa | ||
from .licenses import constants | ||
from .licenses.licenses import ( | ||
get_license_info, LicenseInfo, is_valid_license_info | ||
get_license_info, | ||
get_license_info_from_license_pair, | ||
is_valid_license_info, | ||
LicenseInfo, | ||
) | ||
from .storage.image import ( | ||
Image, ImageStore, MockImageStore | ||
Image, | ||
ImageStore, | ||
MockImageStore, | ||
) | ||
from .storage.audio import ( | ||
Audio, AudioStore, MockAudioStore | ||
Audio, | ||
AudioStore, | ||
MockAudioStore | ||
) | ||
from .storage import columns | ||
from .requester import DelayedRequester |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
## Adding new provider API script | ||
|
||
Openverse Catalog uses APIs of sites that share openly-licensed media to collect the data about the media and save it to the database. We call the scripts that pull the data from these APIs "Provider API scripts". You can find examples in [`provider_api_scripts` folder](../dags/provider_api_scripts). | ||
|
||
To add a Provider API script using this template, you will need to have Python 3 installed on your machine (preferably, version 3.9). You will also need to know the name of provider, and the type of media you are going to collect (`image` or `audio`). | ||
|
||
To add a script for collecting audio data from provider named "MyProvider", open your terminal and run | ||
```bash | ||
python3 src/cc_catalog_airflow/templates/create_api_script.py MyProvider -m audio | ||
``` | ||
You should see output similar to this: | ||
```bash | ||
Creating files in path/to/openverse-catalog | ||
API script: src/cc_catalog_airflow/dags/provider_api_scripts/myprovider.py | ||
API script test: src/cc_catalog_airflow/dags/provider_api_scripts/test_myprovider.py | ||
Airflow workflow file: src/cc_catalog_airflow/dags/myprovider_workflow.py | ||
|
||
``` | ||
The following files have been created: | ||
1. Airflow workflow file. You will probably NOT need to edit it. | ||
2. `myprovider.py` script. This is a template that simplifies creating an API provider script by providing the basic structure. The scripts use small and easily-testable functions. Follow the instructions within the script comments, and complete all the TODOs. Make sure to look at sample `.json` files that will be saved for testing. | ||
3. `test_myprovider.py`. This is a skeleton for your tests. Write tests for the functions in your Provider API script, using the `json` files with sample API responses. |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import argparse | ||
from pathlib import Path | ||
|
||
|
||
IMAGE_STORE_INIT = 'image_store = ImageStore(provider=PROVIDER)' | ||
AUDIO_STORE_INIT = 'audio_store = AudioStore(provider=PROVIDER)' | ||
|
||
|
||
def _get_filled_template(template_path, provider, media_type='image'): | ||
with open(template_path, 'r', encoding='utf8') as template: | ||
template_string = template.read() | ||
script_string = template_string.replace( | ||
'{provider_title_case}', provider.title() | ||
).replace( | ||
'{provider_upper_case}', provider.upper() | ||
).replace( | ||
'{provider}', provider.lower() | ||
) | ||
if media_type == 'audio': | ||
media_store_init = AUDIO_STORE_INIT | ||
media_store = 'audio_store' | ||
else: | ||
media_store_init = IMAGE_STORE_INIT | ||
media_store = 'image_store' | ||
script_string = script_string.replace( | ||
'media_store_init', media_store_init | ||
).replace( | ||
'{media_store}', media_store | ||
).replace( | ||
'{media_type}', media_type | ||
) | ||
|
||
return script_string | ||
|
||
|
||
def fill_template(provider, media_type, templates_path): | ||
project_path = templates_path.parent.parent.parent | ||
template_name = 'template_provider.py_template' | ||
script_template_path = templates_path / template_name | ||
print(f"Creating files in {project_path}") | ||
|
||
dags_path = templates_path.parent / 'dags' | ||
filename = provider.replace(" ", '_').lower() | ||
|
||
api_path = dags_path / 'provider_api_scripts' | ||
api_script_path = api_path / f"{filename}.py" | ||
with open(api_script_path, 'w+', encoding='utf8') as api_script: | ||
api_script_string = _get_filled_template( | ||
script_template_path, provider, media_type | ||
) | ||
api_script.write(api_script_string) | ||
print(f"API script: {api_script_path.relative_to(project_path)}") | ||
|
||
template_name = 'template_test.py_template' | ||
script_template_path = templates_path / template_name | ||
test_script_path = api_path / f"test_{filename}.py" | ||
with open(test_script_path, 'w+', encoding='utf8') as test_script: | ||
test_string = _get_filled_template( | ||
script_template_path, provider, media_type | ||
) | ||
test_script.write(test_string) | ||
print(f"API script test: {test_script_path.relative_to(project_path)}") | ||
|
||
workflow_template_path = templates_path / 'workflow.py_template' | ||
workflow_path = dags_path / f"{filename}_workflow.py" | ||
with open(workflow_path, 'w+', encoding='utf8') as workflow_file: | ||
workflow_string = _get_filled_template( | ||
workflow_template_path, provider | ||
) | ||
workflow_file.write(workflow_string) | ||
print("Airflow workflow file: " | ||
f"{workflow_path.relative_to(project_path)}") | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser( | ||
description='Create a new provider API script', | ||
add_help=True, | ||
) | ||
parser.add_argument( | ||
"provider", | ||
help='Create the script for this provider (eg. "Wikimedia").') | ||
parser.add_argument( | ||
'-m', '--media', type=str, choices=['image', 'audio'], | ||
help="Script will collect media of this type" | ||
" ('audio'/'image'). Default value is 'image'" | ||
) | ||
args = parser.parse_args() | ||
provider = args.provider | ||
media_type = args.media | ||
if media_type not in ['audio', 'image']: | ||
print("No media type given, assuming it's `image`") | ||
media_type = 'image' | ||
templates_path = Path(__file__).parent | ||
fill_template(provider, media_type, templates_path) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.