diff --git a/.circleci/config.yml b/.circleci/config.yml index 4880e30a1..756d9e78e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -31,6 +31,7 @@ jobs: command: | python3 -m venv venv . venv/bin/activate + pip install "cython<3.0.0" && pip install --no-build-isolation "pyyaml==5.4.0" pip install -r dev-requirements.txt pip install -r requirements.txt pip install -e . diff --git a/Dockerfile b/Dockerfile index 20e15cd48..aad32e81a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,6 +27,7 @@ COPY bin/supervisord.conf /etc/supervisor/conf.d/supervisord.conf # Setup dataservice COPY requirements.txt /app/ +RUN pip install "cython<3.0.0" && pip install --no-build-isolation "pyyaml==5.4.0" RUN pip install -r /app/requirements.txt COPY manage.py setup.py config.py /app/ diff --git a/README.md b/README.md index 04787621c..e90833ebd 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,11 @@ https://realpython.com/intro-to-pyenv/ # Setup python environment and install dependencies pyenv virtualenv 3.7.11 dataservice_venv pyenv local dataservice_venv + +# Important but *temporary* +# See https://github.com/yaml/pyyaml/issues/724 +pip install "cython<3.0.0" && pip install --no-build-isolation "pyyaml==5.4.0" + pip install -r dev-requirements.txt pip install -r requirements.txt pip install -e . @@ -103,7 +108,7 @@ source ./env_local.sh flask db upgrade # Run the flask web application -flask run +./manage.py ``` ## Database diff --git a/config.py b/config.py index e986da2e0..07da194b7 100644 --- a/config.py +++ b/config.py @@ -17,9 +17,9 @@ class Config: PG_USER, PG_PASS, PG_HOST, PG_PORT, PG_NAME) # Default number of results per request - DEFAULT_PAGE_LIMIT = 10 + DEFAULT_PAGE_LIMIT = 100 # Determines the maximum number of results per request - MAX_PAGE_LIMIT = 100 + MAX_PAGE_LIMIT = 1000 INDEXD_URL = os.environ.get('INDEXD_URL', None) INDEXD_USER = os.environ.get('INDEXD_USER', 'test') diff --git a/env_local.sh b/env_local.sh old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt index 7aa62515b..184302297 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,4 +24,3 @@ boto3==1.7.8 botocore==1.10.8 Jinja2==2.10 requests==2.24.0 -PyYAML==5.4 diff --git a/tests/conftest.py b/tests/conftest.py index 93dc9f056..0902c9494 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -69,6 +69,9 @@ ENDPOINT_ENTITY_MAP = {v: k for k, v in ENTITY_ENDPOINT_MAP.items()} ENDPOINTS = list(ENTITY_ENDPOINT_MAP.values()) +DEFAULT_PAGE_LIMIT = 100 +MAX_PAGE_LIMIT = 1000 + def _create_entity_params(filepath): # Read data from file diff --git a/tests/genomic_file/test_genomic_file_resources.py b/tests/genomic_file/test_genomic_file_resources.py index b372592cc..6c222b463 100644 --- a/tests/genomic_file/test_genomic_file_resources.py +++ b/tests/genomic_file/test_genomic_file_resources.py @@ -15,7 +15,7 @@ from dataservice.api.sequencing_experiment.models import SequencingExperiment from dataservice.api.genomic_file.models import GenomicFile from tests.conftest import make_entities -from tests.conftest import ENTITY_TOTAL +from tests.conftest import ENTITY_TOTAL, DEFAULT_PAGE_LIMIT from tests.mocks import MockIndexd @@ -159,8 +159,8 @@ def test_get_list(client, indexd, genomic_files): assert resp['_status']['code'] == 200 assert resp['total'] == GenomicFile.query.count() - assert len(resp['results']) == 10 - assert indexd.get.call_count == 10 + assert len(resp['results']) == DEFAULT_PAGE_LIMIT + assert indexd.get.call_count == DEFAULT_PAGE_LIMIT def test_get_list_with_missing_files(client, indexd, genomic_files): diff --git a/tests/study_file/test_study_file_resources.py b/tests/study_file/test_study_file_resources.py index e16ae7569..adff3bf84 100644 --- a/tests/study_file/test_study_file_resources.py +++ b/tests/study_file/test_study_file_resources.py @@ -10,8 +10,7 @@ from dataservice.api.study_file.models import StudyFile from dataservice.api.study.models import Study from tests.utils import FlaskTestCase -from tests.conftest import make_entities -from tests.conftest import ENTITY_TOTAL +from tests.conftest import ENTITY_TOTAL, DEFAULT_PAGE_LIMIT, make_entities from unittest.mock import MagicMock, patch from tests.mocks import MockIndexd @@ -132,8 +131,8 @@ def test_get_list(client, indexd, study_files): assert resp['_status']['code'] == 200 assert resp['total'] == StudyFile.query.count() - assert len(resp['results']) == 10 - assert indexd.get.call_count == 10 + assert len(resp['results']) == DEFAULT_PAGE_LIMIT + assert indexd.get.call_count == DEFAULT_PAGE_LIMIT def test_get_list_with_missing_files(client, indexd, study_files): diff --git a/tests/test_filter_params.py b/tests/test_filter_params.py index c0ee9d865..43f794315 100644 --- a/tests/test_filter_params.py +++ b/tests/test_filter_params.py @@ -6,7 +6,9 @@ from tests.conftest import ( ENTITY_ENDPOINT_MAP, ENDPOINTS, - ENTITY_PARAMS + ENTITY_PARAMS, + MAX_PAGE_LIMIT, + DEFAULT_PAGE_LIMIT ) @@ -56,8 +58,8 @@ def test_filter_params(self, client, entities, model): # Check status code assert response.status_code == 200 - assert resp['limit'] == 10 - assert len(resp['results']) == min(expected_total, 10) + assert resp['limit'] == DEFAULT_PAGE_LIMIT + assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT) assert resp['total'] == expected_total # All results have correct field values for result in resp['results']: @@ -122,8 +124,8 @@ def test_unknown_filter_params(self, client, entities, model): assert response.status_code == 200 # Check content resp = json.loads(response.data.decode('utf-8')) - assert resp['limit'] == 10 - assert len(resp['results']) == min(expected_total, 10) + assert resp['limit'] == DEFAULT_PAGE_LIMIT + assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT) assert resp['total'] == expected_total @pytest.mark.parametrize('field', ['created_at', 'modified_at']) @@ -154,8 +156,8 @@ def test_generated_date_filters(self, client, entities, model, field): assert response.status_code == 200 # Check content resp = json.loads(response.data.decode('utf-8')) - assert resp['limit'] == 10 - assert len(resp['results']) == min(expected_total, 10) + assert resp['limit'] == DEFAULT_PAGE_LIMIT + assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT) assert resp['total'] == expected_total # All results have correct field values for result in resp['results']: diff --git a/tests/test_pagination.py b/tests/test_pagination.py index b0e7cb4de..0521de998 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -32,7 +32,7 @@ from unittest.mock import MagicMock, patch from tests.mocks import MockIndexd -from tests.conftest import ENDPOINTS +from tests.conftest import ENDPOINTS, MAX_PAGE_LIMIT, DEFAULT_PAGE_LIMIT pytest_plugins = ['tests.mocks'] @@ -46,7 +46,7 @@ class TestPagination: def participants(client): # Add a bunch of studies for pagination - for i in range(101): + for i in range(MAX_PAGE_LIMIT + 1): params = { 'external_id': f'Study_{i}', 'short_code': f'KF_{i}' @@ -54,26 +54,26 @@ def participants(client): s = Study(**params) db.session.add(s) - for i in range(101): + for i in range(MAX_PAGE_LIMIT + 1): ca = CavaticaApp(name='app', revision=0) db.session.add(ca) # Add a bunch of study files s0 = Study.query.filter_by(external_id='Study_0').one() s1 = Study.query.filter_by(external_id='Study_1').one() - for i in range(101): + for i in range(MAX_PAGE_LIMIT+1): sf = StudyFile(file_name='blah', study_id=s0.kf_id) db.session.add(sf) # Add a bunch of investigators - for _ in range(102): + for _ in range(MAX_PAGE_LIMIT + 2): inv = Investigator(name='test') inv.studies.extend([s0, s1]) db.session.add(inv) # Add a bunch of families families = [] - for i in range(101): + for i in range(MAX_PAGE_LIMIT + 1): families.append(Family(external_id='Family_{}'.format(i))) db.session.add_all(families) db.session.flush() @@ -82,9 +82,9 @@ def participants(client): f0 = Family.query.filter_by(external_id='Family_0').one() f1 = Family.query.filter_by(external_id='Family_1').one() seq_cen = None - for i in range(102): - f = f0 if i < 50 else f1 - s = s0 if i < 50 else s1 + for i in range(MAX_PAGE_LIMIT + 2): + f = f0 if i < int(MAX_PAGE_LIMIT/2) else f1 + s = s0 if i < int(MAX_PAGE_LIMIT/2) else s1 data = { 'external_id': "test", 'is_proband': True, @@ -168,26 +168,26 @@ def participants(client): db.session.commit() @pytest.mark.parametrize('endpoint, expected_total', [ - ('/participants', 50), - ('/study-files', 101), + ('/participants', int(MAX_PAGE_LIMIT/2)), + ('/study-files', MAX_PAGE_LIMIT+1), ('/investigators', 1), - ('/biospecimens', 50), - ('/sequencing-experiments', 50), - ('/diagnoses', 50), - ('/outcomes', 50), - ('/phenotypes', 50), + ('/biospecimens', int(MAX_PAGE_LIMIT/2)), + ('/sequencing-experiments', int(MAX_PAGE_LIMIT/2)), + ('/diagnoses', int(MAX_PAGE_LIMIT/2)), + ('/outcomes', int(MAX_PAGE_LIMIT/2)), + ('/phenotypes', int(MAX_PAGE_LIMIT/2)), ('/families', 1), - ('/family-relationships', 50), - ('/genomic-files', 50), - ('/read-groups', 50), + ('/family-relationships', int(MAX_PAGE_LIMIT/2)), + ('/genomic-files', int(MAX_PAGE_LIMIT/2)), + ('/read-groups', int(MAX_PAGE_LIMIT/2)), ('/sequencing-centers', 1), ('/cavatica-apps', 1), - ('/tasks', 50), - ('/task-genomic-files', 50), - ('/read-group-genomic-files', 50), - ('/sequencing-experiment-genomic-files', 50), - ('/biospecimen-genomic-files', 50), - ('/biospecimen-diagnoses', 50) + ('/tasks', int(MAX_PAGE_LIMIT/2)), + ('/task-genomic-files', int(MAX_PAGE_LIMIT/2)), + ('/read-group-genomic-files', int(MAX_PAGE_LIMIT/2)), + ('/sequencing-experiment-genomic-files', int(MAX_PAGE_LIMIT/2)), + ('/biospecimen-genomic-files', int(MAX_PAGE_LIMIT/2)), + ('/biospecimen-diagnoses', int(MAX_PAGE_LIMIT/2)) ]) def test_study_filter(self, client, participants, endpoint, expected_total): @@ -198,8 +198,8 @@ def test_study_filter(self, client, participants, endpoint = '{}?study_id={}'.format(endpoint, s.kf_id) resp = client.get(endpoint) resp = json.loads(resp.data.decode('utf-8')) - assert len(resp['results']) == min(expected_total, 10) - assert resp['limit'] == 10 + assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT) + assert resp['limit'] == DEFAULT_PAGE_LIMIT assert resp['total'] == expected_total ids_seen = [] @@ -233,7 +233,7 @@ def test_non_exist_study_filter(self, client, participants, resp = json.loads(resp.data.decode('utf-8')) assert len(resp['results']) == 0 - assert resp['limit'] == 10 + assert resp['limit'] == DEFAULT_PAGE_LIMIT assert resp['total'] == 0 @pytest.mark.parametrize('study_id', ['blah', 3489, 'PT_00001111']) @@ -261,32 +261,32 @@ def test_invalid_study_filter(self, client, participants, assert 'study_id' in resp['_status']['message'] @pytest.mark.parametrize('endpoint, expected_total', [ - ('/studies', 101), - ('/investigators', 102), - ('/participants', 102), - ('/outcomes', 102), - ('/phenotypes', 102), - ('/diagnoses', 102), - ('/family-relationships', 101), - ('/study-files', 101), - ('/families', 101), - ('/read-groups', 102), + ('/studies', MAX_PAGE_LIMIT+1), + ('/investigators', MAX_PAGE_LIMIT+2), + ('/participants', MAX_PAGE_LIMIT+2), + ('/outcomes', MAX_PAGE_LIMIT+2), + ('/phenotypes', MAX_PAGE_LIMIT+2), + ('/diagnoses', MAX_PAGE_LIMIT+2), + ('/family-relationships', MAX_PAGE_LIMIT+1), + ('/study-files', MAX_PAGE_LIMIT+1), + ('/families', MAX_PAGE_LIMIT+1), + ('/read-groups', MAX_PAGE_LIMIT+2), ('/sequencing-centers', 1), - ('/cavatica-apps', 101), - ('/tasks', 102), - ('/task-genomic-files', 102), - ('/read-group-genomic-files', 102), - ('/sequencing-experiment-genomic-files', 102), - ('/biospecimen-genomic-files', 102), - ('/biospecimen-diagnoses', 102) + ('/cavatica-apps', MAX_PAGE_LIMIT+1), + ('/tasks', MAX_PAGE_LIMIT+2), + ('/task-genomic-files', MAX_PAGE_LIMIT+2), + ('/read-group-genomic-files', MAX_PAGE_LIMIT+2), + ('/sequencing-experiment-genomic-files', MAX_PAGE_LIMIT+2), + ('/biospecimen-genomic-files', MAX_PAGE_LIMIT+2), + ('/biospecimen-diagnoses', MAX_PAGE_LIMIT+2) ]) def test_pagination(self, client, participants, endpoint, expected_total): """ Test pagination of resource """ resp = client.get(endpoint) resp = json.loads(resp.data.decode('utf-8')) - assert len(resp['results']) == min(expected_total, 10) - assert resp['limit'] == 10 + assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT) + assert resp['limit'] == DEFAULT_PAGE_LIMIT assert resp['total'] == expected_total ids_seen = [] @@ -311,7 +311,7 @@ def test_same_created_at(self, client): """ created_at = datetime.now() studies = [Study(external_id=f'Study_{i}', short_code=f'KF-ST{i}') - for i in range(50)] + for i in range(int(MAX_PAGE_LIMIT/2))] db.session.add_all(studies) db.session.flush() for study in studies: @@ -343,12 +343,12 @@ def test_limit(self, client, participants, endpoint): assert len(response['results']) == 5 assert response['limit'] == 5 - response = client.get(endpoint + '?limit=200') + response = client.get(endpoint + f'?limit={MAX_PAGE_LIMIT * 2}') response = json.loads(response.data.decode('utf-8')) if '/sequencing-centers' in endpoint: assert len(response['results']) == 1 else: - assert len(response['results']) == 100 + assert len(response['results']) == MAX_PAGE_LIMIT # Check unexpected limit param uses default response = client.get(endpoint + '?limit=dog') @@ -356,8 +356,8 @@ def test_limit(self, client, participants, endpoint): if '/sequencing-centers' in endpoint: assert len(response['results']) == 1 else: - assert len(response['results']) == 10 - assert response['limit'] == 10 + assert len(response['results']) == DEFAULT_PAGE_LIMIT + assert response['limit'] == DEFAULT_PAGE_LIMIT @pytest.mark.parametrize('endpoint', [ (ept) for ept in ENDPOINTS