From e76435c9e46594f5f5efd5b8727eaad308b02048 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Tue, 8 Aug 2023 15:53:03 -0400 Subject: [PATCH 1/5] :bug: Thought I fixed this --- README.md | 2 +- env_local.sh | 0 2 files changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 env_local.sh diff --git a/README.md b/README.md index 04787621..2bc4d72d 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,7 @@ source ./env_local.sh flask db upgrade # Run the flask web application -flask run +./manage.py ``` ## Database diff --git a/env_local.sh b/env_local.sh old mode 100644 new mode 100755 From 5ceede9efcce235fe68ead76c7389a5dec3a5fbf Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Tue, 8 Aug 2023 15:53:20 -0400 Subject: [PATCH 2/5] :wrench: Increase default and max page size --- config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.py b/config.py index e986da2e..07da194b 100644 --- a/config.py +++ b/config.py @@ -17,9 +17,9 @@ class Config: PG_USER, PG_PASS, PG_HOST, PG_PORT, PG_NAME) # Default number of results per request - DEFAULT_PAGE_LIMIT = 10 + DEFAULT_PAGE_LIMIT = 100 # Determines the maximum number of results per request - MAX_PAGE_LIMIT = 100 + MAX_PAGE_LIMIT = 1000 INDEXD_URL = os.environ.get('INDEXD_URL', None) INDEXD_USER = os.environ.get('INDEXD_USER', 'test') From d81913fd48c64498490e2e39c7075311e27b8965 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Tue, 8 Aug 2023 18:38:04 -0400 Subject: [PATCH 3/5] :arrow_down: Use pyyaml 5.3.1 until cython3 issue fixed See https://github.com/yaml/pyyaml/issues/724 for details --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7aa62515..d34fe5e4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,4 +24,4 @@ boto3==1.7.8 botocore==1.10.8 Jinja2==2.10 requests==2.24.0 -PyYAML==5.4 +PyYAML==5.3.1 From b77f12af52a790c10d706cdd1c0276f050e385f0 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 9 Aug 2023 12:04:34 -0400 Subject: [PATCH 4/5] :pushpin: Pin cython<3.0.0, pyyaml==5.4.0 --- .circleci/config.yml | 1 + Dockerfile | 1 + README.md | 5 +++++ requirements.txt | 1 - 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4880e30a..756d9e78 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -31,6 +31,7 @@ jobs: command: | python3 -m venv venv . venv/bin/activate + pip install "cython<3.0.0" && pip install --no-build-isolation "pyyaml==5.4.0" pip install -r dev-requirements.txt pip install -r requirements.txt pip install -e . diff --git a/Dockerfile b/Dockerfile index 20e15cd4..aad32e81 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,6 +27,7 @@ COPY bin/supervisord.conf /etc/supervisor/conf.d/supervisord.conf # Setup dataservice COPY requirements.txt /app/ +RUN pip install "cython<3.0.0" && pip install --no-build-isolation "pyyaml==5.4.0" RUN pip install -r /app/requirements.txt COPY manage.py setup.py config.py /app/ diff --git a/README.md b/README.md index 2bc4d72d..e90833eb 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,11 @@ https://realpython.com/intro-to-pyenv/ # Setup python environment and install dependencies pyenv virtualenv 3.7.11 dataservice_venv pyenv local dataservice_venv + +# Important but *temporary* +# See https://github.com/yaml/pyyaml/issues/724 +pip install "cython<3.0.0" && pip install --no-build-isolation "pyyaml==5.4.0" + pip install -r dev-requirements.txt pip install -r requirements.txt pip install -e . diff --git a/requirements.txt b/requirements.txt index d34fe5e4..18430229 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,4 +24,3 @@ boto3==1.7.8 botocore==1.10.8 Jinja2==2.10 requests==2.24.0 -PyYAML==5.3.1 From 464900f38d0a0d6861cc227f70466791f61b69db Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Wed, 9 Aug 2023 16:07:45 -0400 Subject: [PATCH 5/5] :white_check_mark: Update tests for limit default, max --- tests/conftest.py | 3 + .../test_genomic_file_resources.py | 6 +- tests/study_file/test_study_file_resources.py | 7 +- tests/test_filter_params.py | 16 +-- tests/test_pagination.py | 104 +++++++++--------- 5 files changed, 70 insertions(+), 66 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 93dc9f05..0902c949 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -69,6 +69,9 @@ ENDPOINT_ENTITY_MAP = {v: k for k, v in ENTITY_ENDPOINT_MAP.items()} ENDPOINTS = list(ENTITY_ENDPOINT_MAP.values()) +DEFAULT_PAGE_LIMIT = 100 +MAX_PAGE_LIMIT = 1000 + def _create_entity_params(filepath): # Read data from file diff --git a/tests/genomic_file/test_genomic_file_resources.py b/tests/genomic_file/test_genomic_file_resources.py index b372592c..6c222b46 100644 --- a/tests/genomic_file/test_genomic_file_resources.py +++ b/tests/genomic_file/test_genomic_file_resources.py @@ -15,7 +15,7 @@ from dataservice.api.sequencing_experiment.models import SequencingExperiment from dataservice.api.genomic_file.models import GenomicFile from tests.conftest import make_entities -from tests.conftest import ENTITY_TOTAL +from tests.conftest import ENTITY_TOTAL, DEFAULT_PAGE_LIMIT from tests.mocks import MockIndexd @@ -159,8 +159,8 @@ def test_get_list(client, indexd, genomic_files): assert resp['_status']['code'] == 200 assert resp['total'] == GenomicFile.query.count() - assert len(resp['results']) == 10 - assert indexd.get.call_count == 10 + assert len(resp['results']) == DEFAULT_PAGE_LIMIT + assert indexd.get.call_count == DEFAULT_PAGE_LIMIT def test_get_list_with_missing_files(client, indexd, genomic_files): diff --git a/tests/study_file/test_study_file_resources.py b/tests/study_file/test_study_file_resources.py index e16ae756..adff3bf8 100644 --- a/tests/study_file/test_study_file_resources.py +++ b/tests/study_file/test_study_file_resources.py @@ -10,8 +10,7 @@ from dataservice.api.study_file.models import StudyFile from dataservice.api.study.models import Study from tests.utils import FlaskTestCase -from tests.conftest import make_entities -from tests.conftest import ENTITY_TOTAL +from tests.conftest import ENTITY_TOTAL, DEFAULT_PAGE_LIMIT, make_entities from unittest.mock import MagicMock, patch from tests.mocks import MockIndexd @@ -132,8 +131,8 @@ def test_get_list(client, indexd, study_files): assert resp['_status']['code'] == 200 assert resp['total'] == StudyFile.query.count() - assert len(resp['results']) == 10 - assert indexd.get.call_count == 10 + assert len(resp['results']) == DEFAULT_PAGE_LIMIT + assert indexd.get.call_count == DEFAULT_PAGE_LIMIT def test_get_list_with_missing_files(client, indexd, study_files): diff --git a/tests/test_filter_params.py b/tests/test_filter_params.py index c0ee9d86..43f79431 100644 --- a/tests/test_filter_params.py +++ b/tests/test_filter_params.py @@ -6,7 +6,9 @@ from tests.conftest import ( ENTITY_ENDPOINT_MAP, ENDPOINTS, - ENTITY_PARAMS + ENTITY_PARAMS, + MAX_PAGE_LIMIT, + DEFAULT_PAGE_LIMIT ) @@ -56,8 +58,8 @@ def test_filter_params(self, client, entities, model): # Check status code assert response.status_code == 200 - assert resp['limit'] == 10 - assert len(resp['results']) == min(expected_total, 10) + assert resp['limit'] == DEFAULT_PAGE_LIMIT + assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT) assert resp['total'] == expected_total # All results have correct field values for result in resp['results']: @@ -122,8 +124,8 @@ def test_unknown_filter_params(self, client, entities, model): assert response.status_code == 200 # Check content resp = json.loads(response.data.decode('utf-8')) - assert resp['limit'] == 10 - assert len(resp['results']) == min(expected_total, 10) + assert resp['limit'] == DEFAULT_PAGE_LIMIT + assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT) assert resp['total'] == expected_total @pytest.mark.parametrize('field', ['created_at', 'modified_at']) @@ -154,8 +156,8 @@ def test_generated_date_filters(self, client, entities, model, field): assert response.status_code == 200 # Check content resp = json.loads(response.data.decode('utf-8')) - assert resp['limit'] == 10 - assert len(resp['results']) == min(expected_total, 10) + assert resp['limit'] == DEFAULT_PAGE_LIMIT + assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT) assert resp['total'] == expected_total # All results have correct field values for result in resp['results']: diff --git a/tests/test_pagination.py b/tests/test_pagination.py index b0e7cb4d..0521de99 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -32,7 +32,7 @@ from unittest.mock import MagicMock, patch from tests.mocks import MockIndexd -from tests.conftest import ENDPOINTS +from tests.conftest import ENDPOINTS, MAX_PAGE_LIMIT, DEFAULT_PAGE_LIMIT pytest_plugins = ['tests.mocks'] @@ -46,7 +46,7 @@ class TestPagination: def participants(client): # Add a bunch of studies for pagination - for i in range(101): + for i in range(MAX_PAGE_LIMIT + 1): params = { 'external_id': f'Study_{i}', 'short_code': f'KF_{i}' @@ -54,26 +54,26 @@ def participants(client): s = Study(**params) db.session.add(s) - for i in range(101): + for i in range(MAX_PAGE_LIMIT + 1): ca = CavaticaApp(name='app', revision=0) db.session.add(ca) # Add a bunch of study files s0 = Study.query.filter_by(external_id='Study_0').one() s1 = Study.query.filter_by(external_id='Study_1').one() - for i in range(101): + for i in range(MAX_PAGE_LIMIT+1): sf = StudyFile(file_name='blah', study_id=s0.kf_id) db.session.add(sf) # Add a bunch of investigators - for _ in range(102): + for _ in range(MAX_PAGE_LIMIT + 2): inv = Investigator(name='test') inv.studies.extend([s0, s1]) db.session.add(inv) # Add a bunch of families families = [] - for i in range(101): + for i in range(MAX_PAGE_LIMIT + 1): families.append(Family(external_id='Family_{}'.format(i))) db.session.add_all(families) db.session.flush() @@ -82,9 +82,9 @@ def participants(client): f0 = Family.query.filter_by(external_id='Family_0').one() f1 = Family.query.filter_by(external_id='Family_1').one() seq_cen = None - for i in range(102): - f = f0 if i < 50 else f1 - s = s0 if i < 50 else s1 + for i in range(MAX_PAGE_LIMIT + 2): + f = f0 if i < int(MAX_PAGE_LIMIT/2) else f1 + s = s0 if i < int(MAX_PAGE_LIMIT/2) else s1 data = { 'external_id': "test", 'is_proband': True, @@ -168,26 +168,26 @@ def participants(client): db.session.commit() @pytest.mark.parametrize('endpoint, expected_total', [ - ('/participants', 50), - ('/study-files', 101), + ('/participants', int(MAX_PAGE_LIMIT/2)), + ('/study-files', MAX_PAGE_LIMIT+1), ('/investigators', 1), - ('/biospecimens', 50), - ('/sequencing-experiments', 50), - ('/diagnoses', 50), - ('/outcomes', 50), - ('/phenotypes', 50), + ('/biospecimens', int(MAX_PAGE_LIMIT/2)), + ('/sequencing-experiments', int(MAX_PAGE_LIMIT/2)), + ('/diagnoses', int(MAX_PAGE_LIMIT/2)), + ('/outcomes', int(MAX_PAGE_LIMIT/2)), + ('/phenotypes', int(MAX_PAGE_LIMIT/2)), ('/families', 1), - ('/family-relationships', 50), - ('/genomic-files', 50), - ('/read-groups', 50), + ('/family-relationships', int(MAX_PAGE_LIMIT/2)), + ('/genomic-files', int(MAX_PAGE_LIMIT/2)), + ('/read-groups', int(MAX_PAGE_LIMIT/2)), ('/sequencing-centers', 1), ('/cavatica-apps', 1), - ('/tasks', 50), - ('/task-genomic-files', 50), - ('/read-group-genomic-files', 50), - ('/sequencing-experiment-genomic-files', 50), - ('/biospecimen-genomic-files', 50), - ('/biospecimen-diagnoses', 50) + ('/tasks', int(MAX_PAGE_LIMIT/2)), + ('/task-genomic-files', int(MAX_PAGE_LIMIT/2)), + ('/read-group-genomic-files', int(MAX_PAGE_LIMIT/2)), + ('/sequencing-experiment-genomic-files', int(MAX_PAGE_LIMIT/2)), + ('/biospecimen-genomic-files', int(MAX_PAGE_LIMIT/2)), + ('/biospecimen-diagnoses', int(MAX_PAGE_LIMIT/2)) ]) def test_study_filter(self, client, participants, endpoint, expected_total): @@ -198,8 +198,8 @@ def test_study_filter(self, client, participants, endpoint = '{}?study_id={}'.format(endpoint, s.kf_id) resp = client.get(endpoint) resp = json.loads(resp.data.decode('utf-8')) - assert len(resp['results']) == min(expected_total, 10) - assert resp['limit'] == 10 + assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT) + assert resp['limit'] == DEFAULT_PAGE_LIMIT assert resp['total'] == expected_total ids_seen = [] @@ -233,7 +233,7 @@ def test_non_exist_study_filter(self, client, participants, resp = json.loads(resp.data.decode('utf-8')) assert len(resp['results']) == 0 - assert resp['limit'] == 10 + assert resp['limit'] == DEFAULT_PAGE_LIMIT assert resp['total'] == 0 @pytest.mark.parametrize('study_id', ['blah', 3489, 'PT_00001111']) @@ -261,32 +261,32 @@ def test_invalid_study_filter(self, client, participants, assert 'study_id' in resp['_status']['message'] @pytest.mark.parametrize('endpoint, expected_total', [ - ('/studies', 101), - ('/investigators', 102), - ('/participants', 102), - ('/outcomes', 102), - ('/phenotypes', 102), - ('/diagnoses', 102), - ('/family-relationships', 101), - ('/study-files', 101), - ('/families', 101), - ('/read-groups', 102), + ('/studies', MAX_PAGE_LIMIT+1), + ('/investigators', MAX_PAGE_LIMIT+2), + ('/participants', MAX_PAGE_LIMIT+2), + ('/outcomes', MAX_PAGE_LIMIT+2), + ('/phenotypes', MAX_PAGE_LIMIT+2), + ('/diagnoses', MAX_PAGE_LIMIT+2), + ('/family-relationships', MAX_PAGE_LIMIT+1), + ('/study-files', MAX_PAGE_LIMIT+1), + ('/families', MAX_PAGE_LIMIT+1), + ('/read-groups', MAX_PAGE_LIMIT+2), ('/sequencing-centers', 1), - ('/cavatica-apps', 101), - ('/tasks', 102), - ('/task-genomic-files', 102), - ('/read-group-genomic-files', 102), - ('/sequencing-experiment-genomic-files', 102), - ('/biospecimen-genomic-files', 102), - ('/biospecimen-diagnoses', 102) + ('/cavatica-apps', MAX_PAGE_LIMIT+1), + ('/tasks', MAX_PAGE_LIMIT+2), + ('/task-genomic-files', MAX_PAGE_LIMIT+2), + ('/read-group-genomic-files', MAX_PAGE_LIMIT+2), + ('/sequencing-experiment-genomic-files', MAX_PAGE_LIMIT+2), + ('/biospecimen-genomic-files', MAX_PAGE_LIMIT+2), + ('/biospecimen-diagnoses', MAX_PAGE_LIMIT+2) ]) def test_pagination(self, client, participants, endpoint, expected_total): """ Test pagination of resource """ resp = client.get(endpoint) resp = json.loads(resp.data.decode('utf-8')) - assert len(resp['results']) == min(expected_total, 10) - assert resp['limit'] == 10 + assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT) + assert resp['limit'] == DEFAULT_PAGE_LIMIT assert resp['total'] == expected_total ids_seen = [] @@ -311,7 +311,7 @@ def test_same_created_at(self, client): """ created_at = datetime.now() studies = [Study(external_id=f'Study_{i}', short_code=f'KF-ST{i}') - for i in range(50)] + for i in range(int(MAX_PAGE_LIMIT/2))] db.session.add_all(studies) db.session.flush() for study in studies: @@ -343,12 +343,12 @@ def test_limit(self, client, participants, endpoint): assert len(response['results']) == 5 assert response['limit'] == 5 - response = client.get(endpoint + '?limit=200') + response = client.get(endpoint + f'?limit={MAX_PAGE_LIMIT * 2}') response = json.loads(response.data.decode('utf-8')) if '/sequencing-centers' in endpoint: assert len(response['results']) == 1 else: - assert len(response['results']) == 100 + assert len(response['results']) == MAX_PAGE_LIMIT # Check unexpected limit param uses default response = client.get(endpoint + '?limit=dog') @@ -356,8 +356,8 @@ def test_limit(self, client, participants, endpoint): if '/sequencing-centers' in endpoint: assert len(response['results']) == 1 else: - assert len(response['results']) == 10 - assert response['limit'] == 10 + assert len(response['results']) == DEFAULT_PAGE_LIMIT + assert response['limit'] == DEFAULT_PAGE_LIMIT @pytest.mark.parametrize('endpoint', [ (ept) for ept in ENDPOINTS