From e76435c9e46594f5f5efd5b8727eaad308b02048 Mon Sep 17 00:00:00 2001
From: Natasha Singh <dukedesi22@gmail.com>
Date: Tue, 8 Aug 2023 15:53:03 -0400
Subject: [PATCH 1/5] :bug: Thought I fixed this

---
 README.md    | 2 +-
 env_local.sh | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 mode change 100644 => 100755 env_local.sh

diff --git a/README.md b/README.md
index 04787621..2bc4d72d 100644
--- a/README.md
+++ b/README.md
@@ -103,7 +103,7 @@ source ./env_local.sh
 flask db upgrade
 
 # Run the flask web application
-flask run
+./manage.py
 ```
 
 ## Database
diff --git a/env_local.sh b/env_local.sh
old mode 100644
new mode 100755

From 5ceede9efcce235fe68ead76c7389a5dec3a5fbf Mon Sep 17 00:00:00 2001
From: Natasha Singh <dukedesi22@gmail.com>
Date: Tue, 8 Aug 2023 15:53:20 -0400
Subject: [PATCH 2/5] :wrench: Increase default and max page size

---
 config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config.py b/config.py
index e986da2e..07da194b 100644
--- a/config.py
+++ b/config.py
@@ -17,9 +17,9 @@ class Config:
         PG_USER, PG_PASS, PG_HOST, PG_PORT, PG_NAME)
 
     # Default number of results per request
-    DEFAULT_PAGE_LIMIT = 10
+    DEFAULT_PAGE_LIMIT = 100
     # Determines the maximum number of results per request
-    MAX_PAGE_LIMIT = 100
+    MAX_PAGE_LIMIT = 1000
 
     INDEXD_URL = os.environ.get('INDEXD_URL', None)
     INDEXD_USER = os.environ.get('INDEXD_USER', 'test')

From d81913fd48c64498490e2e39c7075311e27b8965 Mon Sep 17 00:00:00 2001
From: Natasha Singh <dukedesi22@gmail.com>
Date: Tue, 8 Aug 2023 18:38:04 -0400
Subject: [PATCH 3/5] :arrow_down: Use pyyaml 5.3.1 until cython3 issue fixed

See https://github.com/yaml/pyyaml/issues/724 for details
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 7aa62515..d34fe5e4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,4 +24,4 @@ boto3==1.7.8
 botocore==1.10.8
 Jinja2==2.10
 requests==2.24.0
-PyYAML==5.4
+PyYAML==5.3.1

From b77f12af52a790c10d706cdd1c0276f050e385f0 Mon Sep 17 00:00:00 2001
From: Natasha Singh <dukedesi22@gmail.com>
Date: Wed, 9 Aug 2023 12:04:34 -0400
Subject: [PATCH 4/5] :pushpin: Pin cython<3.0.0, pyyaml==5.4.0

---
 .circleci/config.yml | 1 +
 Dockerfile           | 1 +
 README.md            | 5 +++++
 requirements.txt     | 1 -
 4 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 4880e30a..756d9e78 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -31,6 +31,7 @@ jobs:
           command: |
             python3 -m venv venv
             . venv/bin/activate
+            pip install "cython<3.0.0" && pip install --no-build-isolation "pyyaml==5.4.0"
             pip install -r dev-requirements.txt
             pip install -r requirements.txt
             pip install -e .
diff --git a/Dockerfile b/Dockerfile
index 20e15cd4..aad32e81 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -27,6 +27,7 @@ COPY        bin/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
 
 # Setup dataservice
 COPY        requirements.txt /app/
+RUN         pip install "cython<3.0.0" && pip install --no-build-isolation "pyyaml==5.4.0" 
 RUN         pip install -r /app/requirements.txt
 
 COPY        manage.py setup.py config.py /app/
diff --git a/README.md b/README.md
index 2bc4d72d..e90833eb 100644
--- a/README.md
+++ b/README.md
@@ -90,6 +90,11 @@ https://realpython.com/intro-to-pyenv/
 # Setup python environment and install dependencies
 pyenv virtualenv 3.7.11 dataservice_venv
 pyenv local dataservice_venv
+
+# Important but *temporary*
+# See https://github.com/yaml/pyyaml/issues/724
+pip install "cython<3.0.0" && pip install --no-build-isolation "pyyaml==5.4.0"
+
 pip install -r dev-requirements.txt
 pip install -r requirements.txt
 pip install -e .
diff --git a/requirements.txt b/requirements.txt
index d34fe5e4..18430229 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,4 +24,3 @@ boto3==1.7.8
 botocore==1.10.8
 Jinja2==2.10
 requests==2.24.0
-PyYAML==5.3.1

From 464900f38d0a0d6861cc227f70466791f61b69db Mon Sep 17 00:00:00 2001
From: Natasha Singh <dukedesi22@gmail.com>
Date: Wed, 9 Aug 2023 16:07:45 -0400
Subject: [PATCH 5/5] :white_check_mark: Update tests for limit default, max

---
 tests/conftest.py                             |   3 +
 .../test_genomic_file_resources.py            |   6 +-
 tests/study_file/test_study_file_resources.py |   7 +-
 tests/test_filter_params.py                   |  16 +--
 tests/test_pagination.py                      | 104 +++++++++---------
 5 files changed, 70 insertions(+), 66 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 93dc9f05..0902c949 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -69,6 +69,9 @@
 ENDPOINT_ENTITY_MAP = {v: k for k, v in ENTITY_ENDPOINT_MAP.items()}
 ENDPOINTS = list(ENTITY_ENDPOINT_MAP.values())
 
+DEFAULT_PAGE_LIMIT = 100
+MAX_PAGE_LIMIT = 1000
+
 
 def _create_entity_params(filepath):
     # Read data from file
diff --git a/tests/genomic_file/test_genomic_file_resources.py b/tests/genomic_file/test_genomic_file_resources.py
index b372592c..6c222b46 100644
--- a/tests/genomic_file/test_genomic_file_resources.py
+++ b/tests/genomic_file/test_genomic_file_resources.py
@@ -15,7 +15,7 @@
 from dataservice.api.sequencing_experiment.models import SequencingExperiment
 from dataservice.api.genomic_file.models import GenomicFile
 from tests.conftest import make_entities
-from tests.conftest import ENTITY_TOTAL
+from tests.conftest import ENTITY_TOTAL, DEFAULT_PAGE_LIMIT
 from tests.mocks import MockIndexd
 
 
@@ -159,8 +159,8 @@ def test_get_list(client, indexd, genomic_files):
 
     assert resp['_status']['code'] == 200
     assert resp['total'] == GenomicFile.query.count()
-    assert len(resp['results']) == 10
-    assert indexd.get.call_count == 10
+    assert len(resp['results']) == DEFAULT_PAGE_LIMIT
+    assert indexd.get.call_count == DEFAULT_PAGE_LIMIT
 
 
 def test_get_list_with_missing_files(client, indexd, genomic_files):
diff --git a/tests/study_file/test_study_file_resources.py b/tests/study_file/test_study_file_resources.py
index e16ae756..adff3bf8 100644
--- a/tests/study_file/test_study_file_resources.py
+++ b/tests/study_file/test_study_file_resources.py
@@ -10,8 +10,7 @@
 from dataservice.api.study_file.models import StudyFile
 from dataservice.api.study.models import Study
 from tests.utils import FlaskTestCase
-from tests.conftest import make_entities
-from tests.conftest import ENTITY_TOTAL
+from tests.conftest import ENTITY_TOTAL, DEFAULT_PAGE_LIMIT, make_entities
 from unittest.mock import MagicMock, patch
 from tests.mocks import MockIndexd
 
@@ -132,8 +131,8 @@ def test_get_list(client, indexd, study_files):
 
     assert resp['_status']['code'] == 200
     assert resp['total'] == StudyFile.query.count()
-    assert len(resp['results']) == 10
-    assert indexd.get.call_count == 10
+    assert len(resp['results']) == DEFAULT_PAGE_LIMIT
+    assert indexd.get.call_count == DEFAULT_PAGE_LIMIT
 
 
 def test_get_list_with_missing_files(client, indexd, study_files):
diff --git a/tests/test_filter_params.py b/tests/test_filter_params.py
index c0ee9d86..43f79431 100644
--- a/tests/test_filter_params.py
+++ b/tests/test_filter_params.py
@@ -6,7 +6,9 @@
 from tests.conftest import (
     ENTITY_ENDPOINT_MAP,
     ENDPOINTS,
-    ENTITY_PARAMS
+    ENTITY_PARAMS,
+    MAX_PAGE_LIMIT,
+    DEFAULT_PAGE_LIMIT
 )
 
 
@@ -56,8 +58,8 @@ def test_filter_params(self, client, entities, model):
 
         # Check status code
         assert response.status_code == 200
-        assert resp['limit'] == 10
-        assert len(resp['results']) == min(expected_total, 10)
+        assert resp['limit'] == DEFAULT_PAGE_LIMIT
+        assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT)
         assert resp['total'] == expected_total
         # All results have correct field values
         for result in resp['results']:
@@ -122,8 +124,8 @@ def test_unknown_filter_params(self, client, entities, model):
         assert response.status_code == 200
         # Check content
         resp = json.loads(response.data.decode('utf-8'))
-        assert resp['limit'] == 10
-        assert len(resp['results']) == min(expected_total, 10)
+        assert resp['limit'] == DEFAULT_PAGE_LIMIT
+        assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT)
         assert resp['total'] == expected_total
 
     @pytest.mark.parametrize('field', ['created_at', 'modified_at'])
@@ -154,8 +156,8 @@ def test_generated_date_filters(self, client, entities, model, field):
         assert response.status_code == 200
         # Check content
         resp = json.loads(response.data.decode('utf-8'))
-        assert resp['limit'] == 10
-        assert len(resp['results']) == min(expected_total, 10)
+        assert resp['limit'] == DEFAULT_PAGE_LIMIT
+        assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT)
         assert resp['total'] == expected_total
         # All results have correct field values
         for result in resp['results']:
diff --git a/tests/test_pagination.py b/tests/test_pagination.py
index b0e7cb4d..0521de99 100644
--- a/tests/test_pagination.py
+++ b/tests/test_pagination.py
@@ -32,7 +32,7 @@
 
 from unittest.mock import MagicMock, patch
 from tests.mocks import MockIndexd
-from tests.conftest import ENDPOINTS
+from tests.conftest import ENDPOINTS, MAX_PAGE_LIMIT, DEFAULT_PAGE_LIMIT
 
 pytest_plugins = ['tests.mocks']
 
@@ -46,7 +46,7 @@ class TestPagination:
     def participants(client):
 
         # Add a bunch of studies for pagination
-        for i in range(101):
+        for i in range(MAX_PAGE_LIMIT + 1):
             params = {
                 'external_id': f'Study_{i}',
                 'short_code': f'KF_{i}'
@@ -54,26 +54,26 @@ def participants(client):
             s = Study(**params)
             db.session.add(s)
 
-        for i in range(101):
+        for i in range(MAX_PAGE_LIMIT + 1):
             ca = CavaticaApp(name='app', revision=0)
             db.session.add(ca)
 
         # Add a bunch of study files
         s0 = Study.query.filter_by(external_id='Study_0').one()
         s1 = Study.query.filter_by(external_id='Study_1').one()
-        for i in range(101):
+        for i in range(MAX_PAGE_LIMIT+1):
             sf = StudyFile(file_name='blah', study_id=s0.kf_id)
             db.session.add(sf)
 
         # Add a bunch of investigators
-        for _ in range(102):
+        for _ in range(MAX_PAGE_LIMIT + 2):
             inv = Investigator(name='test')
             inv.studies.extend([s0, s1])
             db.session.add(inv)
 
         # Add a bunch of families
         families = []
-        for i in range(101):
+        for i in range(MAX_PAGE_LIMIT + 1):
             families.append(Family(external_id='Family_{}'.format(i)))
         db.session.add_all(families)
         db.session.flush()
@@ -82,9 +82,9 @@ def participants(client):
         f0 = Family.query.filter_by(external_id='Family_0').one()
         f1 = Family.query.filter_by(external_id='Family_1').one()
         seq_cen = None
-        for i in range(102):
-            f = f0 if i < 50 else f1
-            s = s0 if i < 50 else s1
+        for i in range(MAX_PAGE_LIMIT + 2):
+            f = f0 if i < int(MAX_PAGE_LIMIT/2) else f1
+            s = s0 if i < int(MAX_PAGE_LIMIT/2) else s1
             data = {
                 'external_id': "test",
                 'is_proband': True,
@@ -168,26 +168,26 @@ def participants(client):
         db.session.commit()
 
     @pytest.mark.parametrize('endpoint, expected_total', [
-        ('/participants', 50),
-        ('/study-files', 101),
+        ('/participants', int(MAX_PAGE_LIMIT/2)),
+        ('/study-files', MAX_PAGE_LIMIT+1),
         ('/investigators', 1),
-        ('/biospecimens', 50),
-        ('/sequencing-experiments', 50),
-        ('/diagnoses', 50),
-        ('/outcomes', 50),
-        ('/phenotypes', 50),
+        ('/biospecimens', int(MAX_PAGE_LIMIT/2)),
+        ('/sequencing-experiments', int(MAX_PAGE_LIMIT/2)),
+        ('/diagnoses', int(MAX_PAGE_LIMIT/2)),
+        ('/outcomes', int(MAX_PAGE_LIMIT/2)),
+        ('/phenotypes', int(MAX_PAGE_LIMIT/2)),
         ('/families', 1),
-        ('/family-relationships', 50),
-        ('/genomic-files', 50),
-        ('/read-groups', 50),
+        ('/family-relationships', int(MAX_PAGE_LIMIT/2)),
+        ('/genomic-files', int(MAX_PAGE_LIMIT/2)),
+        ('/read-groups', int(MAX_PAGE_LIMIT/2)),
         ('/sequencing-centers', 1),
         ('/cavatica-apps', 1),
-        ('/tasks', 50),
-        ('/task-genomic-files', 50),
-        ('/read-group-genomic-files', 50),
-        ('/sequencing-experiment-genomic-files', 50),
-        ('/biospecimen-genomic-files', 50),
-        ('/biospecimen-diagnoses', 50)
+        ('/tasks', int(MAX_PAGE_LIMIT/2)),
+        ('/task-genomic-files', int(MAX_PAGE_LIMIT/2)),
+        ('/read-group-genomic-files', int(MAX_PAGE_LIMIT/2)),
+        ('/sequencing-experiment-genomic-files', int(MAX_PAGE_LIMIT/2)),
+        ('/biospecimen-genomic-files', int(MAX_PAGE_LIMIT/2)),
+        ('/biospecimen-diagnoses', int(MAX_PAGE_LIMIT/2))
     ])
     def test_study_filter(self, client, participants,
                           endpoint, expected_total):
@@ -198,8 +198,8 @@ def test_study_filter(self, client, participants,
         endpoint = '{}?study_id={}'.format(endpoint, s.kf_id)
         resp = client.get(endpoint)
         resp = json.loads(resp.data.decode('utf-8'))
-        assert len(resp['results']) == min(expected_total, 10)
-        assert resp['limit'] == 10
+        assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT)
+        assert resp['limit'] == DEFAULT_PAGE_LIMIT
         assert resp['total'] == expected_total
 
         ids_seen = []
@@ -233,7 +233,7 @@ def test_non_exist_study_filter(self, client, participants,
         resp = json.loads(resp.data.decode('utf-8'))
 
         assert len(resp['results']) == 0
-        assert resp['limit'] == 10
+        assert resp['limit'] == DEFAULT_PAGE_LIMIT
         assert resp['total'] == 0
 
     @pytest.mark.parametrize('study_id', ['blah', 3489, 'PT_00001111'])
@@ -261,32 +261,32 @@ def test_invalid_study_filter(self, client, participants,
         assert 'study_id' in resp['_status']['message']
 
     @pytest.mark.parametrize('endpoint, expected_total', [
-        ('/studies', 101),
-        ('/investigators', 102),
-        ('/participants', 102),
-        ('/outcomes', 102),
-        ('/phenotypes', 102),
-        ('/diagnoses', 102),
-        ('/family-relationships', 101),
-        ('/study-files', 101),
-        ('/families', 101),
-        ('/read-groups', 102),
+        ('/studies', MAX_PAGE_LIMIT+1),
+        ('/investigators', MAX_PAGE_LIMIT+2),
+        ('/participants', MAX_PAGE_LIMIT+2),
+        ('/outcomes', MAX_PAGE_LIMIT+2),
+        ('/phenotypes', MAX_PAGE_LIMIT+2),
+        ('/diagnoses', MAX_PAGE_LIMIT+2),
+        ('/family-relationships', MAX_PAGE_LIMIT+1),
+        ('/study-files', MAX_PAGE_LIMIT+1),
+        ('/families', MAX_PAGE_LIMIT+1),
+        ('/read-groups', MAX_PAGE_LIMIT+2),
         ('/sequencing-centers', 1),
-        ('/cavatica-apps', 101),
-        ('/tasks', 102),
-        ('/task-genomic-files', 102),
-        ('/read-group-genomic-files', 102),
-        ('/sequencing-experiment-genomic-files', 102),
-        ('/biospecimen-genomic-files', 102),
-        ('/biospecimen-diagnoses', 102)
+        ('/cavatica-apps', MAX_PAGE_LIMIT+1),
+        ('/tasks', MAX_PAGE_LIMIT+2),
+        ('/task-genomic-files', MAX_PAGE_LIMIT+2),
+        ('/read-group-genomic-files', MAX_PAGE_LIMIT+2),
+        ('/sequencing-experiment-genomic-files', MAX_PAGE_LIMIT+2),
+        ('/biospecimen-genomic-files', MAX_PAGE_LIMIT+2),
+        ('/biospecimen-diagnoses', MAX_PAGE_LIMIT+2)
     ])
     def test_pagination(self, client, participants, endpoint, expected_total):
         """ Test pagination of resource """
         resp = client.get(endpoint)
         resp = json.loads(resp.data.decode('utf-8'))
 
-        assert len(resp['results']) == min(expected_total, 10)
-        assert resp['limit'] == 10
+        assert len(resp['results']) == min(expected_total, DEFAULT_PAGE_LIMIT)
+        assert resp['limit'] == DEFAULT_PAGE_LIMIT
         assert resp['total'] == expected_total
 
         ids_seen = []
@@ -311,7 +311,7 @@ def test_same_created_at(self, client):
         """
         created_at = datetime.now()
         studies = [Study(external_id=f'Study_{i}', short_code=f'KF-ST{i}')
-                   for i in range(50)]
+                   for i in range(int(MAX_PAGE_LIMIT/2))]
         db.session.add_all(studies)
         db.session.flush()
         for study in studies:
@@ -343,12 +343,12 @@ def test_limit(self, client, participants, endpoint):
             assert len(response['results']) == 5
         assert response['limit'] == 5
 
-        response = client.get(endpoint + '?limit=200')
+        response = client.get(endpoint + f'?limit={MAX_PAGE_LIMIT * 2}')
         response = json.loads(response.data.decode('utf-8'))
         if '/sequencing-centers' in endpoint:
             assert len(response['results']) == 1
         else:
-            assert len(response['results']) == 100
+            assert len(response['results']) == MAX_PAGE_LIMIT
 
         # Check unexpected limit param uses default
         response = client.get(endpoint + '?limit=dog')
@@ -356,8 +356,8 @@ def test_limit(self, client, participants, endpoint):
         if '/sequencing-centers' in endpoint:
             assert len(response['results']) == 1
         else:
-            assert len(response['results']) == 10
-        assert response['limit'] == 10
+            assert len(response['results']) == DEFAULT_PAGE_LIMIT
+        assert response['limit'] == DEFAULT_PAGE_LIMIT
 
     @pytest.mark.parametrize('endpoint', [
         (ept) for ept in ENDPOINTS