fixes

globaldothealth · May 18, 2022 · cc11650 · cc11650
1 parent 4a0e9c9
commit cc11650
Show file tree

Hide file tree

Showing 8 changed files with 17 additions and 65 deletions.
diff --git a/data-serving/scripts/prune-uploads/hooks/aggregate.py b/data-serving/scripts/prune-uploads/hooks/aggregate.py
@@ -8,8 +8,8 @@
 
 AWS_REGION = os.getenv("GDH_AGGREGATE_AWS_REGION", "eu-central-1")
 # Job definition names are of the form PREFIX-<env>
-PREFIX = "gdh-map-aggregation"
-JOB_QUEUE = "gdh-map-aggregation"
+PREFIX = os.getenv("JOB_DEF_PREFIX", "gdh-map-aggregation")
+JOB_QUEUE = os.getenv("AGG_JOB_QUEUE", "gdh-map-aggregation-fargate")
 
 
 def run(sources: list[dict[str, Any]], env: str, dry_run: bool = False):

diff --git a/data-serving/scripts/prune-uploads/hooks/country_export.py b/data-serving/scripts/prune-uploads/hooks/country_export.py
@@ -3,13 +3,17 @@
 
 from functools import cache
 import logging
+import os
 from typing import Any
 import unicodedata
 
 import boto3
 import pycountry
 
 
+JOB_QUEUE = os.getenv("EXP_JOB_QUEUE", "export-queue")
+
+
 # We do not always use the pycountry names, here's a list of exceptions
 _QUIRKS = {
     "DEMOCRATIC REPUBLIC OF THE CONGO": "CD",
@@ -84,7 +88,7 @@ def run(sources: list[dict[str, Any]], env: str, dry_run: bool = False):
             logging.info(f"Submitting job for {jobdef} ...")
             if not dry_run:
                 batch.submit_job(
-                    jobName=jobdef, jobDefinition=jobdef, jobQueue="export-queue"
+                    jobName=jobdef, jobDefinition=jobdef, jobQueue=JOB_QUEUE
                 )
         except Exception as e:
             logging.exception(f"Error occurred while trying to submit {jobdef}")
diff --git a/docs/data_landscape.md b/docs/data_landscape.md
@@ -6,68 +6,22 @@ What we have, and where it's stored. This is organised by somewhat-physical, som
 
 MongoDB Atlas stores line list case data (including revision history), user records, data ingestion source records including ingestion histories, maps of Mapbox administrative area codes to names and front-end session tokens for the line list portal. There are two projects:
 
- 1. Covid19Map-Dev has one cluster, cluster-0, which is hosted in AWS us-east-1. It holds development data which is mostly based on historical snapshots of production.
- 2. Covid19Map-Prod has one cluster, covid19-map-cluster01, also hosted in AWS us-east-1. It holds production data.
+ 1. Covid19Map-Dev has one cluster, cluster-0, which is hosted in AWS eu-central-1. It holds development data which is mostly based on historical snapshots of production.
+ 2. Covid19Map-Prod has one cluster, covid19-map-cluster01, also hosted in AWS eu-central-1. It holds production data.
 
 ## S3 stores
 
-Various buckets (data containers) are used for both temporary and long-term storage of G.h data. Unless otherwise noted, all S3 buckets are in eu-central-1.
-
-### Unknown use
-
-* config-bucket-612888738066 (contains logs relating to secrets management of the AWS Lambda infrastructure. This doesn't only relate to the old ADI implementation, so check whether this is still needed. In us-east-2)
-* dev-vocviz-sample (old map code, probably not required, in us-east-2)
-* ncov19 (us-east-1)
-
-### Aggregates
-
-Aggregated data from the line list used by the map visualisation.
-
-* covid-19-aggregates
-* covid-19-aggregates-dev
-
-### Export
-
-Country specific (country-) and full (data-) export files in various formats
-
-* covid-19-country-export
-* covid-19-country-export-dev
-* covid-19-data-export
-* covid-19-data-export-dev
-
-### Map
-
-Map is a static site exported to an S3 bucket
-
-* dev-covid-19.global.health (only one of dev/dev-map is used, in us-east-2)
-* dev-map.covid-19.global.health
-* map.covid-19.global.health
-* dev-react-map.covid-19.global.health (us-east-2, should move to dev-map)
-* react-map.covid-19.global.health (should move to map.covid-19.global.health)
-* qa-covid-19.global.health
-
-### Ingestion
-
-* gdh-credentials (used to authenticate against backend, should move to API keys)
-* gdh-sources (raw files downloaded from source URLs, was epid-ingestion-raw)
-
-### Miscellaneous
-
-* gdh-terraform-state-main (terraform state for our stack)
-* gdh-metrics (telemetry on UI and Map)
-* h1n1.global.health (us-east-2, H1N1 map)
+Various buckets (data containers) are used for both temporary and long-term storage of G.h data. All S3 buckets except the one storing terraform state are in eu-central-1.
 
 ## Application logs
 
-All of the "backend" components log to CloudWatch log streams in us-east-1 with no automatic rotation or expiration.
+All of the "backend" components log to CloudWatch log streams in eu-central-1 with no automatic rotation or expiration.
 
 ## Computing servers
 
-The kubernetes cluster (i.e. the backend services for the line list app) runs on four EC2 instances in us-east-1. No application data is stored here.
-
-Ingestion and export both run on AWS Batch "serverless" architecture, both in us-east-1. No application data is stored here.
+The kubernetes cluster (i.e. the backend services for the line list app) runs on Fargate in eu-central-1. No application data is stored here.
 
-Data export has until recently run on AWS Lambda, again no application data is stored here. This is on its way out but mentioned for completeness.
+Ingestion and export both run on AWS Batch in eu-central-1. No application data is stored here.
 
 ## Anything else?
 

diff --git a/ingestion/monitoring/completeness.py b/ingestion/monitoring/completeness.py
@@ -114,8 +114,8 @@ def setup_logger():
     setup_logger()
     endpoint_url = os.getenv("ENDPOINT_URL")
     objects = data_files(
-        os.getenv("COUNTRY_EXPORT_BUCKET", "covid-19-country-export"),
+        os.getenv("COUNTRY_EXPORT_BUCKET", "covid-19-country-export-eu"),
         endpoint_url=endpoint_url,
     )
     data = completeness_s3_many(objects, endpoint_url)
-    upload(data, os.getenv("METRICS_BUCKET", "covid-19-aggregates"), endpoint_url)
+    upload(data, os.getenv("METRICS_BUCKET", "covid-19-aggregates-eu"), endpoint_url)
diff --git a/ingestion/monitoring/daily_metrics.py b/ingestion/monitoring/daily_metrics.py
@@ -11,7 +11,7 @@
 import boto3
 
 
-BUCKET = "covid-19-aggregates"
+BUCKET = "covid-19-aggregates-eu"
 WEBHOOK_URL = os.environ.get("SLACK_WEBHOOK_METRICS_URL", None)
 
 logger = logging.getLogger(__name__)

diff --git a/ingestion/monitoring/freshness.py b/ingestion/monitoring/freshness.py
@@ -91,7 +91,7 @@ def setup_logger():
     setup_logger()
     if not (api_key := os.getenv("GDH_API_KEY")):
         raise ValueError("Set GDH_API_KEY to your Global.health API key")
-    bucket = os.getenv("BUCKET", "covid-19-aggregates")
+    bucket = os.getenv("BUCKET", "covid-19-aggregates-eu")
     s3_endpoint = os.getenv("S3_ENDPOINT")
     instance = os.getenv("GDH_URL", DEFAULT_INSTANCE)
     if sources := fetch_sources(api_key, instance):

diff --git a/verification/curator-service/ui/cypress/integration/components/BulkCaseForm.spec.ts b/verification/curator-service/ui/cypress/integration/components/BulkCaseForm.spec.ts
@@ -54,7 +54,6 @@ describe('Bulk upload form', function () {
 
         // Case data
         cy.contains('www.bulksource.com');
-        cy.contains('sourceEntryId');
         cy.contains('superuser@test.com');
         cy.contains('Data upload IDs')
             .parent()

diff --git a/verification/curator-service/ui/cypress/integration/components/Curator.spec.ts b/verification/curator-service/ui/cypress/integration/components/Curator.spec.ts
@@ -200,10 +200,6 @@ describe('Curator', function () {
                     'www.example.com',
                 );
             });
-            cy.get('input[name="caseReference.sourceEntryId"]').should(
-                'have.value',
-                'testSourceEntryID123',
-            );
 
             // Demographics.
             cy.get('input[name="gender"]').should('have.value', 'Female');
@@ -308,7 +304,6 @@ describe('Curator', function () {
             cy.contains('td', 'www.example.com').click({ force: true });
             // Case data.
             cy.contains('www.example.com');
-            cy.contains('testSourceEntryID123');
             cy.contains('superuser@test.com');
             cy.contains('VERIFIED');
             // Demographics.