From 3972963bd7223f82bfecfd19e03c45238008ad2b Mon Sep 17 00:00:00 2001 From: Abhishek Dasgupta Date: Mon, 25 Apr 2022 11:29:35 +0100 Subject: [PATCH] export: remove notes field #2673 --- .../scripts/export-data/data_dictionary.txt | 94 +++++++++---------- data-serving/scripts/export-data/fields.txt | 1 - .../test_transform_mongoexport.csv | 6 +- .../test_transform_mongoexport_expected.csv | 6 +- .../test_transform_mongoexport_expected.json | 4 +- .../test_transform_mongoexport_expected.tsv | 6 +- .../test_transform_mongoexport_header.csv | 2 +- data-serving/scripts/export-data/transform.py | 3 +- 8 files changed, 57 insertions(+), 65 deletions(-) diff --git a/data-serving/scripts/export-data/data_dictionary.txt b/data-serving/scripts/export-data/data_dictionary.txt index 3b9e85223..83f5d04d3 100644 --- a/data-serving/scripts/export-data/data_dictionary.txt +++ b/data-serving/scripts/export-data/data_dictionary.txt @@ -168,65 +168,59 @@ LOCATION Name of the place this location refers to (example: Boston Children's Hospital) -NOTES - -34. notes - - Notes from upstream source - PATHOGENS -35. pathogens [] +34. pathogens [] Pathogens other than SARS-CoV-2 PRE-EXISTING CONDITIONS -36. preexistingConditions.hasPreexistingConditions +35. preexistingConditions.hasPreexistingConditions Whether the patient has pre-existing conditions Boolean: True | False -37. preexistingConditions.values [] +36. preexistingConditions.values [] List of pre-existing conditions REVISION METADATA -38. revisionMetadata.creationMetadata.date +37. revisionMetadata.creationMetadata.date Date this case was first created -39. revisionMetadata.creationMetadata.notes +38. revisionMetadata.creationMetadata.notes Notes added by the curator for this case -40. revisionMetadata.editMetadata.date +39. revisionMetadata.editMetadata.date Date this case was last edited -41. revisionMetadata.editMetadata.notes +40. revisionMetadata.editMetadata.notes Notes added by the curator for last edit -42. revisionMetadata.revisionNumber +41. revisionMetadata.revisionNumber Revision number of the case (positive integer) SGTF -43. SGTF +42. SGTF S-Gene Target failure (0 = no deletion, 1 = deletion (S-)) SYMPTOMS -44. symptoms.status +43. symptoms.status Symptom status (Asymptomatic | Symptomatic | Presymptomatic | null) -45. symptoms.values [] +44. symptoms.values [] List of symptoms @@ -234,96 +228,96 @@ TRANSMISSION How this case got infected and by who if known -46. transmission.linkedCaseIds [] +45. transmission.linkedCaseIds [] UUID of a related case in the system -47. transmission.places [] +46. transmission.places [] Places where transmission occurred -48. transmission.routes [] +47. transmission.routes [] Routes of transmission TRAVEL HISTORY -49. travelHistory.travel.dateRange.end -50. travelHistory.travel.dateRange.start +48. travelHistory.travel.dateRange.end +49. travelHistory.travel.dateRange.start Start and end dates for travel history -51. travelHistory.travel.location.administrativeAreaLevel1 [] -52. travelHistory.travel.location.administrativeAreaLevel2 [] -53. travelHistory.travel.location.administrativeAreaLevel3 [] -54. travelHistory.travel.location.country [] -55. travelHistory.travel.location.geoResolution [] +50. travelHistory.travel.location.administrativeAreaLevel1 [] +51. travelHistory.travel.location.administrativeAreaLevel2 [] +52. travelHistory.travel.location.administrativeAreaLevel3 [] +53. travelHistory.travel.location.country [] +54. travelHistory.travel.location.geoResolution [] These have the same meaning as in LOCATION, except that these pertain to travel history of the individual. Unlike the fields in location, the fields here are all comma-separated arrays, with each item corresponding to a travel location in the last 30 days. -56. travelHistory.travel.location.geometry.coordinates [] +55. travelHistory.travel.location.geometry.coordinates [] Comma-separated tuples of latitude and longitude. If the individual visited latitude m1 and longitude n1 this would be represented as "(m1, n1)". If there was another travel coordinate (m2, n2), then this would be represented as "(m1, n1),(m2, n2)" -57. travelHistory.travel.location.name [] -58. travelHistory.travel.location.place [] +56. travelHistory.travel.location.name [] +57. travelHistory.travel.location.place [] Same as LOCATION, except these are arrays -59. travelHistory.travel.methods [] +58. travelHistory.travel.methods [] Corresponding travel methods (such as air, ship, rail ...) -60. travelHistory.travel.purpose [] +59. travelHistory.travel.purpose [] Purpose of travel -61. travelHistory.traveledPrior30Days +60. travelHistory.traveledPrior30Days Whether the patient has travelled in the past 30 days Boolean: True | False VACCINES -62. vaccines.0.batch +61. vaccines.0.batch First vaccine batch -63. vaccines.0.date +62. vaccines.0.date Date of first vaccine -64. vaccines.0.name +63. vaccines.0.name Name of first vaccine -65. vaccines.0.sideEffects [] +64. vaccines.0.sideEffects [] List of side-effects experienced after vaccine -66. vaccines.1.batch -67. vaccines.1.date -68. vaccines.1.name -69. vaccines.1.sideEffects -70. vaccines.2.batch -71. vaccines.2.date -72. vaccines.2.name -73. vaccines.2.sideEffects -74. vaccines.3.batch -75. vaccines.3.date -76. vaccines.3.name -77. vaccines.3.sideEffects +65. vaccines.1.batch +66. vaccines.1.date +67. vaccines.1.name +68. vaccines.1.sideEffects +69. vaccines.2.batch +70. vaccines.2.date +71. vaccines.2.name +72. vaccines.2.sideEffects +73. vaccines.3.batch +74. vaccines.3.date +75. vaccines.3.name +76. vaccines.3.sideEffects Same as before, for subsequent vaccines taken by the same individual VARIANT OF CONCERN -78. variantOfConcern +77. variantOfConcern Variant of concern that was detected. This uses the Pango lineage. diff --git a/data-serving/scripts/export-data/fields.txt b/data-serving/scripts/export-data/fields.txt index 907bc2106..9dd1a942a 100644 --- a/data-serving/scripts/export-data/fields.txt +++ b/data-serving/scripts/export-data/fields.txt @@ -21,7 +21,6 @@ location.geometry.longitude location.name location.place location.query -notes pathogens preexistingConditions.hasPreexistingConditions preexistingConditions.values diff --git a/data-serving/scripts/export-data/test_transform_mongoexport.csv b/data-serving/scripts/export-data/test_transform_mongoexport.csv index 2883bfbad..731f340a9 100644 --- a/data-serving/scripts/export-data/test_transform_mongoexport.csv +++ b/data-serving/scripts/export-data/test_transform_mongoexport.csv @@ -1,3 +1,3 @@ -_id,caseReference.additionalSources,caseReference.sourceEntryId,caseReference.sourceId,caseReference.sourceUrl,caseReference.uploadIds,caseReference.verificationStatus,demographics.ageRange.end,demographics.ageRange.start,demographics.ethnicity,demographics.gender,demographics.nationalities,demographics.occupation,events,location.administrativeAreaLevel1,location.administrativeAreaLevel2,location.administrativeAreaLevel3,location.country,location.geoResolution,location.geometry.latitude,location.geometry.longitude,location.name,location.place,location.query,notes,pathogens,preexistingConditions.hasPreexistingConditions,preexistingConditions.values,revisionMetadata.creationMetadata.curator,revisionMetadata.creationMetadata.date,revisionMetadata.creationMetadata.notes,revisionMetadata.editMetadata.curator,revisionMetadata.editMetadata.date,revisionMetadata.editMetadata.notes,revisionMetadata.revisionNumber,SGTF,symptoms.status,symptoms.values,transmission.linkedCaseIds,transmission.places,transmission.routes,travelHistory.travel.dateRange.end,travelHistory.travel.dateRange.start,travelHistory.travel.location.name,travelHistory.travel.methods,travelHistory.travel.purpose,travelHistory.traveledPrior30Days,vaccines.0.name,vaccines.0.batch,vaccines.0.date,vaccines.0.sideEffects,vaccines.1.name,vaccines.1.batch,vaccines.1.date,vaccines.1.sideEffects,vaccines.2.name,vaccines.2.batch,vaccines.2.date,vaccines.2.sideEffects,vaccines.3.name,vaccines.3.batch,vaccines.3.date,vaccines.3.sideEffects -ObjectId(6817283abaa89324a90109aa),[],,787123878aa90909811aaff1,http://foo/bar.csv,"[""bb12399abbb19230900aa123""]",UNVERIFIED,69,60,,Male,[],,"[{""name"":""confirmed"",""dateRange"":{""start"":{""$date"":""2021-10-01T00:00:00.000Z""},""end"":{""$date"":""2021-01-01T00:00:00.000Z""}}}]",,,,Antarctica,Country,-79.402,0.323,Antarctica,,,,[],,,ingestion@example.com,2021-01-02T13:42:34.991Z,,,,,0,,,,,,,,,,,true,,,,,,,,,,,,,,,, -ObjectId(798989a98998acc98989a1bb),[],,787123878aa90909811aaff1,http://foo/bar.csv,"[""bb12399abbb19230900aa123""]",UNVERIFIED,29,20,,Female,[],,"[{""name"":""confirmed"",""dateRange"":{""start"":{""$date"":""2021-01-05T00:00:00.000Z""},""end"":{""$date"":""2021-01-05T00:00:00.000Z""}}}]",,,,Antarctica,Country,-79.402,0.323,Antarctica,,,,[],,,ingestion@example.com,2021-01-02T13:42:34.991Z,,,,,0,,,,,,,,,,,true,,,,,,,,,,,,,,,, +_id,caseReference.additionalSources,caseReference.sourceEntryId,caseReference.sourceId,caseReference.sourceUrl,caseReference.uploadIds,caseReference.verificationStatus,demographics.ageRange.end,demographics.ageRange.start,demographics.ethnicity,demographics.gender,demographics.nationalities,demographics.occupation,events,location.administrativeAreaLevel1,location.administrativeAreaLevel2,location.administrativeAreaLevel3,location.country,location.geoResolution,location.geometry.latitude,location.geometry.longitude,location.name,location.place,location.query,pathogens,preexistingConditions.hasPreexistingConditions,preexistingConditions.values,revisionMetadata.creationMetadata.curator,revisionMetadata.creationMetadata.date,revisionMetadata.creationMetadata.notes,revisionMetadata.editMetadata.curator,revisionMetadata.editMetadata.date,revisionMetadata.editMetadata.notes,revisionMetadata.revisionNumber,SGTF,symptoms.status,symptoms.values,transmission.linkedCaseIds,transmission.places,transmission.routes,travelHistory.travel.dateRange.end,travelHistory.travel.dateRange.start,travelHistory.travel.location.name,travelHistory.travel.methods,travelHistory.travel.purpose,travelHistory.traveledPrior30Days,vaccines.0.name,vaccines.0.batch,vaccines.0.date,vaccines.0.sideEffects,vaccines.1.name,vaccines.1.batch,vaccines.1.date,vaccines.1.sideEffects,vaccines.2.name,vaccines.2.batch,vaccines.2.date,vaccines.2.sideEffects,vaccines.3.name,vaccines.3.batch,vaccines.3.date,vaccines.3.sideEffects +ObjectId(6817283abaa89324a90109aa),[],,787123878aa90909811aaff1,http://foo/bar.csv,"[""bb12399abbb19230900aa123""]",UNVERIFIED,69,60,,Male,[],,"[{""name"":""confirmed"",""dateRange"":{""start"":{""$date"":""2021-10-01T00:00:00.000Z""},""end"":{""$date"":""2021-01-01T00:00:00.000Z""}}}]",,,,Antarctica,Country,-79.402,0.323,Antarctica,,,,[],,,ingestion@example.com,2021-01-02T13:42:34.991Z,,,,,0,,,,,,,,,,true,,,,,,,,,,,,,,,, +ObjectId(798989a98998acc98989a1bb),[],,787123878aa90909811aaff1,http://foo/bar.csv,"[""bb12399abbb19230900aa123""]",UNVERIFIED,29,20,,Female,[],,"[{""name"":""confirmed"",""dateRange"":{""start"":{""$date"":""2021-01-05T00:00:00.000Z""},""end"":{""$date"":""2021-01-05T00:00:00.000Z""}}}]",,,,Antarctica,Country,-79.402,0.323,Antarctica,,,,[],,,ingestion@example.com,2021-01-02T13:42:34.991Z,,,,,0,,,,,,,,,,true,,,,,,,,,,,,,,,, diff --git a/data-serving/scripts/export-data/test_transform_mongoexport_expected.csv b/data-serving/scripts/export-data/test_transform_mongoexport_expected.csv index 10eda2138..cf96dba24 100644 --- a/data-serving/scripts/export-data/test_transform_mongoexport_expected.csv +++ b/data-serving/scripts/export-data/test_transform_mongoexport_expected.csv @@ -1,3 +1,3 @@ -_id,caseReference.additionalSources,caseReference.sourceEntryId,caseReference.sourceId,caseReference.sourceUrl,caseReference.uploadIds,caseReference.verificationStatus,demographics.ageRange.end,demographics.ageRange.start,demographics.ethnicity,demographics.gender,demographics.nationalities,demographics.occupation,events.confirmed.date,events.confirmed.value,events.firstClinicalConsultation.date,events.hospitalAdmission.date,events.hospitalAdmission.value,events.icuAdmission.date,events.icuAdmission.value,events.onsetSymptoms.date,events.outcome.date,events.outcome.value,events.selfIsolation.date,location.administrativeAreaLevel1,location.administrativeAreaLevel2,location.administrativeAreaLevel3,location.country,location.geometry.latitude,location.geometry.longitude,location.geoResolution,location.name,location.place,notes,pathogens,preexistingConditions.hasPreexistingConditions,preexistingConditions.values,revisionMetadata.creationMetadata.date,revisionMetadata.creationMetadata.notes,revisionMetadata.editMetadata.date,revisionMetadata.editMetadata.notes,revisionMetadata.revisionNumber,SGTF,symptoms.status,symptoms.values,transmission.linkedCaseIds,transmission.places,transmission.routes,travelHistory.travel.dateRange.end,travelHistory.travel.dateRange.start,travelHistory.travel.location.administrativeAreaLevel1,travelHistory.travel.location.administrativeAreaLevel2,travelHistory.travel.location.administrativeAreaLevel3,travelHistory.travel.location.country,travelHistory.travel.location.geometry.coordinates,travelHistory.travel.location.geoResolution,travelHistory.travel.location.name,travelHistory.travel.location.place,travelHistory.travel.methods,travelHistory.travel.purpose,travelHistory.traveledPrior30Days,vaccines.0.batch,vaccines.0.date,vaccines.0.name,vaccines.0.sideEffects,vaccines.1.batch,vaccines.1.date,vaccines.1.name,vaccines.1.sideEffects,vaccines.2.batch,vaccines.2.date,vaccines.2.name,vaccines.2.sideEffects,vaccines.3.batch,vaccines.3.date,vaccines.3.name,vaccines.3.sideEffects,variantOfConcern -ObjectId(6817283abaa89324a90109aa),,,787123878aa90909811aaff1,http://foo/bar.csv,bb12399abbb19230900aa123,UNVERIFIED,69,60,,Male,,,2021-01-01,,,,,,,,,,,,,,Antarctica,-79.402,0.323,Country,Antarctica,,,,,,2021-01-02T13:42:34.991Z,,,,0,NA,,,,,,,,,,,,,,,,,true,,,,,,,,,,,,,,,,,, -ObjectId(798989a98998acc98989a1bb),,,787123878aa90909811aaff1,http://foo/bar.csv,bb12399abbb19230900aa123,UNVERIFIED,29,20,,Female,,,2021-01-05,,,,,,,,,,,,,,Antarctica,-79.402,0.323,Country,Antarctica,,,,,,2021-01-02T13:42:34.991Z,,,,0,NA,,,,,,,,,,,,,,,,,true,,,,,,,,,,,,,,,,,, +_id,caseReference.additionalSources,caseReference.sourceEntryId,caseReference.sourceId,caseReference.sourceUrl,caseReference.uploadIds,caseReference.verificationStatus,demographics.ageRange.end,demographics.ageRange.start,demographics.ethnicity,demographics.gender,demographics.nationalities,demographics.occupation,events.confirmed.date,events.confirmed.value,events.firstClinicalConsultation.date,events.hospitalAdmission.date,events.hospitalAdmission.value,events.icuAdmission.date,events.icuAdmission.value,events.onsetSymptoms.date,events.outcome.date,events.outcome.value,events.selfIsolation.date,location.administrativeAreaLevel1,location.administrativeAreaLevel2,location.administrativeAreaLevel3,location.country,location.geometry.latitude,location.geometry.longitude,location.geoResolution,location.name,location.place,pathogens,preexistingConditions.hasPreexistingConditions,preexistingConditions.values,revisionMetadata.creationMetadata.date,revisionMetadata.creationMetadata.notes,revisionMetadata.editMetadata.date,revisionMetadata.editMetadata.notes,revisionMetadata.revisionNumber,SGTF,symptoms.status,symptoms.values,transmission.linkedCaseIds,transmission.places,transmission.routes,travelHistory.travel.dateRange.end,travelHistory.travel.dateRange.start,travelHistory.travel.location.administrativeAreaLevel1,travelHistory.travel.location.administrativeAreaLevel2,travelHistory.travel.location.administrativeAreaLevel3,travelHistory.travel.location.country,travelHistory.travel.location.geometry.coordinates,travelHistory.travel.location.geoResolution,travelHistory.travel.location.name,travelHistory.travel.location.place,travelHistory.travel.methods,travelHistory.travel.purpose,travelHistory.traveledPrior30Days,vaccines.0.batch,vaccines.0.date,vaccines.0.name,vaccines.0.sideEffects,vaccines.1.batch,vaccines.1.date,vaccines.1.name,vaccines.1.sideEffects,vaccines.2.batch,vaccines.2.date,vaccines.2.name,vaccines.2.sideEffects,vaccines.3.batch,vaccines.3.date,vaccines.3.name,vaccines.3.sideEffects,variantOfConcern +ObjectId(6817283abaa89324a90109aa),,,787123878aa90909811aaff1,http://foo/bar.csv,bb12399abbb19230900aa123,UNVERIFIED,69,60,,Male,,,2021-01-01,,,,,,,,,,,,,,Antarctica,-79.402,0.323,Country,Antarctica,,,[],,ingestion@example.com,2021-01-02T13:42:34.991Z,,,,0,,,,,,,,,,,,,,,,,true,,,,,,,,,,,,,,,,,, +ObjectId(798989a98998acc98989a1bb),,,787123878aa90909811aaff1,http://foo/bar.csv,bb12399abbb19230900aa123,UNVERIFIED,29,20,,Female,,,2021-01-05,,,,,,,,,,,,,,Antarctica,-79.402,0.323,Country,Antarctica,,,[],,ingestion@example.com,2021-01-02T13:42:34.991Z,,,,0,,,,,,,,,,,,,,,,,true,,,,,,,,,,,,,,,,,, diff --git a/data-serving/scripts/export-data/test_transform_mongoexport_expected.json b/data-serving/scripts/export-data/test_transform_mongoexport_expected.json index 27d277715..b04e3e19a 100644 --- a/data-serving/scripts/export-data/test_transform_mongoexport_expected.json +++ b/data-serving/scripts/export-data/test_transform_mongoexport_expected.json @@ -1,4 +1,4 @@ [ - {"SGTF": "NA", "_id": "ObjectId(6817283abaa89324a90109aa)", "caseReference.additionalSources": null, "caseReference.sourceEntryId": "", "caseReference.sourceId": "787123878aa90909811aaff1", "caseReference.sourceUrl": "http://foo/bar.csv", "caseReference.uploadIds": "bb12399abbb19230900aa123", "caseReference.verificationStatus": "UNVERIFIED", "demographics.ageRange.end": "69", "demographics.ageRange.start": "60", "demographics.ethnicity": "", "demographics.gender": "Male", "demographics.nationalities": null, "demographics.occupation": "", "events.confirmed.date": "2021-01-01", "events.confirmed.value": null, "events.firstClinicalConsultation.date": "", "events.hospitalAdmission.date": "", "events.hospitalAdmission.value": "", "events.icuAdmission.date": "", "events.icuAdmission.value": "", "events.onsetSymptoms.date": "", "events.outcome.date": "", "events.outcome.value": "", "events.selfIsolation.date": "", "location.administrativeAreaLevel1": "", "location.administrativeAreaLevel2": "", "location.administrativeAreaLevel3": "", "location.country": "Antarctica", "location.geoResolution": "Country", "location.geometry.latitude": "-79.402", "location.geometry.longitude": "0.323", "location.name": "Antarctica", "location.place": "", "notes": "", "pathogens": null, "preexistingConditions.hasPreexistingConditions": "", "preexistingConditions.values": "", "revisionMetadata.creationMetadata.date": "2021-01-02T13:42:34.991Z", "revisionMetadata.creationMetadata.notes": "", "revisionMetadata.editMetadata.date": "", "revisionMetadata.editMetadata.notes": "", "revisionMetadata.revisionNumber": "0", "symptoms.status": "", "symptoms.values": "", "transmission.linkedCaseIds": "", "transmission.places": "", "transmission.routes": "", "travelHistory.travel.dateRange.end": "", "travelHistory.travel.dateRange.start": "", "travelHistory.travel.location.administrativeAreaLevel1": "", "travelHistory.travel.location.administrativeAreaLevel2": "", "travelHistory.travel.location.administrativeAreaLevel3": "", "travelHistory.travel.location.country": "", "travelHistory.travel.location.geoResolution": "", "travelHistory.travel.location.geometry.coordinates": "", "travelHistory.travel.location.name": "", "travelHistory.travel.location.place": "", "travelHistory.travel.methods": "", "travelHistory.travel.purpose": "true", "travelHistory.traveledPrior30Days": "", "vaccines.0.batch": "", "vaccines.0.date": "", "vaccines.0.name": "", "vaccines.0.sideEffects": "", "vaccines.1.batch": "", "vaccines.1.date": "", "vaccines.1.name": "", "vaccines.1.sideEffects": "", "vaccines.2.batch": "", "vaccines.2.date": "", "vaccines.2.name": "", "vaccines.2.sideEffects": "", "vaccines.3.batch": "", "vaccines.3.date": "", "vaccines.3.name": "", "vaccines.3.sideEffects": null, "variantOfConcern": ""} -, {"SGTF": "NA", "_id": "ObjectId(798989a98998acc98989a1bb)", "caseReference.additionalSources": null, "caseReference.sourceEntryId": "", "caseReference.sourceId": "787123878aa90909811aaff1", "caseReference.sourceUrl": "http://foo/bar.csv", "caseReference.uploadIds": "bb12399abbb19230900aa123", "caseReference.verificationStatus": "UNVERIFIED", "demographics.ageRange.end": "29", "demographics.ageRange.start": "20", "demographics.ethnicity": "", "demographics.gender": "Female", "demographics.nationalities": null, "demographics.occupation": "", "events.confirmed.date": "2021-01-05", "events.confirmed.value": null, "events.firstClinicalConsultation.date": "", "events.hospitalAdmission.date": "", "events.hospitalAdmission.value": "", "events.icuAdmission.date": "", "events.icuAdmission.value": "", "events.onsetSymptoms.date": "", "events.outcome.date": "", "events.outcome.value": "", "events.selfIsolation.date": "", "location.administrativeAreaLevel1": "", "location.administrativeAreaLevel2": "", "location.administrativeAreaLevel3": "", "location.country": "Antarctica", "location.geoResolution": "Country", "location.geometry.latitude": "-79.402", "location.geometry.longitude": "0.323", "location.name": "Antarctica", "location.place": "", "notes": "", "pathogens": null, "preexistingConditions.hasPreexistingConditions": "", "preexistingConditions.values": "", "revisionMetadata.creationMetadata.date": "2021-01-02T13:42:34.991Z", "revisionMetadata.creationMetadata.notes": "", "revisionMetadata.editMetadata.date": "", "revisionMetadata.editMetadata.notes": "", "revisionMetadata.revisionNumber": "0", "symptoms.status": "", "symptoms.values": "", "transmission.linkedCaseIds": "", "transmission.places": "", "transmission.routes": "", "travelHistory.travel.dateRange.end": "", "travelHistory.travel.dateRange.start": "", "travelHistory.travel.location.administrativeAreaLevel1": "", "travelHistory.travel.location.administrativeAreaLevel2": "", "travelHistory.travel.location.administrativeAreaLevel3": "", "travelHistory.travel.location.country": "", "travelHistory.travel.location.geoResolution": "", "travelHistory.travel.location.geometry.coordinates": "", "travelHistory.travel.location.name": "", "travelHistory.travel.location.place": "", "travelHistory.travel.methods": "", "travelHistory.travel.purpose": "true", "travelHistory.traveledPrior30Days": "", "vaccines.0.batch": "", "vaccines.0.date": "", "vaccines.0.name": "", "vaccines.0.sideEffects": "", "vaccines.1.batch": "", "vaccines.1.date": "", "vaccines.1.name": "", "vaccines.1.sideEffects": "", "vaccines.2.batch": "", "vaccines.2.date": "", "vaccines.2.name": "", "vaccines.2.sideEffects": "", "vaccines.3.batch": "", "vaccines.3.date": "", "vaccines.3.name": "", "vaccines.3.sideEffects": null, "variantOfConcern": ""} + {"SGTF": "0", "_id": "ObjectId(6817283abaa89324a90109aa)", "caseReference.additionalSources": null, "caseReference.sourceEntryId": "", "caseReference.sourceId": "787123878aa90909811aaff1", "caseReference.sourceUrl": "http://foo/bar.csv", "caseReference.uploadIds": "bb12399abbb19230900aa123", "caseReference.verificationStatus": "UNVERIFIED", "demographics.ageRange.end": "69", "demographics.ageRange.start": "60", "demographics.ethnicity": "", "demographics.gender": "Male", "demographics.nationalities": null, "demographics.occupation": "", "events.confirmed.date": "2021-01-01", "events.confirmed.value": null, "events.firstClinicalConsultation.date": "", "events.hospitalAdmission.date": "", "events.hospitalAdmission.value": "", "events.icuAdmission.date": "", "events.icuAdmission.value": "", "events.onsetSymptoms.date": "", "events.outcome.date": "", "events.outcome.value": "", "events.selfIsolation.date": "", "location.administrativeAreaLevel1": "", "location.administrativeAreaLevel2": "", "location.administrativeAreaLevel3": "", "location.country": "Antarctica", "location.geoResolution": "Country", "location.geometry.latitude": "-79.402", "location.geometry.longitude": "0.323", "location.name": "Antarctica", "location.place": "", "pathogens": "", "preexistingConditions.hasPreexistingConditions": "[]", "preexistingConditions.values": "", "revisionMetadata.creationMetadata.date": "ingestion@example.com", "revisionMetadata.creationMetadata.notes": "2021-01-02T13:42:34.991Z", "revisionMetadata.editMetadata.date": "", "revisionMetadata.editMetadata.notes": "", "revisionMetadata.revisionNumber": "", "symptoms.status": "", "symptoms.values": "", "transmission.linkedCaseIds": "", "transmission.places": "", "transmission.routes": "", "travelHistory.travel.dateRange.end": "", "travelHistory.travel.dateRange.start": "", "travelHistory.travel.location.administrativeAreaLevel1": "", "travelHistory.travel.location.administrativeAreaLevel2": "", "travelHistory.travel.location.administrativeAreaLevel3": "", "travelHistory.travel.location.country": "", "travelHistory.travel.location.geoResolution": "", "travelHistory.travel.location.geometry.coordinates": "", "travelHistory.travel.location.name": "", "travelHistory.travel.location.place": "", "travelHistory.travel.methods": "", "travelHistory.travel.purpose": "true", "travelHistory.traveledPrior30Days": "", "vaccines.0.batch": "", "vaccines.0.date": "", "vaccines.0.name": "", "vaccines.0.sideEffects": "", "vaccines.1.batch": "", "vaccines.1.date": "", "vaccines.1.name": "", "vaccines.1.sideEffects": "", "vaccines.2.batch": "", "vaccines.2.date": "", "vaccines.2.name": "", "vaccines.2.sideEffects": "", "vaccines.3.batch": "", "vaccines.3.date": "", "vaccines.3.name": "", "vaccines.3.sideEffects": null, "variantOfConcern": ""} +, {"SGTF": "0", "_id": "ObjectId(798989a98998acc98989a1bb)", "caseReference.additionalSources": null, "caseReference.sourceEntryId": "", "caseReference.sourceId": "787123878aa90909811aaff1", "caseReference.sourceUrl": "http://foo/bar.csv", "caseReference.uploadIds": "bb12399abbb19230900aa123", "caseReference.verificationStatus": "UNVERIFIED", "demographics.ageRange.end": "29", "demographics.ageRange.start": "20", "demographics.ethnicity": "", "demographics.gender": "Female", "demographics.nationalities": null, "demographics.occupation": "", "events.confirmed.date": "2021-01-05", "events.confirmed.value": null, "events.firstClinicalConsultation.date": "", "events.hospitalAdmission.date": "", "events.hospitalAdmission.value": "", "events.icuAdmission.date": "", "events.icuAdmission.value": "", "events.onsetSymptoms.date": "", "events.outcome.date": "", "events.outcome.value": "", "events.selfIsolation.date": "", "location.administrativeAreaLevel1": "", "location.administrativeAreaLevel2": "", "location.administrativeAreaLevel3": "", "location.country": "Antarctica", "location.geoResolution": "Country", "location.geometry.latitude": "-79.402", "location.geometry.longitude": "0.323", "location.name": "Antarctica", "location.place": "", "pathogens": "", "preexistingConditions.hasPreexistingConditions": "[]", "preexistingConditions.values": "", "revisionMetadata.creationMetadata.date": "ingestion@example.com", "revisionMetadata.creationMetadata.notes": "2021-01-02T13:42:34.991Z", "revisionMetadata.editMetadata.date": "", "revisionMetadata.editMetadata.notes": "", "revisionMetadata.revisionNumber": "", "symptoms.status": "", "symptoms.values": "", "transmission.linkedCaseIds": "", "transmission.places": "", "transmission.routes": "", "travelHistory.travel.dateRange.end": "", "travelHistory.travel.dateRange.start": "", "travelHistory.travel.location.administrativeAreaLevel1": "", "travelHistory.travel.location.administrativeAreaLevel2": "", "travelHistory.travel.location.administrativeAreaLevel3": "", "travelHistory.travel.location.country": "", "travelHistory.travel.location.geoResolution": "", "travelHistory.travel.location.geometry.coordinates": "", "travelHistory.travel.location.name": "", "travelHistory.travel.location.place": "", "travelHistory.travel.methods": "", "travelHistory.travel.purpose": "true", "travelHistory.traveledPrior30Days": "", "vaccines.0.batch": "", "vaccines.0.date": "", "vaccines.0.name": "", "vaccines.0.sideEffects": "", "vaccines.1.batch": "", "vaccines.1.date": "", "vaccines.1.name": "", "vaccines.1.sideEffects": "", "vaccines.2.batch": "", "vaccines.2.date": "", "vaccines.2.name": "", "vaccines.2.sideEffects": "", "vaccines.3.batch": "", "vaccines.3.date": "", "vaccines.3.name": "", "vaccines.3.sideEffects": null, "variantOfConcern": ""} ] diff --git a/data-serving/scripts/export-data/test_transform_mongoexport_expected.tsv b/data-serving/scripts/export-data/test_transform_mongoexport_expected.tsv index 7d3955599..fd990e3f7 100644 --- a/data-serving/scripts/export-data/test_transform_mongoexport_expected.tsv +++ b/data-serving/scripts/export-data/test_transform_mongoexport_expected.tsv @@ -1,3 +1,3 @@ -_id caseReference.additionalSources caseReference.sourceEntryId caseReference.sourceId caseReference.sourceUrl caseReference.uploadIds caseReference.verificationStatus demographics.ageRange.end demographics.ageRange.start demographics.ethnicity demographics.gender demographics.nationalities demographics.occupation events.confirmed.date events.confirmed.value events.firstClinicalConsultation.date events.hospitalAdmission.date events.hospitalAdmission.value events.icuAdmission.date events.icuAdmission.value events.onsetSymptoms.date events.outcome.date events.outcome.value events.selfIsolation.date location.administrativeAreaLevel1 location.administrativeAreaLevel2 location.administrativeAreaLevel3 location.country location.geometry.latitude location.geometry.longitude location.geoResolution location.name location.place notes pathogens preexistingConditions.hasPreexistingConditions preexistingConditions.values revisionMetadata.creationMetadata.date revisionMetadata.creationMetadata.notes revisionMetadata.editMetadata.date revisionMetadata.editMetadata.notes revisionMetadata.revisionNumber SGTF symptoms.status symptoms.values transmission.linkedCaseIds transmission.places transmission.routes travelHistory.travel.dateRange.end travelHistory.travel.dateRange.start travelHistory.travel.location.administrativeAreaLevel1 travelHistory.travel.location.administrativeAreaLevel2 travelHistory.travel.location.administrativeAreaLevel3 travelHistory.travel.location.country travelHistory.travel.location.geometry.coordinates travelHistory.travel.location.geoResolution travelHistory.travel.location.name travelHistory.travel.location.place travelHistory.travel.methods travelHistory.travel.purpose travelHistory.traveledPrior30Days vaccines.0.batch vaccines.0.date vaccines.0.name vaccines.0.sideEffects vaccines.1.batch vaccines.1.date vaccines.1.name vaccines.1.sideEffects vaccines.2.batch vaccines.2.date vaccines.2.name vaccines.2.sideEffects vaccines.3.batch vaccines.3.date vaccines.3.name vaccines.3.sideEffects variantOfConcern -ObjectId(6817283abaa89324a90109aa) 787123878aa90909811aaff1 http://foo/bar.csv bb12399abbb19230900aa123 UNVERIFIED 69 60 Male 2021-01-01 Antarctica -79.402 0.323 Country Antarctica 2021-01-02T13:42:34.991Z 0 NA true -ObjectId(798989a98998acc98989a1bb) 787123878aa90909811aaff1 http://foo/bar.csv bb12399abbb19230900aa123 UNVERIFIED 29 20 Female 2021-01-05 Antarctica -79.402 0.323 Country Antarctica 2021-01-02T13:42:34.991Z 0 NA true +_id caseReference.additionalSources caseReference.sourceEntryId caseReference.sourceId caseReference.sourceUrl caseReference.uploadIds caseReference.verificationStatus demographics.ageRange.end demographics.ageRange.start demographics.ethnicity demographics.gender demographics.nationalities demographics.occupation events.confirmed.date events.confirmed.value events.firstClinicalConsultation.date events.hospitalAdmission.date events.hospitalAdmission.value events.icuAdmission.date events.icuAdmission.value events.onsetSymptoms.date events.outcome.date events.outcome.value events.selfIsolation.date location.administrativeAreaLevel1 location.administrativeAreaLevel2 location.administrativeAreaLevel3 location.country location.geometry.latitude location.geometry.longitude location.geoResolution location.name location.place pathogens preexistingConditions.hasPreexistingConditions preexistingConditions.values revisionMetadata.creationMetadata.date revisionMetadata.creationMetadata.notes revisionMetadata.editMetadata.date revisionMetadata.editMetadata.notes revisionMetadata.revisionNumber SGTF symptoms.status symptoms.values transmission.linkedCaseIds transmission.places transmission.routes travelHistory.travel.dateRange.end travelHistory.travel.dateRange.start travelHistory.travel.location.administrativeAreaLevel1 travelHistory.travel.location.administrativeAreaLevel2 travelHistory.travel.location.administrativeAreaLevel3 travelHistory.travel.location.country travelHistory.travel.location.geometry.coordinates travelHistory.travel.location.geoResolution travelHistory.travel.location.name travelHistory.travel.location.place travelHistory.travel.methods travelHistory.travel.purpose travelHistory.traveledPrior30Days vaccines.0.batch vaccines.0.date vaccines.0.name vaccines.0.sideEffects vaccines.1.batch vaccines.1.date vaccines.1.name vaccines.1.sideEffects vaccines.2.batch vaccines.2.date vaccines.2.name vaccines.2.sideEffects vaccines.3.batch vaccines.3.date vaccines.3.name vaccines.3.sideEffects variantOfConcern +ObjectId(6817283abaa89324a90109aa) 787123878aa90909811aaff1 http://foo/bar.csv bb12399abbb19230900aa123 UNVERIFIED 69 60 Male 2021-01-01 Antarctica -79.402 0.323 Country Antarctica [] ingestion@example.com 2021-01-02T13:42:34.991Z 0 true +ObjectId(798989a98998acc98989a1bb) 787123878aa90909811aaff1 http://foo/bar.csv bb12399abbb19230900aa123 UNVERIFIED 29 20 Female 2021-01-05 Antarctica -79.402 0.323 Country Antarctica [] ingestion@example.com 2021-01-02T13:42:34.991Z 0 true diff --git a/data-serving/scripts/export-data/test_transform_mongoexport_header.csv b/data-serving/scripts/export-data/test_transform_mongoexport_header.csv index 5437f13d0..facdb2b40 100644 --- a/data-serving/scripts/export-data/test_transform_mongoexport_header.csv +++ b/data-serving/scripts/export-data/test_transform_mongoexport_header.csv @@ -1 +1 @@ -_id,caseReference.additionalSources,caseReference.sourceEntryId,caseReference.sourceId,caseReference.sourceUrl,caseReference.uploadIds,caseReference.verificationStatus,demographics.ageRange.end,demographics.ageRange.start,demographics.ethnicity,demographics.gender,demographics.nationalities,demographics.occupation,events,genomeSequences,location.administrativeAreaLevel1,location.administrativeAreaLevel2,location.administrativeAreaLevel3,location.country,location.geoResolution,location.geometry.latitude,location.geometry.longitude,location.name,location.place,location.query,notes,pathogens,preexistingConditions.hasPreexistingConditions,preexistingConditions.values,revisionMetadata.creationMetadata.curator,revisionMetadata.creationMetadata.date,revisionMetadata.creationMetadata.notes,revisionMetadata.editMetadata.curator,revisionMetadata.editMetadata.date,revisionMetadata.editMetadata.notes,revisionMetadata.revisionNumber,SGTF,symptoms.status,symptoms.values,transmission.linkedCaseIds,transmission.places,transmission.routes,travelHistory.travel.dateRange.end,travelHistory.travel.dateRange.start,travelHistory.travel.location.name,travelHistory.travel.methods,travelHistory.travel.purpose,travelHistory.traveledPrior30Days,vaccines.0.name,vaccines.0.batch,vaccines.0.date,vaccines.0.sideEffects,vaccines.1.name,vaccines.1.batch,vaccines.1.date,vaccines.1.sideEffects,vaccines.2.name,vaccines.2.batch,vaccines.2.date,vaccines.2.sideEffects,vaccines.3.name,vaccines.3.batch,vaccines.3.date,vaccines.3.sideEffects +_id,caseReference.additionalSources,caseReference.sourceEntryId,caseReference.sourceId,caseReference.sourceUrl,caseReference.uploadIds,caseReference.verificationStatus,demographics.ageRange.end,demographics.ageRange.start,demographics.ethnicity,demographics.gender,demographics.nationalities,demographics.occupation,events,genomeSequences,location.administrativeAreaLevel1,location.administrativeAreaLevel2,location.administrativeAreaLevel3,location.country,location.geoResolution,location.geometry.latitude,location.geometry.longitude,location.name,location.place,location.query,pathogens,preexistingConditions.hasPreexistingConditions,preexistingConditions.values,revisionMetadata.creationMetadata.curator,revisionMetadata.creationMetadata.date,revisionMetadata.creationMetadata.notes,revisionMetadata.editMetadata.curator,revisionMetadata.editMetadata.date,revisionMetadata.editMetadata.notes,revisionMetadata.revisionNumber,SGTF,symptoms.status,symptoms.values,transmission.linkedCaseIds,transmission.places,transmission.routes,travelHistory.travel.dateRange.end,travelHistory.travel.dateRange.start,travelHistory.travel.location.name,travelHistory.travel.methods,travelHistory.travel.purpose,travelHistory.traveledPrior30Days,vaccines.0.name,vaccines.0.batch,vaccines.0.date,vaccines.0.sideEffects,vaccines.1.name,vaccines.1.batch,vaccines.1.date,vaccines.1.sideEffects,vaccines.2.name,vaccines.2.batch,vaccines.2.date,vaccines.2.sideEffects,vaccines.3.name,vaccines.3.batch,vaccines.3.date,vaccines.3.sideEffects diff --git a/data-serving/scripts/export-data/transform.py b/data-serving/scripts/export-data/transform.py index bf58beae5..a5d18743a 100644 --- a/data-serving/scripts/export-data/transform.py +++ b/data-serving/scripts/export-data/transform.py @@ -37,6 +37,7 @@ "revisionMetadata.creationMetadata.curator", "revisionMetadata.editMetadata.curator", "events", + "notes", "travelHistory.travel", ] @@ -219,8 +220,6 @@ def age_range(case_buckets: str, buckets: [dict[str, Any]]) -> (int, int): def convert_row(row: dict[str, Any], buckets: [dict[str, Any]]) -> Optional[dict[str, Any]]: if "ObjectId" not in row["_id"]: return None - if type(row.get("notes")) == str: - row["notes"] = row["notes"].replace("\n", ", ") for arr_field in __ARRAYS: if row.get(arr_field): row[arr_field] = convert_string_list(row[arr_field])