diff --git a/resources/etls/BCRImport.xml b/resources/etls/BCRImport.xml deleted file mode 100644 index 0c2cc962b..000000000 --- a/resources/etls/BCRImport.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - Import Dataspace BCR Data - Populate BCR tables - - - - - - - - - - diff --git a/resources/etls/LoadApplication.xml b/resources/etls/LoadApplication.xml index fa5e69e91..9892921e9 100644 --- a/resources/etls/LoadApplication.xml +++ b/resources/etls/LoadApplication.xml @@ -19,10 +19,6 @@ - - - - diff --git a/resources/schemas/cds.xml b/resources/schemas/cds.xml index 8eef61d41..2ff17fd7e 100644 --- a/resources/schemas/cds.xml +++ b/resources/schemas/cds.xml @@ -337,21 +337,6 @@ - - - - - - - - - - - - - - -
diff --git a/resources/schemas/dbscripts/postgresql/cds-23.006-23.007.sql b/resources/schemas/dbscripts/postgresql/cds-23.006-23.007.sql new file mode 100644 index 000000000..d34309dad --- /dev/null +++ b/resources/schemas/dbscripts/postgresql/cds-23.006-23.007.sql @@ -0,0 +1,41 @@ +TRUNCATE TABLE cds.MAbMix CASCADE; +TRUNCATE TABLE cds.mabmetadata CASCADE; +TRUNCATE TABLE cds.sequence_germline CASCADE; +TRUNCATE TABLE cds.allele_sequence CASCADE; +TRUNCATE TABLE cds.alignment CASCADE; +TRUNCATE TABLE cds.alignment_run CASCADE; +TRUNCATE TABLE cds.sequence_header CASCADE; +TRUNCATE TABLE cds.header_source CASCADE; +TRUNCATE TABLE cds.antibody_sequence CASCADE; +TRUNCATE TABLE cds.antibody_class CASCADE; +TRUNCATE TABLE cds.sequence CASCADE; + +ALTER TABLE cds.sequence_germline ADD CONSTRAINT FK_cds_sequence_germline_allele FOREIGN KEY (allele) REFERENCES cds.allele_sequence (allele); +CREATE INDEX IX_cds_sequence_germline_allele ON cds.sequence_germline (allele); + +ALTER TABLE cds.alignment ADD CONSTRAINT FK_cds_alignment_v_call FOREIGN KEY (v_call) REFERENCES cds.allele_sequence (allele); +CREATE INDEX IX_cds_alignment_v_call ON cds.alignment (v_call); + +ALTER TABLE cds.alignment ADD CONSTRAINT FK_cds_alignment_d_call FOREIGN KEY (d_call) REFERENCES cds.allele_sequence (allele); +CREATE INDEX IX_cds_alignment_d_call ON cds.alignment (d_call); + +ALTER TABLE cds.alignment ADD CONSTRAINT FK_cds_alignment_j_call FOREIGN KEY (j_call) REFERENCES cds.allele_sequence (allele); +CREATE INDEX IX_cds_alignment_j_call ON cds.alignment (j_call); + +ALTER TABLE cds.sequence_germline ADD CONSTRAINT FK_cds_sequence_germline_run_application FOREIGN KEY (run_application) REFERENCES cds.alignment_run (run_application); +CREATE INDEX IX_cds_sequence_germline_run_application ON cds.sequence_germline (run_application); + +ALTER TABLE cds.alignment ADD CONSTRAINT FK_cds_alignment_run_application FOREIGN KEY (run_application) REFERENCES cds.alignment_run (run_application); +CREATE INDEX IX_cds_alignment_run_application ON cds.alignment (run_application); + +ALTER TABLE cds.sequence_header ADD CONSTRAINT FK_cds_sequence_header_source_id FOREIGN KEY (source_id) REFERENCES cds.header_source (source_id); +CREATE INDEX IX_cds_sequence_header_source_id ON cds.sequence_header(source_id); + +ALTER TABLE cds.antibody_sequence ADD CONSTRAINT FK_cds_antibody_sequence_mab_id FOREIGN KEY (container, mab_id) REFERENCES cds.mabmetadata (container, mab_id); +CREATE INDEX IX_cds_antibody_sequence_mab_id ON cds.antibody_sequence (container, mab_id); + +ALTER TABLE cds.mabmetadata ADD CONSTRAINT FK_cds_mabmetadata_mab_class_id FOREIGN KEY (mab_class_id) REFERENCES cds.antibody_class (mab_class_id); +CREATE INDEX IX_cds_mabmetadata_mab_class_id ON cds.mabmetadata (mab_class_id); + +ALTER TABLE cds.import_MAbMix DROP CONSTRAINT import_MAbMix_mab_id_fkey; +DROP TABLE cds.import_mabmetadata; \ No newline at end of file diff --git a/src/org/labkey/cds/CDSManager.java b/src/org/labkey/cds/CDSManager.java index 31acde965..ebcfea0f0 100644 --- a/src/org/labkey/cds/CDSManager.java +++ b/src/org/labkey/cds/CDSManager.java @@ -148,7 +148,6 @@ public void cleanContainer(Container c) "StudyPublication", "Publication", "StudyRelationship", - "MAbMetadata", "MAbMixMetadata", "Study", "Assay", @@ -204,7 +203,6 @@ public void cleanContainer(Container c) "import_publication", "import_personnel", "import_document", - "import_mabmetadata", "import_mabmixmetadata", "import_lab", "import_study", @@ -224,6 +222,7 @@ public void cleanContainer(Container c) "sequence_header", "sequence_germline", "antibody_sequence", + "MAbMetadata", "alignment", "preferred_allele", "sequence", diff --git a/src/org/labkey/cds/CDSModule.java b/src/org/labkey/cds/CDSModule.java index 33c5329d5..7379bf027 100644 --- a/src/org/labkey/cds/CDSModule.java +++ b/src/org/labkey/cds/CDSModule.java @@ -184,7 +184,7 @@ public String getName() @Override public @Nullable Double getSchemaVersion() { - return 23.006; + return 23.007; } @Override diff --git a/src/org/labkey/cds/data/TSVCopyConfig.java b/src/org/labkey/cds/data/TSVCopyConfig.java index 13149a34b..003ec5f98 100644 --- a/src/org/labkey/cds/data/TSVCopyConfig.java +++ b/src/org/labkey/cds/data/TSVCopyConfig.java @@ -26,7 +26,14 @@ public class TSVCopyConfig extends CDSImportCopyConfig { public TSVCopyConfig(String table, String fileName) { - super("#TSV#", table, "cds", "import_" + table, fileName); + // With the current setup, there are 2 steps: + // Step 1: ETL data from a tab separated .txt or a .csv file to an "import_table". + // Step 2: Run another ETL to copy data from an "import_table" to its corresponding actual table. + // This causes an issue when importing BCR data (via Step 1) since a BCR table (cds.antibody_sequence) has a FK + // to the mabmetadata table, but the mabmetadata table is not populated during Step 1 and it errors in FK not found. + // To get around this, we are going to ETL data from mabmetadata.txt directly into the mabmetadata table during Step 1 + // (and drop import_mabmetadata table altogether since it is not referenced in any queries or reports). + super("#TSV#", table, "cds", (table.equalsIgnoreCase("mabmetadata") ? table : ("import_" + table)), fileName); } public TSVCopyConfig(String table) diff --git a/src/org/labkey/cds/data/steps/CDSImportTask.java b/src/org/labkey/cds/data/steps/CDSImportTask.java index 2688057b3..5e1f71f7d 100644 --- a/src/org/labkey/cds/data/steps/CDSImportTask.java +++ b/src/org/labkey/cds/data/steps/CDSImportTask.java @@ -16,13 +16,25 @@ package org.labkey.cds.data.steps; import org.labkey.cds.data.CDSImportCopyConfig; +import org.labkey.cds.data.CSVCopyConfig; import org.labkey.cds.data.TSVCopyConfig; public class CDSImportTask extends ImportTask { private static CDSImportCopyConfig[] dataspaceTables = new CDSImportCopyConfig[] { - // Core Tables + // bcr data, order matters due to FKs + new CSVCopyConfig("sequence"), + new CSVCopyConfig("alignment_run"), + new CSVCopyConfig("allele_sequence"), + new CSVCopyConfig("alignment"), + new CSVCopyConfig("header_source"), + new CSVCopyConfig("sequence_header"), + new CSVCopyConfig("sequence_germline"), + new CSVCopyConfig("preferred_allele"), + new CSVCopyConfig("antibody_class"), + + // Core Tables new TSVCopyConfig("Study"), new TSVCopyConfig("StudyGroups"), new TSVCopyConfig("Product"), @@ -34,6 +46,8 @@ public class CDSImportTask extends ImportTask new TSVCopyConfig("MAbMetadata"), new TSVCopyConfig("MAbMixMetadata"), + new CSVCopyConfig("antibody_sequence"), // bcr data, order matters due to FKs + // Dependent Tables new TSVCopyConfig("StudyPartGroupArm"), new TSVCopyConfig("StudyPartGroupArmProduct"), diff --git a/test/sampledata/dataspace/bcrimport/alignment.csv b/test/sampledata/dataspace/cdsimport/alignment.csv similarity index 100% rename from test/sampledata/dataspace/bcrimport/alignment.csv rename to test/sampledata/dataspace/cdsimport/alignment.csv diff --git a/test/sampledata/dataspace/bcrimport/alignment_run.csv b/test/sampledata/dataspace/cdsimport/alignment_run.csv similarity index 100% rename from test/sampledata/dataspace/bcrimport/alignment_run.csv rename to test/sampledata/dataspace/cdsimport/alignment_run.csv diff --git a/test/sampledata/dataspace/bcrimport/allele_sequence.csv b/test/sampledata/dataspace/cdsimport/allele_sequence.csv similarity index 54% rename from test/sampledata/dataspace/bcrimport/allele_sequence.csv rename to test/sampledata/dataspace/cdsimport/allele_sequence.csv index 0f6e97123..a45100e7d 100644 --- a/test/sampledata/dataspace/bcrimport/allele_sequence.csv +++ b/test/sampledata/dataspace/cdsimport/allele_sequence.csv @@ -6,4 +6,10 @@ POPO32*02,AACGGTCCCCAGGGAAGGGTTTACAGG,1/20/2023 RRRS31A-37*01,CGGGAAACCCCGGACTGGTGAAGCCTTCGGGGG,1/20/2023 RRRR31D-37*01,GGGCAGCCTCCACGACTCCTAAGGGGTGTATTTGGCC,1/20/2023 POPOK32*01,CAAGAAGTCAGAGCCTCCTGTGGAATAAGAACGAT,1/20/2023 -POPOK32*02,CAGAAAGTCAGAGCCTCCTGTGGAATAAGGGCAGT,1/20/2023 \ No newline at end of file +POPOK32*02,CAGAAAGTCAGAGCCTCCTGTGGAATAAGGGCAGT,1/20/2023 +IGHV7-2*02,CAGAAAGTCAGAGTAAGGGCAGT,1/20/2023 +IGHD1-76*01,TTTTTTTTTACAGGAGTCGGGCCTTCCCCGGA,1/20/2023 +IGHJ7*07,TTCCTTCCCCGGACTGGTGAAGCCTTCGGAATCCCGGA,1/20/2023 +IGHD5-24*07,TTCCTTATTGATTGTACGCTAAATTGGATTCGTCTGGCCCCCGGAAGCCTTCGGAATCCCGGA,1/20/2023 +IGHD7-7*07,ATTGATTGTACGCTAAATTGGATTCGTCTGGCCCCCGGA,1/20/2023 +IGHJ2*07,GTGATTACAATTGGGACTTCGAACACTGGGGCCGGGG,1/20/2023 \ No newline at end of file diff --git a/test/sampledata/dataspace/bcrimport/antibody_class.csv b/test/sampledata/dataspace/cdsimport/antibody_class.csv similarity index 100% rename from test/sampledata/dataspace/bcrimport/antibody_class.csv rename to test/sampledata/dataspace/cdsimport/antibody_class.csv diff --git a/test/sampledata/dataspace/bcrimport/antibody_sequence.csv b/test/sampledata/dataspace/cdsimport/antibody_sequence.csv similarity index 100% rename from test/sampledata/dataspace/bcrimport/antibody_sequence.csv rename to test/sampledata/dataspace/cdsimport/antibody_sequence.csv diff --git a/test/sampledata/dataspace/bcrimport/header_source.csv b/test/sampledata/dataspace/cdsimport/header_source.csv similarity index 100% rename from test/sampledata/dataspace/bcrimport/header_source.csv rename to test/sampledata/dataspace/cdsimport/header_source.csv diff --git a/test/sampledata/dataspace/bcrimport/preferred_allele.csv b/test/sampledata/dataspace/cdsimport/preferred_allele.csv similarity index 100% rename from test/sampledata/dataspace/bcrimport/preferred_allele.csv rename to test/sampledata/dataspace/cdsimport/preferred_allele.csv diff --git a/test/sampledata/dataspace/bcrimport/sequence.csv b/test/sampledata/dataspace/cdsimport/sequence.csv similarity index 100% rename from test/sampledata/dataspace/bcrimport/sequence.csv rename to test/sampledata/dataspace/cdsimport/sequence.csv diff --git a/test/sampledata/dataspace/bcrimport/sequence_germline.csv b/test/sampledata/dataspace/cdsimport/sequence_germline.csv similarity index 100% rename from test/sampledata/dataspace/bcrimport/sequence_germline.csv rename to test/sampledata/dataspace/cdsimport/sequence_germline.csv diff --git a/test/sampledata/dataspace/bcrimport/sequence_header.csv b/test/sampledata/dataspace/cdsimport/sequence_header.csv similarity index 100% rename from test/sampledata/dataspace/bcrimport/sequence_header.csv rename to test/sampledata/dataspace/cdsimport/sequence_header.csv