Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Ticket 47498: New Table Spec for BCR Data - add FKs, combine ETLs #613

Merged
merged 7 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 0 additions & 14 deletions resources/etls/BCRImport.xml

This file was deleted.

4 changes: 0 additions & 4 deletions resources/etls/LoadApplication.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@
<source schemaName="cds" queryName="ds_lab" />
<destination schemaName="cds" queryName="lab" targetOption="truncate"/>
</transform>
<transform id="PopulateMAbMetadata" type="org.labkey.di.pipeline.TransformTask">
<source schemaName="cds" queryName="import_MAbMetadata" />
<destination schemaName="cds" queryName="MAbMetadata" targetOption="truncate" />
</transform>
<transform id="PopulateMAbMixMetadata" type="org.labkey.di.pipeline.TransformTask">
<source schemaName="cds" queryName="import_MAbMixMetadata" />
<destination schemaName="cds" queryName="MAbMixMetadata" targetOption="truncate" />
Expand Down
15 changes: 0 additions & 15 deletions resources/schemas/cds.xml
Original file line number Diff line number Diff line change
Expand Up @@ -337,21 +337,6 @@
<column columnName="container"/>
</columns>
</table>
<table tableName="import_MAbMetadata" tableDbType="TABLE">
<columns>
<column columnName="container"/>
<column columnName="mab_id"/>
<column columnName="mab_name_std"/>
<column columnName="mab_lanlid"/>
<column columnName="mab_hxb2_location"/>
<column columnName="mab_ab_binding_type"/>
<column columnName="mab_isotype"/>
<column columnName="mab_donorid"/>
<column columnName="mab_donor_species"/>
<column columnName="mab_donor_clade"/>
<column columnName="mab_class_id"/>
</columns>
</table>
<table tableName="import_MAbMixMetadata" tableDbType="TABLE">
<columns>
<column columnName="container"/>
Expand Down
41 changes: 41 additions & 0 deletions resources/schemas/dbscripts/postgresql/cds-23.006-23.007.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
TRUNCATE TABLE cds.MAbMix CASCADE;
TRUNCATE TABLE cds.mabmetadata CASCADE;
TRUNCATE TABLE cds.sequence_germline CASCADE;
TRUNCATE TABLE cds.allele_sequence CASCADE;
TRUNCATE TABLE cds.alignment CASCADE;
TRUNCATE TABLE cds.alignment_run CASCADE;
TRUNCATE TABLE cds.sequence_header CASCADE;
TRUNCATE TABLE cds.header_source CASCADE;
TRUNCATE TABLE cds.antibody_sequence CASCADE;
TRUNCATE TABLE cds.antibody_class CASCADE;
TRUNCATE TABLE cds.sequence CASCADE;

ALTER TABLE cds.sequence_germline ADD CONSTRAINT FK_cds_sequence_germline_allele FOREIGN KEY (allele) REFERENCES cds.allele_sequence (allele);
CREATE INDEX IX_cds_sequence_germline_allele ON cds.sequence_germline (allele);

ALTER TABLE cds.alignment ADD CONSTRAINT FK_cds_alignment_v_call FOREIGN KEY (v_call) REFERENCES cds.allele_sequence (allele);
CREATE INDEX IX_cds_alignment_v_call ON cds.alignment (v_call);

ALTER TABLE cds.alignment ADD CONSTRAINT FK_cds_alignment_d_call FOREIGN KEY (d_call) REFERENCES cds.allele_sequence (allele);
CREATE INDEX IX_cds_alignment_d_call ON cds.alignment (d_call);

ALTER TABLE cds.alignment ADD CONSTRAINT FK_cds_alignment_j_call FOREIGN KEY (j_call) REFERENCES cds.allele_sequence (allele);
CREATE INDEX IX_cds_alignment_j_call ON cds.alignment (j_call);

ALTER TABLE cds.sequence_germline ADD CONSTRAINT FK_cds_sequence_germline_run_application FOREIGN KEY (run_application) REFERENCES cds.alignment_run (run_application);
CREATE INDEX IX_cds_sequence_germline_run_application ON cds.sequence_germline (run_application);

ALTER TABLE cds.alignment ADD CONSTRAINT FK_cds_alignment_run_application FOREIGN KEY (run_application) REFERENCES cds.alignment_run (run_application);
CREATE INDEX IX_cds_alignment_run_application ON cds.alignment (run_application);

ALTER TABLE cds.sequence_header ADD CONSTRAINT FK_cds_sequence_header_source_id FOREIGN KEY (source_id) REFERENCES cds.header_source (source_id);
CREATE INDEX IX_cds_sequence_header_source_id ON cds.sequence_header(source_id);

ALTER TABLE cds.antibody_sequence ADD CONSTRAINT FK_cds_antibody_sequence_mab_id FOREIGN KEY (container, mab_id) REFERENCES cds.mabmetadata (container, mab_id);
CREATE INDEX IX_cds_antibody_sequence_mab_id ON cds.antibody_sequence (container, mab_id);

ALTER TABLE cds.mabmetadata ADD CONSTRAINT FK_cds_mabmetadata_mab_class_id FOREIGN KEY (mab_class_id) REFERENCES cds.antibody_class (mab_class_id);
CREATE INDEX IX_cds_mabmetadata_mab_class_id ON cds.mabmetadata (mab_class_id);

ALTER TABLE cds.import_MAbMix DROP CONSTRAINT import_MAbMix_mab_id_fkey;
DROP TABLE cds.import_mabmetadata;
3 changes: 1 addition & 2 deletions src/org/labkey/cds/CDSManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,6 @@ public void cleanContainer(Container c)
"StudyPublication",
"Publication",
"StudyRelationship",
"MAbMetadata",
"MAbMixMetadata",
"Study",
"Assay",
Expand Down Expand Up @@ -204,7 +203,6 @@ public void cleanContainer(Container c)
"import_publication",
"import_personnel",
"import_document",
"import_mabmetadata",
"import_mabmixmetadata",
"import_lab",
"import_study",
Expand All @@ -224,6 +222,7 @@ public void cleanContainer(Container c)
"sequence_header",
"sequence_germline",
"antibody_sequence",
"MAbMetadata",
"alignment",
"preferred_allele",
"sequence",
Expand Down
2 changes: 1 addition & 1 deletion src/org/labkey/cds/CDSModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ public String getName()
@Override
public @Nullable Double getSchemaVersion()
{
return 23.006;
return 23.007;
}

@Override
Expand Down
9 changes: 8 additions & 1 deletion src/org/labkey/cds/data/TSVCopyConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,14 @@ public class TSVCopyConfig extends CDSImportCopyConfig
{
public TSVCopyConfig(String table, String fileName)
{
super("#TSV#", table, "cds", "import_" + table, fileName);
// With the current setup, there are 2 steps:
// Step 1: ETL data from a tab separated .txt or a .csv file to an "import_table".
// Step 2: Run another ETL to copy data from an "import_table" to its corresponding actual table.
// This causes an issue when importing BCR data (via Step 1) since a BCR table (cds.antibody_sequence) has a FK
// to the mabmetadata table, but the mabmetadata table is not populated during Step 1 and it errors in FK not found.
// To get around this, we are going to ETL data from mabmetadata.txt directly into the mabmetadata table during Step 1
// (and drop import_mabmetadata table altogether since it is not referenced in any queries or reports).
super("#TSV#", table, "cds", (table.equalsIgnoreCase("mabmetadata") ? table : ("import_" + table)), fileName);
}

public TSVCopyConfig(String table)
Expand Down
16 changes: 15 additions & 1 deletion src/org/labkey/cds/data/steps/CDSImportTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,25 @@
package org.labkey.cds.data.steps;

import org.labkey.cds.data.CDSImportCopyConfig;
import org.labkey.cds.data.CSVCopyConfig;
import org.labkey.cds.data.TSVCopyConfig;

public class CDSImportTask extends ImportTask
{
private static CDSImportCopyConfig[] dataspaceTables = new CDSImportCopyConfig[]
{
// Core Tables
// bcr data, order matters due to FKs
new CSVCopyConfig("sequence"),
new CSVCopyConfig("alignment_run"),
new CSVCopyConfig("allele_sequence"),
new CSVCopyConfig("alignment"),
new CSVCopyConfig("header_source"),
new CSVCopyConfig("sequence_header"),
new CSVCopyConfig("sequence_germline"),
new CSVCopyConfig("preferred_allele"),
new CSVCopyConfig("antibody_class"),

// Core Tables
new TSVCopyConfig("Study"),
new TSVCopyConfig("StudyGroups"),
new TSVCopyConfig("Product"),
Expand All @@ -34,6 +46,8 @@ public class CDSImportTask extends ImportTask
new TSVCopyConfig("MAbMetadata"),
new TSVCopyConfig("MAbMixMetadata"),

new CSVCopyConfig("antibody_sequence"), // bcr data, order matters due to FKs

// Dependent Tables
new TSVCopyConfig("StudyPartGroupArm"),
new TSVCopyConfig("StudyPartGroupArmProduct"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,10 @@ POPO32*02,AACGGTCCCCAGGGAAGGGTTTACAGG,1/20/2023
RRRS31A-37*01,CGGGAAACCCCGGACTGGTGAAGCCTTCGGGGG,1/20/2023
RRRR31D-37*01,GGGCAGCCTCCACGACTCCTAAGGGGTGTATTTGGCC,1/20/2023
POPOK32*01,CAAGAAGTCAGAGCCTCCTGTGGAATAAGAACGAT,1/20/2023
POPOK32*02,CAGAAAGTCAGAGCCTCCTGTGGAATAAGGGCAGT,1/20/2023
POPOK32*02,CAGAAAGTCAGAGCCTCCTGTGGAATAAGGGCAGT,1/20/2023
IGHV7-2*02,CAGAAAGTCAGAGTAAGGGCAGT,1/20/2023
IGHD1-76*01,TTTTTTTTTACAGGAGTCGGGCCTTCCCCGGA,1/20/2023
IGHJ7*07,TTCCTTCCCCGGACTGGTGAAGCCTTCGGAATCCCGGA,1/20/2023
IGHD5-24*07,TTCCTTATTGATTGTACGCTAAATTGGATTCGTCTGGCCCCCGGAAGCCTTCGGAATCCCGGA,1/20/2023
IGHD7-7*07,ATTGATTGTACGCTAAATTGGATTCGTCTGGCCCCCGGA,1/20/2023
IGHJ2*07,GTGATTACAATTGGGACTTCGAACACTGGGGCCGGGG,1/20/2023