Skip to content

Commit

Permalink
Add categorical patient level filtering for generic-assay-data-counts (
Browse files Browse the repository at this point in the history
…#11155)

* Add patient level filtering for aggregation

* Patient level filtering works for non-NA

* Categorical patient level filtering & clean up

* Use new generic assay table schema
  • Loading branch information
fuzhaoyuan authored Dec 3, 2024
1 parent cc16f2c commit b72ddaa
Show file tree
Hide file tree
Showing 12 changed files with 239 additions and 88 deletions.
7 changes: 7 additions & 0 deletions src/main/java/org/cbioportal/model/GenericAssayDataCount.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ public class GenericAssayDataCount implements Serializable {
private String value;
private Integer count;

public GenericAssayDataCount() {}

public GenericAssayDataCount(String value, Integer count) {
this.value = value;
this.count = count;
}

public String getValue() {
return value;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ public class GenericAssayDataCountItem implements Serializable {
private String stableId;
private List<GenericAssayDataCount> counts;

public GenericAssayDataCountItem() {}

public GenericAssayDataCountItem(String stableId, List<GenericAssayDataCount> counts) {
this.stableId = stableId;
this.counts = counts;
}

public String getStableId() {
return stableId;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,21 +63,28 @@ private CategorizedGenericAssayDataCountFilter extractGenericAssayDataCountFilte

CategorizedGenericAssayDataCountFilter.Builder builder = CategorizedGenericAssayDataCountFilter.getBuilder();

// TODO: Support patient level profiles and data filtering
List<String> sampleCategoricalProfileTypes = genericAssayProfilesMap.get(DataSource.SAMPLE)
.stream().filter(profile -> profile.getDatatype().equals("CATEGORICAL") || profile.getDatatype().equals("BINARY"))
.map(profile -> profile.getStableId().replace(profile.getCancerStudyIdentifier() + "_", ""))
.toList();
// No BINARY in the database yet
List<String> sampleNumericalProfileTypes = genericAssayProfilesMap.get(DataSource.SAMPLE)
.stream().filter(profile -> profile.getDatatype().equals("LIMIT-VALUE"))
.map(profile -> profile.getStableId().replace(profile.getCancerStudyIdentifier() + "_", ""))
.toList();
builder.setSampleNumericalGenericAssayDataFilters(studyViewFilter.getGenericAssayDataFilters().stream()
.filter(genericAssayDataFilter -> sampleNumericalProfileTypes.contains(genericAssayDataFilter.getProfileType()))
.toList());
List<String> sampleCategoricalProfileTypes = genericAssayProfilesMap.get(DataSource.SAMPLE)
.stream().filter(profile -> profile.getDatatype().equals("CATEGORICAL") || profile.getDatatype().equals("BINARY"))
.map(profile -> profile.getStableId().replace(profile.getCancerStudyIdentifier() + "_", ""))
.toList();
builder.setSampleCategoricalGenericAssayDataFilters(studyViewFilter.getGenericAssayDataFilters().stream()
.filter(genericAssayDataFilter -> sampleCategoricalProfileTypes.contains(genericAssayDataFilter.getProfileType()))
.toList());
List<String> patientCategoricalProfileTypes = genericAssayProfilesMap.get(DataSource.PATIENT)
.stream().filter(profile -> profile.getDatatype().equals("CATEGORICAL") || profile.getDatatype().equals("BINARY"))
.map(profile -> profile.getStableId().replace(profile.getCancerStudyIdentifier() + "_", ""))
.toList();
builder.setPatientCategoricalGenericAssayDataFilters(studyViewFilter.getGenericAssayDataFilters().stream()
.filter(genericAssayDataFilter -> patientCategoricalProfileTypes.contains(genericAssayDataFilter.getProfileType()))
.toList());
return builder.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ public class GenericAssayDataFilter extends DataFilter implements Serializable {
private String stableId;
private String profileType;

public GenericAssayDataFilter() {}

public GenericAssayDataFilter(String stableId, String profileType) {
this.stableId = stableId;
this.profileType = profileType;
}

public String getStableId() {
return stableId;
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/db-scripts/clickhouse/clickhouse.sql
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ CREATE TABLE IF NOT EXISTS generic_assay_data_derived
profile_type String
)
ENGINE = MergeTree()
ORDER BY (profile_type, entity_stable_id, sample_unique_id);
ORDER BY (profile_type, entity_stable_id, patient_unique_id, sample_unique_id);

INSERT INTO TABLE generic_assay_data_derived
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@
<include refid="applySampleTreatmentFilter"/>
</if>
<!-- Apply Genomic Data Filter -->
<!-- TODO: add patient level data filtering -->
<if test="studyViewFilterHelper.studyViewFilter.genomicDataFilters != null and !studyViewFilterHelper.studyViewFilter.genomicDataFilters.isEmpty()">
<foreach item="genomicDataFilter" collection="studyViewFilterHelper.studyViewFilter.genomicDataFilters" open="INTERSECT" separator="INTERSECT">
(
Expand All @@ -154,25 +153,9 @@
</foreach>
</if>
<!-- Apply Generic Assay Data Filter -->
<if test="studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleNumericalGenericAssayDataFilters() != null and !studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleNumericalGenericAssayDataFilters().isEmpty()">
<foreach item="genericAssayDataFilter" collection="studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleNumericalGenericAssayDataFilters()" open="INTERSECT" separator="INTERSECT">
(
<include refid="numericalGenericAssayDataFilter">
<property name="unique_id" value="sample_unique_id"/>
<property name="table_name" value="generic_assay_data_derived"/>
</include>
)
</foreach>
</if>
<if test="studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleCategoricalGenericAssayDataFilters() != null and !studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleCategoricalGenericAssayDataFilters().isEmpty()">
<foreach item="genericAssayDataFilter" collection="studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleCategoricalGenericAssayDataFilters()" open="INTERSECT" separator="INTERSECT">
<include refid="categoricalGenericAssayDataCountFilter">
<property name="unique_id" value="sample_unique_id"/>
<property name="table_name" value="generic_assay_data_derived"/>
</include>
</foreach>
</if>

<if test="studyViewFilterHelper.studyViewFilter.genericAssayDataFilters != null and !studyViewFilterHelper.studyViewFilter.genericAssayDataFilters.isEmpty()">
<include refid="applyGenericAssayDataFilter"/>
</if>
<!-- Apply Clinical Data Filter -->
<if test="studyViewFilterHelper.studyViewFilter.clinicalDataFilters != null and !studyViewFilterHelper.studyViewFilter.clinicalDataFilters.isEmpty()">
<include refid="applyClinicalDataCountFilter"/>
Expand Down Expand Up @@ -534,8 +517,39 @@
</foreach>
</sql>

<sql id="applyGenericAssayDataFilter">
<if test="studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleNumericalGenericAssayDataFilters() != null and !studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleNumericalGenericAssayDataFilters().isEmpty()">
<foreach item="genericAssayDataFilter" collection="studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleNumericalGenericAssayDataFilters()" open="INTERSECT" separator="INTERSECT">
(
<include refid="numericalGenericAssayDataFilter">
<property name="type" value="sample"/>
</include>
)
</foreach>
</if>
<if test="studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleCategoricalGenericAssayDataFilters() != null and !studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleCategoricalGenericAssayDataFilters().isEmpty()">
<foreach item="genericAssayDataFilter" collection="studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getSampleCategoricalGenericAssayDataFilters()" open="INTERSECT" separator="INTERSECT">
(
<include refid="categoricalGenericAssayDataCountFilter">
<property name="type" value="sample"/>
</include>
)
</foreach>
</if>
-- patient level profile only have categorical for now
<if test="studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getPatientCategoricalGenericAssayDataFilters() != null and !studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getPatientCategoricalGenericAssayDataFilters().isEmpty()">
<foreach item="genericAssayDataFilter" collection="studyViewFilterHelper.categorizedGenericAssayDataCountFilter.getPatientCategoricalGenericAssayDataFilters()" open="INTERSECT" separator="INTERSECT">
(
<include refid="categoricalGenericAssayDataCountFilter">
<property name="type" value="patient"/>
</include>
)
</foreach>
</if>
</sql>

<sql id="selectAllGenericAssays">
SELECT sample_unique_id, value, datatype
SELECT sample_unique_id, patient_unique_id, value, datatype
FROM generic_assay_data_derived
WHERE profile_type = #{genericAssayDataFilter.profileType}
AND entity_stable_id = #{genericAssayDataFilter.stableId}
Expand Down Expand Up @@ -590,10 +604,18 @@
</sql>

<sql id="categoricalGenericAssayDataCountFilter">
SELECT ${unique_id}
SELECT sample_unique_id
FROM sample_derived sd
LEFT JOIN (<include refid="selectAllGenericAssays"/>) AS generic_assay_query
ON sd.sample_unique_id = generic_assay_query.sample_unique_id
LEFT JOIN (<include refid="selectAllGenericAssays"/>) AS generic_assay_query
ON
<choose>
<when test="'${type}' == 'sample'">
sd.sample_unique_id = generic_assay_query.sample_unique_id
</when>
<otherwise>
sd.patient_unique_id = generic_assay_query.patient_unique_id
</otherwise>
</choose>
<where>
datatype != 'LIMIT-VALUE'
<foreach item="dataFilterValue" collection="genericAssayDataFilter.values" open=" AND ((" separator=") OR (" close="))">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,12 +304,12 @@
</select>

<select id="getGenericAssayDataCounts" resultMap="GenericAssayDataCountItemResultMap">
<bind name="profileType" value="genericAssayDataFilters[0].profileType"/>
<bind name="profileType" value="genericAssayDataFilters[0].profileType"/>
WITH generic_assay_query AS (
SELECT
entity_stable_id AS stableId,
value,
cast(count(*) AS INTEGER) AS count
cast(count(distinct patient_unique_id) AS INTEGER) AS count
FROM generic_assay_data_derived
<where>
<!-- Table creation in clickhouse.sql has ensured no NA values but extra caution is always appreciated -->
Expand Down Expand Up @@ -339,9 +339,24 @@
<!-- The NA count is specially caculated using total sample count minus non-NA count, therefore
these 2 coalesces are here in case the non-NA subquery returned empty results and we need to provide properties needed to construct the target object -->
SELECT
coalesce((SELECT stableId FROM generic_assay_data_sum LIMIT 1), #{genericAssayDataFilters[0].stableId}) as stableId,
'NA' as value,
cast(((SELECT * FROM (<include refid="getTotalSampleCount"/>)) - coalesce((SELECT gad_count FROM generic_assay_data_sum LIMIT 1), 0)) as INTEGER) as count
coalesce((SELECT stableId FROM generic_assay_data_sum LIMIT 1), #{genericAssayDataFilters[0].stableId}) as stableId,
'NA' as value,
cast((
multiIf(
(
SELECT count() > 0
FROM genetic_profile
WHERE patient_level = 1
AND stable_id IN
<foreach item="studyId" collection="studyViewFilterHelper.studyViewFilter.studyIds" open="(" separator="," close=")">
concat(#{studyId}, '_', #{profileType})
</foreach>
),
(SELECT * FROM (<include refid="getTotalPatientCount"/>)),
(SELECT * FROM (<include refid="getTotalSampleCount"/>))
)
- coalesce((SELECT gad_count FROM generic_assay_data_sum LIMIT 1), 0))
as INTEGER) as count
</select>

<!-- for /mutation-data-counts/fetch (returns GenomicDataCountItem objects) mutation counts pie chart part -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ public void getSamplesFilteredByClinicalData() {
)
);
var filteredSamples5 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>()));
// 4 acc_tcga + 4 study_genie_pub samples with "NA" AGE data or no AGE data
assertEquals(8, filteredSamples5.size());
// 4 acc_tcga + 7 study_genie_pub samples with "NA" AGE data or no AGE data
assertEquals(11, filteredSamples5.size());

// NA + UNKNOWN
studyViewFilter.setClinicalDataFilters(
Expand All @@ -147,8 +147,8 @@ public void getSamplesFilteredByClinicalData() {
)
);
var filteredSamples6 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>()));
// 8 NA + 1 UNKNOWN
assertEquals(9, filteredSamples6.size());
// 11 NA + 1 UNKNOWN
assertEquals(12, filteredSamples6.size());
}

private DataFilterValue newDataFilterValue(Double start, Double end, String value) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package org.cbioportal.persistence.mybatisclickhouse;

import org.cbioportal.model.GenericAssayDataCount;
import org.cbioportal.model.GenericAssayDataCountItem;
import org.cbioportal.persistence.helper.StudyViewFilterHelper;
import org.cbioportal.persistence.mybatisclickhouse.config.MyBatisConfig;
import org.cbioportal.web.parameter.GenericAssayDataFilter;
import org.cbioportal.web.parameter.StudyViewFilter;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.jdbc.AutoConfigureTestDatabase;
import org.springframework.boot.test.autoconfigure.orm.jpa.DataJpaTest;
import org.springframework.context.annotation.Import;
import org.springframework.test.annotation.DirtiesContext;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringRunner;

import java.util.List;

import static org.assertj.core.api.Assertions.assertThat;

@RunWith(SpringRunner.class)
@Import(MyBatisConfig.class)
@DataJpaTest
@DirtiesContext
@AutoConfigureTestDatabase(replace= AutoConfigureTestDatabase.Replace.NONE)
@ContextConfiguration(initializers = AbstractTestcontainers.Initializer.class)
public class GenericAssayDataCountsTest extends AbstractTestcontainers {

private static final String ACC_TCGA = "acc_tcga";
private static final String STUDY_GENIE_PUB = "study_genie_pub";

@Autowired
private StudyViewMapper studyViewMapper;

@Test
public void getSampleCategoricalGenericAssayDataCounts() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(ACC_TCGA));

GenericAssayDataFilter genericAssayDataFilter = new GenericAssayDataFilter("1p_status", "armlevel_cna");
List<GenericAssayDataCountItem> actualCounts = studyViewMapper.getGenericAssayDataCounts(
StudyViewFilterHelper.build(studyViewFilter, null, null),
List.of(genericAssayDataFilter)
);

List<GenericAssayDataCountItem> expectedCounts = List.of(
new GenericAssayDataCountItem("1p_status", List.of(
new GenericAssayDataCount("Loss", 1),
new GenericAssayDataCount("Gain", 1),
new GenericAssayDataCount("Unchanged", 1),
new GenericAssayDataCount("NA", 1)
))
);

assertThat(actualCounts)
.usingRecursiveComparison()
.ignoringCollectionOrder()
.isEqualTo(expectedCounts);
}

@Test
public void getPatientCategoricalGenericAssayDataCounts() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_GENIE_PUB));

GenericAssayDataFilter genericAssayDataFilter = new GenericAssayDataFilter("DMETS_DX_ADRENAL", "distant_mets");
List<GenericAssayDataCountItem> actualCounts = studyViewMapper.getGenericAssayDataCounts(
StudyViewFilterHelper.build(studyViewFilter, null, null),
List.of(genericAssayDataFilter)
);

List<GenericAssayDataCountItem> expectedCounts = List.of(
new GenericAssayDataCountItem("DMETS_DX_ADRENAL", List.of(
new GenericAssayDataCount("No", 9),
new GenericAssayDataCount("Yes", 1),
new GenericAssayDataCount("NA", 14)
))
);

assertThat(actualCounts)
.usingRecursiveComparison()
.ignoringCollectionOrder()
.isEqualTo(expectedCounts);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
@DirtiesContext
@AutoConfigureTestDatabase(replace= AutoConfigureTestDatabase.Replace.NONE)
@ContextConfiguration(initializers = AbstractTestcontainers.Initializer.class)
public class MutationCountsTest extends AbstractTestcontainers {
public class MutationDataCountsTest extends AbstractTestcontainers {

private static final String STUDY_TCGA_PUB = "study_tcga_pub";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ public void getMutationCounts() {
assertEquals(2, findClinicaDataCount(mutationsCounts, "4"));
assertEquals(4, findClinicaDataCount(mutationsCounts, "2"));
assertEquals(2, findClinicaDataCount(mutationsCounts, "1"));
// 1 empty string + 1 'NAN' + 12 samples with no data
assertEquals(14, findClinicaDataCount(mutationsCounts, "NA"));
// 1 empty string + 1 'NAN' + 15 samples with no data
assertEquals(17, findClinicaDataCount(mutationsCounts, "NA"));
}

@Test
Expand Down
Loading

0 comments on commit b72ddaa

Please sign in to comment.