cBioPortal · alisman · Dec 3, 2024 · Nov 27, 2024 · Dec 2, 2024
diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java
@@ -20,6 +20,7 @@
 import org.cbioportal.persistence.enums.DataSource;
 import org.cbioportal.persistence.helper.AlterationFilterHelper;
 import org.cbioportal.persistence.helper.StudyViewFilterHelper;
+import org.cbioportal.service.util.StudyViewColumnarServiceUtil;
 import org.cbioportal.web.parameter.ClinicalDataType;
 import org.cbioportal.web.parameter.GenericAssayDataBinFilter;
 import org.cbioportal.web.parameter.GenericAssayDataFilter;
@@ -28,7 +29,6 @@
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Repository;
 
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.EnumMap;
 import java.util.HashMap;
@@ -90,27 +90,7 @@ public List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilterContext
     @Override
     public List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilterContext studyViewFilterContext) {
         var sampleCounts = mapper.getMolecularProfileSampleCounts(createStudyViewFilterHelper(studyViewFilterContext));
-        Map<String, List<GenomicDataCount>> countsPerType = sampleCounts.stream()
-            .collect((Collectors.groupingBy(GenomicDataCount::getValue)));
-
-        // different cancer studies combined into one cohort will have separate molecular profiles
-        // of a given type (e.g. mutation).  We need to merge the counts for these
-        // different profiles based on the type and choose a label
-        // this code just picks the first label, which assumes that the labels will match
-        // across studies. 
-        List<GenomicDataCount> mergedCounts = new ArrayList<>();
-        for (Map.Entry<String,List<GenomicDataCount>> entry : countsPerType.entrySet()) {
-            var dc = new GenomicDataCount();
-            dc.setValue(entry.getKey());
-            // here just snatch the label of the first profile
-            dc.setLabel(entry.getValue().get(0).getLabel());
-            Integer sum = entry.getValue().stream()
-                .map(x -> x.getCount())
-                .collect(Collectors.summingInt(Integer::intValue));
-            dc.setCount(sum);
-            mergedCounts.add(dc);
-        }
-        return mergedCounts;
+        return StudyViewColumnarServiceUtil.mergeGenomicDataCounts(sampleCounts);
 
     }
 

diff --git a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java
@@ -214,7 +214,7 @@ public List<CaseListDataCount> getCaseListDataCounts(StudyViewFilter studyViewFi
         // the study view merges case lists by type across studies
         // type is determined by the suffix of case list name (after study name)
         var caseListDataCountsPerStudy = studyViewRepository.getCaseListDataCountsPerStudy(createContext(studyViewFilter));
-        return mergeCaseListCounts(caseListDataCountsPerStudy);
+        return StudyViewColumnarServiceUtil.mergeCaseListCounts(caseListDataCountsPerStudy);
     }
 
     @Cacheable(
@@ -294,85 +294,11 @@ private List<ClinicalDataCountItem> generateDataCountItemsFromDataCounts(List<Cl
             .entrySet().parallelStream().map(e -> {
                 ClinicalDataCountItem item = new ClinicalDataCountItem();
                 item.setAttributeId(e.getKey());
-                item.setCounts(normalizeDataCounts(e.getValue()));
+                item.setCounts(StudyViewColumnarServiceUtil.normalizeDataCounts(e.getValue()));
                 return item;
             }).toList();
     }
 
-    /**
-     * Normalizes data counts by merging attribute values in a case-insensitive way.
-     * For example attribute values "TRUE", "True", and 'true' will be merged into a single aggregated count.
-     * This method assumes that all the counts in the given dataCounts list has the same attributeId.
-     * 
-     * @param dataCounts list of data counts for a single attribute
-     * 
-     * @return normalized list of data counts
-     */
-    private List<ClinicalDataCount> normalizeDataCounts(List<ClinicalDataCount> dataCounts) {
-        Collection<ClinicalDataCount> normalizedDataCounts = dataCounts
-            .stream()
-            .collect(
-                Collectors.groupingBy(
-                    c -> c.getValue().toLowerCase(),
-                    Collectors.reducing(new ClinicalDataCount(), (count1, count2) -> {
-                        // assuming attribute ids are the same for all data counts, just pick the first one
-                        String attributeId = 
-                            count1.getAttributeId() != null
-                                ? count1.getAttributeId() 
-                                : count2.getAttributeId();
-
-                        // pick the value in a deterministic way by prioritizing lower case over upper case.
-                        // for example, 'True' will be picked in case of 2 different values like 'TRUE', and 'True',
-                        // and 'true' will be picked in case of 3 different values like 'TRUE', 'True', and 'true'
-                        String value = count1.getValue() != null 
-                            ? count1.getValue()
-                            : count2.getValue();
-                        if (count1.getValue() != null && count2.getValue() != null) {
-                            value = count1.getValue().compareTo(count2.getValue()) > 0 
-                                ? count1.getValue()
-                                : count2.getValue();
-                        }
-
-                        // aggregate counts for the merged values 
-                        Integer count = (count1.getCount() != null ? count1.getCount(): 0) +
-                            (count2.getCount() != null ? count2.getCount(): 0);
-
-                        ClinicalDataCount aggregated = new ClinicalDataCount();
-                        aggregated.setAttributeId(attributeId);
-                        aggregated.setValue(value);
-                        aggregated.setCount(count);
-                        return aggregated;
-                    })
-                )
-            )
-            .values();
-
-        return new ArrayList<>(normalizedDataCounts);
-    }
-
-    public static List<CaseListDataCount> mergeCaseListCounts(List<CaseListDataCount> counts) {
-        Map<String, List<CaseListDataCount>> countsPerListType = counts.stream()
-            .collect((Collectors.groupingBy(CaseListDataCount::getValue)));
-
-        // different cancer studies combined into one cohort will have separate case lists
-        // of a given type (e.g. rppa).  We need to merge the counts for these
-        // different lists based on the type and choose a label
-        // this code just picks the first label, which assumes that the labels will match for a give type
-        List<CaseListDataCount> mergedCounts = new ArrayList<>();
-        for (Map.Entry<String,List<CaseListDataCount>> entry : countsPerListType.entrySet()) {
-            var dc = new CaseListDataCount();
-            dc.setValue(entry.getKey());
-            // here just snatch the label of the first profile
-            dc.setLabel(entry.getValue().get(0).getLabel());
-            Integer sum = entry.getValue().stream()
-                .map(x -> x.getCount())
-                .collect(Collectors.summingInt(Integer::intValue));
-            dc.setCount(sum);
-            mergedCounts.add(dc);
-        }
-        return mergedCounts;
-    }
-
 
 
 }
diff --git a/src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java b/src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java
@@ -1,9 +1,13 @@
 package org.cbioportal.service.util;
 
+import org.cbioportal.model.CaseListDataCount;
 import org.cbioportal.model.ClinicalAttribute;
 import org.cbioportal.model.ClinicalDataCount;
 import org.cbioportal.model.ClinicalDataCountItem;
+import org.cbioportal.model.GenomicDataCount;
+
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -58,6 +62,98 @@ public static List<ClinicalDataCountItem> addClinicalDataCountsForMissingAttribu
 
         return result;
     }
+
+    public static List<GenomicDataCount> mergeGenomicDataCounts(List<GenomicDataCount> sampleCounts) {
+        Map<String, List<GenomicDataCount>> countsPerType = sampleCounts.stream()
+            .collect(Collectors.groupingBy(GenomicDataCount::getValue));
+
+        List<GenomicDataCount> mergedCounts = new ArrayList<>();
+        for (Map.Entry<String, List<GenomicDataCount>> entry : countsPerType.entrySet()) {
+            var dc = new GenomicDataCount();
+            dc.setValue(entry.getKey());
+            dc.setLabel(entry.getValue().get(0).getLabel());
+            Integer sum = entry.getValue().stream()
+                .map(GenomicDataCount::getCount)
+                .collect(Collectors.summingInt(Integer::intValue));
+            dc.setCount(sum);
+            mergedCounts.add(dc);
+        }
+        return mergedCounts;
+    }
+
+    public static List<CaseListDataCount> mergeCaseListCounts(List<CaseListDataCount> counts) {
+        Map<String, List<CaseListDataCount>> countsPerListType = counts.stream()
+            .collect((Collectors.groupingBy(CaseListDataCount::getValue)));
+
+        // different cancer studies combined into one cohort will have separate case lists
+        // of a given type (e.g. rppa).  We need to merge the counts for these
+        // different lists based on the type and choose a label
+        // this code just picks the first label, which assumes that the labels will match for a give type
+        List<CaseListDataCount> mergedCounts = new ArrayList<>();
+        for (Map.Entry<String,List<CaseListDataCount>> entry : countsPerListType.entrySet()) {
+            var dc = new CaseListDataCount();
+            dc.setValue(entry.getKey());
+            // here just snatch the label of the first profile
+            dc.setLabel(entry.getValue().get(0).getLabel());
+            Integer sum = entry.getValue().stream()
+                .map(x -> x.getCount())
+                .collect(Collectors.summingInt(Integer::intValue));
+            dc.setCount(sum);
+            mergedCounts.add(dc);
+        }
+        return mergedCounts;
+    }
+
+    /**
+     * Normalizes data counts by merging attribute values in a case-insensitive way.
+     * For example attribute values "TRUE", "True", and 'true' will be merged into a single aggregated count.
+     * This method assumes that all the counts in the given dataCounts list has the same attributeId.
+     *
+     * @param dataCounts list of data counts for a single attribute
+     *
+     * @return normalized list of data counts
+     */
+    public static List<ClinicalDataCount> normalizeDataCounts(List<ClinicalDataCount> dataCounts) {
+        Collection<ClinicalDataCount> normalizedDataCounts = dataCounts
+            .stream()
+            .collect(
+                Collectors.groupingBy(
+                    c -> c.getValue().toLowerCase(),
+                    Collectors.reducing(new ClinicalDataCount(), (count1, count2) -> {
+                        // assuming attribute ids are the same for all data counts, just pick the first one
+                        String attributeId =
+                            count1.getAttributeId() != null
+                                ? count1.getAttributeId()
+                                : count2.getAttributeId();
+
+                        // pick the value in a deterministic way by prioritizing lower case over upper case.
+                        // for example, 'True' will be picked in case of 2 different values like 'TRUE', and 'True',
+                        // and 'true' will be picked in case of 3 different values like 'TRUE', 'True', and 'true'
+                        String value = count1.getValue() != null
+                            ? count1.getValue()
+                            : count2.getValue();
+                        if (count1.getValue() != null && count2.getValue() != null) {
+                            value = count1.getValue().compareTo(count2.getValue()) > 0
+                                ? count1.getValue()
+                                : count2.getValue();
+                        }
+
+                        // aggregate counts for the merged values
+                        Integer count = (count1.getCount() != null ? count1.getCount(): 0) +
+                            (count2.getCount() != null ? count2.getCount(): 0);
+
+                        ClinicalDataCount aggregated = new ClinicalDataCount();
+                        aggregated.setAttributeId(attributeId);
+                        aggregated.setValue(value);
+                        aggregated.setCount(count);
+                        return aggregated;
+                    })
+                )
+            )
+            .values();
+
+        return new ArrayList<>(normalizedDataCounts);
+    }
 
 
 }
diff --git a/...java/org/cbioportal/persistence/mybatisclickhouse/StudyViewCaseListSamplesCountsTest.java b/...java/org/cbioportal/persistence/mybatisclickhouse/StudyViewCaseListSamplesCountsTest.java
@@ -3,7 +3,7 @@
 import org.cbioportal.persistence.helper.StudyViewFilterHelper;
 import org.cbioportal.persistence.mybatisclickhouse.config.MyBatisConfig;
 
-import org.cbioportal.service.impl.StudyViewColumnarServiceImpl;
+import org.cbioportal.service.util.StudyViewColumnarServiceUtil;
 import org.cbioportal.web.parameter.StudyViewFilter;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -102,7 +102,7 @@ public void getMolecularProfileCountsAcrossStudies() {
 
         var unMergedCounts =  studyViewMapper.getCaseListDataCountsPerStudy(StudyViewFilterHelper.build(studyViewFilter, null, null) );
 
-        var caseListCountsMerged = StudyViewColumnarServiceImpl.mergeCaseListCounts(
+        var caseListCountsMerged = StudyViewColumnarServiceUtil.mergeCaseListCounts(
             unMergedCounts
         );