Skip to content

Commit

Permalink
Merge pull request #25 from sa501428/pin5
Browse files Browse the repository at this point in the history
update landscape assessment
  • Loading branch information
sa501428 authored Sep 26, 2022
2 parents a62d9c7 + 4508c7c commit da11fb4
Show file tree
Hide file tree
Showing 37 changed files with 987 additions and 780 deletions.
24 changes: 11 additions & 13 deletions src/cli/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@

public class Main {

public static final String VERSION_NUM = "0.35.2";
public static final int DEFAULT_RESOLUTION = 5000;
public static final int DEFAULT_CUTOFF = 500;
public static final String VERSION_NUM = "0.50.0";
public static boolean printVerboseComments = false;

public static void printGeneralUsageAndExit(int exitCode) {
Expand All @@ -19,8 +17,8 @@ public static void printGeneralUsageAndExit(int exitCode) {
System.out.println("Commands:");
for (String usage : new String[]{Flags.usage, Pinpoint.usage, Cleaner.usage, APA.usage, ATA.usage, Recap.usage,
Sieve.usage, HotSpot.usage, Fusion.usage, Sift.usage, NormHack.usage, SimplePeak.usage, SimpleMax.usage,
GenerateBedpe.usage}) {
System.out.println(usage);
GenerateBedpe.usage, Split.usage}) {
System.out.println("\t" + usage);
}

System.out.println("Exit code " + exitCode);
Expand All @@ -45,15 +43,15 @@ public static void main(String[] argv) throws CmdLineParser.UnknownOptionExcepti

String command = args[0].toLowerCase();
if(command.equals("flags")){
Flags.run(args, parser.getResolutionOption(Main.DEFAULT_RESOLUTION), parser.getCutoffOption(), parser.getNormalizationStringOption());
Flags.run(args, parser);
} else if (command.equals("enhance") || command.equals("amplifi") || command.equals("amplify")) {
Enhance.run(args, parser.getResolutionOption(Main.DEFAULT_RESOLUTION), parser.getNpyOption());
Enhance.run(args, parser);
} else if (command.equals("pinpoint")) {
Pinpoint.run(args, parser);
} else if (command.startsWith("clean")) {
Cleaner.run(args);
Cleaner.run(args, parser);
} else if (command.startsWith("prob")) {
Probability.run(args, parser.getResolutionOption(Main.DEFAULT_RESOLUTION), parser.getLogOption());
Probability.run(args, parser);
} else if (command.startsWith("apa")) {
APA apa = new APA(args, parser);
apa.run();
Expand All @@ -62,16 +60,16 @@ public static void main(String[] argv) throws CmdLineParser.UnknownOptionExcepti
ata.run();
} else if (command.startsWith("recap") || command.startsWith("compile")) {
new Recap(args, parser);
} else if (command.startsWith("split") || command.startsWith("join")) {
new SplitOrJoin(command, args);
} else if (command.startsWith("sieve")) {
new Sieve(args, parser, command);
} else if (command.startsWith("hotspot")) {
HotSpot.run(args, parser);
} else if (command.startsWith("sift")) {
new Sift(args, parser);
} else if (command.startsWith("fuse") || command.startsWith("fusion")) {
new Fusion(args, command);
} else if (command.startsWith("fuse") || command.startsWith("fusion") || command.startsWith("join") || command.startsWith("union")) {
Fusion.run(args, command);
} else if (command.startsWith("split")) {
Split.run(args, command);
} else if (command.startsWith("seer")) {
Seer.run(args, parser);
} else if (command.startsWith("hack")) {
Expand Down
21 changes: 15 additions & 6 deletions src/cli/clt/APA.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
import cli.utils.apa.APADataExporter;
import cli.utils.apa.APAUtils;
import cli.utils.flags.RegionConfiguration;
import cli.utils.flags.Utils;
import cli.utils.general.HiCUtils;
import cli.utils.general.Utils;
import javastraw.feature2D.Feature2D;
import javastraw.feature2D.Feature2DList;
import javastraw.feature2D.Feature2DParser;
Expand Down Expand Up @@ -89,18 +89,22 @@ public APA(String[] args, CommandLineParser parser) {
loopListPath = args[2];
outputDirectory = HiCFileTools.createValidDirectory(args[3]);

useAgNorm = parser.getAggregateNormalization();
String possibleNorm = parser.getNormalizationStringOption();
useAgNorm = parser.getAggregateNormalization() || isAgNorm(possibleNorm);
if (useAgNorm) {
norm = NormalizationHandler.NONE;
} else {
String possibleNorm = parser.getNormalizationStringOption();
try {
norm = ds.getNormalizationHandler().getNormTypeFromString(possibleNorm);
} catch (Exception e) {
norm = NormalizationPicker.getFirstValidNormInThisOrder(ds, new String[]{possibleNorm, "SCALE", "KR", "NONE"});
}
}

System.out.println("Using normalization: " + norm.getLabel());
if (useAgNorm) {
System.out.println("Will apply aggregate normalization.");
}
window = parser.getWindowSizeOption(10);
minPeakDist = parser.getMinDistVal(2 * window);
maxPeakDist = parser.getMaxDistVal(Integer.MAX_VALUE);
Expand All @@ -118,6 +122,11 @@ public APA(String[] args, CommandLineParser parser) {
}
}

private boolean isAgNorm(String norm) {
String normLower = norm.toLowerCase();
return normLower.contains("ag") && normLower.contains("norm");
}

private void printUsageAndExit() {
System.out.println("apa [--min-dist minval] [--max-dist max_val] [--window window] [-r resolution]" +
" [-k NONE/VC/VC_SQRT/KR] [--corner-width corner_width] [--include-inter include_inter_chr] [--ag-norm]" +
Expand Down Expand Up @@ -190,10 +199,10 @@ public void run() {
try {
for (Feature2D loop : loops) {

Utils.addLocalizedData(output, zd, loop, matrixWidthL, resolution, window, norm);
int binXStart = (int) ((loop.getMidPt1() / resolution) - window);
int binYStart = (int) ((loop.getMidPt2() / resolution) - window);
Utils.addLocalBoundedRegion(output, zd, binXStart, binYStart, matrixWidthL, norm);
if (useAgNorm) {
int binXStart = (int) ((loop.getMidPt1() / resolution) - window);
int binYStart = (int) ((loop.getMidPt2() / resolution) - window);
APAUtils.addLocalRowSums(rowSum, vector1, binXStart);
APAUtils.addLocalRowSums(colSum, vector2, binYStart);
}
Expand Down
112 changes: 66 additions & 46 deletions src/cli/clt/Cleaner.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
package cli.clt;

import cli.Main;
import cli.utils.clean.LoopTools;
import cli.utils.clean.OracleScorer;
import cli.utils.flags.RegionConfiguration;
import cli.utils.general.HiCUtils;
import cli.utils.general.VectorCleaner;
import javastraw.feature2D.Feature2D;
import javastraw.feature2D.Feature2DList;
import javastraw.feature2D.Feature2DParser;
import javastraw.reader.Dataset;
import javastraw.reader.basics.Chromosome;
import javastraw.reader.basics.ChromosomeHandler;
import javastraw.reader.basics.ChromosomeTools;
import javastraw.reader.type.HiCZoom;
import javastraw.reader.type.NormalizationType;
import javastraw.tools.HiCFileTools;
Expand All @@ -22,33 +24,47 @@

public class Cleaner {

private static final int MIN_LOOP_SIZE = 25000;
public static String usage = "clean <input.hic> <loops.bedpe> <output.bedpe>";
public static String usage = "clean <input.hic> <loops.bedpe> <output.bedpe>\n" +
"clean [--threshold float] <genomeID> <loops.bedpe> <output.bedpe>";

public static void run(String[] args) {
public static void run(String[] args, CommandLineParser parser) {
if (args.length != 4) {
Main.printGeneralUsageAndExit(5);
}

Dataset dataset = HiCFileTools.extractDatasetForCLT(args[1], false, true, true);
String bedpeFile = args[2];
String outFile = args[3];
double threshold = parser.getThresholdOption(0.5);
Dataset dataset = null;
ChromosomeHandler handler;
if (args[1].endsWith(".hic")) {
dataset = HiCFileTools.extractDatasetForCLT(args[1], false, true, true);
handler = dataset.getChromosomeHandler();
} else {
handler = ChromosomeTools.loadChromosomes(args[1]);
}

ChromosomeHandler handler = dataset.getChromosomeHandler();
Feature2DList loopList = Feature2DParser.loadFeatures(bedpeFile, handler,
true, null, false);
String[] bedpeFiles = args[2].split(",");
String[] outFiles = args[3].split(",");

System.out.println("Number of loops: " + loopList.getNumTotalFeatures());
if (bedpeFiles.length != outFiles.length) {
System.err.println("Number of input and output entries don't match");
System.exit(92);
}

Feature2DList cleanList = cleanupLoops(dataset, loopList, handler);
cleanList.exportFeatureList(new File(outFile), false, Feature2DList.ListFormat.FINAL);
for (int z = 0; z < bedpeFiles.length; z++) {
Feature2DList loopList = LoopTools.loadFilteredBedpe(bedpeFiles[z], handler, true);
Feature2DList cleanList;
if (dataset != null) {
cleanList = cleanupLoops(dataset, loopList, handler);
} else {
cleanList = OracleScorer.filter(loopList, threshold);
}
cleanList.exportFeatureList(new File(outFiles[z]), false, Feature2DList.ListFormat.NA);
}
}

private static Feature2DList cleanupLoops(final Dataset dataset, Feature2DList loopList, ChromosomeHandler handler) {

int resolution = 1000;
HiCZoom zoom = new HiCZoom(resolution);

Set<HiCZoom> resolutions = getResolutions(loopList);
NormalizationType vcNorm = dataset.getNormalizationHandler().getNormTypeFromString("VC");

Map<Integer, RegionConfiguration> chromosomePairs = new ConcurrentHashMap<>();
Expand All @@ -71,22 +87,12 @@ private static Feature2DList cleanupLoops(final Dataset dataset, Feature2DList l
if (loops != null && loops.size() > 0) {
Set<Feature2D> goodLoops = new HashSet<>();

double[] vector1b = dataset.getNormalizationVector(chr1.getIndex(), zoom, vcNorm).getData().getValues().get(0);
VectorCleaner.inPlaceClean(vector1b);

double[] vector2b = vector1b;
if (chr1.getIndex() != chr2.getIndex()) {
vector2b = dataset.getNormalizationVector(chr2.getIndex(), zoom, vcNorm).getData().getValues().get(0);
VectorCleaner.inPlaceClean(vector2b);
}

Map<Integer, double[]> vectorMap = loadVectors(dataset, chr1, vcNorm, resolutions);
try {
for (Feature2D loop : loops) {
if (passesMinLoopSize(loop)) {
if (normHasHighValPixel(loop.getStart1(), loop.getEnd1(), resolution, vector1b)
&& normHasHighValPixel(loop.getStart2(), loop.getEnd2(), resolution, vector2b)) {
goodLoops.add(loop);
}
if (normIsOk(loop.getMidPt1(), (int) loop.getWidth1(), vectorMap)
&& normIsOk(loop.getMidPt2(), (int) loop.getWidth2(), vectorMap)) {
goodLoops.add(loop);
}
}
} catch (Exception e) {
Expand All @@ -97,34 +103,48 @@ && normHasHighValPixel(loop.getStart2(), loop.getEnd2(), resolution, vector2b))
goodLoopsList.addByKey(Feature2DList.getKey(chr1, chr2), new ArrayList<>(goodLoops));
}
}
//System.out.print(((int) Math.floor((100.0 * currentProgressStatus.incrementAndGet()) / maxProgressStatus.get())) + "% ");
threadPair = currChromPair.getAndIncrement();
}
});

return goodLoopsList;
}

public static boolean passesMinLoopSize(Feature2D loop) {
return dist(loop) > MIN_LOOP_SIZE;
private static Map<Integer, double[]> loadVectors(Dataset dataset, Chromosome chrom, NormalizationType vcNorm,
Set<HiCZoom> zooms) {
Map<Integer, double[]> vectorMap = new HashMap<>();
for (HiCZoom zoom : zooms) {
double[] vector = dataset.getNormalizationVector(chrom.getIndex(), zoom,
vcNorm).getData().getValues().get(0);
VectorCleaner.inPlaceClean(vector);
vectorMap.put(zoom.getBinSize(), vector);
}
return vectorMap;
}

public static long dist(Feature2D loop) {
return (Math.min(Math.abs(loop.getEnd1() - loop.getStart2()),
Math.abs(loop.getMidPt1() - loop.getMidPt2())) / MIN_LOOP_SIZE) * MIN_LOOP_SIZE;
private static Set<HiCZoom> getResolutions(Feature2DList loopList) {
Set<HiCZoom> zooms = new HashSet<>();
Set<Integer> resolutions = getResolutionSet(loopList);
for (Integer res : resolutions) {
zooms.add(new HiCZoom(res));
}
return zooms;
}

private static boolean normHasHighValPixel(long start, long end, int resolution, double[] vector) {

int x0 = (int) (start / resolution);
int xF = (int) (end / resolution) + 1;

for (int k = x0; k < xF + 1; k++) {
if (vector[k] > 1 && vector[k] >= vector[k - 1] && vector[k] >= vector[k + 1]) {
return true;
private static Set<Integer> getResolutionSet(Feature2DList loopList) {
Set<Integer> resolutions = new HashSet<>();
loopList.processLists((s, list) -> {
for (Feature2D feature2D : list) {
resolutions.add((int) feature2D.getWidth1());
resolutions.add((int) feature2D.getWidth2());
}
}
});
return resolutions;
}

return false;
private static boolean normIsOk(long pos, int resolution, Map<Integer, double[]> vMap) {
double[] vector = vMap.get(resolution);
int x = (int) (pos / resolution);
return vector[x - 1] > 0 && vector[x] > 0 && vector[x + 1] > 0; // verify neighbors also ok
}
}
43 changes: 10 additions & 33 deletions src/cli/clt/CommandLineParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

package cli.clt;

import cli.Main;
import jargs.gnu.CmdLineParser;
import javastraw.reader.Dataset;
import javastraw.reader.type.NormalizationHandler;
Expand All @@ -47,24 +46,15 @@ public class CommandLineParser extends CmdLineParser {
private final Option seedOption = addIntegerOption("seed");
private final Option normalizationTypeOption = addStringOption('k', "norm");
private final Option cutoffOption = addIntegerOption("cutoff");
private final Option minOption = addDoubleOption("min");
private final Option maxOption = addDoubleOption("max");
private final Option threadsOption = addIntegerOption("threads");
private final Option percentileOption = addIntegerOption("percentile");
private final Option windowOption = addIntegerOption("window");
private final Option minDisValOption = addIntegerOption("min-dist");
private final Option maxDistValOption = addIntegerOption("max-dist");
private final Option cornerRegionDimensionsOption = addIntegerOption("corner-width");
private final Option interChromosomalOption = addBooleanOption("include-inter");
private final Option onlyOneOption = addBooleanOption("only-one");
private final Option aggregateNormalization = addBooleanOption("ag-norm");
private final Option isLoopAnalysis = addBooleanOption("loop");

public CommandLineParser() {
}

/*
* convert Options to Objects or Primitives
*/
private final Option thresholdOption = addDoubleOption("threshold");

private boolean optionToBoolean(Option option) {
Object opt = getOptionValue(option);
Expand All @@ -86,10 +76,6 @@ private String optionToString(Option option) {
return opt == null ? null : opt.toString();
}

/*
* Actual parameters
*/

public boolean getHelpOption() {
return optionToBoolean(helpOption);
}
Expand All @@ -110,16 +96,12 @@ public boolean getLogOption() {
return optionToBoolean(logOption);
}

public int getCutoffOption() {
return optionToInteger(cutoffOption, Main.DEFAULT_CUTOFF);
public int getCutoffOption(int defaultValue) {
return optionToInteger(cutoffOption, defaultValue);
}

public double getMinOption(double defaultVal) {
return optionToDouble(minOption, defaultVal);
}

public double getMaxOption(double defaultVal) {
return optionToDouble(maxOption, defaultVal);
public double getThresholdOption(double defaultVal) {
return optionToDouble(thresholdOption, defaultVal);
}

public int getResolutionOption(int defaultVal) {
Expand All @@ -138,7 +120,6 @@ public String getNormalizationStringOption() {
return optionToString(normalizationTypeOption);
}


public boolean getAggregateNormalization() {
return optionToBoolean(aggregateNormalization);
}
Expand All @@ -163,14 +144,6 @@ public boolean getIncludeInterChromosomal() {
return optionToBoolean(interChromosomalOption);
}

public int getCornerRegionDimensionOption(int val) {
return optionToInteger(cornerRegionDimensionsOption, val);
}

public int getNumThreads(int val) {
return optionToInteger(threadsOption, val);
}

public int getPercentileOption(int val) {
return optionToInteger(percentileOption, val);
}
Expand All @@ -183,4 +156,8 @@ public NormalizationType getNormOrDefaultScale(Dataset ds) {
return NormalizationHandler.SCALE;
}
}

public boolean getOnlyOneOption() {
return optionToBoolean(onlyOneOption);
}
}
Loading

0 comments on commit da11fb4

Please sign in to comment.