Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/origin/master' into lrGS_batchView
Browse files Browse the repository at this point in the history
  • Loading branch information
leonschuetz committed Oct 8, 2024
2 parents d6b6925 + c2512c3 commit a78c7ce
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 21 deletions.
16 changes: 16 additions & 0 deletions bin/GSvar_filters.ini
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,22 @@ Variant quality qual=10 depth=0 mapq=40 strand_bias=20 allele_balance=40 min_occ
Variant type HIGH=frameshift_variant,splice_acceptor_variant,splice_donor_variant,start_lost,start_retained_variant,stop_gained,stop_lost MODERATE=inframe_deletion,inframe_insertion,missense_variant LOW=splice_region_variant MODIFIER=
SNVs only invert=no disabled

#dominant relaxed lrGS (add phenotype region)
Allele frequency max_af=1
Allele frequency (sub-populations) max_af=1
Count NGSD max_count=5 ignore_genotype=no mosaic_as_het=no
Impact impact=HIGH,MODERATE,LOW
Filter columns entries=low_conf_region action=REMOVE disabled
Splice effect SpliceAi=0.5 MaxEntScan=HIGH splice_site_only=yes action=FILTER
Predicted pathogenic min=2 action=FILTER skip_high_impact=no cutoff_phylop=1.6 cutoff_cadd=20 cutoff_revel=0.9 cutoff_alphamissense=0.56 disabled
Annotated pathogenic sources=ClinVar,HGMD also_likely_pathogenic=yes action=KEEP
Allele frequency max_af=3
Filter columns entries=low_mappability,mosaic action=REMOVE
Classification NGSD classes=1,2 action=REMOVE
Classification NGSD classes=4,5 action=KEEP
Variant quality qual=10 depth=0 mapq=40 strand_bias=20 allele_balance=40 min_occurences=1 min_af=0.2 max_af=1
SNVs only invert=no disabled

#---

#somatic
Expand Down
39 changes: 37 additions & 2 deletions src/ExtractMethylationData/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ class ConcreteTool
addOutfile("out", "Output BED file containing combined methylation info of provided loci. If unset, writes to STDOUT.", true);
addInfile("ref", "Reference genome FASTA file. If unset 'reference_genome' from the 'settings.ini' file is used.", true, false);
addFlag("add_methylation_types", "Also report 5mC (m) and 5hmC (h) entries as separate columns");
addFlag("skip_invalid_sites", "Skip invalid CpG sites instead of aborting.");

//changelog
changeLog(2024, 6, 26, "Initial commit.");
changeLog(2024, 7, 18, "Added option to add separate columns for 5mC/5hmC.");
changeLog(2024, 9, 27, "Added option to ignore invalid CpG sites.");
}

virtual void main()
Expand All @@ -42,6 +44,7 @@ class ConcreteTool
QString output_file_path = getOutfile("out").toUtf8();
QString ref_file = getInfile("ref");
bool add_methylation_types = getFlag("add_methylation_types");
bool skip_invalid_sites = getFlag("skip_invalid_sites");
if (ref_file=="") ref_file = Settings::string("reference_genome", true);
if (ref_file=="") THROW(CommandLineParsingException, "Reference genome FASTA unset in both command-line and settings.ini file!");

Expand Down Expand Up @@ -77,15 +80,47 @@ class ConcreteTool
BedLine bed_line = loci[i];

//validate length (CpG only):
if (bed_line.length() != 2) THROW(ArgumentException, "A CpG site has to be 2 bp long! " + bed_line.toString(true));
if (bed_line.length() != 2)
{
if (skip_invalid_sites)
{
qDebug() << "A CpG site has to be 2 bp long! " + bed_line.toString(true);
continue;
}
else THROW(ArgumentException, "A CpG site has to be 2 bp long! " + bed_line.toString(true));
}
QByteArray strand = bed_line.annotations().at(0).trimmed();
if (!((strand == "+") || (strand == "-"))) THROW(ArgumentException, "Strand has to be '+' or '-'! " + bed_line.toString(true));

//get position of C/G
int pos = ((strand == "+")?bed_line.start():bed_line.end());
QByteArray mod_base = ((strand == "+")?"C":"G");
//validate C position
if (ref_idx.seq(bed_line.chr(), pos, 1, true) != mod_base) THROW(ArgumentException, "Invalid " + mod_base + " position (is actually " + ref_idx.seq(bed_line.chr(), pos, pos, true) + ") (" + QByteArray::number(pos) + " for CpG site)! " + bed_line.toString(true));
try
{
if (ref_idx.seq(bed_line.chr(), pos, 1, true) != mod_base)
{
if (skip_invalid_sites)
{
qDebug() << "Invalid " + mod_base + " position (is actually " + ref_idx.seq(bed_line.chr(), pos, 1, true) + " for CpG site)! " + bed_line.toString(true);
continue;
}
else THROW(ArgumentException, "Invalid " + mod_base + " position (is actually " + ref_idx.seq(bed_line.chr(), pos, 1, true) + " for CpG site)! " + bed_line.toString(true));
}

}
catch (Exception& e)
{
if (skip_invalid_sites)
{
QTextStream(stderr) << e.message();
continue;
}
else
{
THROW(ArgumentException, e.message());
}
}

//get entries from methylation file:
QByteArrayList matches = methylation_idx.getMatchingLines(bed_line.chr(), pos, pos, false);
Expand Down
37 changes: 18 additions & 19 deletions src/NGSDExportStudyGHGA/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,11 +218,10 @@ class ConcreteTool
obj.insert("analysis_method", "ANAM_" + ps_data.pseudonym);
obj.insert("title", "ANA_" + ps_data.pseudonym);
obj.insert("description", data.analysis_description);
obj.insert("type", data.analysis_type); //"cfDNA"
obj.insert("type", data.analysis_type); //e.g. "cfDNA"
//optional
//obj.insert("ega_accession", QJsonValue());

//TODO: read from folder
QStringList input_files;
input_files << "FASTQ_R1_" + ps_data.pseudonym;
input_files << "FASTQ_R2_" + ps_data.pseudonym;
Expand All @@ -243,12 +242,12 @@ class ConcreteTool
{
QJsonObject obj;
obj.insert("name", "ANAM_" + ps_data.pseudonym);
obj.insert("description", data.analysis_description); //TODO
obj.insert("type", data.analysis_type); //TODO
obj.insert("workflow_name", "megSAP"); //TODO
obj.insert("description", data.analysis_description);
obj.insert("type", data.analysis_type);
obj.insert("workflow_name", "megSAP");
obj.insert("workflow_version", data.workflow_version);
obj.insert("workflow_repository", "https://github.com/imgag/megSAP"); //TODO
obj.insert("workflow_doi", "megSAP_doi"); //TODO
obj.insert("workflow_repository", "https://github.com/imgag/megSAP");
obj.insert("workflow_doi", "megSAP_doi");
//optional:
//obj.insert("workflow_tasks", "Pipeline?");
//obj.insert("parameters", QJsonArray());
Expand All @@ -261,7 +260,7 @@ class ConcreteTool
parent.insert("analysis_methods", array);
}

void addAnalysesMethodSupportingFiles(QJsonObject& parent, const CommonData& data) //TODO Leon: fix warning about unused parameter 'data'
void addAnalysesMethodSupportingFiles(QJsonObject& parent)
{
QJsonArray array;

Expand All @@ -273,7 +272,7 @@ class ConcreteTool
{
QJsonObject obj;
obj.insert("email", data.dac_email);
obj.insert("institute", data.dac_organization); //TODO
obj.insert("institute", data.dac_organization);
//optional:
//obj.insert("ega_accession", QJsonObject());
obj.insert("alias", data.dac_email);
Expand Down Expand Up @@ -374,7 +373,7 @@ class ConcreteTool
//optional:
//obj.insert("sequencing_center", QJsonValue());
//obj.insert("sequencing_read_length", QJsonValue());
obj.insert("sequencing_layout", "PE"); //TODO
obj.insert("sequencing_layout", "PE");
//optional:
//obj.insert("target_coverage", QJsonValue());
QString fc_id = db.getValue("SELECT fcid FROM sequencing_run WHERE name='" + ps_data.ps_info.run_name + "'").toString();
Expand Down Expand Up @@ -492,7 +491,7 @@ class ConcreteTool
//obj.insert("author", QJsonObject());
//obj.insert("year", QJsonObject());
//obj.insert("journal", QJsonObject());
obj.insert("doi", data.publication_doi); //TODO
obj.insert("doi", data.publication_doi);
//optional
//obj.insert("xref", QJsonObject());
obj.insert("alias", "PUB_" + data.study_name);
Expand Down Expand Up @@ -554,13 +553,13 @@ class ConcreteTool
obj.insert("name", "SAM_" + ps_data.pseudonym);
obj.insert("type", sampleTypeToSampleType(ps_data.s_info.type, ps_data.s_info.is_ffpe));
//optional:
//obj.insert("biological_replicate", QJsonValue()); //TODO
obj.insert("description", "sample that was sequenced"); //TODO
//obj.insert("biological_replicate", QJsonValue());
obj.insert("description", "sample that was sequenced");
//optional:
//obj.insert("storage", QJsonValue());
//obj.insert("disease_or_healthy", QJsonValue()); //TODO
obj.insert("case_control_status", "UNKNOWN"); //TODO
//obj.insert("ega_accession", QJsonValue()); //TODO
//obj.insert("disease_or_healthy", QJsonValue());
obj.insert("case_control_status", "UNKNOWN");
//obj.insert("ega_accession", QJsonValue());
//obj.insert("xref", QJsonValue());
//obj.insert("biospecimen_name", QJsonValue());
//obj.insert("biospecimen_type", QJsonValue());
Expand All @@ -584,9 +583,9 @@ class ConcreteTool
QJsonObject obj;
obj.insert("title", data.study_name);
obj.insert("description", data.study_description);
obj.insert("types", QJsonArray::fromStringList(data.study_types)); //TODO
obj.insert("types", QJsonArray::fromStringList(data.study_types));
//optional:
//obj.insert("ega_accession", QJsonValue());//TODO
//obj.insert("ega_accession", QJsonValue());
obj.insert("affiliations", QJsonArray::fromStringList(data.study_affilitions));
//optional:
//obj.insert("attributes", QJsonArray());
Expand Down Expand Up @@ -667,7 +666,7 @@ class ConcreteTool
QJsonObject root;
addAnalyses(root, data);
addAnalysesMethods(root, data);
addAnalysesMethodSupportingFiles(root, data);
addAnalysesMethodSupportingFiles(root);
addDataAccessCommittee(root, data);
addDataAccessPolicy(root, data);
addDatasets(root, data);
Expand Down

0 comments on commit a78c7ce

Please sign in to comment.