Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add igdtools program #15

Merged
merged 1 commit into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ project(picovcf)

option(ENABLE_VCF_GZ "Enable support for .vcf.gz (via ZLIB)" OFF)
option(FUZZING_SUPPORT "Build for fuzzing; only abort on true failures, not invalid input data" OFF)
option(BUILD_EXAMPLES "Build example tools" ON)
option(BUILD_EXAMPLES "Build example tools" OFF)
option(BUILD_IGDTOOLS "Build igdtools (for converting and processing IGD files)" ON)

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
Expand Down Expand Up @@ -67,18 +68,17 @@ if(${BUILD_EXAMPLES})
# vcfpp utility
add_executable(vcfpp examples/vcfpp.cpp)
target_link_libraries(vcfpp ${LIBRARIES_TO_LINK})
# vcfconv utility
add_executable(vcfconv examples/vcfconv.cpp)
target_link_libraries(vcfconv ${LIBRARIES_TO_LINK})
# igdpp utility
add_executable(igdpp examples/igdpp.cpp)
target_link_libraries(igdpp ${LIBRARIES_TO_LINK})
# igdcp utility
add_executable(igdcp examples/igdcp.cpp)
target_link_libraries(igdcp ${LIBRARIES_TO_LINK})
if (${ENABLE_VCF_GZ})
# gzcat utility
add_executable(gzcat examples/gzcat.cpp)
target_link_libraries(gzcat ${LIBRARIES_TO_LINK})
endif()
endif()

if(${BUILD_IGDTOOLS})
add_executable(igdtools igdtools/igdtools.cpp)
target_link_libraries(igdtools ${LIBRARIES_TO_LINK})
endif()
12 changes: 9 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,16 @@ make

To convert from a `.vcf` or `.vcf.gz` file to `.igd`, run:
```
./vcfconv <vcf filename> <output IGD filename>
./igdtools <vcf filename> -o <output IGD filename>
```

To view basic statistics for an IGD file, use `igdpp`. Some commands to try are `./igdpp stats <igd file>` or `./igdpp range_stats <igd file>`.
Run `./igdtools --help` to see the full list of options. Here are some common tasks you might want to perform, besides VCF conversion:
* Pipe allele frequencies to a file: `./igdtools <input IGD> -a > allele.freq.tsv`
* View variant/sample statistics and header info: `./igdtools <input IGD> --stats --info`
* To, e.g., restrict to variants in base-pair range 10000,20000 add argument `--range 10000-20000`
* To restrict to variants with frequencies >=0.01: `--frange 0.01-1.0`
* Copy from one IGD to another: `./igdtools <input IGD> -o <output IGD>`
* Only include variants in a certain range and with frequency: `./igdtools <input IGD> -o <output IGD> --range 100000-500000 --frange 0.01-0.5`

Finally, to run the unit tests:
```
Expand Down Expand Up @@ -78,7 +84,7 @@ Converting the `.vcf.gz` to `.bgen` (via qctool) took 23 minutes, but converting

* Clone [picovcf](https://github.com/aprilweilab/picovcf) and follow the instructions in this README to build the example tools for that library.
* If you want to be able to convert `.vcf.gz` (compressed VCF) to IGD, make sure you build with `-DENABLE_VCF_GZ=ON`
* One of the built tools will be `vcfconf`, which converts from VCF to IGD. Run `vcfconv <vcf file> <igd file>` to convert your data to IGD.
* Use `igdtools` to convert and process files
* Do one of the following:
* If your project is C++, copy [picovcf.hpp](https://github.com/aprilweilab/picovcf/blob/main/picovcf.hpp) into your project, `#include` it somewhere and then use according to the [documentation](https://picovcf.readthedocs.io/en/latest/)
* If your project is Python, clone [pyigd](https://github.com/aprilweilab/pyigd/) and install it per the [README instructions](https://github.com/aprilweilab/pyigd/blob/main/README.md).
52 changes: 0 additions & 52 deletions examples/igdcp.cpp

This file was deleted.

46 changes: 2 additions & 44 deletions examples/igdpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* Usage:
* igdpp <command> <file>
*
* where command is one of ["freq", "individuals", "stats", "sites", "range_start", "variants"]
* where command is one of ["stats", "range_stats"]
*/
#include <iostream>
#include <cmath>
Expand All @@ -14,14 +14,6 @@

using namespace picovcf;

inline void emitAllele(VariantT alleleIndex, std::ostream& out) {
if (alleleIndex == MISSING_VALUE) {
out << "? ";
} else {
out << alleleIndex << " ";
}
}

int main(int argc, char *argv[]) {
std::cout << std::fixed << std::setprecision(4);
if (argc < 3) {
Expand All @@ -44,28 +36,6 @@ int main(int argc, char *argv[]) {
std::cout << " Genome range: " << igd.getPosition(0)
<< "-" << igd.getPosition(igd.numVariants()-1) << std::endl;
std::cout << " Has individual IDs? " << (igd.getIndividualIds().empty() ? "No" : "Yes") << std::endl;
} else if (command == "sites") {
size_t lastPosition = std::numeric_limits<size_t>::max();
size_t sites = 0;
for (size_t i = 0; i < igd.numVariants(); i++) {
bool isMissing = false;
auto pos = igd.getPosition(i, isMissing);
if (pos != lastPosition) {
sites++;
lastPosition = pos;
}
}
std::cout << "Unique sites: " << sites << std::endl;
} else if (command == "individuals") {
std::vector<std::string> individualIds = igd.getIndividualIds();
for (size_t i = 0; i < individualIds.size(); i++) {
std::cout << i << ": " << individualIds[i] << std::endl;
}
} else if (command == "variants") {
std::vector<std::string> variantIds = igd.getVariantIds();
for (size_t i = 0; i < variantIds.size(); i++) {
std::cout << i << ": " << variantIds[i] << std::endl;
}
} else if (command == "range_stats") {
std::cout << "Stats for " << filename << std::endl;
bool _ignore = false;
Expand Down Expand Up @@ -128,18 +98,6 @@ int main(int argc, char *argv[]) {

std::cout << " Variants with missing data: " << missingRows << std::endl;
std::cout << " Total missing alleles: " << missingAlleles << std::endl;


} else if (command == "freq") {
static constexpr char SEP = '\t';
std::cout << "POSITION" << SEP << "REF" << SEP << "ALT" << SEP << "ALT COUNT" << SEP << "TOTAL" << std::endl;
for (size_t i = 0; i < igd.numVariants(); i++) {
bool isMissing = false;
auto pos = igd.getPosition(i, isMissing);
auto sampleList = igd.getSamplesWithAlt(i);
std::cout << pos << SEP << igd.getRefAllele(i) << SEP
<< igd.getAltAllele(i) << SEP << sampleList.size() << SEP << igd.numSamples() << std::endl;
}
}
}
return 0;
}
34 changes: 0 additions & 34 deletions examples/vcfconv.cpp

This file was deleted.

Loading
Loading