Skip to content

Commit

Permalink
tidyup
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Sep 30, 2024
1 parent 6a9c10d commit f71560e
Show file tree
Hide file tree
Showing 10 changed files with 81 additions and 80 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ IOMP5 = 0

########################### end ###########################

VERSION=0.4.5
VERSION=0.4.6
# detect OS architecture and add flags
Platform := $(shell uname -s)

Expand Down
58 changes: 58 additions & 0 deletions src/Common.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,22 @@
/*******************************************************************************
* @file https://github.com/Zilong-Li/PCAone/src/Common.hpp
* @author Zilong Li
* Copyright (C) 2022-2024. Use of this code is governed by the LICENSE file.
******************************************************************************/
#ifndef PCAone_Common_H
#define PCAone_Common_H

#include <Eigen/Dense>
#include <cstdio>
#include <string>
#include <unordered_map>
#include <vector>
#include <algorithm>
#include <iterator>
#include <numeric>

#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"

#define MAF(a) ((a) > 0.5 ? (1 - a) : (a))

Expand Down Expand Up @@ -34,5 +46,51 @@ inline UMapIntInt vector2map(const Int1D& v) {
return m;
}

template <typename T>
inline std::vector<size_t> sortidx(const std::vector<T>& v) {
std::vector<size_t> idx(v.size());
std::iota(idx.begin(), idx.end(), 0);
std::sort(idx.begin(), idx.end(),
[&v](size_t i1, size_t i2) { return v[i1] < v[i2]; });
return idx;
}

struct Line {
std::string data;
operator std::string const &() const { return data; }
friend std::istream& operator>>(std::istream& ifs, Line& line) {
return std::getline(ifs, line.data);
}
};

struct SNPld {
std::vector<int> pos; // pos of each SNP
std::vector<int> end_pos; // 0-based index for last snp pos
std::vector<std::string> chr; // chr sequences
Double1D af; // allele frequency
std::vector<int> ws; // the snp index, i.e the index for lead SNP
std::vector<int> we; // the number of SNPs (including lead SNP) in a window
};

struct ZstdBuffer {
ZstdBuffer() {
buffInTmp.reserve(buffInSize);
buffOutTmp.reserve(buffOutSize);
}
~ZstdBuffer() {
ZSTD_freeDCtx(dctx);
if (fclose(fin)) {
perror("fclose error");
exit(1);
}
}
FILE* fin = nullptr;
size_t const buffInSize = ZSTD_DStreamInSize();
size_t const buffOutSize = ZSTD_DStreamOutSize();
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
size_t lastRet = 1;
std::string buffCur = "";
std::string buffLine, buffInTmp, buffOutTmp;
};

#endif // PCAone_Common_H
11 changes: 4 additions & 7 deletions src/Data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@

#include "Data.hpp"

#include <string>
#include <vector>

#include "Eigen/src/Core/util/Meta.h"
#include "LD.hpp"
#include "Utils.hpp"
Expand Down Expand Up @@ -215,8 +212,8 @@ void Data::write_residuals(const MyVector &S, const MyMatrix &U,
"ld-stats=0: calculate the ancestry adjusted LD matrix!");
}
std::ofstream ofs(params.fileout + ".residuals", std::ios::binary);
const uint ibyte = 4;
const uint magic = ibyte * 2;
const uint64 ibyte = 4;
const uint64 magic = ibyte * 2;
uint64 bytes_per_snp = nsamples * ibyte;
ofs.write((char *)&nsnps, ibyte);
ofs.write((char *)&nsamples, ibyte);
Expand All @@ -229,7 +226,7 @@ void Data::write_residuals(const MyVector &S, const MyMatrix &U,
for (Eigen::Index ib = 0; ib < G.cols(); ib++) {
fg = G.col(ib).cast<float>();
if (params.perm) {
idx = magic + perm.indices()[ib] * bytes_per_snp;
idx = magic + (uint64)perm.indices()[ib] * bytes_per_snp;
ofs.seekp(idx, std::ios_base::beg);
}
ofs.write((char *)fg.data(), bytes_per_snp);
Expand All @@ -246,7 +243,7 @@ void Data::write_residuals(const MyVector &S, const MyMatrix &U,
for (Eigen::Index ib = 0; ib <= stop[b] - start[b]; ib++, i++) {
fg = G.col(ib).cast<float>();
if (params.perm) {
idx = magic + perm.indices()[i] * bytes_per_snp;
idx = magic + (uint64)perm.indices()[i] * bytes_per_snp;
ofs.seekp(idx, std::ios_base::beg);
}
ofs.write((char *)fg.data(), bytes_per_snp);
Expand Down
15 changes: 0 additions & 15 deletions src/FileBinary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,6 @@

using namespace std;

bool isZstdCompressed(const char *filename) {
FILE *file = fopen(filename, "rb");
if (!file) return false;

char magicNumber[4];
if (fread(magicNumber, 1, 4, file) != 4) {
fclose(file);
return false;
}

bool isCompressed = (ZSTD_isFrame(magicNumber, 4) != 0);

fclose(file);
return isCompressed;
}

void FileBin::check_file_offset_first_var() {
setlocale(LC_ALL, "C");
Expand Down
3 changes: 0 additions & 3 deletions src/FileBinary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@
#define FILEBINARY_H_

#include "Data.hpp"
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"

bool isZstdCompressed(const char *filename);

class FileBin : public Data {
public:
Expand Down
2 changes: 0 additions & 2 deletions src/FilePlink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@

#include "FilePlink.hpp"

#include <string>

using namespace std;

void FileBed::check_file_offset_first_var() {
Expand Down
4 changes: 0 additions & 4 deletions src/LD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@

#include <zlib.h>

#include <algorithm>
#include <cstring>
#include <string>

#include "Cmd.hpp"
#include "Data.hpp"
#include "Utils.hpp"
Expand Down
1 change: 1 addition & 0 deletions src/Timer.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef TIMER_H_
#define TIMER_H_

#include <clocale>
#include <chrono>
#include <iomanip> // put_time
#include <sstream>
Expand Down
16 changes: 16 additions & 0 deletions src/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,19 @@ void make_plink2_eigenvec_file(int K, std::string fout, const std::string& fin,
ofs << tokens[0] + "\t" + tokens[1] + "\t" << line2 << std::endl;
}
}

bool isZstdCompressed(const char *filename) {
FILE *file = fopen(filename, "rb");
if (!file) return false;

char magicNumber[4];
if (fread(magicNumber, 1, 4, file) != 4) {
fclose(file);
return false;
}

bool isCompressed = (ZSTD_isFrame(magicNumber, 4) != 0);

fclose(file);
return isCompressed;
}
49 changes: 1 addition & 48 deletions src/Utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,18 @@

#include <sys/utsname.h>

#include <algorithm>
#include <cassert>
#include <chrono>
#include <clocale>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <fstream>
#include <iostream>
#include <iterator>
#include <random>
#include <stdexcept>

#include "Common.hpp"
#include "Logger.hpp"
#include "Timer.hpp"
#include "zstd.h"

// MAKE SOME TOOLS FULLY ACCESSIBLE THROUGHOUT THE SOFTWARE
#ifdef _DECLARE_TOOLBOX_HERE
Expand All @@ -30,32 +25,6 @@ extern Timer tick;
extern Logger cao;
#endif

template <typename T>
inline std::vector<size_t> sortidx(const std::vector<T>& v) {
std::vector<size_t> idx(v.size());
std::iota(idx.begin(), idx.end(), 0);
std::sort(idx.begin(), idx.end(),
[&v](size_t i1, size_t i2) { return v[i1] < v[i2]; });
return idx;
}

struct Line {
std::string data;
operator std::string const &() const { return data; }
friend std::istream& operator>>(std::istream& ifs, Line& line) {
return std::getline(ifs, line.data);
}
};

struct SNPld {
std::vector<int> pos; // pos of each SNP
std::vector<int> end_pos; // 0-based index for last snp pos
std::vector<std::string> chr; // chr sequences
Double1D af; // allele frequency
std::vector<int> ws; // the snp index, i.e the index for lead SNP
std::vector<int> we; // the number of SNPs (including lead SNP) in a window
};

std::string get_machine();

void fcloseOrDie(FILE* file);
Expand Down Expand Up @@ -91,22 +60,6 @@ std::vector<std::string> split_string(const std::string& s,
void make_plink2_eigenvec_file(int K, std::string fout, const std::string& fin,
const std::string& fam);

struct ZstdBuffer {
ZstdBuffer() {
buffInTmp.reserve(buffInSize);
buffOutTmp.reserve(buffOutSize);
}
~ZstdBuffer() {
ZSTD_freeDCtx(dctx);
fcloseOrDie(fin);
}
FILE* fin = nullptr;
size_t const buffInSize = ZSTD_DStreamInSize();
size_t const buffOutSize = ZSTD_DStreamOutSize();
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
size_t lastRet = 1;
std::string buffCur = "";
std::string buffLine, buffInTmp, buffOutTmp;
};
bool isZstdCompressed(const char *filename);

#endif // PCAONE_UTILES_

0 comments on commit f71560e

Please sign in to comment.