Skip to content

Commit

Permalink
fixed fastq reading error
Browse files Browse the repository at this point in the history
  • Loading branch information
lmdu committed Feb 28, 2024
1 parent ce4ffc8 commit ae5d004
Show file tree
Hide file tree
Showing 9 changed files with 68 additions and 43 deletions.
21 changes: 12 additions & 9 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,20 @@ jobs:
os: [windows-2019, ubuntu-20.04, macos-11]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with:
python-version: "3.10"

- name: Set up QEMU
if: runner.os == 'Linux'
uses: docker/setup-qemu-action@v2
uses: docker/setup-qemu-action@v3
with:
platforms: all

- name: Build wheels
uses: pypa/cibuildwheel@v2.11.4
uses: pypa/cibuildwheel@v2.16.5
env:
CIBW_SKIP: pp*
CIBW_BUILD_VERBOSITY: 0
Expand All @@ -40,16 +40,17 @@ jobs:

- name: Upload wheels to artifact
uses: actions/upload-artifact@v3
name: artifact-${{ matrix.os }}
with:
path: ./wheelhouse/*.whl

build_sdist:
name: Build source distribution
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
name: Install Python
with:
python-version: "3.10"
Expand All @@ -60,18 +61,20 @@ jobs:
- name: Display dist
run: ls -l dist

- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
with:
name: artifact-source
path: dist/*.tar.gz

upload_pypi:
needs: [build_wheels, build_sdist]
runs-on: ubuntu-latest
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
steps:
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
with:
name: artifact
pattern: artifact-*
merge-multiple: true
path: dist

- run: ls -l dist/
Expand Down
7 changes: 7 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
Changelog
=========

Version 2.1.0 (2024-02-28)
--------------------------

- Added support for Python 3.12
- Fixed fasta sequence composition error
- Fixed fastq continuous reading error

Version 2.0.2 (2023-11-25)
--------------------------

Expand Down
3 changes: 2 additions & 1 deletion src/fasta.c
Original file line number Diff line number Diff line change
Expand Up @@ -1055,6 +1055,7 @@ PyObject *pyfastx_fasta_gc_skew(pyfastx_Fasta *self, void* closure) {
}

PyObject *pyfastx_fasta_composition(pyfastx_Fasta *self, void* closure) {
int i;
int l;
int ret;
const char *sql;
Expand Down Expand Up @@ -1085,7 +1086,7 @@ PyObject *pyfastx_fasta_composition(pyfastx_Fasta *self, void* closure) {
ret = sqlite3_step(stmt);
);

if (n > 0 && l != 13) {
if (n > 0 && l >= 32 && l < 127) {
b = Py_BuildValue("C", l);
c = Py_BuildValue("n", n);
PyDict_SetItem(d, b, c);
Expand Down
2 changes: 1 addition & 1 deletion src/fastq.c
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ PyObject *pyfastx_fastq_iter(pyfastx_Fastq *self) {
self->middle->iterating = 1;

if (!self->middle->cache_buff) {
self->middle->cache_buff = (char *)malloc(1048576);
self->middle->cache_buff = (char *)malloc(CACHE_SIZE);
}
self->middle->cache_soff = 0;
self->middle->cache_eoff = 0;
Expand Down
2 changes: 2 additions & 0 deletions src/fastq.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include "util.h"
#include "sqlite3.h"

#define CACHE_SIZE 1048576

typedef struct {
PyObject_HEAD

Expand Down
2 changes: 1 addition & 1 deletion src/index.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ void pyfastx_create_index(pyfastx_Index *self){
avglen REAL, --average seq length \n \
medlen REAL, --median seq length \n \
n50 INTEGER, --N50 seq length \n \
l50 INTEGER --N50 seq count \n \
l50 INTEGER --L50 seq count \n \
); \
CREATE TABLE comp ( \
ID INTEGER PRIMARY KEY, --comp identifier\n \
Expand Down
64 changes: 38 additions & 26 deletions src/read.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,18 @@ void pyfastx_read_continue_reader(pyfastx_Read *self) {
Py_ssize_t read_len;
Py_ssize_t cache_len;
Py_ssize_t offset;
Py_ssize_t offset1;

//read raw string offset
offset = self->seq_offset - self->desc_len - 1;
offset1 = offset;

//read raw string length
residue_len = self->qual_offset + self->read_len - offset + 2;
residue_len = self->qual_offset + self->read_len - offset + 1;
read_len = 0;
cache_len = 0;

self->raw = (char *)malloc(residue_len + 1);
self->raw = (char *)malloc(residue_len + 2);

if (offset < self->middle->cache_soff) {
pyfastx_read_random_reader(self, self->raw, offset, residue_len);
Expand All @@ -76,10 +78,15 @@ void pyfastx_read_continue_reader(pyfastx_Read *self) {
memcpy(self->raw+read_len, self->middle->cache_buff+slice_offset, cache_len);
read_len += cache_len;
residue_len -= cache_len;
offset += cache_len;
} else {
self->middle->cache_soff = self->middle->cache_eoff;
gzread(self->middle->gzfd, self->middle->cache_buff, 1048576);
gzread(self->middle->gzfd, self->middle->cache_buff, CACHE_SIZE);
self->middle->cache_eoff = gztell(self->middle->gzfd);

if (self->middle->cache_soff == self->middle->cache_eoff) {
break;
}
}
}
}
Expand All @@ -92,20 +99,21 @@ void pyfastx_read_continue_reader(pyfastx_Read *self) {
self->desc[self->desc_len] = '\0';
}

if (self->raw[read_len-2] == '\n') {
self->raw[read_len-1] = '\0';
} else if (self->raw[read_len-2] == '\r' && self->raw[read_len-1] == '\n') {
if (self->raw[read_len-1] == '\n') {
self->raw[read_len] = '\0';
} else if (self->raw[read_len-1] == '\r') {
self->raw[read_len] = '\n';
self->raw[read_len+1] = '\0';
} else {
self->raw[read_len-2] = '\0';
self->raw[read_len] = '\0';
}

self->seq = (char *)malloc(self->read_len + 1);
memcpy(self->seq, self->raw + self->seq_offset - offset, self->read_len);
memcpy(self->seq, self->raw + self->seq_offset - offset1, self->read_len);
self->seq[self->read_len] = '\0';

self->qual = (char *)malloc(self->read_len + 1);
memcpy(self->qual, self->raw + self->qual_offset - offset, self->read_len);
memcpy(self->qual, self->raw + self->qual_offset - offset1, self->read_len);
self->qual[self->read_len] = '\0';
}

Expand Down Expand Up @@ -202,31 +210,35 @@ PyObject* pyfastx_read_antisense(pyfastx_Read *self, void* closure) {
PyObject* pyfastx_read_description(pyfastx_Read *self, void* closure) {
Py_ssize_t new_offset;

if (self->middle->iterating) {
pyfastx_read_continue_reader(self);
} else if (!self->desc) {
new_offset = self->seq_offset - self->desc_len - 1;
self->desc = (char *)malloc(self->desc_len + 1);
if (!self->desc) {
if (self->middle->iterating) {
pyfastx_read_continue_reader(self);
} else {
new_offset = self->seq_offset - self->desc_len - 1;
self->desc = (char *)malloc(self->desc_len + 1);

pyfastx_read_random_reader(self, self->desc, new_offset, self->desc_len);
pyfastx_read_random_reader(self, self->desc, new_offset, self->desc_len);

if (self->desc[self->desc_len-1] == '\r') {
self->desc[self->desc_len-1] = '\0';
} else {
self->desc[self->desc_len] = '\0';
if (self->desc[self->desc_len-1] == '\r') {
self->desc[self->desc_len-1] = '\0';
} else {
self->desc[self->desc_len] = '\0';
}
}
}

return Py_BuildValue("s", self->desc);
}

PyObject* pyfastx_read_qual(pyfastx_Read *self, void* closure) {
if (self->middle->iterating) {
pyfastx_read_continue_reader(self);
} else if (!self->qual) {
self->qual = (char *)malloc(self->read_len + 1);
pyfastx_read_random_reader(self, self->qual, self->qual_offset, self->read_len);
self->qual[self->read_len] = '\0';
if (!self->qual) {
if (self->middle->iterating) {
pyfastx_read_continue_reader(self);
} else {
self->qual = (char *)malloc(self->read_len + 1);
pyfastx_read_random_reader(self, self->qual, self->qual_offset, self->read_len);
self->qual[self->read_len] = '\0';
}
}

return Py_BuildValue("s", self->qual);
Expand All @@ -250,7 +262,7 @@ PyObject* pyfastx_read_quali(pyfastx_Read *self, void* closure) {
phred = self->middle->phred ? self->middle->phred : 33;

quals = PyList_New(0);
for (i = 0; i < self->read_len; i++) {
for (i = 0; i < self->read_len; ++i) {
q = Py_BuildValue("i", self->qual[i] - phred);
PyList_Append(quals, q);
Py_DECREF(q);
Expand Down
8 changes: 4 additions & 4 deletions src/sequence.c
Original file line number Diff line number Diff line change
Expand Up @@ -734,14 +734,14 @@ PyObject *pyfastx_sequence_composition(pyfastx_Sequence *self, void* closure) {
d = PyDict_New();

if (ret == SQLITE_ROW && self->start == 1 && self->end == self->seq_len) {
for (i = 1; i < 128; ++i) {
while (ret == SQLITE_ROW) {
PYFASTX_SQLITE_CALL(
l = sqlite3_column_int(stmt, 2);
n = sqlite3_column_int64(stmt, 3);
ret = sqlite3_step(stmt);
);

if (n > 0 && l != 13) {
if (n > 0 && l >= 32 && l < 127) {
b = Py_BuildValue("C", l);
c = Py_BuildValue("n", n);
PyDict_SetItem(d, b, c);
Expand All @@ -756,10 +756,10 @@ PyObject *pyfastx_sequence_composition(pyfastx_Sequence *self, void* closure) {
++seq_comp[(unsigned char)seq[i]];
}

for (l = 0; l < 128; ++l) {
for (l = 32; l < 127; ++l) {
n = seq_comp[l];

if (n > 0 && l != 13) {
if (n > 0) {
b = Py_BuildValue("C", l);
c = Py_BuildValue("i", n);
PyDict_SetItem(d, b, c);
Expand Down
2 changes: 1 addition & 1 deletion src/version.h
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#define PYFASTX_VERSION "2.0.2"
#define PYFASTX_VERSION "2.1.0"
#define ZRAN_VERSION "1.7.0"

0 comments on commit ae5d004

Please sign in to comment.