Skip to content

Commit

Permalink
Add hdf5 support to ssu and register fp32 methods in the Python API (#…
Browse files Browse the repository at this point in the history
…108)

* Add write_mat_hdf5

* Add hdf5 compression

* Ad write_mat_fp32 and add --format to su.cpp

* Create dedicaed compress functions

* Update documentation

* Add fp32 methods

* Minor mormatting tweaks
  • Loading branch information
sfiligoi authored Jun 18, 2020
1 parent 8ab40f0 commit d58cbbf
Show file tree
Hide file tree
Showing 5 changed files with 549 additions and 15 deletions.
164 changes: 164 additions & 0 deletions sucpp/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,170 @@ IOStatus write_mat(const char* output_filename, mat_t* result) {
return write_okay;
}

herr_t write_hdf5_string(hid_t output_file_id,const char *dname, const char *str)
{
// this is the convoluted way to store a string
// Will use the FORTRAN forma, so we do not depend on null termination
hid_t filetype_id = H5Tcopy (H5T_FORTRAN_S1);
H5Tset_size(filetype_id, strlen(str));
hid_t memtype_id = H5Tcopy (H5T_C_S1);
H5Tset_size(memtype_id, strlen(str)+1);

hsize_t dims[1] = {1};
hid_t dataspace_id = H5Screate_simple (1, dims, NULL);

hid_t dataset_id = H5Dcreate(output_file_id,dname, filetype_id, dataspace_id, H5P_DEFAULT, H5P_DEFAULT,
H5P_DEFAULT);
herr_t status = H5Dwrite(dataset_id, memtype_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, str);

H5Dclose(dataset_id);
H5Sclose(dataspace_id);
H5Tclose(memtype_id);
H5Tclose(filetype_id);

return status;
}

// Internal: Make sure TReal and real_id match
template<class TReal>
IOStatus write_mat_hdf5_D(const char* output_filename, mat_t* result,hid_t real_id, unsigned int compress_level) {
/* Create a new file using default properties. */
hid_t output_file_id = H5Fcreate(output_filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
if (output_file_id<0) return open_error;

// simple header
if (write_hdf5_string(output_file_id,"format","BDSM")<0) {
H5Fclose (output_file_id);
return open_error;
}
if (write_hdf5_string(output_file_id,"version","2020.06")<0) {
H5Fclose (output_file_id);
return open_error;
}

// save the ids
{
hsize_t dims[1];
dims[0] = result->n_samples;
hid_t dataspace_id = H5Screate_simple(1, dims, NULL);

// this is the convoluted way to store an array of strings
hid_t datatype_id = H5Tcopy(H5T_C_S1);
H5Tset_size(datatype_id,H5T_VARIABLE);

hid_t dcpl_id = H5Pcreate (H5P_DATASET_CREATE);
if (H5Pset_deflate(dcpl_id, compress_level)<0) return open_error; // just abort on error

hsize_t chunks[1];
chunks[0] = result->n_samples;

if (H5Pset_chunk (dcpl_id, 1, chunks)) return open_error; // just abort on error

hid_t dataset_id = H5Dcreate1(output_file_id, "order", datatype_id, dataspace_id, dcpl_id);

herr_t status = H5Dwrite(dataset_id, datatype_id, H5S_ALL, H5S_ALL,
H5P_DEFAULT, result->sample_ids);

H5Dclose(dataset_id);
H5Tclose(datatype_id);
H5Sclose(dataspace_id);
H5Pclose(dcpl_id);

// check status after cleanup, for simplicity
if (status<0) {
H5Fclose (output_file_id);
return open_error;
}
}

// save the matrix
{
const uint64_t n_samples = result->n_samples;
TReal *buf2d = (TReal*) malloc(n_samples*n_samples*sizeof(TReal));
if (buf2d==NULL) {
H5Fclose (output_file_id);
return open_error; // we don't have a better error code
}

// but first compute the values to save
{
const uint64_t comb_N = su::comb_2(n_samples);
for(uint64_t i = 0; i < n_samples; i++) {
for(uint64_t j = 0; j < n_samples; j++) {
TReal v;
if(i < j) { // upper triangle
const uint64_t comb_N_minus = su::comb_2(n_samples - i);
v = result->condensed_form[comb_N - comb_N_minus + (j - i - 1)];
} else if (i > j) { // lower triangle
const uint64_t comb_N_minus = su::comb_2(n_samples - j);
v = result->condensed_form[comb_N - comb_N_minus + (i - j - 1)];
} else {
v = 0.0;
}
buf2d[i*n_samples+j] = v;
}
}
}

hsize_t dims[2];
dims[0] = result->n_samples;
dims[1] = result->n_samples;
hid_t dataspace_id = H5Screate_simple(2, dims, NULL);

hid_t dcpl_id = H5Pcreate (H5P_DATASET_CREATE);
if (H5Pset_deflate(dcpl_id, compress_level)<0) return open_error; // just abort on error

// shoot for a 0.75M chunk size at double, to fit in default cache
hsize_t chunks[2];
chunks[0] = 1;
chunks[1] = 96*1024;
if ( chunks[1]>(result->n_samples) ) {
chunks[1] = result->n_samples;
chunks[0] = 96*1024/chunks[1];
if ( chunks[0]>(result->n_samples) ) {
chunks[0] = result->n_samples;
}
}

if (H5Pset_chunk (dcpl_id, 2, chunks)) return open_error; // just abort on error

hid_t dataset_id = H5Dcreate2(output_file_id, "matrix",real_id, dataspace_id,
H5P_DEFAULT, dcpl_id, H5P_DEFAULT);
herr_t status = H5Dwrite(dataset_id, real_id, H5S_ALL, H5S_ALL, H5P_DEFAULT,
buf2d);

H5Pclose(dcpl_id);
H5Dclose(dataset_id);
H5Sclose(dataspace_id);
free(buf2d);

// check status after cleanup, for simplicity
if (status<0) {
H5Fclose (output_file_id);
return open_error;
}
}

H5Fclose (output_file_id);
return write_okay;
}

IOStatus write_mat_hdf5(const char* output_filename, mat_t* result) {
return write_mat_hdf5_D<double>(output_filename,result,H5T_IEEE_F64LE,0);
}

IOStatus write_mat_hdf5_fp32(const char* output_filename, mat_t* result) {
return write_mat_hdf5_D<float>(output_filename,result,H5T_IEEE_F32LE,0);
}

IOStatus write_mat_hdf5_compressed(const char* output_filename, mat_t* result, unsigned int compress_level) {
return write_mat_hdf5_D<double>(output_filename,result,H5T_IEEE_F64LE,compress_level);
}

IOStatus write_mat_hdf5_fp32_compressed(const char* output_filename, mat_t* result, unsigned int compress_level) {
return write_mat_hdf5_D<float>(output_filename,result,H5T_IEEE_F32LE,compress_level);
}

IOStatus write_vec(const char* output_filename, r_vec* result) {
std::ofstream output;
output.open(output_filename);
Expand Down
46 changes: 46 additions & 0 deletions sucpp/api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,52 @@ EXTERN ComputeStatus faith_pd_one_off(const char* biom_filename, const char* tre
*/
EXTERN IOStatus write_mat(const char* filename, mat_t* result);

/* Write a matrix object using hdf5 format
*
* filename <const char*> the file to write into
* result <mat_t*> the results object
*
* The following error codes are returned:
*
* write_okay : no problems
*/
EXTERN IOStatus write_mat_hdf5(const char* filename, mat_t* result);

/* Write a matrix object using hdf5 format, using fp32 precision
*
* filename <const char*> the file to write into
* result <mat_t*> the results object
*
* The following error codes are returned:
*
* write_okay : no problems
*/
EXTERN IOStatus write_mat_hdf5_fp32(const char* filename, mat_t* result);

/* Write a matrix object using hdf5 format
*
* filename <const char*> the file to write into
* result <mat_t*> the results object
* compress_level - 0=no compression, 1-9 higher is slower
*
* The following error codes are returned:
*
* write_okay : no problems
*/
EXTERN IOStatus write_mat_hdf5_compressed(const char* filename, mat_t* result, unsigned int compress_level);

/* Write a matrix object using hdf5 format, using fp32 precision
*
* filename <const char*> the file to write into
* result <mat_t*> the results object
* compress_level - 0=no compression, 1-9 higher is slower
*
* The following error codes are returned:
*
* write_okay : no problems
*/
EXTERN IOStatus write_mat_hdf5_fp32_compressed(const char* filename, mat_t* result, unsigned int compress_level);


/* Write a series
*
Expand Down
Loading

0 comments on commit d58cbbf

Please sign in to comment.