Skip to content

Commit

Permalink
specify data type as float when reading CSV files into data frames
Browse files Browse the repository at this point in the history
  • Loading branch information
msrocka committed Aug 4, 2017
1 parent 1eac9f2 commit 81462b7
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 3 deletions.
6 changes: 4 additions & 2 deletions iomb/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import uuid
import logging as log

import numpy
import pandas as pd


Expand Down Expand Up @@ -49,7 +50,8 @@ def each_csv_row(csv_file: str, func, skip_header=False, encoding='utf-8'):
def read_csv_data_frame(csv_file, keys_to_lower=True) -> pd.DataFrame:
""" Loads a pandas DataFrame from the given CSV file. """
log.info('read data frame from %s', csv_file)
df = pd.read_csv(csv_file, index_col=0, header=0)
df = pd.read_csv(csv_file, index_col=0, header=0, dtype=numpy.float64,
converters={0: str})
df.fillna(0.0, inplace=True)

def strip(x: str):
Expand All @@ -66,4 +68,4 @@ def csv_val(row: list, idx: int, default=None):
if idx >= len(row):
return default
else:
return row[idx]
return row[idx]
25 changes: 24 additions & 1 deletion tests/test_util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import iomb.util as util
import tempfile
import unittest
import uuid

import iomb.util as util
import numpy


class TestUtil(unittest.TestCase):

Expand All @@ -10,5 +13,25 @@ def test_make_uuid(self):
actual = util.make_uuid("Flow", None, "a", 1, "B")
self.assertEqual(expected, actual)

def test_read_csv_as_data_frame(self):
text = """
,C1,C2,C3
R1,1,2,3
R2,4,5,6
""".strip()
data = ""
for line in text.split('\n'):
data += line.strip() + '\n'
temp = tempfile.NamedTemporaryFile('w', encoding='utf-8', delete=False,
prefix='iomb_tests_')
temp.write(data)
temp.close()
data_frame = util.read_csv_data_frame(temp.name)
self.assertAlmostEqual(1.0, data_frame['c1']['r1'], 1e-16)
self.assertTrue(type(data_frame['c1']['r1']) is numpy.float64)
self.assertAlmostEqual(6.0, data_frame['c3']['r2'], 1e-16)
self.assertTrue(type(data_frame['c3']['r2']) is numpy.float64)


if __name__ == '__main__':
unittest.main()

0 comments on commit 81462b7

Please sign in to comment.