vincentlaucsb · vincentlaucsb · Mar 26, 2019 · Mar 25, 2019 · Mar 25, 2019 · Mar 25, 2019
diff --git a/src/data_type.cpp b/src/data_type.cpp
@@ -20,6 +20,10 @@ namespace csv::internals {
     };
     #endif
 
+    const long double _INT_MAX = (long double)std::numeric_limits<int>::max();
+    const long double _LONG_MAX = (long double)std::numeric_limits<long int>::max();
+    const long double _LONG_LONG_MAX = (long double)std::numeric_limits<long long int>::max();
+
     DataType data_type(std::string_view in, long double* const out) {
         /** Distinguishes numeric from other text values. Used by various
         *  type casting functions, like csv_parser::CSVReader::read_row()
@@ -43,7 +47,8 @@ namespace csv::internals {
         bool prob_float = false;
 
         unsigned places_after_decimal = 0;
-        long double num_buff = 0;
+        long double integral_part = 0,
+            decimal_part = 0;
 
         for (size_t i = 0, ilen = in.size(); i < ilen; i++) {
             const char& current = in[i];
@@ -66,18 +71,16 @@ namespace csv::internals {
                     // Ex: '510-123-4567'
                     return CSV_STRING;
                 }
-                else {
-                    neg_allowed = false;
-                }
+
+                neg_allowed = false;
                 break;
             case '.':
                 if (!dot_allowed) {
                     return CSV_STRING;
                 }
-                else {
-                    dot_allowed = false;
-                    prob_float = true;
-                }
+
+                dot_allowed = false;
+                prob_float = true;
                 break;
             default:
                 if (isdigit(current)) {
@@ -91,15 +94,12 @@ namespace csv::internals {
 
                     // Build current number
                     unsigned digit = current - '0';
-                    if (num_buff == 0) {
-                        num_buff = digit;
-                    }
-                    else if (prob_float) {
-                        num_buff += (long double)digit / pow(10.0, ++places_after_decimal);
+                    if (prob_float) {
+                        places_after_decimal++;
+                        decimal_part = (decimal_part * 10) + digit;
                     }
                     else {
-                        num_buff *= 10;
-                        num_buff += digit;
+                        integral_part = (integral_part * 10) + digit;
                     }
                 }
                 else {
@@ -110,29 +110,24 @@ namespace csv::internals {
 
         // No non-numeric/non-whitespace characters found
         if (has_digit) {
-            if (!neg_allowed) num_buff *= -1;
-            if (out) *out = num_buff;
+            long double number = integral_part + decimal_part * pow(10, -(double)places_after_decimal);
+            if (out) *out = neg_allowed ? number : -number;
 
             if (prob_float)
                 return CSV_DOUBLE;
-            else {
-                long double log10_num_buff;
-                if (!neg_allowed) log10_num_buff = log10(-num_buff);
-                else log10_num_buff = log10(num_buff);
 
-                if (log10_num_buff < log10(std::numeric_limits<int>::max()))
-                    return CSV_INT;
-                else if (log10_num_buff < log10(std::numeric_limits<long int>::max()))
-                    return CSV_LONG_INT;
-                else if (log10_num_buff < log10(std::numeric_limits<long long int>::max()))
-                    return CSV_LONG_LONG_INT;
-                else // Conversion to long long will cause an overflow
-                    return CSV_DOUBLE;
-            }
-        }
-        else {
-            // Just whitespace
-            return CSV_NULL;
+            // We can assume number is always positive
+            if (number < _INT_MAX)
+                return CSV_INT;
+            else if (number < _LONG_MAX)
+                return CSV_LONG_INT;
+            else if (number < _LONG_LONG_MAX)
+                return CSV_LONG_LONG_INT;
+            else // Conversion to long long will cause an overflow
+                return CSV_DOUBLE;
         }
+
+        // Just whitespace
+        return CSV_NULL;
     }
 }
diff --git a/tests/test_data_type.cpp b/tests/test_data_type.cpp
@@ -53,4 +53,42 @@ TEST_CASE( "Recognize Floats Properly", "[dtype_float]" ) {
 
     REQUIRE(data_type(e, &out) == CSV_DOUBLE);
     REQUIRE(is_equal(out, 2.71828));
+}
+
+TEST_CASE("Integer Overflow", "[int_overflow]") {
+    const long double _INT_MAX = (long double)std::numeric_limits<int>::max();
+    const long double _LONG_MAX = (long double)std::numeric_limits<long int>::max();
+    const long double _LONG_LONG_MAX = (long double)std::numeric_limits<long long int>::max();
+
+    std::string s;
+    long double out;
+
+    s = std::to_string((long long)_INT_MAX + 1);
+    if (_INT_MAX == _LONG_MAX) {
+        REQUIRE(data_type(s, &out) == CSV_LONG_LONG_INT);
+    }
+    else {
+        REQUIRE(data_type(s, &out) == CSV_LONG_INT);
+    }
+
+    REQUIRE(out == (long long)_INT_MAX + 1);
+}
+
+TEST_CASE( "Recognize Sub-Unit Double Values", "[regression_double]" ) {
+    std::string s("0.15");
+    long double out;
+    REQUIRE(data_type(s, &out) == CSV_DOUBLE);
+    REQUIRE(is_equal(out, 0.15));
+}
+
+TEST_CASE( "Recognize Double Values", "[regression_double2]" ) {
+    // Test converting double values back and forth
+    long double out;
+    std::string s;
+
+    for (double i = 0; i <= 2.0; i += 0.01) {
+        s = std::to_string(i);
+        REQUIRE(data_type(s, &out) == CSV_DOUBLE);
+        REQUIRE(is_equal(out, i));
+    }
 }
diff --git a/tests/test_read_csv.cpp b/tests/test_read_csv.cpp
@@ -291,8 +291,9 @@ TEST_CASE("Test read_row() CSVField - Memory", "[read_row_csvf2]") {
     // Fourth Row
     rows.pop_front();
     row = rows.front();
+    double big_num_csv = row[0].get<double>();
     REQUIRE(row[0].type() == CSV_DOUBLE); // Overflow
-    REQUIRE(internals::is_equal(row[0].get<double>(), big_num));
+    REQUIRE(internals::is_equal(big_num_csv, big_num));
 }
 
 TEST_CASE("Test read_row() CSVField - Power Status", "[read_row_csvf3]") {