Skip to content

Commit

Permalink
#81 Fixed date calculation for dates after the 4000 and 8000 year (#83)
Browse files Browse the repository at this point in the history
See details in the `SasDateFormat.sasLeapDaysFix`
  • Loading branch information
xantorohara authored Jan 5, 2021
1 parent 70f5877 commit e059517
Show file tree
Hide file tree
Showing 9 changed files with 244 additions and 7 deletions.
101 changes: 101 additions & 0 deletions src/main/java/com/epam/parso/impl/SasDateFormat.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package com.epam.parso.impl;

import static com.epam.parso.impl.SasFileConstants.SECONDS_IN_DAY;

/**
* SAS supports wide family of date formats.
* It is reasonable to keep all SAS date related features separately.
* See more about SAS dates:
* - https://v8doc.sas.com/sashtml/lrcon/zenid-63.htm
* - https://v8doc.sas.com/sashtml/lgref/z0197923.htm
* - https://v8doc.sas.com/sashtml/ets/chap2/sect7.htm
*/
final class SasDateFormat {
/**
* Private constructor for utility class.
*/
private SasDateFormat() {
}

/**
* First time when a leap day is removed from the SAS calendar.
* In seconds since 1960-01-01
*/
private static final double SAS_SECONDS_29FEB4000 = 64381305600D;

/**
* Second time when a leap day is removed from the SAS calendar.
* In seconds since 1960-01-01
*/
private static final double SAS_SECONDS_29FEB8000 = 190609027200D;

/**
* SAS removes leap day every 4000 year.
* It removes these days:
* - 29FEB4000
* - 29FEB8000
* This guy proposed such approach many years ago: https://en.wikipedia.org/wiki/John_Herschel
* <p>
* Sometimes people discussed why SAS dates are so strange:
* - https://blogs.sas.com/content/sasdummy/2010/04/05/in-the-year-9999/
* - https://communities.sas.com/t5/SAS-Programming/Leap-Years-divisible-by-4000/td-p/663467
* <p>
* See the SAS program and its output:
* ```shell
* data test;
* dtime = '28FEB4000:00:00:00'dt;
* put dtime; *out: 64381219200
* <p>
* dtime = '29FEB4000:00:00:00'dt;
* put dtime; *err: ERROR: Invalid date/time/datetime constant '29FEB4000:00:00:00'dt.
* <p>
* dtime = '01MAR4000:00:00:00'dt;
* put dtime; *out: 64381305600
* <p>
* dtime = '31DEC4000:00:00:00'dt;
* put dtime; *out: 64407657600
* <p>
* dtime = '28FEB8000:00:00:00'dt;
* put dtime; *out: 190608940800
* <p>
* dtime = '29FEB8000:00:00:00'dt;
* put dtime; *err: ERROR: Invalid date/time/datetime constant '29FEB8000:00:00:00'dt.
* <p>
* dtime = '01MAR8000:00:00:00'dt;
* put dtime; * out: 190609027200
* <p>
* dtime = '31DEC8000:00:00:00'dt;
* put dtime; *out: 190635379200
* <p>
* dtime = '31DEC9999:00:00:00'dt;
* put dtime; *out: 253717660800
* run;
* ```
* As you can see SAS doesn't accept leap days for 4000 and 8000 years
* and removes these days at all from the SAS calendar.
* <p>
* At the same time these leap days are ok for:
* - Java: `LocalDateTime.of(4000, 2, 29, 0, 0).toEpochSecond(ZoneOffset.UTC)`
* outputs 64065686400
* - JavaScript: `Date.parse('4000-02-29')`
* outputs 64065686400000
* - GNU/date: `date --utc --date '4000-02-29' +%s`
* outputs 64065686400
* and so on.
* <p>
* So, in order to parse SAS dates correctly,
* we need to restore removed leap days
*
* @param sasSeconds SAS date representation in seconds since 1960-01-01
* @return seconds with restored leap days
*/
public static double sasLeapDaysFix(double sasSeconds) {
if (sasSeconds >= SAS_SECONDS_29FEB4000) {
if (sasSeconds >= SAS_SECONDS_29FEB8000) {
sasSeconds += SECONDS_IN_DAY; //restore Y8K leap day
}
sasSeconds += SECONDS_IN_DAY; //restore Y4K leap day
}
return sasSeconds;
}
}
8 changes: 6 additions & 2 deletions src/main/java/com/epam/parso/impl/SasFileConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -1077,12 +1077,16 @@ public interface SasFileConstants {
*/
int START_DATES_DAYS_DIFFERENCE = DAYS_IN_YEAR * 10 + 3;

/**
* The number of seconds in a day.
*/
int SECONDS_IN_DAY = SECONDS_IN_MINUTE * MINUTES_IN_HOUR * HOURS_IN_DAY;

/**
* The difference in seconds between 01/01/1960 (the dates starting point in SAS) and 01/01/1970 (the dates starting
* point in Java).
*/
int START_DATES_SECONDS_DIFFERENCE = SECONDS_IN_MINUTE * MINUTES_IN_HOUR * HOURS_IN_DAY
* START_DATES_DAYS_DIFFERENCE;
int START_DATES_SECONDS_DIFFERENCE = SECONDS_IN_DAY * START_DATES_DAYS_DIFFERENCE;

/**
* The offset to the pointer for the bitwise representation of deleted records in MIX pages in x64.
Expand Down
16 changes: 12 additions & 4 deletions src/main/java/com/epam/parso/impl/SasFileParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -1069,8 +1069,12 @@ private String bytesToString(byte[] bytes, int offset, int length)
*/
private Date bytesToDateTime(byte[] bytes) {
double doubleSeconds = bytesToDouble(bytes);
return Double.isNaN(doubleSeconds) ? null : new Date((long) ((doubleSeconds - START_DATES_SECONDS_DIFFERENCE)
* MILLISECONDS_IN_SECONDS));
if (Double.isNaN(doubleSeconds)) {
return null;
} else {
double seconds = SasDateFormat.sasLeapDaysFix(doubleSeconds) - START_DATES_SECONDS_DIFFERENCE;
return new Date((long) (seconds * MILLISECONDS_IN_SECONDS));
}
}

/**
Expand All @@ -1083,8 +1087,12 @@ private Date bytesToDateTime(byte[] bytes) {
*/
private Date bytesToDate(byte[] bytes) {
double doubleDays = bytesToDouble(bytes);
return Double.isNaN(doubleDays) ? null : new Date((long) ((doubleDays - START_DATES_DAYS_DIFFERENCE)
* SECONDS_IN_MINUTE * MINUTES_IN_HOUR * HOURS_IN_DAY * MILLISECONDS_IN_SECONDS));
if (Double.isNaN(doubleDays)) {
return null;
} else {
double seconds = SasDateFormat.sasLeapDaysFix(doubleDays * SECONDS_IN_DAY) - START_DATES_SECONDS_DIFFERENCE;
return new Date((long) (seconds * MILLISECONDS_IN_SECONDS));
}
}

/**
Expand Down
31 changes: 30 additions & 1 deletion src/test/java/com/epam/parso/BugsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.util.Date;
import java.util.List;

import static org.assertj.core.api.Assertions.assertThat;

public class BugsTest {


@Test
public void testOOM() throws Exception {
try (InputStream is = this.getClass().getResourceAsStream("/bugs/mixed_data_one.sas7bdat.oom")) {
Expand Down Expand Up @@ -216,4 +218,31 @@ public void testInfinityLoopUnbufferedIssue58() throws Exception {
assertThat(sasFileReader.getSasFileProperties().getRowCount()).isEqualTo(0);
}
}

/**
* Converts year, month and day to UTC Date.
*/
private static Date dateOf(int year, int month, int day) {
return Date.from(LocalDateTime.of(year, month, day, 0, 0).toInstant(ZoneOffset.UTC));
}

@Test
public void testLeapDayFixIssue81() throws Exception {
try (InputStream is = this.getClass().getResourceAsStream("/bugs/81-dates.sas7bdat")) {
SasFileReader sasFileReader = new SasFileReaderImpl(is);

Object[][] result = sasFileReader.readAll();
assertThat(result.length).isEqualTo(10);
assertThat(result[0][1]).isEqualTo(dateOf(9999, 12, 31));
assertThat(result[1][1]).isEqualTo(dateOf(2049, 12, 31));
assertThat(result[2][1]).isEqualTo(dateOf(2099, 12, 31));
assertThat(result[3][1]).isEqualTo(dateOf(4000, 2, 28));
assertThat(result[4][1]).isEqualTo(dateOf(4000, 3, 1));
assertThat(result[5][1]).isEqualTo(dateOf(4000, 12, 31));
assertThat(result[6][1]).isEqualTo(dateOf(8000, 2, 28));
assertThat(result[7][1]).isEqualTo(dateOf(8000, 3, 1));
assertThat(result[8][1]).isEqualTo(dateOf(8000, 12, 31));
assertThat(result[9][1]).isEqualTo(dateOf(8001, 2, 21));
}
}
}
Binary file added src/test/resources/bugs/81-dates.sas7bdat
Binary file not shown.
15 changes: 15 additions & 0 deletions src/test/resources/csv/dates_leap_days.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
d, dt
28Feb2000,28Feb2000:00:00:00.00
29Feb2000,29Feb2000:00:00:00.00
01Mar2000,01Mar2000:00:00:00.00
31Dec2000,31Dec2000:00:00:00.00
28Feb4000,28Feb4000:00:00:00.00
01Mar4000,01Mar4000:00:00:00.00
31Dec4000,31Dec4000:00:00:00.00
28Feb6000,28Feb6000:00:00:00.00
29Feb6000,29Feb6000:00:00:00.00
01Mar6000,01Mar6000:00:00:00.00
31Dec6000,31Dec6000:00:00:00.00
28Feb8000,28Feb8000:00:00:00.00
01Mar8000,01Mar8000:00:00:00.00
31Dec8000,31Dec8000:00:00:00.00
23 changes: 23 additions & 0 deletions src/test/resources/csv/dates_leap_days_meta.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Number,Name,Type,Data length,Format,Label
1,d,Numeric,8,DATE9.,
2,dt,Numeric,8,DATETIME20.,
Bitness: x64
Compressed: null
Endianness: LITTLE_ENDIANNESS
Encoding: ISO-8859-1
Name: DATES_LEAP_DAYS
File type: DATA
File label: Leap days dataset
Date created: Fri Jan 01 13:53:59 MSK 2021
Date modified: Fri Jan 01 13:53:59 MSK 2021
SAS release: 9.0401M5
SAS server type: Linux
OS name: x86_64
OS type: 3.10.0-1160.2.1.
Header Length: 4096
Page Length: 4096
Page Count: 1
Row Length: 16
Row Count: 14
Mix Page Row Count: 124
Columns Count: 2
57 changes: 57 additions & 0 deletions src/test/resources/sas7bdat/dates_leap_days.sas
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
SAS program to generate sas7bdat file with two types of columns: date and datetime.
Both columns contain data around leap days.
Years 4000 and 8000 don't have leap days in terms of SAS.
Years 2000 and 6000 have it.
All of them necessary for unit tests.
*/

options bufsize=4096 pagesize=15;

data dev.dates_leap_days(label='Leap days dataset');
format d date9.;
format dt datetime20.;

d='28FEB2000'd;
dt='28FEB2000:00:00:00'dt;
output;
d='29FEB2000'd;
dt='29FEB2000:00:00:00'dt;
output;
d='01MAR2000'd;
dt='01MAR2000:00:00:00'dt;
output;
d='31DEC2000'd;
dt='31DEC2000:00:00:00'dt;
output;
d='28FEB4000'd;
dt='28FEB4000:00:00:00'dt;
output;
d='01MAR4000'd;
dt='01MAR4000:00:00:00'dt;
output;
d='31DEC4000'd;
dt='31DEC4000:00:00:00'dt;
output;
d='28FEB6000'd;
dt='28FEB6000:00:00:00'dt;
output;
d='29FEB6000'd;
dt='29FEB6000:00:00:00'dt;
output;
d='01MAR6000'd;
dt='01MAR6000:00:00:00'dt;
output;
d='31DEC6000'd;
dt='31DEC6000:00:00:00'dt;
output;
d='28FEB8000'd;
dt='28FEB8000:00:00:00'dt;
output;
d='01MAR8000'd;
dt='01MAR8000:00:00:00'dt;
output;
d='31DEC8000'd;
dt='31DEC8000:00:00:00'dt;
output;
run;
Binary file not shown.

0 comments on commit e059517

Please sign in to comment.