From 21929ddb6c6061c2d7ded113768bf720eac0b99d Mon Sep 17 00:00:00 2001 From: Cole MacKenzie Date: Thu, 18 May 2023 16:36:31 -0700 Subject: [PATCH] test case for #1374 Test table `issue_1374` was created by hand to have 2 data files where only one file has the `min_values` for the statistics in the `checkpoint.parquet` file set to null in order to trigger the bug. There is no other significance to the table other than to demonstrate issue #1374. ``` internal error: entered unreachable code thread 'test_issue_1374' panicked at 'internal error: entered unreachable code', /Users/cole/.cargo/registry/src/index.crates.io-6f17d22bba15001f/datafusion-common-24.0.0/src/scalar.rs:2472:26 ``` --- .../_delta_log/00000000000000000000.json | 3 ++ .../00000000000000000001.checkpoint.parquet | Bin 0 -> 20622 bytes .../_delta_log/00000000000000000001.json | 3 ++ .../issue_1374/_delta_log/_last_checkpoint | 1 + ...4008-82df-e98efdcdd47d-c000.snappy.parquet | Bin 0 -> 1021 bytes ...4008-82df-e98efdcdd49c-c000.snappy.parquet | Bin 0 -> 1021 bytes rust/tests/datafusion_test.rs | 34 ++++++++++-------- 7 files changed, 26 insertions(+), 15 deletions(-) create mode 100644 rust/tests/data/issue_1374/_delta_log/00000000000000000000.json create mode 100644 rust/tests/data/issue_1374/_delta_log/00000000000000000001.checkpoint.parquet create mode 100644 rust/tests/data/issue_1374/_delta_log/00000000000000000001.json create mode 100644 rust/tests/data/issue_1374/_delta_log/_last_checkpoint create mode 100644 rust/tests/data/issue_1374/date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd47d-c000.snappy.parquet create mode 100644 rust/tests/data/issue_1374/date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd49c-c000.snappy.parquet diff --git a/rust/tests/data/issue_1374/_delta_log/00000000000000000000.json b/rust/tests/data/issue_1374/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..493d40ebfb --- /dev/null +++ b/rust/tests/data/issue_1374/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"protocol":{"minReaderVersion":1,"minWriterVersion":1}} +{"metaData":{"id":"d5ad9276-c21f-474e-bfa8-996099dce265","name":null,"description":null,"format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"temperature\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["date"],"createdTime":1684886484991,"configuration":{}}} +{"commitInfo":{"timestamp":1684886484992,"operation":"CREATE TABLE","operationParameters":{"mode":"ErrorIfExists","metadata":"{\"configuration\":{},\"created_time\":1684886484991,\"description\":null,\"format\":{\"options\":{},\"provider\":\"parquet\"},\"id\":\"d5ad9276-c21f-474e-bfa8-996099dce265\",\"name\":null,\"partition_columns\":[\"date\"],\"schema\":{\"fields\":[{\"metadata\":{},\"name\":\"timestamp\",\"nullable\":true,\"type\":\"timestamp\"},{\"metadata\":{},\"name\":\"temperature\",\"nullable\":true,\"type\":\"integer\"},{\"metadata\":{},\"name\":\"date\",\"nullable\":true,\"type\":\"string\"}],\"type\":\"struct\"}}","protocol":"{\"minReaderVersion\":1,\"minWriterVersion\":1}","location":"file:///Users/cole/github.com/cmackenzie1/delta-rs/rust/tests/data/issue_1374"},"clientVersion":"delta-rs.0.11.0"}} \ No newline at end of file diff --git a/rust/tests/data/issue_1374/_delta_log/00000000000000000001.checkpoint.parquet b/rust/tests/data/issue_1374/_delta_log/00000000000000000001.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ea7b775bb5e53a1d4efaba6034710526b54792b3 GIT binary patch literal 20622 zcmeG^Yiv_zme;|A6xa?$y;n|~wlpo5R3VAuJcv`;?RA_ufh4s}V#n@uTR(2%Bz6*P z=gBf!RWoe1Vo}xYcG)gF%OZXtG^#3u`lG548dW2NMyskq2qAwOJ6c*-DxPM6AO< z6&zQ_Z?0_N%C}V3RHWo=#2v{-%0}ZnCsYXM%iupTD^@&o>>v}4=X&u6WCDL};kRhK zDzsT)AdfWbL?k5>as5KS_*>L1j}cErTP94}fPRW%|G(d|(leJS*OO zrb-8*w}kQ<4Yb}%uHHZ*LQSqx$ zHy`_7#3m<(M@Oc?-2(+rjLY-y%tVsY@;Yb{?ywLRmtGLByr2dqa|C^!e?AWzak~Y$ zEgju@LP`jKF8o~l`bF{Viz-5a7gWvI06;-p`=@0POkSq4K^d~C4VX=wZ#D?NO%r_i z@MJm_$yQ9ICuc_Ea+s0xc0!wve2y4TPL3Rl3O1=1Hy$bDG8Go&; z7xfad+I_Gc=z(5y*d0xE_Le$Fa|3#qIy*jfvLkA59FDctMV$85x@Nn*t**@xAFh*~ zZSrtD7LPYOV|6i@S)Z8z2IlJ(n_4s2UGYL-L9?FbpVlN<)oQKT_Xw!Btqo{l+Eu+j z&RN!Je!XU0Bp$a;MY0Lzxyywk<>DoWc%|V7#v7=UA>REj`cZiDJb|^QLLIYdEtp7t z_c+H{c}V;DZ`6c?QkhVwxPMF5-0s+=p5DZpxV%%-)9lC>o@86O=UjZqBD z$rNtBDt`VdkeCk+NzCsh87b#Vj-8>cH5Sf>{!)_)&sA>c$}34(HiEuKIuenI5hj@~ zp-a5?+HSA}Wfide%X0WnweT|l2+nHLDZd<>OydEt(GF+1(TM;CZg_>`l6DSk$I}`d z1`S8L_O^pCJu+8tdbRF02JH5Bdt>FyAW-h9-v)@;;AZH6{1GaUQ4CrT&MkdvRW2xlzf z=|S?CqahWAA=N;s<5MWrP{(W)Y6~XnnxLc_ZKw&G$?o3)eaOi1jk;?lPrxn#-hWHH z9){4a!qQs;g!cU2T#dcMr<&z^>yc7(5(`%d>X=Py0i|>>K2Nvb3aO;DCQ_f!0{IhK z05zOUufHvxc^hcXhmRK*`MpH5;SdGL^XUZ(!+sn6s&q2FRnBwE(YeWx7Z9Zj!h(42 zUGbB5;Yg*!1(_P&N|q(;L?N!2EE$c<$WqO63d-_sRJ<51EXyKT!PpG43>T3l_S>xZ z3lHu2in3JcEGWy%vUo-YS?X|6mik++EE(d0`;6N`k4Pl1Q>zOL%U@WOr{&b-jI1AA@si=hlz3=L_0Z-ZP#Bt`JT(fmr{ZvliPwsF zq@vGK!~%jow_iN5pMi#$!EeSJ06HzH5k);ADWGx>N%ca#cqP4?zQL6Q;Awd-D^JAb zc-LrB?o+;d5jhCzYKDQzjin{AsUK>Qv%}Nkoor#DJ;Ohv-E^pYvP40Tn+m@r7s;siKFV`hb z1FaRxxyq+3m2lq2KiE-Gr~?o`F5j^pA5PWnht1W#AsL#6VRGn|>?@JlU6RtMA0#TZ zFA}R-PS@*zm)+_!Wd&*rb(ciWqqtYCT;pwR;R|dOlNBrx_eQ;LG&GIF>ZKkGt4+hO zZrBRAAI~jHrR|4!Ct}!}T(eSk$Z&YBG@hQ-l_jaVb|3_`P|t=c08}dVlwxS&FopxP zFwB9Kze-r85Kc)xtioL1xJu2Z0hih=eR%ECSXo|=Vn&sanqR-jzo9fQyu`n&G%s)0CY;@&O*sAvKcGxF zTdOtC*YS3x_e#CiyyW0NT>UYDC<8RcqdRjqg$>obTk%l`HK(`ZX#}5Aq92?@~}F zD5Mee{#zQf>u>YJ%G`7B@&ToJH>&ksmbKm^39a|;nAW_V;*-j<6H^)==k{yOD`~!7 znQ%3uH4jf~6Hd+WyOjy=&1uaW^IG%d0oH^M#P>S7H%HRx$=P>Ur4PT6Ybmp7AISMF zkPUAyvq3rFcCNPyH6LH81kc!EETJ>@o9c(u0V_Z-mVzM0yuu5pl`QYw4G3GTHY&p4 z%NTC87(;;uYt3a7u&}ImCtIlBIPz9p&DpS&LZt&aVd61KSDg#3DKF#ahrtjn^Ec^1_x@8>GYhBx#lj21*MH4_YT?BD)&fKn zt6LV%)6)9~`D#u)US43O(Yw+$2^K!<1~RA#`)te-R3yq z51B+2g_ z`Za8vP51^j?&ES!yt+@@xrb5aB793~@$Em5OFJ1T6}Z6#ms*}8+$U5Ye`MjiAczi` zQ5E|XME}!)R)QjY^lxZy*8m2D&85(?Fbp)Bt4xH`1m!-1a{DdKY9C#>h;W`z9{)#j z6kq+7hH|-b3jJV6QG|V{UVYhzaG7vlXShF#XklwP+{U9IVK1$eL-+&XI`%Q?)3w;L ze5z;Jf_$#NBtv-bI4Z|QzzuoeZ-$rUQ^j47&y~w8grkIO@t=s#rNnZ4YRB$^e6GG+ zLim($e-5~T&uil);m+gpLCYS5uL%1R!+v!_Gr@;jwIEz2G-r;IxAnDg@yeE&Y-$UCib# zEqJ>@BpB)sgfj%>Is@|MbRHLa?qV)A5H8Tdzxjj+|IZmsZru-BPas?;h$k4tb91YJ zXz;-1q5)xvV1C759$UzRSu7lwD+7f0Pv9=yVqk6@SP4uHJ^JT=;WzgcH($YTn0wTdlZv4T==a6U|d%F-UAcOhMQYTR0gJrH+ zq-YxF;(+AtYXeeiz21g~IeeFpW_(_Zx$Qt-n-f5HVW&2~vrD5F_``7*N~t@Xh;|2(IJ3z+1oV*TRC*+rPs1k#yaNpl zBwklKJTxw$j3iGsDF>X6P;jm>OiS^{We)}83kXx%JAm{MsTzL2hi0-}LjF5KNRQVW z1^k3h!|x6f6k@HPnlzvIYejm@^tz1nI*4A}PnS+#&zJ+~p-tB4kp?wY?>!`g|dszHWUyJe079gDvCXpx+Y-&Lsze0g$^t5%+-J!84n` zpDx#kzMmN!pPhu6uJO>2KjCvE646u~=x}FH-w29}^b_WEc91^pT>$wx5j=a?PWtgB z>Y)rNk)TC2kO=k%5Q>Id1+wDiA>p-KELa1PFAH2KT&9C!-P(L~cnhWR$u?Du14G;B$jy94KGXK%i z81>A#qfLS7IMUNMR)8PyulOMA27YFdpIYL-AU|p3XP#E#k793y_>o3x^Zn&sO+J!K z26h_&2KgZsBGKOs`W_4{#64YE+63k=vZ0}bvd^e0u1SP%_TQcTqXY=IhLJ|VJ#LU+ zZ6D4rEZ>C|*gsMNT27oxz1WU>0@;`+Fb}re7)uR;{R97If9mRBNNFkj0E;6M*qO>| z$^1ovEe8H1Uu`je(eDO&^ZW(GWl4%o1hRvvz*w|t&v*#ALZ>;tn(~1V(z{|lfUc;J|Lc>ciC@|e zjbEcb-B0jwYg}kwcGMB+P1@g_{W8SIei|qJ3d5SvVE<$&*fN3f!%h4E3=9qIaY+fp z@9sl+5+&5noDBVX8fU_uWGWKuhq#oSi6+tB3B9zhC_JgB0qi#y9^3E!DH&`NA30yX zru-#><1C&){P1I-hKKw~A`B_iep*lVpvli2q<6AoN>a#ac8nl@3q|Se=qpGsg6a+@ z!44s&H%kpG(7TK1rTt38x?Ovi?Gn9q+0!ibdLfm~@kj2EPVYYAkMw^W=^3Q?)S0ti zlCJ^mi;S3tP5U(!O~&UT)+S>~opziyRurDAZ+Uool8{&B!6PAeyXK=!{+W=&iGc<3 zk1kB}_$bkDNj z2EYl3nO{@9=|Oxe#TyEJp8lC=3gpp5c>>ur=;H{k_n`jj`Dfpo^iH-#`qfDEy7Twb zXNZ?x(m&d-Fq}ysH^RJ*=r^91d|)r}VB<9TbkKhQ%J7pU5pib+i*F-nFhE3x*$*>y zoWPPM=!fQ5EENF%92gH^j_GsZ@p>8kaAk0ul&-sfECP8`D9~?@rIOQO=8kCGEN^4H z?1>oyQ&X-2{^wBtR5lwXgt;h<<2ZAqjg2>wx#zxrYv-6j( zKTOGJ5eNX55CD=|B#3R)R~O{j{)8XnAA|oIXRB2HeUQ3zW}EVllx%BitGCzNx&Pk5 KabEbB2>%~%Cankn literal 0 HcmV?d00001 diff --git a/rust/tests/data/issue_1374/_delta_log/00000000000000000001.json b/rust/tests/data/issue_1374/_delta_log/00000000000000000001.json new file mode 100644 index 0000000000..74ce0bf390 --- /dev/null +++ b/rust/tests/data/issue_1374/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"add":{"path":"date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd49c-c000.snappy.parquet","size":1021,"partitionValues":{"date":"2023-05-24"},"modificationTime":1684886485017,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"timestamp\":null,\"temperature\":8},\"maxValues\":{\"timestamp\":\"2023-05-24T00:01:25.014Z\",\"temperature\":90},\"nullCount\":{\"temperature\":0,\"timestamp\":0}}","tags":null}} +{"add":{"path":"date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd47d-c000.snappy.parquet","size":1021,"partitionValues":{"date":"2023-05-24"},"modificationTime":1684886485017,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"timestamp\":\"2023-05-24T00:01:25.014Z\",\"temperature\":8},\"maxValues\":{\"timestamp\":\"2023-05-24T00:01:25.014Z\",\"temperature\":90},\"nullCount\":{\"temperature\":0,\"timestamp\":0}}","tags":null}} +{"commitInfo":{"timestamp":1685483647338,"clientVersion":"delta-rs.0.11.0"}} \ No newline at end of file diff --git a/rust/tests/data/issue_1374/_delta_log/_last_checkpoint b/rust/tests/data/issue_1374/_delta_log/_last_checkpoint new file mode 100644 index 0000000000..1c0d1f36c5 --- /dev/null +++ b/rust/tests/data/issue_1374/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"parts":null,"size":20622,"version":1} \ No newline at end of file diff --git a/rust/tests/data/issue_1374/date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd47d-c000.snappy.parquet b/rust/tests/data/issue_1374/date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd47d-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..cc6a8f345f02fc845d77164ff0f20b125baf8527 GIT binary patch literal 1021 zcmb_by>HV{5I;Lka72Sb<$KtYg)+3BLP;S=p^7SA`jxh6Nur`A9Y7(qXr-72$88pn zP^S(EF;$%qVrFJ!K!}NvKcMP=z|O#(ozNh4D<}Ql-Mf$9yE`XUS)WD?KPcF(pn)(B zP@Q;n#ml?D>+gSYQ``1}YR|mab z4P3A5`mJE!cf)Q!E8>gd9BR(Tm~D=b6h>?S>$+;4uY=6tju0#{2#0T55y!x7i(>_S zN&w3ihZ;U+lX@wZ$6CI>*MHJ$@3>wIHJX1?9a-e0QF$|NXL%ILr_gBMAPmd4vd)oe z|JQFIek4$c(>Tt8bE?^w-I+LJy7vj30&x}-2JOHcllA(;&6#Ly^PPH|rBIV3;X!&; z%D$X?CRf-+TFMfKNosVLa+ytX4K>}^EIk_drCi%!(k9=-BsOZxZB|sj7V`F^(BHUR z;IN$Q(^0N>o;FIIwzrtuZme|pu|+PDc{$&Y5{W15w;Wb+5zE5rIUnDf{N57lNdAe^ zd{S;Nb%L$Ndak)S9g=`)bzA<6eK)w}_c8~r-`m}Lp1E6?%1!0qKK-Vv^vC}LP21BX literal 0 HcmV?d00001 diff --git a/rust/tests/data/issue_1374/date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd49c-c000.snappy.parquet b/rust/tests/data/issue_1374/date=2023-05-24/part-00000-e2b01fc6-a906-4008-82df-e98efdcdd49c-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..cc6a8f345f02fc845d77164ff0f20b125baf8527 GIT binary patch literal 1021 zcmb_by>HV{5I;Lka72Sb<$KtYg)+3BLP;S=p^7SA`jxh6Nur`A9Y7(qXr-72$88pn zP^S(EF;$%qVrFJ!K!}NvKcMP=z|O#(ozNh4D<}Ql-Mf$9yE`XUS)WD?KPcF(pn)(B zP@Q;n#ml?D>+gSYQ``1}YR|mab z4P3A5`mJE!cf)Q!E8>gd9BR(Tm~D=b6h>?S>$+;4uY=6tju0#{2#0T55y!x7i(>_S zN&w3ihZ;U+lX@wZ$6CI>*MHJ$@3>wIHJX1?9a-e0QF$|NXL%ILr_gBMAPmd4vd)oe z|JQFIek4$c(>Tt8bE?^w-I+LJy7vj30&x}-2JOHcllA(;&6#Ly^PPH|rBIV3;X!&; z%D$X?CRf-+TFMfKNosVLa+ytX4K>}^EIk_drCi%!(k9=-BsOZxZB|sj7V`F^(BHUR z;IN$Q(^0N>o;FIIwzrtuZme|pu|+PDc{$&Y5{W15w;Wb+5zE5rIUnDf{N57lNdAe^ zd{S;Nb%L$Ndak)S9g=`)bzA<6eK)w}_c8~r-`m}Lp1E6?%1!0qKK-Vv^vC}LP21BX literal 0 HcmV?d00001 diff --git a/rust/tests/datafusion_test.rs b/rust/tests/datafusion_test.rs index 19bc0b6e90..8ce726199d 100644 --- a/rust/tests/datafusion_test.rs +++ b/rust/tests/datafusion_test.rs @@ -9,7 +9,6 @@ use arrow::datatypes::{ DataType as ArrowDataType, Field as ArrowField, Schema as ArrowSchema, TimeUnit, }; use arrow::record_batch::RecordBatch; -use common::datafusion::context_with_delta_table_factory; use datafusion::assert_batches_sorted_eq; use datafusion::datasource::TableProvider; use datafusion::execution::context::{SessionContext, SessionState, TaskContext}; @@ -25,6 +24,7 @@ use datafusion_proto::bytes::{ }; use url::Url; +use common::datafusion::context_with_delta_table_factory; use deltalake::action::SaveMode; use deltalake::delta_datafusion::{DeltaPhysicalCodec, DeltaScan}; use deltalake::operations::create::CreateBuilder; @@ -723,7 +723,7 @@ async fn test_files_scanned() -> Result<()> { // assert_eq!(metrics.num_scanned_files(), 1); // Check pruning for null partitions. Since there are no record count statistics pruning cannot be done - // let e = col("k").is_not_null(); + // let e = col("k").is_not_null();z // let metrics = get_scan_metrics(&table, &state, &[e]).await?; // assert_eq!(metrics.num_scanned_files(), 2); @@ -842,31 +842,35 @@ async fn test_issue_1292_datafusion_sql_projection() -> Result<()> { } #[tokio::test] -async fn test_issue_1291_datafusion_sql_partitioned_data() -> Result<()> { +async fn test_issue_1374() -> Result<()> { + env_logger::init(); + let ctx = SessionContext::new(); - let table = deltalake::open_table("./tests/data/http_requests") + let table = deltalake::open_table("./tests/data/issue_1374") .await .unwrap(); - ctx.register_table("http_requests", Arc::new(table))?; + ctx.register_table("t", Arc::new(table))?; let batches = ctx .sql( - "SELECT \"ClientRequestURI\", date FROM http_requests WHERE date > '2023-04-13' LIMIT 5", + r#"SELECT * + FROM t + WHERE timestamp BETWEEN '2023-05-24T00:00:00.000Z' AND '2023-05-25T00:00:00.000Z' + LIMIT 5 + "#, ) .await? .collect() .await?; let expected = vec![ - "+------------------+------------+", - "| ClientRequestURI | date |", - "+------------------+------------+", - "| / | 2023-04-14 |", - "| / | 2023-04-14 |", - "| / | 2023-04-14 |", - "| / | 2023-04-14 |", - "| / | 2023-04-14 |", - "+------------------+------------+", + "+---------------------+-------------+------------+", + "| timestamp | temperature | date |", + "+---------------------+-------------+------------+", + "| 2023-05-17T17:00:00 | 20 | 2023-05-17 |", + "| 2023-05-18T18:00:00 | 20 | 2023-05-18 |", + "| 2023-05-19T19:00:00 | 20 | 2023-05-19 |", + "+---------------------+-------------+------------+", ]; assert_batches_sorted_eq!(&expected, &batches);