diff --git a/Cargo.lock b/Cargo.lock index fc69750b3..779466162 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -71,6 +71,12 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -498,9 +504,9 @@ dependencies = [ [[package]] name = "async-once-cell" -version = "0.3.1" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72faff1fdc615a0199d7bf71e6f389af54d46a66e9beb5d76c39e48eda93ecce" +checksum = "9338790e78aa95a416786ec8389546c4b6a1dfc3dc36071ed9518a9413a542eb" [[package]] name = "async-process" @@ -615,17 +621,6 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1181e1e0d1fce796a03db1ae795d67167da795f9cf4a39c37589e85ef57f26d3" -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.1.0" @@ -745,10 +740,12 @@ checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" [[package]] name = "bigdecimal" -version = "0.3.1" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6773ddc0eafc0e509fb60e48dff7f450f8e674a0686ae8605e8d9901bd5eefa" +checksum = "454bca3db10617b88b566f205ed190aedb0e0e6dd4cad61d3988a72e8c5594cb" dependencies = [ + "autocfg", + "libm", "num-bigint 0.4.3", "num-integer", "num-traits", @@ -766,9 +763,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.64.0" +version = "0.65.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4243e6031260db77ede97ad86c27e501d646a27ab57b59a574f725d98ab1fb4" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" dependencies = [ "bitflags 1.3.2", "cexpr", @@ -776,12 +773,13 @@ dependencies = [ "lazy_static", "lazycell", "peeking_take_while", + "prettyplease 0.2.12", "proc-macro2", "quote", "regex", "rustc-hash", "shlex", - "syn 1.0.109", + "syn 2.0.27", ] [[package]] @@ -1041,18 +1039,6 @@ dependencies = [ "libloading", ] -[[package]] -name = "clap" -version = "3.2.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" -dependencies = [ - "bitflags 1.3.2", - "clap_lex 0.2.4", - "indexmap 1.9.3", - "textwrap", -] - [[package]] name = "clap" version = "4.3.19" @@ -1072,7 +1058,7 @@ checksum = "01c6a3f08f1fe5662a35cfe393aec09c4df95f60ee93b7556505260f75eee9e1" dependencies = [ "anstream", "anstyle", - "clap_lex 0.5.0", + "clap_lex", "strsim", ] @@ -1088,15 +1074,6 @@ dependencies = [ "syn 2.0.27", ] -[[package]] -name = "clap_lex" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" -dependencies = [ - "os_str_bytes", -] - [[package]] name = "clap_lex" version = "0.5.0" @@ -1266,20 +1243,20 @@ dependencies = [ [[package]] name = "criterion" -version = "0.4.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" dependencies = [ "anes", - "atty", "cast", "ciborium", - "clap 3.2.25", + "clap", "criterion-plot", "futures", - "itertools", - "lazy_static", + "is-terminal", + "itertools 0.10.5", "num-traits", + "once_cell", "oorandom", "regex", "serde", @@ -1297,7 +1274,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools", + "itertools 0.10.5", ] [[package]] @@ -1626,9 +1603,9 @@ checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" [[package]] name = "fallible-iterator" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" [[package]] name = "fallible-streaming-iterator" @@ -1994,20 +1971,15 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.13.2" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" dependencies = [ "ahash 0.8.3", + "allocator-api2", "serde", ] -[[package]] -name = "hashbrown" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" - [[package]] name = "heapless" version = "0.7.16" @@ -2037,15 +2009,6 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - [[package]] name = "hermit-abi" version = "0.3.2" @@ -2297,7 +2260,7 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ - "hermit-abi 0.3.2", + "hermit-abi", "libc", "windows-sys 0.48.0", ] @@ -2314,7 +2277,7 @@ version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ - "hermit-abi 0.3.2", + "hermit-abi", "rustix 0.38.4", "windows-sys 0.48.0", ] @@ -2328,6 +2291,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.9" @@ -2363,20 +2335,21 @@ dependencies = [ [[package]] name = "lalrpop" -version = "0.19.12" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a1cbf952127589f2851ab2046af368fd20645491bb4b376f04b7f94d7a9837b" +checksum = "da4081d44f4611b66c6dd725e6de3169f9f63905421e8626fcb86b6a898998b8" dependencies = [ "ascii-canvas", "bit-set", "diff", "ena", "is-terminal", - "itertools", + "itertools 0.10.5", "lalrpop-util", "petgraph", + "pico-args", "regex", - "regex-syntax 0.6.29", + "regex-syntax 0.7.4", "string_cache", "term", "tiny-keccak", @@ -2385,9 +2358,9 @@ dependencies = [ [[package]] name = "lalrpop-util" -version = "0.19.12" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3c48237b9604c5a4702de6b824e02006c3214327564636aef27c1028a8fa0ed" +checksum = "3f35c735096c0293d313e8f2a641627472b83d01b937177fe76e5e2708d31e0d" dependencies = [ "regex", ] @@ -2512,9 +2485,9 @@ checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "librocksdb-sys" -version = "0.8.3+7.4.4" +version = "0.11.0+8.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "557b255ff04123fcc176162f56ed0c9cd42d8f357cf55b3fabeb60f7413741b3" +checksum = "d3386f101bcb4bd252d8e9d2fb41ec3b0862a15a62b478c355b2982efa469e3e" dependencies = [ "bindgen", "bzip2-sys", @@ -2522,6 +2495,7 @@ dependencies = [ "glob", "libc", "libz-sys", + "lz4-sys", ] [[package]] @@ -2825,7 +2799,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.3.2", + "hermit-abi", "libc", ] @@ -2851,7 +2825,7 @@ dependencies = [ "futures", "humantime", "hyper", - "itertools", + "itertools 0.10.5", "parking_lot 0.12.1", "percent-encoding", "quick-xml", @@ -2936,9 +2910,9 @@ dependencies = [ [[package]] name = "opentelemetry" -version = "0.18.0" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d6c3d7288a106c0a363e4b0e8d308058d56902adefb16f4936f417ffef086e" +checksum = "5f4b8347cc26099d3aeee044065ecc3ae11469796b4d65d065a23a584ed92a6f" dependencies = [ "opentelemetry_api", "opentelemetry_sdk", @@ -2946,9 +2920,9 @@ dependencies = [ [[package]] name = "opentelemetry-otlp" -version = "0.11.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1c928609d087790fc936a1067bdc310ae702bdf3b090c3f281b713622c8bbde" +checksum = "8af72d59a4484654ea8eb183fea5ae4eb6a41d7ac3e3bae5f4d2a282a3a7d3ca" dependencies = [ "async-trait", "futures", @@ -2964,39 +2938,38 @@ dependencies = [ [[package]] name = "opentelemetry-proto" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d61a2f56df5574508dd86aaca016c917489e589ece4141df1b5e349af8d66c28" +checksum = "045f8eea8c0fa19f7d48e7bc3128a39c2e5c533d5c61298c548dfefc1064474c" dependencies = [ "futures", "futures-util", "opentelemetry", "prost", "tonic 0.8.3", - "tonic-build 0.8.4", ] [[package]] name = "opentelemetry_api" -version = "0.18.0" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c24f96e21e7acc813c7a8394ee94978929db2bcc46cf6b5014fc612bf7760c22" +checksum = "ed41783a5bf567688eb38372f2b7a8530f5a607a4b49d38dd7573236c23ca7e2" dependencies = [ "fnv", "futures-channel", "futures-util", "indexmap 1.9.3", - "js-sys", "once_cell", "pin-project-lite", "thiserror", + "urlencoding", ] [[package]] name = "opentelemetry_sdk" -version = "0.18.0" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ca41c4933371b61c2a2f214bf16931499af4ec90543604ec828f7a625c09113" +checksum = "8b3a2a91fdbfdd4d212c0dcc2ab540de2c2bcbbd90be17de7a7daf8822d010c1" dependencies = [ "async-trait", "crossbeam-channel", @@ -3023,12 +2996,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "os_str_bytes" -version = "6.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac" - [[package]] name = "overload" version = "0.1.1" @@ -3286,6 +3253,12 @@ dependencies = [ "uncased", ] +[[package]] +name = "pico-args" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" + [[package]] name = "pin-project" version = "1.1.2" @@ -3371,7 +3344,7 @@ checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" dependencies = [ "anstyle", "difflib", - "itertools", + "itertools 0.10.5", "predicates-core", ] @@ -3401,6 +3374,16 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "prettyplease" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c64d9ba0963cdcea2e1b2230fbae2bab30eb25a174be395c41e764bfb65dd62" +dependencies = [ + "proc-macro2", + "syn 2.0.27", +] + [[package]] name = "prettytable-rs" version = "0.10.0" @@ -3468,12 +3451,12 @@ checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ "bytes", "heck 0.4.1", - "itertools", + "itertools 0.10.5", "lazy_static", "log", "multimap", "petgraph", - "prettyplease", + "prettyplease 0.1.25", "prost", "prost-types", "regex", @@ -3489,7 +3472,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" dependencies = [ "anyhow", - "itertools", + "itertools 0.10.5", "proc-macro2", "quote", "syn 1.0.109", @@ -3835,9 +3818,9 @@ checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" [[package]] name = "rocksdb" -version = "0.19.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9562ea1d70c0cc63a34a22d977753b50cca91cc6b6527750463bd5dd8697bc" +checksum = "bb6f170a4041d50a0ce04b0d2e14916d6ca863ea2e422689a5b694395d299ffe" dependencies = [ "libc", "librocksdb-sys", @@ -4063,6 +4046,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96426c9936fd7a0124915f9185ea1d20aa9445cc9821142f0a73bc9207a2e186" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -4231,12 +4223,12 @@ dependencies = [ "anyhow", "arrow", "chrono", - "clap 4.3.19", + "clap", "decorum", "derive_more", "enum-map", "error-stack", - "itertools", + "itertools 0.11.0", "prost", "prost-build", "prost-types", @@ -4251,7 +4243,7 @@ dependencies = [ "thiserror", "tokio", "tonic 0.9.2", - "tonic-build 0.9.2", + "tonic-build", "uuid 1.4.1", ] @@ -4275,7 +4267,7 @@ dependencies = [ "error-stack", "half 2.3.1", "insta", - "itertools", + "itertools 0.11.0", "num", "proptest", "serde", @@ -4298,13 +4290,13 @@ name = "sparrow-catalog" version = "0.10.0" dependencies = [ "arrow", - "clap 4.3.19", + "clap", "derive_more", "error-stack", "fallible-iterator", "futures", - "hashbrown 0.13.2", - "itertools", + "hashbrown 0.14.0", + "itertools 0.11.0", "logos", "parquet", "prettytable-rs", @@ -4338,7 +4330,7 @@ dependencies = [ "arrow", "bit-set", "chrono", - "clap 4.3.19", + "clap", "codespan-reporting", "const_format", "decorum", @@ -4347,9 +4339,9 @@ dependencies = [ "egg", "enum-map", "error-stack", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "insta", - "itertools", + "itertools 0.11.0", "lalrpop", "lalrpop-util", "logos", @@ -4390,7 +4382,7 @@ dependencies = [ "chrono", "decorum", "futures", - "itertools", + "itertools 0.11.0", "num", "owning_ref", "parquet", @@ -4435,10 +4427,10 @@ dependencies = [ "arrow-string", "derive_more", "error-stack", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "index_vec", "inventory", - "itertools", + "itertools 0.11.0", "num", "serde_json", "sparrow-arrow", @@ -4463,8 +4455,8 @@ dependencies = [ "derive_more", "erased-serde", "error-stack", - "hashbrown 0.13.2", - "itertools", + "hashbrown 0.14.0", + "itertools 0.11.0", "lz4-sys", "num", "owning_ref", @@ -4495,7 +4487,7 @@ dependencies = [ "bitvec", "chrono", "chronoutil", - "itertools", + "itertools 0.11.0", "num", "proptest", "smallvec", @@ -4514,7 +4506,7 @@ dependencies = [ "assert_cmd", "async-stream", "chrono", - "clap 4.3.19", + "clap", "dashmap", "data-encoding", "derive_more", @@ -4522,11 +4514,11 @@ dependencies = [ "fallible-iterator", "filetime", "futures", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "hex", "indoc", "insta", - "itertools", + "itertools 0.11.0", "opentelemetry", "opentelemetry-otlp", "parquet", @@ -4601,8 +4593,8 @@ dependencies = [ "anyhow", "arrow", "enum-map", - "hashbrown 0.13.2", - "itertools", + "hashbrown 0.14.0", + "itertools 0.11.0", "parse-display", "serde_yaml", "sparrow-api", @@ -4624,10 +4616,10 @@ dependencies = [ "error-stack", "fallible-iterator", "futures", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "insta", "inventory", - "itertools", + "itertools 0.11.0", "once_cell", "pin-project", "prost", @@ -4637,7 +4629,7 @@ dependencies = [ "tempfile", "tokio", "tokio-stream", - "tonic-build 0.9.2", + "tonic-build", "tracing", ] @@ -4645,14 +4637,14 @@ dependencies = [ name = "sparrow-qfr-tool" version = "0.10.0" dependencies = [ - "clap 4.3.19", + "clap", "cpu-time", "derive_more", "error-stack", "fallible-iterator", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "insta", - "itertools", + "itertools 0.11.0", "serde", "serde_json", "serde_yaml", @@ -4682,7 +4674,7 @@ dependencies = [ "bitvec", "bytes", "chrono", - "clap 4.3.19", + "clap", "criterion", "dashmap", "data-encoding", @@ -4694,10 +4686,10 @@ dependencies = [ "futures", "futures-lite", "half 2.3.1", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "insta", "inventory", - "itertools", + "itertools 0.11.0", "lz4", "num-traits", "object_store", @@ -4744,7 +4736,7 @@ dependencies = [ "derive_more", "error-stack", "index_vec", - "itertools", + "itertools 0.11.0", "loom", "serde", "sparrow-arrow", @@ -4764,9 +4756,9 @@ dependencies = [ "bitvec", "codespan-reporting", "decorum", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "insta", - "itertools", + "itertools 0.11.0", "lalrpop", "lalrpop-util", "logos", @@ -4789,7 +4781,7 @@ dependencies = [ "arrow-select", "derive_more", "error-stack", - "itertools", + "itertools 0.11.0", "parquet", "serde", "serde_json", @@ -4807,7 +4799,7 @@ dependencies = [ "arrow-schema", "derive_more", "error-stack", - "itertools", + "itertools 0.11.0", "parking_lot 0.12.1", "sparrow-arrow", "sparrow-expressions", @@ -5083,12 +5075,6 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" -[[package]] -name = "textwrap" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" - [[package]] name = "thiserror" version = "1.0.44" @@ -5260,11 +5246,36 @@ dependencies = [ [[package]] name = "toml" -version = "0.5.11" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c17e963a819c331dcacd7ab957d80bc2b9a9c1e71c804826d2f283dd65306542" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.19.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +checksum = "f8123f27e969974a3dfba720fdb560be359f57b44302d280ba72e76a74480e8a" dependencies = [ + "indexmap 2.0.0", "serde", + "serde_spanned", + "toml_datetime", + "winnow", ] [[package]] @@ -5327,26 +5338,13 @@ dependencies = [ "tracing", ] -[[package]] -name = "tonic-build" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bf5e9b9c0f7e0a7c027dcfaba7b2c60816c7049171f679d99ee2ff65d0de8c4" -dependencies = [ - "prettyplease", - "proc-macro2", - "prost-build", - "quote", - "syn 1.0.109", -] - [[package]] name = "tonic-build" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6fdaae4c2c638bb70fe42803a26fbd6fc6ac8c72f5c59f67ecc2a2dcabf4b07" dependencies = [ - "prettyplease", + "prettyplease 0.1.25", "proc-macro2", "prost-build", "quote", @@ -5477,9 +5475,9 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.18.0" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21ebb87a95ea13271332df069020513ab70bdb5637ca42d6e492dc3bbbad48de" +checksum = "00a39dcf9bfc1742fa4d6215253b33a6e474be78275884c216fc2a06267b3600" dependencies = [ "once_cell", "opentelemetry", @@ -5711,6 +5709,12 @@ dependencies = [ "serde", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8parse" version = "0.2.1" @@ -6104,6 +6108,15 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" +[[package]] +name = "winnow" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25b5872fa2e10bd067ae946f927e726d7d603eaeb6e02fa6a350e0722d2b8c11" +dependencies = [ + "memchr", +] + [[package]] name = "winreg" version = "0.10.1" diff --git a/Cargo.toml b/Cargo.toml index 7549a5771..fda6ee33b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,12 +23,12 @@ arrow-ord = { version = "43.0.0" } arrow-schema = { version = "43.0.0", features = ["serde"] } arrow-select = { version = "43.0.0" } arrow-string = { version = "43.0.0" } -async-once-cell = "0.3.1" +async-once-cell = "0.5.3" async-stream = "0.3.4" async-trait = "0.1.68" avro-rs = "0.13.0" avro-schema = "0.3.0" -bigdecimal = { version = "0.3.1", features = ["serde"] } +bigdecimal = { version = "0.4.1", features = ["serde"] } bincode = "1.3.3" bit-set = "0.5.3" bitvec = { version = "1.0.1", features = ["serde"] } @@ -40,7 +40,7 @@ codespan-reporting = "0.11.1" const_format = "0.2.30" core_affinity = "0.8.0" cpu-time = "1.0.0" -criterion = { version = "0.4.0", default-features = false, features = [ +criterion = { version = "0.5.1", default-features = false, features = [ "async_tokio", ] } dashmap = "5.4.0" @@ -53,19 +53,19 @@ enum-as-inner = "0.6.0" enum-map = "2.5.0" erased-serde = "0.3.25" error-stack = { version = "0.3.1", features = ["anyhow", "spantrace"] } -fallible-iterator = "0.2.0" +fallible-iterator = "0.3.0" futures = "0.3.27" futures-lite = "1.12.0" half = { version = "2.2.1", features = ["serde"] } -hashbrown = { version = "0.13.2", features = ["serde"] } +hashbrown = { version = "0.14.0", features = ["serde"] } hex = "0.4.3" index_vec = { version = "0.1.3", features = ["serde"] } indoc = "1.0.9" insta = { version = "1.29.0", features = ["ron", "yaml", "json"] } inventory = "0.3.8" -itertools = "0.10.5" -lalrpop = "0.19.9" -lalrpop-util = "0.19.9" +itertools = "0.11.0" +lalrpop = "0.20.0" +lalrpop-util = "0.20.0" logos = "0.12.1" lz4 = "1.24.0" lz4-sys = "1.9.4" @@ -73,8 +73,8 @@ num = "0.4.0" num-traits = "0.2.15" object_store = { version = "0.6.1", features = ["aws", "gcp"] } once_cell = "1.17.1" -opentelemetry = { version = "0.18.0", features = ["rt-tokio"] } -opentelemetry-otlp = "0.11.0" +opentelemetry = { version = "0.19.0", features = ["rt-tokio"] } +opentelemetry-otlp = "0.12.0" owning_ref = "0.4.1" parking_lot = { version = "0.12.1" } parquet = { version = "43.0.0", features = ["async"] } @@ -122,14 +122,14 @@ tokio = { version = "1.27.0", features = [ ] } tokio-stream = { version = "0.1.12", features = ["fs"] } tokio-util = { version = "0.7.7", features = ["io"] } -toml = "0.5.11" +toml = "0.7.6" tonic = "0.9.2" tonic-build = { version = "0.9.2", features = ["prost"] } tonic-health = "0.9.2" tonic-reflection = "0.9.2" tracing = "0.1.37" tracing-error = "0.2.0" -tracing-opentelemetry = "0.18.0" +tracing-opentelemetry = "0.19.0" tracing-serde = "0.1.3" tracing-subscriber = { version = "0.3.17", features = [ "env-filter", @@ -143,7 +143,7 @@ uuid = { version = "1.3.0", features = ["v4"] } # This disables compression algorithms that cause issues during linking due to # https://github.com/rust-rocksdb/rust-rocksdb/issues/514 default-features = false -version = "0.19.0" +version = "0.21.0" features = ["lz4"] [profile.release] diff --git a/crates/sparrow-catalog/catalog/add.toml b/crates/sparrow-catalog/catalog/add.toml index cd3017caa..2a40d249b 100644 --- a/crates/sparrow-catalog/catalog/add.toml +++ b/crates/sparrow-catalog/catalog/add.toml @@ -1,8 +1,8 @@ -name = 'add' -signature = 'add(a: number, b: number) -> number' -operator = 'a + b' -short_doc = 'Returns the sum of two numbers.' -long_doc = ''' +name = "add" +signature = "add(a: number, b: number) -> number" +operator = "a + b" +short_doc = "Returns the sum of two numbers." +long_doc = """ This is the function used for the binary operation `a + b`. ### Parameters @@ -16,28 +16,28 @@ following the [numeric type coercion rules](docs:data-model#numeric-type-coercio Returns a numeric column of the promoted numeric type compatible with both `a` and `b`. The result contains `null` if `a` or `b` was null at that row. Otherwise the row contains the sum of `a` and `b`. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Addition' -description = ''' +name = "Addition" +description = """ In this example, `a` is an integer column (defaulting to `i64`) and `b` is a floating point column (defaulting to `f64`). The result is a floating point column, achieved by implicitly converting `a` to `f64`. -''' -expression = 'Input.a + Input.b' -input_csv = ''' +""" +expression = "Input.a + Input.b" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,A,5,1.2 2021-01-02T00:00:00.000000000Z,A,6.3,0.4 2021-03-01T00:00:00.000000000Z,B,,3.7 2021-04-10T00:00:00.000000000Z,A,13, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,A,5.0,1.2,6.2 2021-01-02T00:00:00.000000000,A,6.3,0.4,6.7 2021-03-01T00:00:00.000000000,B,,3.7, 2021-04-10T00:00:00.000000000,A,13.0,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/add_time.toml b/crates/sparrow-catalog/catalog/add_time.toml index 7f28c7fd0..c04a4a6e4 100644 --- a/crates/sparrow-catalog/catalog/add_time.toml +++ b/crates/sparrow-catalog/catalog/add_time.toml @@ -1,7 +1,7 @@ -name = 'add_time' -signature = 'add_time(delta: timedelta, time: timestamp_ns) -> timestamp_ns' -short_doc = 'Adds a `timedelta` (duration or interval) to a time.' -long_doc = ''' +name = "add_time" +signature = "add_time(delta: timedelta, time: timestamp_ns) -> timestamp_ns" +short_doc = "Adds a `timedelta` (duration or interval) to a time." +long_doc = """ ### Parameters * delta: The time delta to add to the timestamp. See other [time functions](#time-functions) for how to create `timedelta`s. @@ -11,17 +11,17 @@ long_doc = ''' Returns a time column with each row containing the value of `time` for that row plus the given `delta`. If either the `delta` or `time` are `null` then the result is `null` in that row. -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Adding a fixed number of days' -description = ''' +name = "Adding a fixed number of days" +description = """ This example uses [`days`](#days) to create a fixed `interval_days` to add to a given date. -''' -expression = 'Input.time | add_time(days(3))' -input_csv = ''' +""" +expression = "Input.time | add_time(days(3))" +input_csv = """ time,key 1996-03-21T00:00:00-00:00,Ben 1996-04-21T00:00:00-00:00,Ryan @@ -29,8 +29,8 @@ time,key 1996-06-21T00:00:00-00:00,Ryan 1996-07-21T00:00:00-00:00,Ben 1996-08-21T00:00:00-00:00,Ben -''' -output_csv = ''' +""" +output_csv = """ time,key,result 1996-03-21T00:00:00.000000000,Ben,1996-03-24T00:00:00.000000000 1996-04-21T00:00:00.000000000,Ryan,1996-04-24T00:00:00.000000000 @@ -38,4 +38,4 @@ time,key,result 1996-06-21T00:00:00.000000000,Ryan,1996-06-24T00:00:00.000000000 1996-07-21T00:00:00.000000000,Ben,1996-07-24T00:00:00.000000000 1996-08-21T00:00:00.000000000,Ben,1996-08-24T00:00:00.000000000 -''' +""" diff --git a/crates/sparrow-catalog/catalog/ceil.toml b/crates/sparrow-catalog/catalog/ceil.toml index a885de75c..e5cd0830d 100644 --- a/crates/sparrow-catalog/catalog/ceil.toml +++ b/crates/sparrow-catalog/catalog/ceil.toml @@ -1,7 +1,7 @@ -name = 'ceil' -signature = 'ceil(n: number) -> number' -short_doc = 'Rounds the number up to the next largest integer.' -long_doc = ''' +name = "ceil" +signature = "ceil(n: number) -> number" +short_doc = "Rounds the number up to the next largest integer." +long_doc = """ See also [`round`](#round) and [`floor`](#floor). ### Parameters @@ -14,23 +14,23 @@ Note: This method may be applied to any numeric type. For anything other than Returns a numeric column of the same type as `n`. The result contains `null` if `n` was null at that position. Otherwise, it contains the result of rounding `n` up to the next largest integer. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Ceil' -expression = 'Input.a | ceil()' -input_csv = ''' +name = "Ceil" +expression = "Input.a | ceil()" +input_csv = """ time,key,a 2021-01-01T00:00:00.000000000Z,A,5.7 2021-01-01T00:00:00.000000000Z,A,6.3 2021-01-02T00:00:00.000000000Z,B, 2021-01-02T00:00:00.000000000Z,B,-2.3 -''' -output_csv = ''' +""" +output_csv = """ time,key,a,result 2021-01-01T00:00:00.000000000,A,5.7,6.0 2021-01-01T00:00:00.000000000,A,6.3,7.0 2021-01-02T00:00:00.000000000,B,, 2021-01-02T00:00:00.000000000,B,-2.3,-2.0 -''' +""" diff --git a/crates/sparrow-catalog/catalog/clamp.toml b/crates/sparrow-catalog/catalog/clamp.toml index 8a480af6d..ff07a0f23 100644 --- a/crates/sparrow-catalog/catalog/clamp.toml +++ b/crates/sparrow-catalog/catalog/clamp.toml @@ -1,7 +1,7 @@ -name = 'clamp' -signature = 'clamp(value: number, min: number = null, max: number = null) -> number' -short_doc = 'Returns `value` clamped between the bounds `min` and `max`.' -long_doc = ''' +name = "clamp" +signature = "clamp(value: number, min: number = null, max: number = null) -> number" +short_doc = "Returns `value` clamped between the bounds `min` and `max`." +long_doc = """ ### Parameters * `value`: The value to be clamped. * `min`: The minimum bound. If `null`, no minimum bound will be applied. @@ -20,48 +20,48 @@ it contains `value` if `value` is between `min` and `max`, `min` if `value` is less than `min`, `max` if `value` is greater than `max`, and `null` if `value` is `null` or `min > max`. If `min` or `max` are null than no clamping on that side will be performed. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Clamp With Min and Max' -description = ''' +name = "Clamp With Min and Max" +description = """ This example shows the use of `clamp` with both a `min` and `max` value provided. -''' -expression = 'Input.a | clamp(min = 0.5, max = 9.5)' -input_csv = ''' +""" +expression = "Input.a | clamp(min = 0.5, max = 9.5)" +input_csv = """ time,key,a 2021-01-01T00:00:00.000000000Z,A,5.7 2021-01-01T00:00:00.000000000Z,A,6.3 2021-01-01T00:00:00.000000000Z,B, 2021-01-01T00:00:00.000000000Z,A, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,result 2021-01-01T00:00:00.000000000,A,5.7,5.7 2021-01-01T00:00:00.000000000,A,6.3,6.3 2021-01-01T00:00:00.000000000,B,, 2021-01-01T00:00:00.000000000,A,, -''' +""" [[examples]] -name = 'Clamp with Min' -description = ''' +name = "Clamp with Min" +description = """ This example shows the use of clamp with just a minimum bound. -''' -expression = 'Input.a | clamp(min = 0.5)' -input_csv = ''' +""" +expression = "Input.a | clamp(min = 0.5)" +input_csv = """ time,key,a 2021-01-01T00:00:00.000000000Z,A,5.7 2021-01-01T00:00:00.000000000Z,A,6.3 2021-01-01T00:00:00.000000000Z,B, 2021-01-01T00:00:00.000000000Z,A, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,result 2021-01-01T00:00:00.000000000,A,5.7,5.7 2021-01-01T00:00:00.000000000,A,6.3,6.3 2021-01-01T00:00:00.000000000,B,, 2021-01-01T00:00:00.000000000,A,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/coalesce.toml b/crates/sparrow-catalog/catalog/coalesce.toml index 3e92ffbdd..dd2148eb0 100644 --- a/crates/sparrow-catalog/catalog/coalesce.toml +++ b/crates/sparrow-catalog/catalog/coalesce.toml @@ -1,7 +1,7 @@ -name = 'coalesce' -signature = 'coalesce(values+: any) -> any' -short_doc = 'Return first non-`null` value or `null` if all values are `null`.' -long_doc = ''' +name = "coalesce" +signature = "coalesce(values+: any) -> any" +short_doc = "Return first non-`null` value or `null` if all values are `null`." +long_doc = """ ### Parameters * values: One or more values to be coalesced. Note that all of the values must be promotable to the same type. @@ -12,11 +12,11 @@ If all values are `null`, then returns `null`. The type of the result is the minimum type that all of the `values` were [promotable](docs:data-model#type-promotion-rules) to. -''' -tags = ['logical'] +""" +tags = ["logical"] [[examples]] -description = ''' +description = """ In this example we use `coalesce` to apply multiple conditions, almost like a `switch` statement. Each case uses [`if`](#if) to only pass through the cases where the condition is met. @@ -24,8 +24,8 @@ the cases where the condition is met. One thing to be aware of when using `coalesce` like this is that the first non-`null` is taken. Which means that even if a condition is met, if the corresponding value was `null`, it would move on to other conditions. -''' -expression = ''' +""" +expression = """ coalesce( # Tax exempt items Input.value | if(Input.tax_category == 'exempt'), @@ -34,18 +34,18 @@ coalesce( # Normal tax (10%) items Input.value * 1.1 ) -''' -input_csv = ''' +""" +input_csv = """ time,key,value,tax_category 2020-01-01T00:00:00.000000000Z,Ben,10.00,exempt 2020-01-02T00:00:00.000000000Z,Ben,12.00, 2020-01-02T01:00:00.000000000Z,Ryan,13.00,flat 2020-01-02T01:00:00.000000000Z,Ryan,,exempt -''' -output_csv = ''' +""" +output_csv = """ time,key,value,tax_category,result 2020-01-01T00:00:00.000000000,Ben,10.0,exempt,10.0 2020-01-02T00:00:00.000000000,Ben,12.0,,13.200000000000001 2020-01-02T01:00:00.000000000,Ryan,13.0,flat,14.0 2020-01-02T01:00:00.000000000,Ryan,,exempt, -''' +""" diff --git a/crates/sparrow-catalog/catalog/count.toml b/crates/sparrow-catalog/catalog/count.toml index 2ce3b0ed2..7cb4f637f 100644 --- a/crates/sparrow-catalog/catalog/count.toml +++ b/crates/sparrow-catalog/catalog/count.toml @@ -1,7 +1,7 @@ -name = 'count' -signature = 'count(input: any, window: window = null) -> u32' -short_doc = 'Counts each new, non-`null` value in the input.' -long_doc = ''' +name = "count" +signature = "count(input: any, window: window = null) -> u32" +short_doc = "Counts each new, non-`null` value in the input." +long_doc = """ ### Parameters * input: The input to be counted. * window: The window to aggregate within, as described in @@ -13,13 +13,13 @@ See [window functions](#window-functions) for how to specify the aggregation win For each input row, return the count of new, non-`null` rows in `input` up to and including the input row for the given entity. Returns `0` if there have been no such inputs. -''' -tags = ['aggregation'] +""" +tags = ["aggregation"] [[examples]] -name = 'Count' -expression = 'count(Input.value)' -input_csv = ''' +name = "Count" +expression = "count(Input.value)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,50.7 2021-01-02T00:00:00.000000000Z,Ryan, @@ -27,8 +27,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,1.2 2021-01-04T00:00:00.000000000Z,Ben, 2021-01-05T00:00:00.000000000Z,Ryan,2.3 -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,50.7,1 2021-01-02T00:00:00.000000000,Ryan,,0 @@ -36,4 +36,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,1.2,2 2021-01-04T00:00:00.000000000,Ben,,2 2021-01-05T00:00:00.000000000,Ryan,2.3,2 -''' +""" diff --git a/crates/sparrow-catalog/catalog/count_if.toml b/crates/sparrow-catalog/catalog/count_if.toml index 32be3bca3..6d6d7f29a 100644 --- a/crates/sparrow-catalog/catalog/count_if.toml +++ b/crates/sparrow-catalog/catalog/count_if.toml @@ -1,7 +1,7 @@ -name = 'count_if' -signature = 'count_if(input: any, window: window = null) -> u32' -short_doc = 'Counts each `true` value across in input.' -long_doc = ''' +name = "count_if" +signature = "count_if(input: any, window: window = null) -> u32" +short_doc = "Counts each `true` value across in input." +long_doc = """ ### Parameters * input: The input to be counted. * window: The window to aggregate within, as described in @@ -13,13 +13,13 @@ See [window functions](#window-functions) for how to specify the aggregation win For each input row, return the count of new rows containing `true` in `input` up to and including the input row for the given entity. Returns `0` if there have been no such inputs. -''' -tags = ['aggregation'] +""" +tags = ["aggregation"] [[examples]] -name = 'Count If' -expression = 'count_if(Input.value)' -input_csv = ''' +name = "Count If" +expression = "count_if(Input.value)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,false 2021-01-02T00:00:00.000000000Z,Ryan,true @@ -27,8 +27,8 @@ time,key,value 2021-01-04T00:00:00.000000000Z,Ben,true 2021-01-04T00:00:00.000000000Z,Ben, 2021-01-05T00:00:00.000000000Z,Ryan,false -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,false,0 2021-01-02T00:00:00.000000000,Ryan,true,1 @@ -36,4 +36,4 @@ time,key,value,result 2021-01-04T00:00:00.000000000,Ben,true,1 2021-01-04T00:00:00.000000000,Ben,,1 2021-01-05T00:00:00.000000000,Ryan,false,2 -''' +""" diff --git a/crates/sparrow-catalog/catalog/daily.toml b/crates/sparrow-catalog/catalog/daily.toml index 59bff8654..16ff0efe6 100644 --- a/crates/sparrow-catalog/catalog/daily.toml +++ b/crates/sparrow-catalog/catalog/daily.toml @@ -1,7 +1,7 @@ -name = 'daily' -signature = 'daily() -> bool' -short_doc = 'A periodic function that produces a `true` value at the start of each calendar day (UTC).' -long_doc = ''' +name = "daily" +signature = "daily() -> bool" +short_doc = "A periodic function that produces a `true` value at the start of each calendar day (UTC)." +long_doc = """ This function is often used in aggregations to produce windows or as a predicate column. @@ -9,21 +9,21 @@ as a predicate column. Returns a boolean column with each row containing a `true` value at the start of the day, corresponding to time 00:00:00Z, and `null` at all other times. -''' -tags = ['tick'] +""" +tags = ["tick"] [[examples]] -name = 'Daily Aggregated Window' -description = ''' +name = "Daily Aggregated Window" +description = """ In this example, the `daily()` function is used as an argument to the [`since](#since) window function. The result is a windowed aggregation that resets daily. -''' -full_expression = ''' +""" +full_expression = """ { n: Input.n, daily_sum: sum(Input.n, window = since(daily())) } | extend({time: time_of($input), key: first(Input.key) }) -''' -input_csv = ''' +""" +input_csv = """ time,key,n 1996-12-19T04:00:00-00:00,Ben,1 1996-12-19T05:00:00-00:00,Ryan,2 @@ -31,8 +31,8 @@ time,key,n 1996-12-20T22:00:00-00:00,Ben,4 1996-12-21T03:00:00-00:00,Ryan,5 1996-12-21T07:00:00-00:00,Ben,6 -''' -output_csv = ''' +""" +output_csv = """ time,key,n,daily_sum 1996-12-19T04:00:00.000000000,Ben,1,1 1996-12-19T05:00:00.000000000,Ryan,2,2 @@ -44,18 +44,18 @@ time,key,n,daily_sum 1996-12-21T00:00:00.000000000,Ben,,7 1996-12-21T03:00:00.000000000,Ryan,5,5 1996-12-21T07:00:00.000000000,Ben,6,6 -''' +""" [[examples]] -name = 'Filter Daily' -description = ''' +name = "Filter Daily" +description = """ In this example, the `daily()` function is used as an argument to the [`when`](#when) function, which filters input. The output includes the last input row before a [`tick`](#tick) occurs. -''' -full_expression = 'Input | last() | when(daily())' -input_csv = ''' +""" +full_expression = "Input | last() | when(daily())" +input_csv = """ time,key,n 1996-12-19T04:00:00-00:00,Ben,1 1996-12-19T05:00:00-00:00,Ryan,2 @@ -63,11 +63,11 @@ time,key,n 1996-12-20T22:00:00-00:00,Ben,4 1996-12-21T03:00:00-00:00,Ryan,5 1996-12-21T07:00:00-00:00,Ben,6 -''' -output_csv = ''' +""" +output_csv = """ time,key,n 1996-12-19T05:00:00.000000000,Ryan,2 1996-12-19T04:00:00.000000000,Ben,1 1996-12-19T05:00:00.000000000,Ryan,2 1996-12-20T22:00:00.000000000,Ben,4 -''' +""" diff --git a/crates/sparrow-catalog/catalog/day_of_month.toml b/crates/sparrow-catalog/catalog/day_of_month.toml index a8061ef4e..6edb721cc 100644 --- a/crates/sparrow-catalog/catalog/day_of_month.toml +++ b/crates/sparrow-catalog/catalog/day_of_month.toml @@ -1,7 +1,7 @@ -name = 'day_of_month' -signature = 'day_of_month(time: timestamp_ns) -> u32' -short_doc = 'Return the day-of-month for the given time, starting with 1.' -long_doc = ''' +name = "day_of_month" +signature = "day_of_month(time: timestamp_ns) -> u32" +short_doc = "Return the day-of-month for the given time, starting with 1." +long_doc = """ ### Parameters * time: The timestamp to return the day-of-month for. @@ -9,13 +9,13 @@ long_doc = ''' Returns a `u32` column containing the day-of-month for each input `time`. Returns `null` for rows where `time` is `null`. The first day of the month is `1`. The result will be in the range 1 to 31 (inclusive). -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Day of Month' -expression = 'day_of_month(Input.time)' -input_csv = ''' +name = "Day of Month" +expression = "day_of_month(Input.time)" +input_csv = """ time,key 1996-03-21T00:00:00-00:00,Ben 1996-04-21T00:00:00-00:00,Ryan @@ -23,8 +23,8 @@ time,key 1996-06-21T00:00:00-00:00,Ryan 1996-07-21T00:00:00-00:00,Ben 1996-08-21T00:00:00-00:00,Ben -''' -output_csv = ''' +""" +output_csv = """ time,key,result 1996-03-21T00:00:00.000000000,Ben,21 1996-04-21T00:00:00.000000000,Ryan,21 @@ -32,4 +32,4 @@ time,key,result 1996-06-21T00:00:00.000000000,Ryan,21 1996-07-21T00:00:00.000000000,Ben,21 1996-08-21T00:00:00.000000000,Ben,21 -''' +""" diff --git a/crates/sparrow-catalog/catalog/day_of_month0.toml b/crates/sparrow-catalog/catalog/day_of_month0.toml index 64ef0057e..2d4798a5d 100644 --- a/crates/sparrow-catalog/catalog/day_of_month0.toml +++ b/crates/sparrow-catalog/catalog/day_of_month0.toml @@ -1,7 +1,7 @@ -name = 'day_of_month0' -signature = 'day_of_month0(time: timestamp_ns) -> u32' -short_doc = 'Return the day-of-month for the given time, starting with 0.' -long_doc = ''' +name = "day_of_month0" +signature = "day_of_month0(time: timestamp_ns) -> u32" +short_doc = "Return the day-of-month for the given time, starting with 0." +long_doc = """ ### Parameters * time: The timestamp to return the day-of-month for. @@ -9,13 +9,13 @@ long_doc = ''' Returns a `u32` column containing the day-of-month for each input `time`. Returns `null` for rows where `time` is `null`. The first day of the month is `0`. The result will be in the range 0 to 30 (inclusive). -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Day of Month (Zero Based)' -expression = 'day_of_month0(Input.time)' -input_csv = ''' +name = "Day of Month (Zero Based)" +expression = "day_of_month0(Input.time)" +input_csv = """ time,key 1996-03-21T00:00:00-00:00,Ben 1996-04-21T00:00:00-00:00,Ryan @@ -23,8 +23,8 @@ time,key 1996-06-21T00:00:00-00:00,Ryan 1996-07-21T00:00:00-00:00,Ben 1996-08-21T00:00:00-00:00,Ben -''' -output_csv = ''' +""" +output_csv = """ time,key,result 1996-03-21T00:00:00.000000000,Ben,20 1996-04-21T00:00:00.000000000,Ryan,20 @@ -32,4 +32,4 @@ time,key,result 1996-06-21T00:00:00.000000000,Ryan,20 1996-07-21T00:00:00.000000000,Ben,20 1996-08-21T00:00:00.000000000,Ben,20 -''' +""" diff --git a/crates/sparrow-catalog/catalog/day_of_year.toml b/crates/sparrow-catalog/catalog/day_of_year.toml index 6e99d26e6..008ef879e 100644 --- a/crates/sparrow-catalog/catalog/day_of_year.toml +++ b/crates/sparrow-catalog/catalog/day_of_year.toml @@ -1,7 +1,7 @@ -name = 'day_of_year' -signature = 'day_of_year(time: timestamp_ns) -> u32' -short_doc = 'Return the day-of-year for the given time, starting with 1.' -long_doc = ''' +name = "day_of_year" +signature = "day_of_year(time: timestamp_ns) -> u32" +short_doc = "Return the day-of-year for the given time, starting with 1." +long_doc = """ ### Parameters * time: The timestamp to return the day-of-year for. @@ -9,13 +9,13 @@ long_doc = ''' Returns a `u32` column containing the day-of-year for each input `time`. Returns `null` for rows where `time` is `null`. The first day of the month is `1`. The result will be in the range 1 to 366 (inclusive). -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Day Of Year' -expression = 'day_of_year(Input.time)' -input_csv = ''' +name = "Day Of Year" +expression = "day_of_year(Input.time)" +input_csv = """ time,key 1996-03-21T00:00:00-00:00,Ben 1996-04-21T00:00:00-00:00,Ryan @@ -23,8 +23,8 @@ time,key 1996-06-21T00:00:00-00:00,Ryan 1996-07-21T00:00:00-00:00,Ben 1996-08-21T00:00:00-00:00,Ben -''' -output_csv = ''' +""" +output_csv = """ time,key,result 1996-03-21T00:00:00.000000000,Ben,81 1996-04-21T00:00:00.000000000,Ryan,112 @@ -32,4 +32,4 @@ time,key,result 1996-06-21T00:00:00.000000000,Ryan,173 1996-07-21T00:00:00.000000000,Ben,203 1996-08-21T00:00:00.000000000,Ben,234 -''' +""" diff --git a/crates/sparrow-catalog/catalog/day_of_year0.toml b/crates/sparrow-catalog/catalog/day_of_year0.toml index ab5dee097..f7a99d71d 100644 --- a/crates/sparrow-catalog/catalog/day_of_year0.toml +++ b/crates/sparrow-catalog/catalog/day_of_year0.toml @@ -1,7 +1,7 @@ -name = 'day_of_year0' -signature = 'day_of_year0(time: timestamp_ns) -> u32' -short_doc = 'Return the day-of-year for the given time, starting with 0.' -long_doc = ''' +name = "day_of_year0" +signature = "day_of_year0(time: timestamp_ns) -> u32" +short_doc = "Return the day-of-year for the given time, starting with 0." +long_doc = """ ### Parameters * time: The timestamp to return the day-of-year for. @@ -9,13 +9,13 @@ long_doc = ''' Returns a `u32` column containing the day-of-year for each input `time`. Returns `null` for rows where `time` is `null`. The first day of the year is `0`. The result will be in the range 0 to 365 (inclusive). -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Day of Year (Zero Based)' -expression = 'day_of_year0(Input.time)' -input_csv = ''' +name = "Day of Year (Zero Based)" +expression = "day_of_year0(Input.time)" +input_csv = """ time,key 1996-03-21T00:00:00-00:00,Ben 1996-04-21T00:00:00-00:00,Ryan @@ -23,8 +23,8 @@ time,key 1996-06-21T00:00:00-00:00,Ryan 1996-07-21T00:00:00-00:00,Ben 1996-08-21T00:00:00-00:00,Ben -''' -output_csv = ''' +""" +output_csv = """ time,key,result 1996-03-21T00:00:00.000000000,Ben,80 1996-04-21T00:00:00.000000000,Ryan,111 @@ -32,4 +32,4 @@ time,key,result 1996-06-21T00:00:00.000000000,Ryan,172 1996-07-21T00:00:00.000000000,Ben,202 1996-08-21T00:00:00.000000000,Ben,233 -''' +""" diff --git a/crates/sparrow-catalog/catalog/days.toml b/crates/sparrow-catalog/catalog/days.toml index 3d2c40c99..2f6718007 100644 --- a/crates/sparrow-catalog/catalog/days.toml +++ b/crates/sparrow-catalog/catalog/days.toml @@ -1,7 +1,7 @@ -name = 'days' -signature = 'days(days: i64) -> interval_days' -short_doc = 'Produces an interval corresponding to the given number of calendar days.' -long_doc = ''' +name = "days" +signature = "days(days: i64) -> interval_days" +short_doc = "Produces an interval corresponding to the given number of calendar days." +long_doc = """ ### Parameters * days: The number of days to create the interval for. @@ -10,17 +10,17 @@ Returns an `interval_days` column with each row containing the value of `days` converted to an interval with the corresponding number of days. Rows where `days` is `null`, less than `0` or greater than `i32::MAX` will be `null`. -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Adding a variable number of days' -description = ''' +name = "Adding a variable number of days" +description = """ This example uses [`add_time`](#add-time) to add the created interval to the `time` column. -''' -expression = 'Input.time | add_time(days(Input.n))' -input_csv = ''' +""" +expression = "Input.time | add_time(days(Input.n))" +input_csv = """ time,key,n 1996-03-21T00:00:00-00:00,Ben,1 1996-04-21T00:00:00-00:00,Ryan,2 @@ -28,8 +28,8 @@ time,key,n 1996-06-21T00:00:00-00:00,Ryan, 1996-07-21T00:00:00-00:00,Ben,2 1996-08-21T00:00:00-00:00,Ben,1 -''' -output_csv = ''' +""" +output_csv = """ time,key,n,result 1996-03-21T00:00:00.000000000,Ben,1,1996-03-22T00:00:00.000000000 1996-04-21T00:00:00.000000000,Ryan,2,1996-04-23T00:00:00.000000000 @@ -37,4 +37,4 @@ time,key,n,result 1996-06-21T00:00:00.000000000,Ryan,, 1996-07-21T00:00:00.000000000,Ben,2,1996-07-23T00:00:00.000000000 1996-08-21T00:00:00.000000000,Ben,1,1996-08-22T00:00:00.000000000 -''' +""" diff --git a/crates/sparrow-catalog/catalog/days_between.toml b/crates/sparrow-catalog/catalog/days_between.toml index 95955ff25..1771ef47e 100644 --- a/crates/sparrow-catalog/catalog/days_between.toml +++ b/crates/sparrow-catalog/catalog/days_between.toml @@ -1,7 +1,7 @@ -name = 'days_between' -signature = 'days_between(t1: timestamp_ns, t2: timestamp_ns) -> interval_days' -short_doc = 'Returns the number of days between the first and second timestamp.' -long_doc = ''' +name = "days_between" +signature = "days_between(t1: timestamp_ns, t2: timestamp_ns) -> interval_days" +short_doc = "Returns the number of days between the first and second timestamp." +long_doc = """ ### Parameters * t1: The first timestamp * t2: The second timestamp @@ -14,17 +14,17 @@ rounded towards zero. In rows where `t1` or `t2` are `null`, the result will be `null`. If `t1` is before `t2`, the result will be positive. If `t1` is after `t2` the result will be negative. -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Days Between' -description = ''' +name = "Days Between" +description = """ Note that the expression uses `as i32` to convert the `interval_days` to the integer number of days. This discards the units. -''' -expression = 'days_between(Input.time, Input.date) as i32' -input_csv = ''' +""" +expression = "days_between(Input.time, Input.date) as i32" +input_csv = """ time,key,date 1996-03-21T00:00:00-00:00,Ben,1996-08-19T00:00:00-00:00 1996-04-21T00:00:00-00:00,Ryan,1995-07-20T00:00:00-00:00 @@ -32,8 +32,8 @@ time,key,date 1996-06-21T00:00:00-00:00,Ryan,1996-06-19T05:00:00-00:00 1996-07-21T00:00:00-00:00,Ben, 1996-08-21T00:00:00-00:00,Ben,1996-08-22T00:00:00-00:00 -''' -output_csv = ''' +""" +output_csv = """ time,key,date,result 1996-03-21T00:00:00.000000000,Ben,1996-08-19T00:00:00.000000000,151 1996-04-21T00:00:00.000000000,Ryan,1995-07-20T00:00:00.000000000,-276 @@ -41,4 +41,4 @@ time,key,date,result 1996-06-21T00:00:00.000000000,Ryan,1996-06-19T05:00:00.000000000,-1 1996-07-21T00:00:00.000000000,Ben,, 1996-08-21T00:00:00.000000000,Ben,1996-08-22T00:00:00.000000000,1 -''' +""" diff --git a/crates/sparrow-catalog/catalog/div.toml b/crates/sparrow-catalog/catalog/div.toml index 78a99217e..54202a509 100644 --- a/crates/sparrow-catalog/catalog/div.toml +++ b/crates/sparrow-catalog/catalog/div.toml @@ -1,8 +1,8 @@ -name = 'div' -signature = 'div(a: number, b: number) -> number' -operator = 'a / b' -short_doc = 'Returns the division of two numbers.' -long_doc = ''' +name = "div" +signature = "div(a: number, b: number) -> number" +operator = "a / b" +short_doc = "Returns the division of two numbers." +long_doc = """ This is the function used for the binary operation `a / b`. ### Parameters @@ -16,25 +16,25 @@ following the [numeric type coercion rules](docs:data-model#numeric-type-coercio Returns a numeric column of the promoted numeric type compatible with both `a` and `b`. The result contains `null` if `a` or `b` was null at that row, or if `b` was `0`. Otherwise the row contains the resulting of dividing `a` by `b`. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Division' -expression = 'Input.a / Input.b' -input_csv = ''' +name = "Division" +expression = "Input.a / Input.b" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,A,5.7,1.2 2021-01-02T00:00:00.000000000Z,A,6.3,0.4 2021-01-03T00:00:00.000000000Z,B,,3.7 2021-01-03T00:00:00.000000000Z,A,13.2, 2021-01-04T00:00:00.000000000Z,A,12.2,0 -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,A,5.7,1.2,4.75 2021-01-02T00:00:00.000000000,A,6.3,0.4,15.749999999999998 2021-01-03T00:00:00.000000000,B,,3.7, 2021-01-03T00:00:00.000000000,A,13.2,, 2021-01-04T00:00:00.000000000,A,12.2,0.0, -''' +""" diff --git a/crates/sparrow-catalog/catalog/else.toml b/crates/sparrow-catalog/catalog/else.toml index f7446519c..465680f55 100644 --- a/crates/sparrow-catalog/catalog/else.toml +++ b/crates/sparrow-catalog/catalog/else.toml @@ -1,7 +1,7 @@ -name = 'else' -signature = 'else(default: any, value: any) -> any' -short_doc = 'Return the `value` if it is non-`null`, `default` otherwise.' -long_doc = ''' +name = "else" +signature = "else(default: any, value: any) -> any" +short_doc = "Return the `value` if it is non-`null`, `default` otherwise." +long_doc = """ ### Parameters * default: The result to use if `value` is `null`. * value: The result to prefer if it is non-`null`. @@ -14,52 +14,52 @@ it with the pipe syntax to provide default values, as in For each row, returns `value` if it is non-`null` in that row, or `default` if `value is `null`. -''' -tags = ['logical'] +""" +tags = ["logical"] [[examples]] -name = 'Choosing between two values' -description = ''' +name = "Choosing between two values" +description = """ In this example the result is `Input.a` if it is non-`null`, and `Input.b` otherwise. This may be combined with [`if`](#if) to conditionaly `null` out cases to implement various logical operations. When chaining multiple conditionals, it may be better to use [`coalesce`](#coalesce). -''' -expression = 'Input.a | else(Input.b)' -input_csv = ''' +""" +expression = "Input.a | else(Input.b)" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,X,57.8,63 2021-01-02T00:00:00.000000000Z,Y,,86.3 2021-01-03T00:00:00.000000000Z,X,6873, 2021-01-04T00:00:00.000000000Z,X,, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,X,57.8,63.0,57.8 2021-01-02T00:00:00.000000000,Y,,86.3,86.3 2021-01-03T00:00:00.000000000,X,6873.0,,6873.0 2021-01-04T00:00:00.000000000,X,,, -''' +""" [[examples]] -name = 'Providing a default value' -description = ''' +name = "Providing a default value" +description = """ This example shows how to use `else` to provide a default value for a possibly `null` value. -''' -expression = 'Input.a | else(42.0)' -input_csv = ''' +""" +expression = "Input.a | else(42.0)" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,X,57.8,63 2021-01-02T00:00:00.000000000Z,Y,,86.3 2021-01-03T00:00:00.000000000Z,X,6873, 2021-01-04T00:00:00.000000000Z,X,, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,X,57.8,63.0,57.8 2021-01-02T00:00:00.000000000,Y,,86.3,42.0 2021-01-03T00:00:00.000000000,X,6873.0,,6873.0 2021-01-04T00:00:00.000000000,X,,,42.0 -''' +""" diff --git a/crates/sparrow-catalog/catalog/eq.toml b/crates/sparrow-catalog/catalog/eq.toml index e0fd8b129..2834054a7 100644 --- a/crates/sparrow-catalog/catalog/eq.toml +++ b/crates/sparrow-catalog/catalog/eq.toml @@ -1,8 +1,8 @@ -name = 'eq' -signature = 'eq(a: any, b: any) -> bool' -operator = 'a == b' -short_doc = 'Return `true` if `a` is equal to `b`.' -long_doc = ''' +name = "eq" +signature = "eq(a: any, b: any) -> bool" +operator = "a == b" +short_doc = "Return `true` if `a` is equal to `b`." +long_doc = """ This is the function used for the binary comparison `a == b`. ### Parameters @@ -17,13 +17,13 @@ they may be promoted to a compatible numeric type following the Returns a `bool` column indicating the results. For each row, it contains `null` if `a` or `b` are `null`, `true` if they are equal and `false` if they are not equal. -''' -tags = ['comparison'] +""" +tags = ["comparison"] [[examples]] -name = 'Equals' -expression = 'Input.a == Input.b' -input_csv = ''' +name = "Equals" +expression = "Input.a == Input.b" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,Ben,50.7,6.0 2021-01-02T00:00:00.000000000Z,Ryan,,70 @@ -32,8 +32,8 @@ time,key,a,b 2021-01-05T00:00:00.000000000Z,Ben,65, 2021-01-06T00:00:00.000000000Z,Jordan,2.3,68.7 2021-01-07T00:00:00.000000000Z,Ryan,, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,Ben,50.7,6.0,false 2021-01-02T00:00:00.000000000,Ryan,,70.0, @@ -42,4 +42,4 @@ time,key,a,b,result 2021-01-05T00:00:00.000000000,Ben,65.0,, 2021-01-06T00:00:00.000000000,Jordan,2.3,68.7,false 2021-01-07T00:00:00.000000000,Ryan,,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/exp.toml b/crates/sparrow-catalog/catalog/exp.toml index 2f1dad94a..7f5b6b66b 100644 --- a/crates/sparrow-catalog/catalog/exp.toml +++ b/crates/sparrow-catalog/catalog/exp.toml @@ -1,7 +1,7 @@ -name = 'exp' -signature = 'exp(power: f64) -> f64' -short_doc = 'Returns `e^power`.' -long_doc = ''' +name = "exp" +signature = "exp(power: f64) -> f64" +short_doc = "Returns `e^power`." +long_doc = """ ### Parameters * power: The power to raise `e` to. @@ -12,21 +12,21 @@ Other numbers will be implicitly promoted. Returns a column of `f64` values. Each row contains `null` if `power` is `null`. Otherwise, the row contains the value `e ^ power`. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Exponential' -expression = 'exp(Input.a)' -input_csv = ''' +name = "Exponential" +expression = "exp(Input.a)" +input_csv = """ time,key,a 2021-01-01T00:00:00.000000000Z,A,5.7 2021-01-02T00:00:00.000000000Z,A,6.3 2021-01-02T00:00:00.000000000Z,B, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,result 2021-01-01T00:00:00.000000000,A,5.7,298.8674009670603 2021-01-02T00:00:00.000000000,A,6.3,544.571910125929 2021-01-02T00:00:00.000000000,B,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/extend.toml b/crates/sparrow-catalog/catalog/extend.toml index 474749d4a..ebdce3e59 100644 --- a/crates/sparrow-catalog/catalog/extend.toml +++ b/crates/sparrow-catalog/catalog/extend.toml @@ -1,7 +1,7 @@ -name = 'extend' -signature = 'extend(new, old) -> extended' -short_doc = 'Extends a record with fields from another.' -long_doc = ''' +name = "extend" +signature = "extend(new, old) -> extended" +short_doc = "Extends a record with fields from another." +long_doc = """ ### Parameters * new: The record column containing the new fields. * old: The record column containing the old fields. @@ -13,25 +13,25 @@ a way to add fields to the `old` record. Returns a column containing the combined record fields from both `old` and `new`. If either `old` or `new` are `null` then the fields from the given record are `null`. If a field exists in both `old` and `new`, the value from `new` is preferred. -''' -tags = ['record'] +""" +tags = ["record"] [[examples]] -name = 'Record Extension' -full_expression = ''' +name = "Record Extension" +full_expression = """ extend(Input, { sum: Input.a + Input.b, five: 5 }) -''' -input_csv = ''' +""" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,A,5,1.2 2021-01-02T00:00:00.000000000Z,A,6.3,0.4 2021-03-01T00:00:00.000000000Z,B,,3.7 2021-04-10T00:00:00.000000000Z,A,13, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,sum,five 2021-01-01T00:00:00.000000000,A,5.0,1.2,6.2,5 2021-01-02T00:00:00.000000000,A,6.3,0.4,6.7,5 2021-03-01T00:00:00.000000000,B,,3.7,,5 2021-04-10T00:00:00.000000000,A,13.0,,,5 -''' +""" diff --git a/crates/sparrow-catalog/catalog/first.toml b/crates/sparrow-catalog/catalog/first.toml index ed1351bb0..96916ee8f 100644 --- a/crates/sparrow-catalog/catalog/first.toml +++ b/crates/sparrow-catalog/catalog/first.toml @@ -1,7 +1,7 @@ -name = 'first' -signature = 'first(input: any, window: window = null) -> any' -short_doc = 'Computes the first value present across the input.' -long_doc = ''' +name = "first" +signature = "first(input: any, window: window = null) -> any" +short_doc = "Computes the first value present across the input." +long_doc = """ ### Parameters * input: The input to be considered. * window: The window to aggregate within, as described in @@ -15,13 +15,13 @@ up to and including the current row. Returns `null` until there has been at least one such input. NOTE: The first value is inclusive of any values at the current time. -''' -tags = ['aggregation'] +""" +tags = ["aggregation"] [[examples]] -name = 'First' -expression = 'first(Input.value)' -input_csv = ''' +name = "First" +expression = "first(Input.value)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,50.7 2021-01-02T00:00:00.000000000Z,Ryan, @@ -29,8 +29,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,1.2 2021-01-03T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,2.3 -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,50.7,50.7 2021-01-02T00:00:00.000000000,Ryan,, @@ -38,4 +38,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,1.2,50.7 2021-01-03T00:00:00.000000000,Ben,,50.7 2021-01-04T00:00:00.000000000,Ryan,2.3,67.2 -''' +""" diff --git a/crates/sparrow-catalog/catalog/floor.toml b/crates/sparrow-catalog/catalog/floor.toml index 1f78e5e0f..f9a14ebe6 100644 --- a/crates/sparrow-catalog/catalog/floor.toml +++ b/crates/sparrow-catalog/catalog/floor.toml @@ -1,7 +1,7 @@ -name = 'floor' -signature = 'floor(n: number) -> number' -short_doc = 'Rounds the number down to the next smallest integer.' -long_doc = ''' +name = "floor" +signature = "floor(n: number) -> number" +short_doc = "Rounds the number down to the next smallest integer." +long_doc = """ See also [`round`](#round) and [`ceil`](#ceil). ### Parameters @@ -14,23 +14,23 @@ Note: This method may be applied to any numeric type. For anything other than Returns a numeric column of the same type as `n`. The result contains `null` if `n` was null at that row. Otherwise, it contains the result of rounding `n` down to the next smallest integer. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Floor' -expression = 'Input.a | floor()' -input_csv = ''' +name = "Floor" +expression = "Input.a | floor()" +input_csv = """ time,key,a 2021-01-01T00:00:00.000000000Z,A,5.7 2021-01-01T00:00:00.000000000Z,A,6.3 2021-01-02T00:00:00.000000000Z,B, 2021-01-02T00:00:00.000000000Z,B,-2.3 -''' -output_csv = ''' +""" +output_csv = """ time,key,a,result 2021-01-01T00:00:00.000000000,A,5.7,5.0 2021-01-01T00:00:00.000000000,A,6.3,6.0 2021-01-02T00:00:00.000000000,B,, 2021-01-02T00:00:00.000000000,B,-2.3,-3.0 -''' +""" diff --git a/crates/sparrow-catalog/catalog/gt.toml b/crates/sparrow-catalog/catalog/gt.toml index 5449d9427..e8825de2f 100644 --- a/crates/sparrow-catalog/catalog/gt.toml +++ b/crates/sparrow-catalog/catalog/gt.toml @@ -1,8 +1,8 @@ -name = 'gt' -signature = 'gt(a: ordered, b: ordered) -> bool' -operator = 'a > b' -short_doc = 'Return `true` if `a` is greater than `b`.' -long_doc = ''' +name = "gt" +signature = "gt(a: ordered, b: ordered) -> bool" +operator = "a > b" +short_doc = "Return `true` if `a` is greater than `b`." +long_doc = """ This is the function used for the binary comparison `a > b`. ### Parameters @@ -17,13 +17,13 @@ they may be promoted to a compatible numeric type following the Returns a `bool` column indicating the results. For each row, it contains `null` if `a` or `b` are `null`, `true` if `a` is greater than `b`, and `false` if `a` is less than or equal to `b`. -''' -tags = ['comparison'] +""" +tags = ["comparison"] [[examples]] -name = 'Greater Than' -expression = 'Input.a > Input.b' -input_csv = ''' +name = "Greater Than" +expression = "Input.a > Input.b" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,Ben,50.7,6.0 2021-01-02T00:00:00.000000000Z,Ryan,,70 @@ -32,8 +32,8 @@ time,key,a,b 2021-01-05T00:00:00.000000000Z,Ben,65, 2021-01-06T00:00:00.000000000Z,Jordan,2.3,68.7 2021-01-07T00:00:00.000000000Z,Ryan,, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,Ben,50.7,6.0,true 2021-01-02T00:00:00.000000000,Ryan,,70.0, @@ -42,4 +42,4 @@ time,key,a,b,result 2021-01-05T00:00:00.000000000,Ben,65.0,, 2021-01-06T00:00:00.000000000,Jordan,2.3,68.7,false 2021-01-07T00:00:00.000000000,Ryan,,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/gte.toml b/crates/sparrow-catalog/catalog/gte.toml index 4b971e98c..100f874f7 100644 --- a/crates/sparrow-catalog/catalog/gte.toml +++ b/crates/sparrow-catalog/catalog/gte.toml @@ -1,8 +1,8 @@ -name = 'gte' -signature = 'gte(a: ordered, b: ordered) -> bool' -operator = 'a >= b' -short_doc = 'Return `true` if `a` is greater than or equal to `b`.' -long_doc = ''' +name = "gte" +signature = "gte(a: ordered, b: ordered) -> bool" +operator = "a >= b" +short_doc = "Return `true` if `a` is greater than or equal to `b`." +long_doc = """ This is the function used for the binary comparison `a >= b`. ### Parameters @@ -17,13 +17,13 @@ they may be promoted to a compatible numeric type following the Returns a `bool` column indicating the results. For each row, it contains `null` if `a` or `b` are `null`, `true` if `a` is greater than or equal to `b`, and `false` if `a` is less than `b`. -''' -tags = ['comparison'] +""" +tags = ["comparison"] [[examples]] -name = 'Greater Than or Equal To' -expression = 'Input.a >= Input.b' -input_csv = ''' +name = "Greater Than or Equal To" +expression = "Input.a >= Input.b" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,Ben,50.7,6.0 2021-01-02T00:00:00.000000000Z,Ryan,,70 @@ -32,8 +32,8 @@ time,key,a,b 2021-01-05T00:00:00.000000000Z,Ben,65, 2021-01-06T00:00:00.000000000Z,Jordan,2.3,68.7 2021-01-07T00:00:00.000000000Z,Ryan,, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,Ben,50.7,6.0,true 2021-01-02T00:00:00.000000000,Ryan,,70.0, @@ -42,4 +42,4 @@ time,key,a,b,result 2021-01-05T00:00:00.000000000,Ben,65.0,, 2021-01-06T00:00:00.000000000,Jordan,2.3,68.7,false 2021-01-07T00:00:00.000000000,Ryan,,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/hash.toml b/crates/sparrow-catalog/catalog/hash.toml index 76c7006f4..94a583230 100644 --- a/crates/sparrow-catalog/catalog/hash.toml +++ b/crates/sparrow-catalog/catalog/hash.toml @@ -1,7 +1,7 @@ -name = 'hash' -signature = 'hash(input: key) -> u64' -short_doc = 'Returns the hash of the `input`.' -long_doc = ''' +name = "hash" +signature = "hash(input: key) -> u64" +short_doc = "Returns the hash of the `input`." +long_doc = """ ### Parameters * input: The argument to hash. @@ -11,13 +11,13 @@ Returns a `u64` column which contains the hash of the `input`. Note: Unlike many functions which return `null` if any of their arguments are `null`, `hash` will never return `null`. -''' -tags = ['misc'] +""" +tags = ["misc"] [[examples]] -name = 'String Hash' -expression = 'hash(Input.value)' -input_csv = ''' +name = "String Hash" +expression = "hash(Input.value)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,hello 2021-01-01T00:00:00.000000000Z,Ryan, @@ -25,8 +25,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,hi 2021-01-04T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,earth -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,hello,1472103086483932002 2021-01-01T00:00:00.000000000,Ryan,,5663277146615294718 @@ -34,12 +34,12 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,hi,2460612554838835252 2021-01-04T00:00:00.000000000,Ben,,5663277146615294718 2021-01-04T00:00:00.000000000,Ryan,earth,14489671231712828724 -''' +""" [[examples]] -name = 'Integer Hash' -expression = 'hash(Input.value)' -input_csv = ''' +name = "Integer Hash" +expression = "hash(Input.value)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,5 2021-01-01T00:00:00.000000000Z,Ryan,8 @@ -47,8 +47,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,8 2021-01-04T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,9 -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,5,16461383214845928621 2021-01-01T00:00:00.000000000,Ryan,8,6794973171266502674 @@ -56,4 +56,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,8,6794973171266502674 2021-01-04T00:00:00.000000000,Ben,,0 2021-01-04T00:00:00.000000000,Ryan,9,15653042715643359010 -''' +""" diff --git a/crates/sparrow-catalog/catalog/hourly.toml b/crates/sparrow-catalog/catalog/hourly.toml index d0c19fc75..2df1012ba 100644 --- a/crates/sparrow-catalog/catalog/hourly.toml +++ b/crates/sparrow-catalog/catalog/hourly.toml @@ -1,28 +1,28 @@ -name = 'hourly' -signature = 'hourly() -> bool' -short_doc = 'A periodic function that produces a `true` value at the start of each hour.' -long_doc = ''' +name = "hourly" +signature = "hourly() -> bool" +short_doc = "A periodic function that produces a `true` value at the start of each hour." +long_doc = """ This function is often used in aggregations to produce windows or as a predicate column. ### Results Returns a boolean column with each row containing a `true` value at the start of the hour, and `null` at all other times. -''' -tags = ['tick'] +""" +tags = ["tick"] [[examples]] -name = 'Hourly Aggregated Window' -description = ''' +name = "Hourly Aggregated Window" +description = """ In this example, the `hourly()` function is used as an argument to the [`since](#since) function, which produces a window. The result is a windowed aggregation that resets hourly. -''' -full_expression = ''' +""" +full_expression = """ { n: Input.n, hourly_sum: sum(Input.n, window = since(hourly())) } | extend({time: time_of($input), key: first(Input.key) }) -''' -input_csv = ''' +""" +input_csv = """ time,key,n 1996-12-19T16:00:57-00:00,Ben,2 1996-12-19T16:00:58-00:00,Ryan,3 @@ -30,8 +30,8 @@ time,key,n 1996-12-19T17:01:00-00:00,Ben,9 1996-12-19T17:01:00-00:00,Ryan,8 1996-12-19T18:00:00-00:00,Ben,1 -''' -output_csv = ''' +""" +output_csv = """ time,key,n,hourly_sum 1996-12-19T16:00:57.000000000,Ben,2,2 1996-12-19T16:00:58.000000000,Ryan,3,3 @@ -43,18 +43,18 @@ time,key,n,hourly_sum 1996-12-19T18:00:00.000000000,Ben,1,16 1996-12-19T18:00:00.000000000,Ryan,,8 1996-12-19T18:00:00.000000000,Ben,,16 -''' +""" [[examples]] -name = 'Filter Hourly' -description = ''' +name = "Filter Hourly" +description = """ In this example, the `hourly()` function is used as an argument to the [`when`](#when) function, which filters input. The output includes the last input row before a [`tick`](#tick) occurs. -''' -full_expression = 'Input | last() | when(hourly())' -input_csv = ''' +""" +full_expression = "Input | last() | when(hourly())" +input_csv = """ time,key,n 1996-12-19T16:00:57-00:00,Ben,2 1996-12-19T16:00:58-00:00,Ryan,3 @@ -62,11 +62,11 @@ time,key,n 1996-12-19T17:01:00-00:00,Ben,9 1996-12-19T17:01:00-00:00,Ryan,8 1996-12-19T18:00:00-00:00,Ben,1 -''' -output_csv = ''' +""" +output_csv = """ time,key,n 1996-12-19T16:00:58.000000000,Ryan,3 1996-12-19T16:00:57.000000000,Ben,2 1996-12-19T17:01:00.000000000,Ryan,8 1996-12-19T18:00:00.000000000,Ben,1 -''' +""" diff --git a/crates/sparrow-catalog/catalog/if.toml b/crates/sparrow-catalog/catalog/if.toml index 652123aef..cb0bfca12 100644 --- a/crates/sparrow-catalog/catalog/if.toml +++ b/crates/sparrow-catalog/catalog/if.toml @@ -1,8 +1,8 @@ -name = 'if' -signature = 'if(condition: bool, value: any) -> any' -short_doc = 'Return the `value` if `condition` is `true`, `null` otherwise.' -long_doc = ''' -`if` "nulls out" the `value` if `condition` is `false`. +name = "if" +signature = "if(condition: bool, value: any) -> any" +short_doc = "Return the `value` if `condition` is `true`, `null` otherwise." +long_doc = """ +`if` \"nulls out\" the `value` if `condition` is `false`. It is equivalent to `null_if(!condition, value)`. See also [`null_if`](#null_if). @@ -12,30 +12,30 @@ See also [`null_if`](#null_if). * value: The value to return if `condition` is `true`. Note: The order of arguments is chosen to allow use with the pipe operation. -Specifically, `value | if(condition)` may be used to conditionally "null-out" +Specifically, `value | if(condition)` may be used to conditionally \"null-out\" the value on the left-hand side. ### Results For each row, return the `value` if `condition` is `true`. Returns `null` if the `condition` is `false` or `null`. -''' -tags = ['logical'] +""" +tags = ["logical"] [[examples]] -expression = 'Input.value | if(Input.condition)' -input_csv = ''' +expression = "Input.value | if(Input.condition)" +input_csv = """ time,key,value,condition 2021-01-01T00:00:00.000000000Z,A,57.8,false 2021-01-02T00:00:00.000000000Z,B,58.7,true 2021-01-03T00:00:00.000000000Z,A,,true 2021-01-04T00:00:00.000000000Z,A,876, 2021-01-05T00:00:00.000000000Z,A,786.0, -''' -output_csv = ''' +""" +output_csv = """ time,key,value,condition,result 2021-01-01T00:00:00.000000000,A,57.8,false, 2021-01-02T00:00:00.000000000,B,58.7,true,58.7 2021-01-03T00:00:00.000000000,A,,true, 2021-01-04T00:00:00.000000000,A,876.0,, 2021-01-05T00:00:00.000000000,A,786.0,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/is_valid.toml b/crates/sparrow-catalog/catalog/is_valid.toml index 0572e03d4..1901e545c 100644 --- a/crates/sparrow-catalog/catalog/is_valid.toml +++ b/crates/sparrow-catalog/catalog/is_valid.toml @@ -1,7 +1,7 @@ -name = 'is_valid' -signature = 'is_valid(input: any) -> bool' -short_doc = 'Returns `true` if `input` is non-`null`.' -long_doc = ''' +name = "is_valid" +signature = "is_valid(input: any) -> bool" +short_doc = "Returns `true` if `input` is non-`null`." +long_doc = """ ### Parameters * input: The input to test for `null`. @@ -12,13 +12,13 @@ Returns a `bool` column that is `true` if the `input` is Note: Unlike many functions which return `null` if any of their arguments are `null`, `is_valid` will never return `null`. -''' -tags = ['misc'] +""" +tags = ["misc"] [[examples]] -name = 'Is Valid' -expression = 'is_valid(Input.value)' -input_csv = ''' +name = "Is Valid" +expression = "is_valid(Input.value)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,5 2021-01-01T00:00:00.000000000Z,Ryan, @@ -26,8 +26,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,3 2021-01-04T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,2 -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,5,true 2021-01-01T00:00:00.000000000,Ryan,,false @@ -35,4 +35,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,3,true 2021-01-04T00:00:00.000000000,Ben,,false 2021-01-04T00:00:00.000000000,Ryan,2,true -''' +""" diff --git a/crates/sparrow-catalog/catalog/json.toml b/crates/sparrow-catalog/catalog/json.toml index 0524898cd..ff76581af 100644 --- a/crates/sparrow-catalog/catalog/json.toml +++ b/crates/sparrow-catalog/catalog/json.toml @@ -1,12 +1,12 @@ -name = 'json' -signature = 'json(s: string) -> json' -short_doc = 'Creates a JSON object from a string.' -experimental = ''' +name = "json" +signature = "json(s: string) -> json" +short_doc = "Creates a JSON object from a string." +experimental = """ `json` is experimental functionality. You should expect the behavior to potentially change in the future. Certain functionality, such as nested types, are not yet supported. -''' -long_doc = ''' +""" +long_doc = """ This functions converts a JSON string into a JSON object. Fields of the JSON object can be accessed as strings and cast into other types. @@ -15,29 +15,29 @@ the JSON object can be accessed as strings and cast into other types. ### Results Returns a JSON object. -''' -tags = ['string'] +""" +tags = ["string"] [[examples]] -name = 'JSON field access' -expression = 'json(Input.json_string).a' -input_csv = ''' +name = "JSON field access" +expression = "json(Input.json_string).a" +input_csv = """ time,key,json_string -2021-01-01T00:00:00.000000000Z,Ben,"{""a"": 10}" -2021-01-02T00:00:00.000000000Z,Ryan,"{""a"": 2}" -2021-01-03T00:00:00.000000000Z,Ryan,"{""b"": 10}" -2021-01-04T00:00:00.000000000Z,Ben,"{""a"": 4}" -2021-01-05T00:00:00.000000000Z,Ben,"{""c"": 12}" -2021-01-06T00:00:00.000000000Z,Jordan,"{""a"": 0}" -2021-01-07T00:00:00.000000000Z,Ryan,"{""a"": 8}" -''' -output_csv = ''' +2021-01-01T00:00:00.000000000Z,Ben,\"{\"\"a\"\": 10}\" +2021-01-02T00:00:00.000000000Z,Ryan,\"{\"\"a\"\": 2}\" +2021-01-03T00:00:00.000000000Z,Ryan,\"{\"\"b\"\": 10}\" +2021-01-04T00:00:00.000000000Z,Ben,\"{\"\"a\"\": 4}\" +2021-01-05T00:00:00.000000000Z,Ben,\"{\"\"c\"\": 12}\" +2021-01-06T00:00:00.000000000Z,Jordan,\"{\"\"a\"\": 0}\" +2021-01-07T00:00:00.000000000Z,Ryan,\"{\"\"a\"\": 8}\" +""" +output_csv = """ time,key,json_string,result -2021-01-01T00:00:00.000000000,Ben,"{""a"": 10}",10 -2021-01-02T00:00:00.000000000,Ryan,"{""a"": 2}",2 -2021-01-03T00:00:00.000000000,Ryan,"{""b"": 10}", -2021-01-04T00:00:00.000000000,Ben,"{""a"": 4}",4 -2021-01-05T00:00:00.000000000,Ben,"{""c"": 12}", -2021-01-06T00:00:00.000000000,Jordan,"{""a"": 0}",0 -2021-01-07T00:00:00.000000000,Ryan,"{""a"": 8}",8 -''' +2021-01-01T00:00:00.000000000,Ben,\"{\"\"a\"\": 10}\",10 +2021-01-02T00:00:00.000000000,Ryan,\"{\"\"a\"\": 2}\",2 +2021-01-03T00:00:00.000000000,Ryan,\"{\"\"b\"\": 10}\", +2021-01-04T00:00:00.000000000,Ben,\"{\"\"a\"\": 4}\",4 +2021-01-05T00:00:00.000000000,Ben,\"{\"\"c\"\": 12}\", +2021-01-06T00:00:00.000000000,Jordan,\"{\"\"a\"\": 0}\",0 +2021-01-07T00:00:00.000000000,Ryan,\"{\"\"a\"\": 8}\",8 +""" diff --git a/crates/sparrow-catalog/catalog/lag.toml b/crates/sparrow-catalog/catalog/lag.toml index 5c357d2cd..475c768e2 100644 --- a/crates/sparrow-catalog/catalog/lag.toml +++ b/crates/sparrow-catalog/catalog/lag.toml @@ -1,7 +1,7 @@ -name = 'lag' -signature = 'lag(const n: i64, input: ordered) -> ordered' -short_doc = 'Returns a lagging value of `e`.' -long_doc = ''' +name = "lag" +signature = "lag(const n: i64, input: ordered) -> ordered" +short_doc = "Returns a lagging value of `e`." +long_doc = """ ### Parameters * n: The amount of lag to retrieve. For instance, `n = 1` is the previous non-`null` value, `n = 2` is the non-`null` value before that, etc. @@ -9,13 +9,13 @@ long_doc = ''' ### Results Returns a new column with the same type as `input`, but with each row containing the value of `input` from `n` rows earlier (counting only non-`null` rows for the current entity). -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Lag for Previous Value' -expression = 'lag(1, Input.n)' -input_csv = ''' +name = "Lag for Previous Value" +expression = "lag(1, Input.n)" +input_csv = """ time,key,n 1996-03-21T00:00:00-00:00,Ben,1 1996-04-21T00:00:00-00:00,Ryan,2 @@ -23,8 +23,8 @@ time,key,n 1996-06-21T00:00:00-00:00,Ryan,4 1996-07-21T00:00:00-00:00,Ben,5 1996-08-21T00:00:00-00:00,Ben,6 -''' -output_csv = ''' +""" +output_csv = """ time,key,n,result 1996-03-21T00:00:00.000000000,Ben,1, 1996-04-21T00:00:00.000000000,Ryan,2, @@ -32,14 +32,14 @@ time,key,n,result 1996-06-21T00:00:00.000000000,Ryan,4,3 1996-07-21T00:00:00.000000000,Ben,5,1 1996-08-21T00:00:00.000000000,Ben,6,5 -''' +""" [[examples]] -name = 'Lag for Average Change' -description = ''' +name = "Lag for Average Change" +description = """ This example uses `lag` to compute the average difference between values of `n`. -''' -full_expression = ''' +""" +full_expression = """ # Will always be non-`null` after the first non-`null` `Input.n`. let prev_value = Input.n | lag(1) @@ -50,8 +50,8 @@ in difference, mean_difference: mean(difference), } | extend({ time: time_of($input), key: first(Input.key) }) -''' -input_csv = ''' +""" +input_csv = """ time,key,n 1996-03-21T00:00:00-00:00,Ben,1 1996-04-21T00:00:00-00:00,Ryan,2 @@ -59,8 +59,8 @@ time,key,n 1996-06-21T00:00:00-00:00,Ryan,4 1996-07-21T00:00:00-00:00,Ben,5 1996-08-21T00:00:00-00:00,Ben,6 -''' -output_csv = ''' +""" +output_csv = """ time,key,difference,mean_difference 1996-03-21T00:00:00.000000000,Ben,, 1996-04-21T00:00:00.000000000,Ryan,, @@ -68,4 +68,4 @@ time,key,difference,mean_difference 1996-06-21T00:00:00.000000000,Ryan,2,2.0 1996-07-21T00:00:00.000000000,Ben,4,4.0 1996-08-21T00:00:00.000000000,Ben,1,2.5 -''' +""" diff --git a/crates/sparrow-catalog/catalog/last.toml b/crates/sparrow-catalog/catalog/last.toml index 1ff376567..3095fa352 100644 --- a/crates/sparrow-catalog/catalog/last.toml +++ b/crates/sparrow-catalog/catalog/last.toml @@ -1,7 +1,7 @@ -name = 'last' -signature = 'last(input: any, window: window = null) -> any' -short_doc = 'Computes the last value present across the input.' -long_doc = ''' +name = "last" +signature = "last(input: any, window: window = null) -> any" +short_doc = "Computes the last value present across the input." +long_doc = """ ### Parameters * input: The input to be considered. * window: The window to aggregate within, as described in @@ -18,17 +18,17 @@ NOTE: The last value is inclusive of any values at the current time. This means that if the current row is new and non-`null`, the result will be the same of the input. If the input is not new or `null`, this will be the previous value that was new and non-`null`. -''' -tags = ['aggregation'] +""" +tags = ["aggregation"] [[examples]] -name = 'Last' -description = ''' +name = "Last" +description = """ As shown in the example, the last aggregation is useful for extrapolating missing results from the most recent present result. -''' -expression = 'last(Input.value)' -input_csv = ''' +""" +expression = "last(Input.value)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,50.7 2021-01-02T00:00:00.000000000Z,Ryan, @@ -36,8 +36,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,1.2 2021-01-03T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,2.3 -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,50.7,50.7 2021-01-02T00:00:00.000000000,Ryan,, @@ -45,4 +45,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,1.2,1.2 2021-01-03T00:00:00.000000000,Ben,,1.2 2021-01-04T00:00:00.000000000,Ryan,2.3,2.3 -''' +""" diff --git a/crates/sparrow-catalog/catalog/len.toml b/crates/sparrow-catalog/catalog/len.toml index 128235336..48d4ed750 100644 --- a/crates/sparrow-catalog/catalog/len.toml +++ b/crates/sparrow-catalog/catalog/len.toml @@ -1,7 +1,7 @@ -name = 'len' -signature = 'len(s: string) -> i32' -short_doc = 'Returns the length of the string `s`.' -long_doc = ''' +name = "len" +signature = "len(s: string) -> i32" +short_doc = "Returns the length of the string `s`." +long_doc = """ ### Parameters * s: The string to compute the length of. @@ -9,13 +9,13 @@ long_doc = ''' Returns an `i32` column with each row containing the length of the string `s` in that row. Returns `0` for the empty string and `null` if `s` is `null`. -''' -tags = ['string'] +""" +tags = ["string"] [[examples]] -name = 'String Length' -expression = 'Input.value | len()' -input_csv = ''' +name = "String Length" +expression = "Input.value | len()" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,Hello World 2021-01-02T00:00:00.000000000Z,Ryan,'' @@ -23,8 +23,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,Hello 2021-01-03T00:00:00.000000000Z,Ben,'' 2021-01-04T00:00:00.000000000Z,Ryan,hi -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,Hello World,11 2021-01-02T00:00:00.000000000,Ryan,'',2 @@ -32,4 +32,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,Hello,5 2021-01-03T00:00:00.000000000,Ben,'',2 2021-01-04T00:00:00.000000000,Ryan,hi,2 -''' +""" diff --git a/crates/sparrow-catalog/catalog/logical_and.toml b/crates/sparrow-catalog/catalog/logical_and.toml index bff1cc0af..85f385287 100644 --- a/crates/sparrow-catalog/catalog/logical_and.toml +++ b/crates/sparrow-catalog/catalog/logical_and.toml @@ -1,8 +1,8 @@ -name = 'logical_and' -signature = 'logical_and(a: bool, b: bool) -> bool' -operator = 'a and b' -short_doc = 'Returns the logical conjunction (AND) of two booleans.' -long_doc = ''' +name = "logical_and" +signature = "logical_and(a: bool, b: bool) -> bool" +operator = "a and b" +short_doc = "Returns the logical conjunction (AND) of two booleans." +long_doc = """ This is the function used for the binary operation `a and b`. ### Parameters @@ -13,13 +13,13 @@ This is the function used for the binary operation `a and b`. * Returns `true` if `a` and `b` are both `true`. * Returns `false` if `a` or `b` are `false`. * Returns `null` if `a` or `b` are `null`. -''' -tags = ['logical'] +""" +tags = ["logical"] [[examples]] -name = 'Logical And' -expression = 'Input.a and Input.b' -input_csv = ''' +name = "Logical And" +expression = "Input.a and Input.b" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,A,true,false 2021-01-02T00:00:00.000000000Z,B,true,true @@ -29,8 +29,8 @@ time,key,a,b 2021-02-01T00:00:00.000000000Z,B,true, 2021-02-02T00:00:00.000000000Z,A,,false 2021-03-01T00:00:00.000000000Z,B,false, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,A,true,false,false 2021-01-02T00:00:00.000000000,B,true,true,true @@ -40,4 +40,4 @@ time,key,a,b,result 2021-02-01T00:00:00.000000000,B,true,, 2021-02-02T00:00:00.000000000,A,,false,false 2021-03-01T00:00:00.000000000,B,false,,false -''' +""" diff --git a/crates/sparrow-catalog/catalog/logical_or.toml b/crates/sparrow-catalog/catalog/logical_or.toml index 3845b8ecf..30b7be5b2 100644 --- a/crates/sparrow-catalog/catalog/logical_or.toml +++ b/crates/sparrow-catalog/catalog/logical_or.toml @@ -1,8 +1,8 @@ -name = 'logical_or' -signature = 'logical_or(a: bool, b: bool) -> bool' -operator = 'a or b' -short_doc = 'Returns the logical disjunction (OR) of two booleans.' -long_doc = ''' +name = "logical_or" +signature = "logical_or(a: bool, b: bool) -> bool" +operator = "a or b" +short_doc = "Returns the logical disjunction (OR) of two booleans." +long_doc = """ This is the function used for the binary operation `a or b`. ### Parameters @@ -13,13 +13,13 @@ This is the function used for the binary operation `a or b`. * Returns `true` if `a` or `b` are `true`. * Returns `false` if `a` and `b` are both `false`. * Returns `null` if `a` or `b` are `null`. -''' -tags = ['logical'] +""" +tags = ["logical"] [[examples]] -name = 'Logical Or' -expression = 'Input.a or Input.b' -input_csv = ''' +name = "Logical Or" +expression = "Input.a or Input.b" +input_csv = """ time,subsort,key,a,b 2021-01-01T00:00:00.000000000Z,0,A,true,false 2021-01-02T00:00:00.000000000Z,0,B,true,true @@ -29,8 +29,8 @@ time,subsort,key,a,b 2021-02-01T00:00:00.000000000Z,0,B,true, 2021-02-02T00:00:00.000000000Z,0,A,,false 2021-03-01T00:00:00.000000000Z,0,B,false, -''' -output_csv = ''' +""" +output_csv = """ time,subsort,key,a,b,result 2021-01-01T00:00:00.000000000,0,A,true,false,true 2021-01-02T00:00:00.000000000,0,B,true,true,true @@ -40,4 +40,4 @@ time,subsort,key,a,b,result 2021-02-01T00:00:00.000000000,0,B,true,,true 2021-02-02T00:00:00.000000000,0,A,,false, 2021-03-01T00:00:00.000000000,0,B,false,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/lookup.toml b/crates/sparrow-catalog/catalog/lookup.toml index 8a8dcb8e7..eb9e404b7 100644 --- a/crates/sparrow-catalog/catalog/lookup.toml +++ b/crates/sparrow-catalog/catalog/lookup.toml @@ -1,7 +1,7 @@ -name = 'lookup' -signature = 'lookup(key: key, value: any) -> any' -short_doc = 'Looks up the value for a foreign key.' -long_doc = ''' +name = "lookup" +signature = "lookup(key: key, value: any) -> any" +short_doc = "Looks up the value for a foreign key." +long_doc = """ Performs a lookup join between the `key` and the computed `value` from a foreign entity. ### Parameters @@ -14,16 +14,16 @@ Performs a lookup join between the `key` and the computed `value` from a foreign ### Results For each row with a non-`null` key, returns the value at that time from the `value` computed for the entity identified by the `key`. Yields `null` if the `key` is `null` or if there is no foreign value computed for that key at the corresponding time. -''' -tags = ['grouping'] +""" +tags = ["grouping"] [[examples]] -name = 'Lookup' -description = ''' +name = "Lookup" +description = """ This example operates on customer reviews. It augments each review with the average rating the customer has given and the average rating the product has received, up to that point in time. -''' -full_expression = ''' +""" +full_expression = """ # This is the average review a product has received (keyed by products) let average_review_by_product = ProductReviewsByProduct.stars | mean() @@ -41,39 +41,39 @@ in average_customer_review, average_product_review, } | extend({ time: time_of($input)} ) -''' -output_csv = ''' +""" +output_csv = """ time,key,average_customer_review,average_product_review 2021-01-01T00:00:00.000000000,krabby_patty,3.0,3.0 2021-01-02T00:00:00.000000000,coral_bits,3.5,4.0 2021-03-01T00:00:00.000000000,krabby_patty,5.0,4.0 2021-04-10T00:00:00.000000000,krabby_patty,2.6666666666666665,3.0 -''' +""" [[examples.tables]] -name = 'ProductReviewsByProduct' -uuid = 'dd440605-4cee-431b-b208-360ec00a2192' -time_column_name = 'time' -group_column_name = 'product_id' -grouping = 'products' -input_csv = ''' +name = "ProductReviewsByProduct" +uuid = "dd440605-4cee-431b-b208-360ec00a2192" +time_column_name = "time" +group_column_name = "product_id" +grouping = "products" +input_csv = """ time,customer_id,product_id,stars 2021-01-01T00:00:00.000000000Z,Patrick,krabby_patty,3 2021-01-02T00:00:00.000000000Z,Patrick,coral_bits,4 2021-03-01T00:00:00.000000000Z,Squidward,krabby_patty,5 2021-04-10T00:00:00.000000000Z,Patrick,krabby_patty,1 -''' +""" [[examples.tables]] -name = 'ProductReviewsByCustomer' -uuid = '7ababffe-a104-4f8b-8288-20d9ce8fb162' -time_column_name = 'time' -group_column_name = 'customer_id' -grouping = 'customers' -input_csv = ''' +name = "ProductReviewsByCustomer" +uuid = "7ababffe-a104-4f8b-8288-20d9ce8fb162" +time_column_name = "time" +group_column_name = "customer_id" +grouping = "customers" +input_csv = """ time,customer_id,product_id,stars 2021-01-01T00:00:00.000000000Z,Patrick,krabby_patty,3 2021-01-02T00:00:00.000000000Z,Patrick,coral_bits,4 2021-03-01T00:00:00.000000000Z,Squidward,krabby_patty,5 2021-04-10T00:00:00.000000000Z,Patrick,krabby_patty,1 -''' +""" diff --git a/crates/sparrow-catalog/catalog/lower.toml b/crates/sparrow-catalog/catalog/lower.toml index b5397104f..eda6c9373 100644 --- a/crates/sparrow-catalog/catalog/lower.toml +++ b/crates/sparrow-catalog/catalog/lower.toml @@ -1,7 +1,7 @@ -name = 'lower' -signature = 'lower(s: string) -> string' -short_doc = 'Converts the string to lower case.' -long_doc = ''' +name = "lower" +signature = "lower(s: string) -> string" +short_doc = "Converts the string to lower case." +long_doc = """ ### Parameters * s: The string to convert to lower case. @@ -10,13 +10,13 @@ long_doc = ''' Returns a `string` column with each row containing the string `s` from that row converted to all lower case. The row contains `null` if `s` is `null` in that row. -''' -tags = ['string'] +""" +tags = ["string"] [[examples]] -name = 'Lower Case' -expression = 'Input.value | lower()' -input_csv = ''' +name = "Lower Case" +expression = "Input.value | lower()" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,Hello World 2021-01-02T00:00:00.000000000Z,Ryan, @@ -24,8 +24,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,Hello 2021-01-03T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,hi -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,Hello World,hello world 2021-01-02T00:00:00.000000000,Ryan,, @@ -33,4 +33,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,Hello,hello 2021-01-03T00:00:00.000000000,Ben,, 2021-01-04T00:00:00.000000000,Ryan,hi,hi -''' +""" diff --git a/crates/sparrow-catalog/catalog/lt.toml b/crates/sparrow-catalog/catalog/lt.toml index a46cf7b06..cedf92b1d 100644 --- a/crates/sparrow-catalog/catalog/lt.toml +++ b/crates/sparrow-catalog/catalog/lt.toml @@ -1,8 +1,8 @@ -name = 'lt' -signature = 'lt(a: ordered, b: ordered) -> bool' -operator = 'a < b' -short_doc = 'Return `true` if `a` is less than `b`.' -long_doc = ''' +name = "lt" +signature = "lt(a: ordered, b: ordered) -> bool" +operator = "a < b" +short_doc = "Return `true` if `a` is less than `b`." +long_doc = """ This is the function used for the binary comparison `a < b`. ### Parameters @@ -17,13 +17,13 @@ they may be promoted to a compatible numeric type following the Returns a `bool` column indicating the results. For each row, it contains `null` if `a` or `b` are `null`, `true` if `a` is less than `b` and `false` if `a` is greater than or equal to `b`. -''' -tags = ['comparison'] +""" +tags = ["comparison"] [[examples]] -name = 'Less Than' -expression = 'Input.a < Input.b' -input_csv = ''' +name = "Less Than" +expression = "Input.a < Input.b" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,Ben,50.7,6.0 2021-01-02T00:00:00.000000000Z,Ryan,,70 @@ -32,8 +32,8 @@ time,key,a,b 2021-01-05T00:00:00.000000000Z,Ben,65, 2021-01-06T00:00:00.000000000Z,Jordan,2.3,68.7 2021-01-07T00:00:00.000000000Z,Ryan,, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,Ben,50.7,6.0,false 2021-01-02T00:00:00.000000000,Ryan,,70.0, @@ -42,4 +42,4 @@ time,key,a,b,result 2021-01-05T00:00:00.000000000,Ben,65.0,, 2021-01-06T00:00:00.000000000,Jordan,2.3,68.7,true 2021-01-07T00:00:00.000000000,Ryan,,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/lte.toml b/crates/sparrow-catalog/catalog/lte.toml index 486fc8e06..bf22a0288 100644 --- a/crates/sparrow-catalog/catalog/lte.toml +++ b/crates/sparrow-catalog/catalog/lte.toml @@ -1,8 +1,8 @@ -name = 'lte' -signature = 'lte(a: ordered, b: ordered) -> bool' -operator = 'a < b' -short_doc = 'Return `true` if `a` is less than or equal to `b`.' -long_doc = ''' +name = "lte" +signature = "lte(a: ordered, b: ordered) -> bool" +operator = "a < b" +short_doc = "Return `true` if `a` is less than or equal to `b`." +long_doc = """ This is the function used for the binary comparison `a <= b`. ### Parameters @@ -17,13 +17,13 @@ they may be promoted to a compatible numeric type following the Returns a `bool` column indicating the results. For each row, it contains `null` if `a` or `b` are `null`, `true` if `a` is less than or equal to `b`, and `false` if `a` is greater than `b`. -''' -tags = ['comparison'] +""" +tags = ["comparison"] [[examples]] -name = 'Less Than or Equal To' -expression = 'Input.a <= Input.b' -input_csv = ''' +name = "Less Than or Equal To" +expression = "Input.a <= Input.b" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,Ben,50.7,6.0 2021-01-02T00:00:00.000000000Z,Ryan,,70 @@ -32,8 +32,8 @@ time,key,a,b 2021-01-05T00:00:00.000000000Z,Ben,65, 2021-01-06T00:00:00.000000000Z,Jordan,2.3,68.7 2021-01-07T00:00:00.000000000Z,Ryan,, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,Ben,50.7,6.0,false 2021-01-02T00:00:00.000000000,Ryan,,70.0, @@ -42,4 +42,4 @@ time,key,a,b,result 2021-01-05T00:00:00.000000000,Ben,65.0,, 2021-01-06T00:00:00.000000000,Jordan,2.3,68.7,true 2021-01-07T00:00:00.000000000,Ryan,,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/max.toml b/crates/sparrow-catalog/catalog/max.toml index 7b8834a8b..196a4f07a 100644 --- a/crates/sparrow-catalog/catalog/max.toml +++ b/crates/sparrow-catalog/catalog/max.toml @@ -1,7 +1,7 @@ -name = 'max' -signature = 'max(input: ordered, window: window = null) -> ordered' -short_doc = 'Computes the maximum of values across the input.' -long_doc = ''' +name = "max" +signature = "max(input: ordered, window: window = null) -> ordered" +short_doc = "Computes the maximum of values across the input." +long_doc = """ This is an aggregation that computes the maximum across multiple rows. See [`zip_max`](#zip-max) to take the maximum of two values from each row. @@ -17,16 +17,16 @@ See [window functions](#window-functions) for how to specify the aggregation win For each input row, return the maximum of new, non-`null` rows in `input` up to and including the input row for the given entity. Returns `null` until there has been at least one such input. -''' +""" tags = [ - 'aggregation', - 'math', + "aggregation", + "math", ] [[examples]] -name = 'Maximum' -expression = 'max(Input.value)' -input_csv = ''' +name = "Maximum" +expression = "max(Input.value)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,50.7 2021-01-01T00:00:00.000000000Z,Ryan, @@ -34,8 +34,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,1.2 2021-01-04T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,2.3 -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,50.7,50.7 2021-01-01T00:00:00.000000000,Ryan,, @@ -43,4 +43,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,1.2,50.7 2021-01-04T00:00:00.000000000,Ben,,50.7 2021-01-04T00:00:00.000000000,Ryan,2.3,67.2 -''' +""" diff --git a/crates/sparrow-catalog/catalog/mean.toml b/crates/sparrow-catalog/catalog/mean.toml index a56bb2538..f8214227b 100644 --- a/crates/sparrow-catalog/catalog/mean.toml +++ b/crates/sparrow-catalog/catalog/mean.toml @@ -1,7 +1,7 @@ -name = 'mean' -signature = 'mean(input: number, window: window = null) -> f64' -short_doc = 'Computes the arithmetic mean of values across the input.' -long_doc = ''' +name = "mean" +signature = "mean(input: number, window: window = null) -> f64" +short_doc = "Computes the arithmetic mean of values across the input." +long_doc = """ ### Parameters * input: The input to compute the mean of. * window: The window to aggregate within, as described in @@ -13,16 +13,16 @@ See [window functions](#window-functions) for how to specify the aggregation win For each input row, return the mean of new, non-`null` rows in `input` up to and including the input row for the given entity. Returns `null` until there has been at least one such input. -''' +""" tags = [ - 'aggregation', - 'math', + "aggregation", + "math", ] [[examples]] -name = 'Mean' -expression = 'mean(Input.value)' -input_csv = ''' +name = "Mean" +expression = "mean(Input.value)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,50.7 2021-01-01T00:00:00.000000000Z,Ryan, @@ -30,8 +30,8 @@ time,key,value 2021-01-02T00:00:00.000000000Z,Ben,1.2 2021-01-03T00:00:00.000000000Z,Ben, 2021-01-03T00:00:00.000000000Z,Ryan,2.3 -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,50.7,50.7 2021-01-01T00:00:00.000000000,Ryan,, @@ -39,4 +39,4 @@ time,key,value,result 2021-01-02T00:00:00.000000000,Ben,1.2,25.950000000000003 2021-01-03T00:00:00.000000000,Ben,,25.950000000000003 2021-01-03T00:00:00.000000000,Ryan,2.3,34.75 -''' +""" diff --git a/crates/sparrow-catalog/catalog/min.toml b/crates/sparrow-catalog/catalog/min.toml index 18fa458d9..ed527b1e1 100644 --- a/crates/sparrow-catalog/catalog/min.toml +++ b/crates/sparrow-catalog/catalog/min.toml @@ -1,7 +1,7 @@ -name = 'min' -signature = 'min(input: ordered, window: window = null) -> ordered' -short_doc = 'Computes the minimum of values across the input.' -long_doc = ''' +name = "min" +signature = "min(input: ordered, window: window = null) -> ordered" +short_doc = "Computes the minimum of values across the input." +long_doc = """ This is an aggregation that computes the minimum across multiple rows. See [`zip_min`](#zip-min) to take the minimum of two values from each row. @@ -17,16 +17,16 @@ See [window functions](#window-functions) for how to specify the aggregation win For each input row, return the minimum of new, non-`null` rows in `input` up to and including the input row for the given entity. Returns `null` until there has been at least one such input. -''' +""" tags = [ - 'aggregation', - 'math', + "aggregation", + "math", ] [[examples]] -name = 'Minimum' -expression = 'min(Input.value)' -input_csv = ''' +name = "Minimum" +expression = "min(Input.value)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,50.7 2021-01-01T00:00:00.000000000Z,Ryan, @@ -34,8 +34,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,1.2 2021-01-04T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,2.3 -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,50.7,50.7 2021-01-01T00:00:00.000000000,Ryan,, @@ -43,4 +43,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,1.2,1.2 2021-01-04T00:00:00.000000000,Ben,,1.2 2021-01-04T00:00:00.000000000,Ryan,2.3,2.3 -''' +""" diff --git a/crates/sparrow-catalog/catalog/minutely.toml b/crates/sparrow-catalog/catalog/minutely.toml index e5d5c1f64..fd4f930b7 100644 --- a/crates/sparrow-catalog/catalog/minutely.toml +++ b/crates/sparrow-catalog/catalog/minutely.toml @@ -1,28 +1,28 @@ -name = 'minutely' -signature = 'minutely() -> bool' -short_doc = 'A periodic function that produces a `true` value at the start of each minutely.' -long_doc = ''' +name = "minutely" +signature = "minutely() -> bool" +short_doc = "A periodic function that produces a `true` value at the start of each minutely." +long_doc = """ This function is often used in aggregations to produce windows or as a predicate column. ### Results Returns a boolean column with each row containing a `true` value at the start of each minute, and `null` at all other times. -''' -tags = ['tick'] +""" +tags = ["tick"] [[examples]] -name = 'Minutely Aggregated Window' -description = ''' +name = "Minutely Aggregated Window" +description = """ In this example, the `minutely()` function is used as an argument to the [`since](#since) function, which produces a window. The result is a windowed aggregation that resets minutely. -''' -full_expression = ''' +""" +full_expression = """ { n: Input.n, hourly_sum: sum(Input.n, window = since(minutely())) } | extend({time: time_of($input), key: first(Input.key) }) -''' -input_csv = ''' +""" +input_csv = """ time,key,n 1996-12-19T16:00:57-00:00,Ben,2 1996-12-19T16:00:58-00:00,Ryan,3 @@ -30,8 +30,8 @@ time,key,n 1996-12-19T16:02:00-00:00,Ben,9 1996-12-19T16:02:00-00:00,Ryan,8 1996-12-19T16:03:00-00:00,Ben,1 -''' -output_csv = ''' +""" +output_csv = """ time,key,n,hourly_sum 1996-12-19T16:00:57.000000000,Ben,2,2 1996-12-19T16:00:58.000000000,Ryan,3,3 @@ -45,18 +45,18 @@ time,key,n,hourly_sum 1996-12-19T16:03:00.000000000,Ben,1,1 1996-12-19T16:03:00.000000000,Ryan,, 1996-12-19T16:03:00.000000000,Ben,,1 -''' +""" [[examples]] -name = 'Filter Minutely' -description = ''' +name = "Filter Minutely" +description = """ In this example, the `minutely()` function is used as an argument to the [`when`](#when) function, which filters input. The output includes the last input row before a [`tick`](#tick) occurs. -''' -full_expression = 'Input | last() | when(minutely())' -input_csv = ''' +""" +full_expression = "Input | last() | when(minutely())" +input_csv = """ time,key,n 1996-12-19T16:00:57-00:00,Ben,2 1996-12-19T16:00:58-00:00,Ryan,3 @@ -64,8 +64,8 @@ time,key,n 1996-12-19T16:02:00-00:00,Ben,9 1996-12-19T16:02:00-00:00,Ryan,8 1996-12-19T16:03:00-00:00,Ben,1 -''' -output_csv = ''' +""" +output_csv = """ time,key,n 1996-12-19T16:00:58.000000000,Ryan,3 1996-12-19T16:00:57.000000000,Ben,2 @@ -73,4 +73,4 @@ time,key,n 1996-12-19T16:02:00.000000000,Ben,9 1996-12-19T16:02:00.000000000,Ryan,8 1996-12-19T16:03:00.000000000,Ben,1 -''' +""" diff --git a/crates/sparrow-catalog/catalog/month_of_year.toml b/crates/sparrow-catalog/catalog/month_of_year.toml index f02bc1a7c..470bf4b9a 100644 --- a/crates/sparrow-catalog/catalog/month_of_year.toml +++ b/crates/sparrow-catalog/catalog/month_of_year.toml @@ -1,7 +1,7 @@ -name = 'month_of_year' -signature = 'month_of_year(time: timestamp_ns) -> u32' -short_doc = 'Return the month-of-year for the given time, starting with 1.' -long_doc = ''' +name = "month_of_year" +signature = "month_of_year(time: timestamp_ns) -> u32" +short_doc = "Return the month-of-year for the given time, starting with 1." +long_doc = """ ### Parameters * time: The timestamp to return the month-of-year for. @@ -9,13 +9,13 @@ long_doc = ''' Returns a `u32` column containing the month-of-year for each input `time`. Returns `null` for rows where `time` is `null`. January is `1`. The result will be in the range 1 to 12 (inclusive). -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Month of Year' -expression = 'month_of_year(Input.time)' -input_csv = ''' +name = "Month of Year" +expression = "month_of_year(Input.time)" +input_csv = """ time,key 1996-03-21T00:00:00-00:00,Ben 1996-04-21T00:00:00-00:00,Ryan @@ -23,8 +23,8 @@ time,key 1996-06-21T00:00:00-00:00,Ryan 1996-07-21T00:00:00-00:00,Ben 1996-08-21T00:00:00-00:00,Ben -''' -output_csv = ''' +""" +output_csv = """ time,key,result 1996-03-21T00:00:00.000000000,Ben,3 1996-04-21T00:00:00.000000000,Ryan,4 @@ -32,4 +32,4 @@ time,key,result 1996-06-21T00:00:00.000000000,Ryan,6 1996-07-21T00:00:00.000000000,Ben,7 1996-08-21T00:00:00.000000000,Ben,8 -''' +""" diff --git a/crates/sparrow-catalog/catalog/month_of_year0.toml b/crates/sparrow-catalog/catalog/month_of_year0.toml index 66dd82b0a..cd71b9fab 100644 --- a/crates/sparrow-catalog/catalog/month_of_year0.toml +++ b/crates/sparrow-catalog/catalog/month_of_year0.toml @@ -1,7 +1,7 @@ -name = 'month_of_year0' -signature = 'month_of_year0(time: timestamp_ns) -> u32' -short_doc = 'Return the month-of-year for the given time, starting with 0.' -long_doc = ''' +name = "month_of_year0" +signature = "month_of_year0(time: timestamp_ns) -> u32" +short_doc = "Return the month-of-year for the given time, starting with 0." +long_doc = """ ### Parameters * time: The timestamp to return the day-of-month for. @@ -9,13 +9,13 @@ long_doc = ''' Returns a `u32` column containing the month-of-year for each input `time`. Returns `null` for rows where `time` is `null`. January is `1`. The result will be in the range 0 to 11 (inclusive). -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Month of Year (Zero Based)' -expression = 'month_of_year0(Input.time)' -input_csv = ''' +name = "Month of Year (Zero Based)" +expression = "month_of_year0(Input.time)" +input_csv = """ time,key 1996-03-21T00:00:00-00:00,Ben 1996-04-21T00:00:00-00:00,Ryan @@ -23,8 +23,8 @@ time,key 1996-06-21T00:00:00-00:00,Ryan 1996-07-21T00:00:00-00:00,Ben 1996-08-21T00:00:00-00:00,Ben -''' -output_csv = ''' +""" +output_csv = """ time,key,result 1996-03-21T00:00:00.000000000,Ben,2 1996-04-21T00:00:00.000000000,Ryan,3 @@ -32,4 +32,4 @@ time,key,result 1996-06-21T00:00:00.000000000,Ryan,5 1996-07-21T00:00:00.000000000,Ben,6 1996-08-21T00:00:00.000000000,Ben,7 -''' +""" diff --git a/crates/sparrow-catalog/catalog/monthly.toml b/crates/sparrow-catalog/catalog/monthly.toml index debb6dd3f..be87ca753 100644 --- a/crates/sparrow-catalog/catalog/monthly.toml +++ b/crates/sparrow-catalog/catalog/monthly.toml @@ -1,28 +1,28 @@ -name = 'monthly' -signature = 'monthly() -> bool' -short_doc = 'A periodic function that produces a `true` value at the start of each calendar month (UTC).' -long_doc = ''' +name = "monthly" +signature = "monthly() -> bool" +short_doc = "A periodic function that produces a `true` value at the start of each calendar month (UTC)." +long_doc = """ This function is often used in aggregations to produce windows or as a predicate column. ### Results Returns a boolean column with each row containing a `true` value at the start of each calendar month, and `null` at all other times. -''' -tags = ['tick'] +""" +tags = ["tick"] [[examples]] -name = 'Monthly Aggregated Window' -description = ''' +name = "Monthly Aggregated Window" +description = """ In this example, the `monthly()` function is used as an argument to the [`since](#since) function, which produces a window. The result is a windowed aggregation that resets at the start of each calendar month. -''' -full_expression = ''' +""" +full_expression = """ { n: Input.n, monthly_sum: sum(Input.n, window = since(monthly())) } | extend({time: time_of($input), key: first(Input.key) }) -''' -input_csv = ''' +""" +input_csv = """ time,key,n 1996-02-19T16:00:00-00:00,Ben,2 1996-02-19T16:00:00-00:00,Ryan,3 @@ -30,8 +30,8 @@ time,key,n 1996-04-20T16:01:00-00:00,Ben,9 1996-04-21T16:00:00-00:00,Ryan,8 1996-05-21T16:00:00-00:00,Ben,1 -''' -output_csv = ''' +""" +output_csv = """ time,key,n,monthly_sum 1996-02-19T16:00:00.000000000,Ben,2,2 1996-02-19T16:00:00.000000000,Ryan,3,3 @@ -45,18 +45,18 @@ time,key,n,monthly_sum 1996-05-01T00:00:00.000000000,Ryan,,8 1996-05-01T00:00:00.000000000,Ben,,15 1996-05-21T16:00:00.000000000,Ben,1,1 -''' +""" [[examples]] -name = 'Filter Monthly' -description = ''' +name = "Filter Monthly" +description = """ In this example, the `monthly()` function is used as an argument to the [`when`](#when) function, which filters input. The output includes the last input row before a [`tick`](#tick) occurs. -''' -full_expression = 'Input | last() | when(monthly())' -input_csv = ''' +""" +full_expression = "Input | last() | when(monthly())" +input_csv = """ time,key,n 1996-02-19T16:00:00-00:00,Ben,2 1996-02-19T16:00:00-00:00,Ryan,3 @@ -64,8 +64,8 @@ time,key,n 1996-04-20T16:01:00-00:00,Ben,9 1996-04-21T16:00:00-00:00,Ryan,8 1996-05-21T16:00:00-00:00,Ben,1 -''' -output_csv = ''' +""" +output_csv = """ time,key,n 1996-02-19T16:00:00.000000000,Ryan,3 1996-02-19T16:00:00.000000000,Ben,2 @@ -73,4 +73,4 @@ time,key,n 1996-02-19T16:00:00.000000000,Ben,2 1996-04-21T16:00:00.000000000,Ryan,8 1996-04-20T16:01:00.000000000,Ben,9 -''' +""" diff --git a/crates/sparrow-catalog/catalog/months.toml b/crates/sparrow-catalog/catalog/months.toml index 78aa8df58..8e1a2a4a6 100644 --- a/crates/sparrow-catalog/catalog/months.toml +++ b/crates/sparrow-catalog/catalog/months.toml @@ -1,7 +1,7 @@ -name = 'months' -signature = 'months(months: i64) -> interval_months' -short_doc = 'Produces an interval corresponding to the given number of calendar months.' -long_doc = ''' +name = "months" +signature = "months(months: i64) -> interval_months" +short_doc = "Produces an interval corresponding to the given number of calendar months." +long_doc = """ ### Parameters * months: The number of calendar months to create the interval for. @@ -10,16 +10,16 @@ Returns an `interval_months` column with each row containing the value of `months` converted to an interval with the corresponding number of calendar months. Rows where `months` is `null`, less than `i32::MIN` or greater than `i32::MAX` will be `null`. -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -description = ''' +description = """ This example uses [`add_time`](#add-time) to add the created interval to the `time` column. -''' -expression = 'Input.time | add_time(months(Input.n))' -input_csv = ''' +""" +expression = "Input.time | add_time(months(Input.n))" +input_csv = """ time,key,n 1996-03-21T00:00:00-00:00,Ben,1 1996-04-21T00:00:00-00:00,Ryan,2 @@ -27,8 +27,8 @@ time,key,n 1996-06-21T00:00:00-00:00,Ryan, 1996-07-21T00:00:00-00:00,Ben,2 1996-08-21T00:00:00-00:00,Ben,1 -''' -output_csv = ''' +""" +output_csv = """ time,key,n,result 1996-03-21T00:00:00.000000000,Ben,1,1996-04-21T00:00:00.000000000 1996-04-21T00:00:00.000000000,Ryan,2,1996-06-21T00:00:00.000000000 @@ -36,4 +36,4 @@ time,key,n,result 1996-06-21T00:00:00.000000000,Ryan,, 1996-07-21T00:00:00.000000000,Ben,2,1996-09-21T00:00:00.000000000 1996-08-21T00:00:00.000000000,Ben,1,1996-09-21T00:00:00.000000000 -''' +""" diff --git a/crates/sparrow-catalog/catalog/months_between.toml b/crates/sparrow-catalog/catalog/months_between.toml index 4c548d3c7..480e0a245 100644 --- a/crates/sparrow-catalog/catalog/months_between.toml +++ b/crates/sparrow-catalog/catalog/months_between.toml @@ -1,7 +1,7 @@ -name = 'months_between' -signature = 'months_between(t1: timestamp_ns, t2: timestamp_ns) -> interval_months' -short_doc = 'Returns the number of months between the first and second timestamp.' -long_doc = ''' +name = "months_between" +signature = "months_between(t1: timestamp_ns, t2: timestamp_ns) -> interval_months" +short_doc = "Returns the number of months between the first and second timestamp." +long_doc = """ ### Parameters * t1: The first timestamp * t2: The second timestamp @@ -13,17 +13,17 @@ of calendar months between the two timestamps. In rows where `t1` or `t2` are `null`, the result will be `null`. If `t1` is before `t2`, the result will be positive. If `t1` is after `t2` the result will be negative. -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Months Between' -description = ''' +name = "Months Between" +description = """ Note that the expression uses `as i32` to convert the `interval_months` to the integer number of months. This discards the units. -''' -expression = 'months_between(Input.time, Input.date) as i32' -input_csv = ''' +""" +expression = "months_between(Input.time, Input.date) as i32" +input_csv = """ time,key,date 1996-03-21T00:00:00-00:00,Ben,1996-08-19T00:00:00-00:00 1996-04-21T00:00:00-00:00,Ryan,1995-07-20T00:00:00-00:00 @@ -31,8 +31,8 @@ time,key,date 1996-06-21T00:00:00-00:00,Ryan,1996-08-19T05:00:00-00:00 1996-07-21T00:00:00-00:00,Ben, 1996-08-21T00:00:00-00:00,Ben,1996-08-22T00:00:00-00:00 -''' -output_csv = ''' +""" +output_csv = """ time,key,date,result 1996-03-21T00:00:00.000000000,Ben,1996-08-19T00:00:00.000000000,5 1996-04-21T00:00:00.000000000,Ryan,1995-07-20T00:00:00.000000000,-9 @@ -40,4 +40,4 @@ time,key,date,result 1996-06-21T00:00:00.000000000,Ryan,1996-08-19T05:00:00.000000000,2 1996-07-21T00:00:00.000000000,Ben,, 1996-08-21T00:00:00.000000000,Ben,1996-08-22T00:00:00.000000000,0 -''' +""" diff --git a/crates/sparrow-catalog/catalog/mul.toml b/crates/sparrow-catalog/catalog/mul.toml index a965d2dda..d10a74fc1 100644 --- a/crates/sparrow-catalog/catalog/mul.toml +++ b/crates/sparrow-catalog/catalog/mul.toml @@ -1,8 +1,8 @@ -name = 'mul' -signature = 'mul(a: number, b: number) -> number' -operator = 'a * b' -short_doc = 'Returns the product of two numbers.' -long_doc = ''' +name = "mul" +signature = "mul(a: number, b: number) -> number" +operator = "a * b" +short_doc = "Returns the product of two numbers." +long_doc = """ This is the function used for the binary operation `a * b`. ### Parameters @@ -16,23 +16,23 @@ following the [numeric type coercion rules](docs:data-model#numeric-type-coercio Returns a numeric column of the promoted numeric type compatible with both `a` and `b`. The result contains `null` if `a` or `b` was null at that row. Otherwise the row contains the product of `a` and `b`. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Multiplication' -expression = 'Input.a * Input.b' -input_csv = ''' +name = "Multiplication" +expression = "Input.a * Input.b" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,A,5.7,1.2 2021-01-01T00:00:00.000000000Z,A,6.3,0.4 2021-01-01T00:00:00.000000000Z,B,,3.7 2021-01-01T00:00:00.000000000Z,A,13.2, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,A,5.7,1.2,6.84 2021-01-01T00:00:00.000000000,A,6.3,0.4,2.52 2021-01-01T00:00:00.000000000,B,,3.7, 2021-01-01T00:00:00.000000000,A,13.2,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/neg.toml b/crates/sparrow-catalog/catalog/neg.toml index 7c336d268..cbbcab034 100644 --- a/crates/sparrow-catalog/catalog/neg.toml +++ b/crates/sparrow-catalog/catalog/neg.toml @@ -1,8 +1,8 @@ -name = 'neg' -signature = 'neg(n: signed) -> signed' -operator = '-n' -short_doc = 'Returns the negation of `n`.' -long_doc = ''' +name = "neg" +signature = "neg(n: signed) -> signed" +operator = "-n" +short_doc = "Returns the negation of `n`." +long_doc = """ This is the function used for the unary operation `-n`. ### Parameters @@ -17,25 +17,25 @@ signed integer type. If it is `u64` it is promoted to `f64`. ### Results For each row in the input, returns `null` if `n` is `null`. Otherwise, returns the negation of `n`. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Negation' -expression = '-Input.a' -input_csv = ''' +name = "Negation" +expression = "-Input.a" +input_csv = """ time,key,a 2021-01-01T00:00:00.000000000Z,A,5.7 2021-01-01T00:00:00.000000000Z,A,6.3 2021-01-02T00:00:00.000000000Z,B, 2021-01-02T00:00:00.000000000Z,B,-2.2 2021-01-03T00:00:00.000000000Z,B,0 -''' -output_csv = ''' +""" +output_csv = """ time,key,a,result 2021-01-01T00:00:00.000000000,A,5.7,-5.7 2021-01-01T00:00:00.000000000,A,6.3,-6.3 2021-01-02T00:00:00.000000000,B,, 2021-01-02T00:00:00.000000000,B,-2.2,2.2 2021-01-03T00:00:00.000000000,B,0.0,0.0 -''' +""" diff --git a/crates/sparrow-catalog/catalog/neq.toml b/crates/sparrow-catalog/catalog/neq.toml index 074640630..399484378 100644 --- a/crates/sparrow-catalog/catalog/neq.toml +++ b/crates/sparrow-catalog/catalog/neq.toml @@ -1,8 +1,8 @@ -name = 'neq' -signature = 'neq(a: any, b: any) -> bool' -operator = 'a != b' -short_doc = 'Return `true` if `a` is not equal to `b`.' -long_doc = ''' +name = "neq" +signature = "neq(a: any, b: any) -> bool" +operator = "a != b" +short_doc = "Return `true` if `a` is not equal to `b`." +long_doc = """ This is the function used for the binary comparison `a != b`. ### Parameters @@ -17,13 +17,13 @@ they may be promoted to a compatible numeric type following the Returns a `bool` column indicating the results. For each row, it contains `null` if `a` or `b` are `null`, `true` if they are not equal and `false` if they are equal. -''' -tags = ['comparison'] +""" +tags = ["comparison"] [[examples]] -name = 'Not Equals' -expression = 'Input.a != Input.b' -input_csv = ''' +name = "Not Equals" +expression = "Input.a != Input.b" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,Ben,50.7,6.0 2021-01-02T00:00:00.000000000Z,Ryan,,70 @@ -32,8 +32,8 @@ time,key,a,b 2021-01-05T00:00:00.000000000Z,Ben,65, 2021-01-06T00:00:00.000000000Z,Jordan,2.3,68.7 2021-01-07T00:00:00.000000000Z,Ryan,, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,Ben,50.7,6.0,true 2021-01-02T00:00:00.000000000,Ryan,,70.0, @@ -42,4 +42,4 @@ time,key,a,b,result 2021-01-05T00:00:00.000000000,Ben,65.0,, 2021-01-06T00:00:00.000000000,Jordan,2.3,68.7,true 2021-01-07T00:00:00.000000000,Ryan,,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/not.toml b/crates/sparrow-catalog/catalog/not.toml index 7d6a16672..6a537fd63 100644 --- a/crates/sparrow-catalog/catalog/not.toml +++ b/crates/sparrow-catalog/catalog/not.toml @@ -1,8 +1,8 @@ -name = 'not' -signature = 'not(input: bool) -> bool' -operator = '!input' -short_doc = 'Returns the logical negation of a boolean.' -long_doc = ''' +name = "not" +signature = "not(input: bool) -> bool" +operator = "!input" +short_doc = "Returns the logical negation of a boolean." +long_doc = """ This is the function used for the unary operation `!input`. ### Parameters @@ -11,12 +11,12 @@ This is the function used for the unary operation `!input`. ### Results For each row, return `true` if `input` is `false`, `false` if `input` is `true` and `null` if `input` is `null`. -''' -tags = ['logical'] +""" +tags = ["logical"] [[examples]] -expression = '!Input.a' -input_csv = ''' +expression = "!Input.a" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,A,true,false 2021-01-02T00:00:00.000000000Z,B,true,true @@ -25,8 +25,8 @@ time,key,a,b 2021-02-01T00:00:00.000000000Z,A,,true 2021-02-02T00:00:00.000000000Z,B,true, 2021-03-01T00:00:00.000000000Z,A,,false -2021-03-03T00:00:00.000000000Z,B,false,''' -output_csv = ''' +2021-03-03T00:00:00.000000000Z,B,false,""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,A,true,false,false 2021-01-02T00:00:00.000000000,B,true,true,false @@ -36,4 +36,4 @@ time,key,a,b,result 2021-02-02T00:00:00.000000000,B,true,,false 2021-03-01T00:00:00.000000000,A,,false, 2021-03-03T00:00:00.000000000,B,false,,true -''' +""" diff --git a/crates/sparrow-catalog/catalog/null_if.toml b/crates/sparrow-catalog/catalog/null_if.toml index 6a5e75004..87ddd808c 100644 --- a/crates/sparrow-catalog/catalog/null_if.toml +++ b/crates/sparrow-catalog/catalog/null_if.toml @@ -1,8 +1,8 @@ -name = 'null_if' -signature = 'null_if(condition: bool, value: any) -> any' -short_doc = 'Return the `value` if `condition` is `false`, `null` otherwise.' -long_doc = ''' -`null_if` "nulls out" the `value` if `condition` is `true`. +name = "null_if" +signature = "null_if(condition: bool, value: any) -> any" +short_doc = "Return the `value` if `condition` is `false`, `null` otherwise." +long_doc = """ +`null_if` \"nulls out\" the `value` if `condition` is `true`. It is equivalent to `if(!condition, value)`](#if). See also [`if`](#if). @@ -12,31 +12,31 @@ See also [`if`](#if). * value: The value to return if `condition` is `false`. Note: The order of arguments is chosen to allow use with the pipe operation. -Specifically, `value | null_if(condition)` may be used to conditionally "null-out" +Specifically, `value | null_if(condition)` may be used to conditionally \"null-out\" the value on the left-hand side. ### Results For each row, return the `value` if `condition` is `false`. Returns `null` if the `condition` is `true` or `null`. -''' -tags = ['logical'] +""" +tags = ["logical"] [[examples]] -name = 'Null If' -expression = 'Input.value | null_if(Input.condition)' -input_csv = ''' +name = "Null If" +expression = "Input.value | null_if(Input.condition)" +input_csv = """ time,key,value,condition 2021-01-01T00:00:00.000000000Z,A,57.8,false 2021-01-02T00:00:00.000000000Z,B,58.7,true 2021-01-03T00:00:00.000000000Z,A,,true 2021-01-04T00:00:00.000000000Z,A,876, 2021-01-05T00:00:00.000000000Z,A,786.0, -''' -output_csv = ''' +""" +output_csv = """ time,key,value,condition,result 2021-01-01T00:00:00.000000000,A,57.8,false,57.8 2021-01-02T00:00:00.000000000,B,58.7,true, 2021-01-03T00:00:00.000000000,A,,true, 2021-01-04T00:00:00.000000000,A,876.0,, 2021-01-05T00:00:00.000000000,A,786.0,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/powf.toml b/crates/sparrow-catalog/catalog/powf.toml index 9dfacbb8a..d23532355 100644 --- a/crates/sparrow-catalog/catalog/powf.toml +++ b/crates/sparrow-catalog/catalog/powf.toml @@ -1,7 +1,7 @@ -name = 'powf' -signature = 'powf(base: f64, power: f64) -> f64' -short_doc = 'Returns `base^power`.' -long_doc = ''' +name = "powf" +signature = "powf(base: f64, power: f64) -> f64" +short_doc = "Returns `base^power`." +long_doc = """ ### Parameters * base: The base to raise to the given power. * power: The power to raise the base to. @@ -13,23 +13,23 @@ Other numbers will be implicitly promoted. Returns a column of `f64` values. Each row contains `null` if `base` or `power` are `null`. Otherwise, the row contains the value `base ^ power`. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Power' -expression = 'powf(Input.a, Input.b)' -input_csv = ''' +name = "Power" +expression = "powf(Input.a, Input.b)" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,A,5.7,1.2 2021-01-01T00:00:00.000000000Z,A,6.3,0.4 2021-01-02T00:00:00.000000000Z,B,,3.7 2021-01-03T00:00:00.000000000Z,A,13.2, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,A,5.7,1.2,8.073276500106656 2021-01-01T00:00:00.000000000,A,6.3,0.4,2.0880275269924504 2021-01-02T00:00:00.000000000,B,,3.7, 2021-01-03T00:00:00.000000000,A,13.2,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/remove_fields.toml b/crates/sparrow-catalog/catalog/remove_fields.toml index 0940c7f60..8744fdd58 100644 --- a/crates/sparrow-catalog/catalog/remove_fields.toml +++ b/crates/sparrow-catalog/catalog/remove_fields.toml @@ -1,7 +1,7 @@ -name = 'remove_fields' -signature = 'remove_fields(record, fields: string+) -> record' -short_doc = 'Remove fields from a record.' -long_doc = ''' +name = "remove_fields" +signature = "remove_fields(record, fields: string+) -> record" +short_doc = "Remove fields from a record." +long_doc = """ Note: If more fields are being removed than retained, you can use [`select_fields`](#select_fields). @@ -17,25 +17,25 @@ syntax you must be explicit, as in the example. Returns a column containing the fields in `record` not listed in `fields`, with the corresponding values from `record`. The result is `null` in rows where `record` is `null`. -''' -tags = ['record'] +""" +tags = ["record"] [[examples]] -name = 'Record Field Filtering' -full_expression = ''' +name = "Record Field Filtering" +full_expression = """ Input | remove_fields($input, 'c') -''' -input_csv = ''' +""" +input_csv = """ time,key,a,b,c 2021-01-01T00:00:00.000000000Z,A,5,1.2,true 2021-01-02T00:00:00.000000000Z,A,6.3,0.4,false 2021-03-01T00:00:00.000000000Z,B,,3.7,true 2021-04-10T00:00:00.000000000Z,A,13,,true -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000,A,5.0,1.2 2021-01-02T00:00:00.000000000,A,6.3,0.4 2021-03-01T00:00:00.000000000,B,,3.7 2021-04-10T00:00:00.000000000,A,13.0, -''' +""" diff --git a/crates/sparrow-catalog/catalog/round.toml b/crates/sparrow-catalog/catalog/round.toml index 5d9572cd2..7de9eb038 100644 --- a/crates/sparrow-catalog/catalog/round.toml +++ b/crates/sparrow-catalog/catalog/round.toml @@ -1,7 +1,7 @@ -name = 'round' -signature = 'round(n: number) -> number' -short_doc = 'Rounds the number to the nearest integer.' -long_doc = ''' +name = "round" +signature = "round(n: number) -> number" +short_doc = "Rounds the number to the nearest integer." +long_doc = """ See also [`ceil`](#ceil) and [`floor`](#floor). ### Parameters @@ -17,21 +17,21 @@ Otherwise, it contains the result of rounding `n` to the nearest integer. Numbers half-way between two integers are rounded away from `0`. For example, `0.5` rounds to `1.0` and `-0.5` rounds to `-1.0`. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Round' -expression = 'Input.a | round()' -input_csv = ''' +name = "Round" +expression = "Input.a | round()" +input_csv = """ time,key,a 2021-01-01T00:00:00.000000000Z,A,5.7 2021-01-01T00:00:00.000000000Z,A,6.3 2021-01-02T00:00:00.000000000Z,B, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,result 2021-01-01T00:00:00.000000000,A,5.7,6.0 2021-01-01T00:00:00.000000000,A,6.3,6.0 2021-01-02T00:00:00.000000000,B,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/seconds.toml b/crates/sparrow-catalog/catalog/seconds.toml index 77d2a1c68..f7486f276 100644 --- a/crates/sparrow-catalog/catalog/seconds.toml +++ b/crates/sparrow-catalog/catalog/seconds.toml @@ -1,7 +1,7 @@ -name = 'seconds' -signature = 'seconds(seconds: i64) -> duration_s' -short_doc = 'Produces a duration corresponding to the given number of seconds.' -long_doc = ''' +name = "seconds" +signature = "seconds(seconds: i64) -> duration_s" +short_doc = "Produces a duration corresponding to the given number of seconds." +long_doc = """ ### Parameters * seconds: The number of seconds to create the duration for. @@ -9,16 +9,16 @@ long_doc = ''' Returns a `duration_s` column with each row containing the value of `seconds` converted to the corresponding duration. Rows where `seconds` is `null` will be `null`. -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -description = ''' +description = """ This example uses [`add_time`](#add-time) to add the created duration to the `time` column. -''' -expression = 'Input.time | add_time(seconds(Input.n))' -input_csv = ''' +""" +expression = "Input.time | add_time(seconds(Input.n))" +input_csv = """ time,key,n 1996-03-21T00:00:00-00:00,Ben,1 1996-04-21T00:00:00-00:00,Ryan,2 @@ -26,8 +26,8 @@ time,key,n 1996-06-21T00:00:00-00:00,Ryan, 1996-07-21T00:00:00-00:00,Ben,2 1996-08-21T00:00:00-00:00,Ben,1 -''' -output_csv = ''' +""" +output_csv = """ time,key,n,result 1996-03-21T00:00:00.000000000,Ben,1,1996-03-21T00:00:01.000000000 1996-04-21T00:00:00.000000000,Ryan,2,1996-04-21T00:00:02.000000000 @@ -35,4 +35,4 @@ time,key,n,result 1996-06-21T00:00:00.000000000,Ryan,, 1996-07-21T00:00:00.000000000,Ben,2,1996-07-21T00:00:02.000000000 1996-08-21T00:00:00.000000000,Ben,1,1996-08-21T00:00:01.000000000 -''' +""" diff --git a/crates/sparrow-catalog/catalog/seconds_between.toml b/crates/sparrow-catalog/catalog/seconds_between.toml index 04e1e0fb5..89cc08ce7 100644 --- a/crates/sparrow-catalog/catalog/seconds_between.toml +++ b/crates/sparrow-catalog/catalog/seconds_between.toml @@ -1,7 +1,7 @@ -name = 'seconds_between' -signature = 'seconds_between(t1: timestamp_ns, t2: timestamp_ns) -> duration_s' -short_doc = 'Returns the number of seconds between the first and second timestamp.' -long_doc = ''' +name = "seconds_between" +signature = "seconds_between(t1: timestamp_ns, t2: timestamp_ns) -> duration_s" +short_doc = "Returns the number of seconds between the first and second timestamp." +long_doc = """ ### Parameters * t1: The first timestamp * t2: The second timestamp @@ -13,17 +13,17 @@ between the two timestamps. In rows where `t1` or `t2` are `null`, the result will be `null`. If `t1` is before `t2`, the result will be positive. If `t1` is after `t2`, the result will be negative. -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Seconds Between' -description = ''' +name = "Seconds Between" +description = """ Note that the expression uses `as i64` to convert the `duration_s` to the integer number of seconds. This discards the units. -''' -expression = 'seconds_between(Input.time, Input.date) as i64' -input_csv = ''' +""" +expression = "seconds_between(Input.time, Input.date) as i64" +input_csv = """ time,key,date 1996-03-21T00:00:00-00:00,Ben,1996-08-19T00:00:00-00:00 1996-04-21T00:00:00-00:00,Ryan,1995-07-20T00:00:00-00:00 @@ -31,8 +31,8 @@ time,key,date 1996-06-21T00:00:00-00:00,Ryan,1996-08-19T05:00:00-00:00 1996-07-21T00:00:00-00:00,Ben, 1996-08-21T00:00:00-00:00,Ben,1996-08-22T00:00:00-00:00 -''' -output_csv = ''' +""" +output_csv = """ time,key,date,result 1996-03-21T00:00:00.000000000,Ben,1996-08-19T00:00:00.000000000,13046400 1996-04-21T00:00:00.000000000,Ryan,1995-07-20T00:00:00.000000000,-23846400 @@ -40,4 +40,4 @@ time,key,date,result 1996-06-21T00:00:00.000000000,Ryan,1996-08-19T05:00:00.000000000,5115600 1996-07-21T00:00:00.000000000,Ben,, 1996-08-21T00:00:00.000000000,Ben,1996-08-22T00:00:00.000000000,86400 -''' +""" diff --git a/crates/sparrow-catalog/catalog/select_fields.toml b/crates/sparrow-catalog/catalog/select_fields.toml index 3611a57c8..3eb160f67 100644 --- a/crates/sparrow-catalog/catalog/select_fields.toml +++ b/crates/sparrow-catalog/catalog/select_fields.toml @@ -1,7 +1,7 @@ -name = 'select_fields' -signature = 'select_fields(record, fields: string+) -> record' -short_doc = 'Limits fields in a record to a given set.' -long_doc = ''' +name = "select_fields" +signature = "select_fields(record, fields: string+) -> record" +short_doc = "Limits fields in a record to a given set." +long_doc = """ Note: If more fields are being selected than removed, you can use [`remove_fields`](#remove_fields). @@ -17,25 +17,25 @@ syntax you must be explicit, as in the example. Returns a column containing the record fields listed in `fields` with the corresponding values from `record`. The result is `null` in rows where `record` is `null`. -''' -tags = ['record'] +""" +tags = ["record"] [[examples]] -name = 'Record Field Selection' -full_expression = ''' +name = "Record Field Selection" +full_expression = """ Input | select_fields($input, 'key', 'a', 'b') -''' -input_csv = ''' +""" +input_csv = """ time,key,a,b,c 2021-01-01T00:00:00.000000000Z,A,5,1.2,true 2021-01-02T00:00:00.000000000Z,A,6.3,0.4,false 2021-03-01T00:00:00.000000000Z,B,,3.7,true 2021-04-10T00:00:00.000000000Z,A,13,,true -''' -output_csv = ''' +""" +output_csv = """ key,a,b A,5.0,1.2 A,6.3,0.4 B,,3.7 A,13.0, -''' +""" diff --git a/crates/sparrow-catalog/catalog/shift_by.toml b/crates/sparrow-catalog/catalog/shift_by.toml index 178b18f5b..74da858b2 100644 --- a/crates/sparrow-catalog/catalog/shift_by.toml +++ b/crates/sparrow-catalog/catalog/shift_by.toml @@ -1,7 +1,7 @@ -name = 'shift_by' -signature = 'shift_by(delta: timedelta, value: any) -> any' -short_doc = 'Produces the current `value` shifted forward by the given `delta`.' -long_doc = ''' +name = "shift_by" +signature = "shift_by(delta: timedelta, value: any) -> any" +short_doc = "Produces the current `value` shifted forward by the given `delta`." +long_doc = """ ### Parameters * delta: The time delta to shift the value by. See other [time functions](#time-functions) for how to create `timedelta`s. @@ -15,16 +15,16 @@ If multiple values for the same entity key are shifted to the same time, all of them will be emitted in the order they originally appeared. New `subsort` IDs will be assigned to each row. -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Shift By' -description = ''' +name = "Shift By" +description = """ This example uses `shift_by` to shift values from `Input` forward by 1 month. -''' -full_expression = 'Input | shift_by(months(1))' -input_csv = ''' +""" +full_expression = "Input | shift_by(months(1))" +input_csv = """ time,key,date,n 1996-03-21T00:00:00-00:00,Ben,1996-08-19T00:00:00-00:00,1 1996-04-21T00:00:00-00:00,Ryan,1996-07-20T00:00:00-00:00,2 @@ -32,8 +32,8 @@ time,key,date,n 1996-06-21T00:00:00-00:00,Ryan,1996-05-22T00:00:00-00:00,4 1996-07-21T00:00:00-00:00,Ben,1996-07-22T00:00:00-00:00,5 1996-08-21T00:00:00-00:00,Ben,1996-08-22T00:00:00-00:00,6 -''' -output_csv = ''' +""" +output_csv = """ time,key,date,n 1996-03-21T00:00:00.000000000,Ben,1996-08-19T00:00:00.000000000,1 1996-04-21T00:00:00.000000000,Ryan,1996-07-20T00:00:00.000000000,2 @@ -41,4 +41,4 @@ time,key,date,n 1996-06-21T00:00:00.000000000,Ryan,1996-05-22T00:00:00.000000000,4 1996-07-21T00:00:00.000000000,Ben,1996-07-22T00:00:00.000000000,5 1996-08-21T00:00:00.000000000,Ben,1996-08-22T00:00:00.000000000,6 -''' +""" diff --git a/crates/sparrow-catalog/catalog/shift_to.toml b/crates/sparrow-catalog/catalog/shift_to.toml index 6a9c06d83..2605400c0 100644 --- a/crates/sparrow-catalog/catalog/shift_to.toml +++ b/crates/sparrow-catalog/catalog/shift_to.toml @@ -1,7 +1,7 @@ -name = 'shift_to' -signature = 'shift_to(time: timestamp_ns, value: any) -> any' -short_doc = 'Produces the current `value` shifted forward to the given `time`.' -long_doc = ''' +name = "shift_to" +signature = "shift_to(time: timestamp_ns, value: any) -> any" +short_doc = "Produces the current `value` shifted forward to the given `time`." +long_doc = """ ### Parameters * time: Column containing the times to shift values to. * value: The values to be shifted. @@ -15,21 +15,21 @@ If multiple values for the same entity key are shifted to the same time, all of them will be emitted in the order they originally appeared. New `subsort` IDs will be assigned to each row. -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Shift To' -description = ''' +name = "Shift To" +description = """ This example uses `shift_to` to shift values from `Input` forward to the `date` field. The order of rows (shown in field `n`) changes based on the order of `date`. Since the row containing `n = 4` has a `date` less than the `time`, it is dropped. The rows with `n = 3` and `n = 5` had the same `date`. We see that they have both been shifted to the same time, and the original order preserved within that time. -''' -full_expression = 'Input | shift_to(Input.date)' -input_csv = ''' +""" +full_expression = "Input | shift_to(Input.date)" +input_csv = """ time,key,date,n 1996-03-21T00:00:00-00:00,Ben,1996-08-19T00:00:00-00:00,1 1996-04-21T00:00:00-00:00,Ryan,1996-07-20T00:00:00-00:00,2 @@ -37,12 +37,12 @@ time,key,date,n 1996-06-21T00:00:00-00:00,Ryan,1996-05-22T00:00:00-00:00,4 1996-07-21T00:00:00-00:00,Ben,1996-07-22T00:00:00-00:00,5 1996-08-21T00:00:00-00:00,Ben,1996-08-22T00:00:00-00:00,6 -''' -output_csv = ''' +""" +output_csv = """ time,key,date,n 1996-04-21T00:00:00.000000000,Ryan,1996-07-20T00:00:00.000000000,2 1996-05-21T00:00:00.000000000,Ryan,1996-07-22T00:00:00.000000000,3 1996-07-21T00:00:00.000000000,Ben,1996-07-22T00:00:00.000000000,5 1996-03-21T00:00:00.000000000,Ben,1996-08-19T00:00:00.000000000,1 1996-08-21T00:00:00.000000000,Ben,1996-08-22T00:00:00.000000000,6 -''' +""" diff --git a/crates/sparrow-catalog/catalog/shift_until.toml b/crates/sparrow-catalog/catalog/shift_until.toml index b9aac565c..5efc473ba 100644 --- a/crates/sparrow-catalog/catalog/shift_until.toml +++ b/crates/sparrow-catalog/catalog/shift_until.toml @@ -1,7 +1,7 @@ -name = 'shift_until' -signature = 'shift_until(predicate: bool, value: any) -> any' -short_doc = 'Produces the `value` shifted forward to the time the `predicate` is true.' -long_doc = ''' +name = "shift_until" +signature = "shift_until(predicate: bool, value: any) -> any" +short_doc = "Produces the `value` shifted forward to the time the `predicate` is true." +long_doc = """ ### Parameters * predicate: The predicate to determine whether to emit shifted rows. * value: The value to shift until the `predicate` is true. @@ -14,19 +14,19 @@ If multiple values for the same entity are shifted to the same time, all of them New `subsort` IDs will be assigned to each row. A value may be produced at the same time it occurs if the `predicate` evaluates to true at that time. -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Shift Until' -description = ''' +name = "Shift Until" +description = """ This examples uses `shift_until` to shift values from `Input` forward until the condition is true. We see that the rows are output in the original order (seen by looking at the `n` column). Rows where the `condition` is `true` cause rows to be output at that time, including any preceding (but not yet output) rows. Also note that the final row (with `n = 7`) has not yet been output, since the condition has not been `true` after it (yet). -''' -full_expression = 'Input | shift_until(Input.condition)' -input_csv = ''' +""" +full_expression = "Input | shift_until(Input.condition)" +input_csv = """ time,key,condition,n 1996-03-21T00:00:00-00:00,Ben,true,1 1996-04-21T00:00:00-00:00,Ryan,false,2 @@ -35,8 +35,8 @@ time,key,condition,n 1996-07-21T00:00:00-00:00,Ben,,5 1996-08-21T00:00:00-00:00,Ben,true,6 1996-06-21T00:00:00-00:00,Ryan,false,7 -''' -output_csv = ''' +""" +output_csv = """ time,key,condition,n 1996-03-21T00:00:00.000000000,Ben,true,1 1996-04-21T00:00:00.000000000,Ryan,false,2 @@ -44,4 +44,4 @@ time,key,condition,n 1996-06-21T00:00:00.000000000,Ryan,true,4 1996-07-21T00:00:00.000000000,Ben,,5 1996-08-21T00:00:00.000000000,Ben,true,6 -''' +""" diff --git a/crates/sparrow-catalog/catalog/since.toml b/crates/sparrow-catalog/catalog/since.toml index 49d556de5..58570dbbf 100644 --- a/crates/sparrow-catalog/catalog/since.toml +++ b/crates/sparrow-catalog/catalog/since.toml @@ -1,7 +1,7 @@ -name = 'since' -signature = 'since(condition: bool) -> window' -short_doc = 'Configures a windowed aggregation.' -long_doc = ''' +name = "since" +signature = "since(condition: bool) -> window" +short_doc = "Configures a windowed aggregation." +long_doc = """ Configures aggregations to window since the last time the `condition` was `true`. @@ -11,23 +11,23 @@ Configures aggregations to window since the last time the ### Results Returns a window behavior that can be used with an [aggregation](#aggregation-functions) to configure windowed aggregations. -''' -tags = ['window'] +""" +tags = ["window"] [[examples]] -name = 'Hourly Count' -description = ''' +name = "Hourly Count" +description = """ Produces the count since the start of the hour. NOTE: The time and key are not available on the rows created by the ticks. The expression here uses `extend`, `time_of` and `first` to compute the `time` and `key` columns for all rows. -''' -full_expression = ''' +""" +full_expression = """ { n: Input.n, result: count(Input, window = since(hourly())) } # Compute time and key for all rows, even the ticks. | extend({ time: time_of($input), key: first(Input.key) }) -''' -input_csv = ''' +""" +input_csv = """ time,key,n 1996-12-19T16:00:57-00:00,Ben,2 1996-12-19T16:00:58-00:00,Ryan,3 @@ -35,8 +35,8 @@ time,key,n 1996-12-19T17:03:00-00:00,Ben,9 1996-12-19T17:01:00-00:00,Ryan,8 1996-12-19T18:01:00-00:00,Ben,1 -''' -output_csv = ''' +""" +output_csv = """ time,key,n,result 1996-12-19T16:00:57.000000000,Ben,2,1 1996-12-19T16:00:58.000000000,Ryan,3,1 @@ -48,12 +48,12 @@ time,key,n,result 1996-12-19T18:00:00.000000000,Ryan,,1 1996-12-19T18:00:00.000000000,Ben,,1 1996-12-19T18:01:00.000000000,Ben,1,1 -''' +""" [[examples]] -name = 'Count Since Predicate' -expression = 'count(Input, window = since(Input.n > 5))' -input_csv = ''' +name = "Count Since Predicate" +expression = "count(Input, window = since(Input.n > 5))" +input_csv = """ time,key,n 1996-12-19T16:00:57-00:00,Ben,2 1996-12-19T16:00:58-00:00,Ryan,3 @@ -61,8 +61,8 @@ time,key,n 1996-12-19T17:03:00-00:00,Ben,9 1996-12-19T17:01:00-00:00,Ryan,8 1996-12-19T18:01:00-00:00,Ben,1 -''' -output_csv = ''' +""" +output_csv = """ time,key,n,result 1996-12-19T16:00:57.000000000,Ben,2,1 1996-12-19T16:00:58.000000000,Ryan,3,1 @@ -70,4 +70,4 @@ time,key,n,result 1996-12-19T17:01:00.000000000,Ryan,8,2 1996-12-19T17:03:00.000000000,Ben,9,1 1996-12-19T18:01:00.000000000,Ben,1,1 -''' +""" diff --git a/crates/sparrow-catalog/catalog/sliding.toml b/crates/sparrow-catalog/catalog/sliding.toml index bb5465880..1b9327af7 100644 --- a/crates/sparrow-catalog/catalog/sliding.toml +++ b/crates/sparrow-catalog/catalog/sliding.toml @@ -1,7 +1,7 @@ -name = 'sliding' -signature = 'sliding(const duration: i64, condition: bool) -> window' -short_doc = 'Configures sliding windowed aggregations.' -long_doc = ''' +name = "sliding" +signature = "sliding(const duration: i64, condition: bool) -> window" +short_doc = "Configures sliding windowed aggregations." +long_doc = """ Configures aggregations to slide over a window of inputs, where the width of the window is determined by the number of times (`duration`) the `condition` is `true`. @@ -19,31 +19,31 @@ and 9:00 PM. ### Results Returns a window behavior that can be used with an [aggregation](#aggregation-functions) to configure windowed aggregations. -''' -tags = ['window'] +""" +tags = ["window"] [[examples]] -name = 'Sliding Over 2 Days' -description = ''' +name = "Sliding Over 2 Days" +description = """ Produces the sum of `Input.n` over a window of 2 days. NOTE: The time and key are not available on the rows created by the ticks. The expression here uses `extend`, `time_of` and `first` to compute the `time` and `key` columns for all rows. -''' -full_expression = ''' +""" +full_expression = """ { n: Input.n, result: sum(Input.n, window = sliding(2, daily())) } # Compute time and key for all rows, even the ticks. | extend({ time: time_of($input), key: first(Input.key) }) -''' -input_csv = ''' +""" +input_csv = """ time,key,n 1996-12-19T00:00:00-00:00,Ben,1 1996-12-19T00:00:00-00:00,Ryan,2 1996-12-20T00:00:00-00:00,Ben,3 1996-12-20T01:00:00-00:00,Ben,4 1996-12-21T00:00:00-00:00,Ryan,5 -1996-12-21T00:00:00-00:00,Ben,6''' -output_csv = ''' +1996-12-21T00:00:00-00:00,Ben,6""" +output_csv = """ time,key,n,result 1996-12-19T00:00:00.000000000,Ben,1,1 1996-12-19T00:00:00.000000000,Ryan,2,2 @@ -57,24 +57,24 @@ time,key,n,result 1996-12-21T00:00:00.000000000,Ben,6,13 1996-12-21T00:00:00.000000000,Ryan,,5 1996-12-21T00:00:00.000000000,Ben,,13 -''' +""" [[examples]] -name = 'Sliding Over 3 Events' -description = ''' +name = "Sliding Over 3 Events" +description = """ In this example, the `condition` evaluates to `true` when the input is valid, meaning the width of the window is 3 `Input` rows. -''' -expression = 'mean(Input.n, window = sliding(3, is_valid(Input)))' -input_csv = ''' +""" +expression = "mean(Input.n, window = sliding(3, is_valid(Input)))" +input_csv = """ time,subsort,key,n 1996-12-19T00:00:00-00:00,0,Ben,1 1996-12-19T00:00:00-00:00,0,Ryan,2 1996-12-20T00:00:00-00:00,0,Ben,3 1996-12-20T01:00:00-00:00,0,Ben,4 1996-12-21T00:00:00-00:00,0,Ryan,5 -1996-12-21T00:00:00-00:00,0,Ben,6''' -output_csv = ''' +1996-12-21T00:00:00-00:00,0,Ben,6""" +output_csv = """ time,subsort,key,n,result 1996-12-19T00:00:00.000000000,0,Ben,1,1.0 1996-12-19T00:00:00.000000000,0,Ryan,2,2.0 @@ -82,4 +82,4 @@ time,subsort,key,n,result 1996-12-20T01:00:00.000000000,0,Ben,4,2.6666666666666665 1996-12-21T00:00:00.000000000,0,Ryan,5,3.5 1996-12-21T00:00:00.000000000,0,Ben,6,4.333333333333333 -''' +""" diff --git a/crates/sparrow-catalog/catalog/sqrt.toml b/crates/sparrow-catalog/catalog/sqrt.toml index 0352874d7..e03f0ccfd 100644 --- a/crates/sparrow-catalog/catalog/sqrt.toml +++ b/crates/sparrow-catalog/catalog/sqrt.toml @@ -1,7 +1,7 @@ -name = 'sqrt' -signature = 'sqrt(a: number) -> f64' -short_doc = 'Returns the square root of `a`.' -long_doc = ''' +name = "sqrt" +signature = "sqrt(a: number) -> f64" +short_doc = "Returns the square root of `a`." +long_doc = """ ### Parameters * a: The number to take the square root of. @@ -9,21 +9,21 @@ long_doc = ''' Returns a column of type `f64`. The result contains `null` if `a` was null at that row. Otherwise the row contains the square root of `a`. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Square Root' -expression = 'sqrt(Input.a)' -input_csv = ''' +name = "Square Root" +expression = "sqrt(Input.a)" +input_csv = """ time,key,a 2021-01-01T00:00:00.000000000Z,A,5.7 2021-01-01T00:00:00.000000000Z,A,6.3 2021-01-02T00:00:00.000000000Z,B, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,result 2021-01-01T00:00:00.000000000,A,5.7,2.3874672772626644 2021-01-01T00:00:00.000000000,A,6.3,2.5099800796022267 2021-01-02T00:00:00.000000000,B,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/stddev.toml b/crates/sparrow-catalog/catalog/stddev.toml index f2077f6fd..32c58f363 100644 --- a/crates/sparrow-catalog/catalog/stddev.toml +++ b/crates/sparrow-catalog/catalog/stddev.toml @@ -1,7 +1,7 @@ -name = 'stddev' -signature = 'stddev(input: number, window: window = null) -> f64' -short_doc = 'Computes the sample standard deviation of values across the input.' -long_doc = ''' +name = "stddev" +signature = "stddev(input: number, window: window = null) -> f64" +short_doc = "Computes the sample standard deviation of values across the input." +long_doc = """ Computes the sample standard deviation, which is the square root of the [sample variance](#variance). @@ -16,16 +16,16 @@ See [window functions](#window-functions) for how to specify the aggregation win For each input row, return the mean of new, non-`null` rows in `input` up to and including the input row for the given entity. Returns `null` until there has been at least two such inputs. -''' +""" tags = [ - 'aggregation', - 'math', + "aggregation", + "math", ] [[examples]] -name = 'Standard Deviation' -expression = 'stddev(Input.value)' -input_csv = ''' +name = "Standard Deviation" +expression = "stddev(Input.value)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,50.7 2021-01-01T00:00:00.000000000Z,Ryan, @@ -33,8 +33,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,1.2 2021-01-04T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,2.3 -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,50.7, 2021-01-01T00:00:00.000000000,Ryan,, @@ -42,4 +42,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,1.2,24.750000000000004 2021-01-04T00:00:00.000000000,Ben,,24.750000000000004 2021-01-04T00:00:00.000000000,Ryan,2.3,32.45 -''' +""" diff --git a/crates/sparrow-catalog/catalog/sub.toml b/crates/sparrow-catalog/catalog/sub.toml index 176abf958..c60f03d16 100644 --- a/crates/sparrow-catalog/catalog/sub.toml +++ b/crates/sparrow-catalog/catalog/sub.toml @@ -1,8 +1,8 @@ -name = 'sub' -signature = 'sub(a: number, b: number) -> number' -operator = 'a - b' -short_doc = 'Returns the difference of two numbers.' -long_doc = ''' +name = "sub" +signature = "sub(a: number, b: number) -> number" +operator = "a - b" +short_doc = "Returns the difference of two numbers." +long_doc = """ This is the function used for the binary operation `a - b`. ### Parameters @@ -16,25 +16,25 @@ following the [numeric type coercion rules](docs:data-model#numeric-type-coercio Returns a numeric column of the promoted numeric type compatible with both `a` and `b`. The result contains `null` if `a` or `b` was null at that row. Otherwise the row contains the difference of `a` and `b`. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Subtraction' -expression = 'Input.a - Input.b' -input_csv = ''' +name = "Subtraction" +expression = "Input.a - Input.b" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,A,5.7,1.2 2021-01-02T00:00:00.000000000Z,A,6.3,0.4 2021-01-03T00:00:00.000000000Z,B,,3.7 2021-01-03T00:00:00.000000000Z,A,13.2, 2021-01-04T00:00:00.000000000Z,A,12.2,0 -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,A,5.7,1.2,4.5 2021-01-02T00:00:00.000000000,A,6.3,0.4,5.8999999999999995 2021-01-03T00:00:00.000000000,B,,3.7, 2021-01-03T00:00:00.000000000,A,13.2,, 2021-01-04T00:00:00.000000000,A,12.2,0.0,12.2 -''' +""" diff --git a/crates/sparrow-catalog/catalog/substring.toml b/crates/sparrow-catalog/catalog/substring.toml index e84908d35..de2cb5b10 100644 --- a/crates/sparrow-catalog/catalog/substring.toml +++ b/crates/sparrow-catalog/catalog/substring.toml @@ -1,7 +1,7 @@ -name = 'substring' -signature = 'substring(s: string, start: i64 = null, end: i64 = null) -> string' -short_doc = 'Takes a substring of the input between start and end indices.' -long_doc = ''' +name = "substring" +signature = "substring(s: string, start: i64 = null, end: i64 = null) -> string" +short_doc = "Takes a substring of the input between start and end indices." +long_doc = """ ### Parameters * s: The string to take a substring of. * start: The inclusive index to start at. `null` indicates the beginning of the @@ -15,12 +15,12 @@ starting at `start` (inclusive) up to but not including the `end`. If `s` is `null`, returns `null`. If `end > start` an empty string is returned. -''' -tags = ['string'] +""" +tags = ["string"] [[examples]] -name = 'Substring Suffix' -description = ''' +name = "Substring Suffix" +description = """ This example shows using the `substring` function to extract the last 3 characters of a string. Note that if the string is shorter than 3 characters the empty string is returned. @@ -28,9 +28,9 @@ is shorter than 3 characters the empty string is returned. Specifically, `-3` is interpreted as `len(s) - 3`, which produces a negative number for shorter strings, and is thus less than the start of the string (`0`). -''' -expression = 'Input.value | substring(start = -3)' -input_csv = ''' +""" +expression = "Input.value | substring(start = -3)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,Hello World 2021-01-02T00:00:00.000000000Z,Ryan, @@ -38,8 +38,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,Hello 2021-01-03T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,hi -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,Hello World,rld 2021-01-02T00:00:00.000000000,Ryan,, @@ -47,13 +47,13 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,Hello,llo 2021-01-03T00:00:00.000000000,Ben,, 2021-01-04T00:00:00.000000000,Ryan,hi, -''' +""" [[examples]] -name = 'Substring' -description = '' -expression = 'Input.value | substring(start = 3, end = -3)' -input_csv = ''' +name = "Substring" +description = "" +expression = "Input.value | substring(start = 3, end = -3)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,Hello World 2021-01-02T00:00:00.000000000Z,Ryan, @@ -61,8 +61,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,Hello 2021-01-03T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,hi -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,Hello World,lo Wo 2021-01-02T00:00:00.000000000,Ryan,, @@ -70,4 +70,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,Hello, 2021-01-03T00:00:00.000000000,Ben,, 2021-01-04T00:00:00.000000000,Ryan,hi, -''' +""" diff --git a/crates/sparrow-catalog/catalog/sum.toml b/crates/sparrow-catalog/catalog/sum.toml index 8ebe392c4..fe9fd0a56 100644 --- a/crates/sparrow-catalog/catalog/sum.toml +++ b/crates/sparrow-catalog/catalog/sum.toml @@ -1,7 +1,7 @@ -name = 'sum' -signature = 'sum(input: number, window: window = null) -> number' -short_doc = 'Computes the sum of values across the input.' -long_doc = ''' +name = "sum" +signature = "sum(input: number, window: window = null) -> number" +short_doc = "Computes the sum of values across the input." +long_doc = """ ### Parameters * input: The input to compute the sum of. * window: The window to aggregate within, as described in @@ -13,16 +13,16 @@ See [window functions](#window-functions) for how to specify the aggregation win For each input row, return the minimum of new, non-`null` rows in `input` up to and including the input row for the given entity. Returns `null` until there has been at least one such input. -''' +""" tags = [ - 'aggregation', - 'math', + "aggregation", + "math", ] [[examples]] -name = 'Sum' -expression = 'sum(Input.value)' -input_csv = ''' +name = "Sum" +expression = "sum(Input.value)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,50.7 2021-01-01T00:00:00.000000000Z,Ryan, @@ -30,8 +30,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,1.2 2021-01-04T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,2.3 -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,50.7,50.7 2021-01-01T00:00:00.000000000,Ryan,, @@ -39,4 +39,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,1.2,51.900000000000006 2021-01-04T00:00:00.000000000,Ben,,51.900000000000006 2021-01-04T00:00:00.000000000,Ryan,2.3,69.5 -''' +""" diff --git a/crates/sparrow-catalog/catalog/time_of.toml b/crates/sparrow-catalog/catalog/time_of.toml index 0fa939bf5..0222b335a 100644 --- a/crates/sparrow-catalog/catalog/time_of.toml +++ b/crates/sparrow-catalog/catalog/time_of.toml @@ -1,20 +1,20 @@ -name = 'time_of' -signature = 'time_of(input: any) -> timestamp_ns' -short_doc = 'Returns the timestamp of rows in `input`.' -long_doc = ''' +name = "time_of" +signature = "time_of(input: any) -> timestamp_ns" +short_doc = "Returns the timestamp of rows in `input`." +long_doc = """ ### Parameters * input: The column to retrieve timestamps for. It may be of any type (including records). ### Results Returns a `timestamp_ns` column containing the timestamp of each row in the `input`. -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Time Of Record Column' -expression = 'time_of(Input)' -input_csv = ''' +name = "Time Of Record Column" +expression = "time_of(Input)" +input_csv = """ time,key 1996-03-21T00:00:00-00:00,Ben 1996-04-21T00:00:00-00:00,Ryan @@ -22,8 +22,8 @@ time,key 1996-06-21T00:00:00-00:00,Ryan 1996-07-21T00:00:00-00:00,Ben 1996-08-21T00:00:00-00:00,Ben -''' -output_csv = ''' +""" +output_csv = """ time,key,result 1996-03-21T00:00:00.000000000,Ben,1996-03-21T00:00:00.000000000 1996-04-21T00:00:00.000000000,Ryan,1996-04-21T00:00:00.000000000 @@ -31,12 +31,12 @@ time,key,result 1996-06-21T00:00:00.000000000,Ryan,1996-06-21T00:00:00.000000000 1996-07-21T00:00:00.000000000,Ben,1996-07-21T00:00:00.000000000 1996-08-21T00:00:00.000000000,Ben,1996-08-21T00:00:00.000000000 -''' +""" [[examples]] -name = 'Time Of Integer Column' -expression = 'time_of(Input.integer)' -input_csv = ''' +name = "Time Of Integer Column" +expression = "time_of(Input.integer)" +input_csv = """ time,key,integer 1996-03-21T00:00:00-00:00,Ben,8 1996-04-21T00:00:00-00:00,Ryan,12 @@ -44,8 +44,8 @@ time,key,integer 1996-06-21T00:00:00-00:00,Ryan,37 1996-07-21T00:00:00-00:00,Ben, 1996-08-21T00:00:00-00:00,Ben,24 -''' -output_csv = ''' +""" +output_csv = """ time,key,integer,result 1996-03-21T00:00:00.000000000,Ben,8,1996-03-21T00:00:00.000000000 1996-04-21T00:00:00.000000000,Ryan,12,1996-04-21T00:00:00.000000000 @@ -53,4 +53,4 @@ time,key,integer,result 1996-06-21T00:00:00.000000000,Ryan,37,1996-06-21T00:00:00.000000000 1996-07-21T00:00:00.000000000,Ben,,1996-07-21T00:00:00.000000000 1996-08-21T00:00:00.000000000,Ben,24,1996-08-21T00:00:00.000000000 -''' +""" diff --git a/crates/sparrow-catalog/catalog/upper.toml b/crates/sparrow-catalog/catalog/upper.toml index 6c04b9b92..47de18956 100644 --- a/crates/sparrow-catalog/catalog/upper.toml +++ b/crates/sparrow-catalog/catalog/upper.toml @@ -1,7 +1,7 @@ -name = 'upper' -signature = 'upper(s: string) -> string' -short_doc = 'Converts the string to upper case.' -long_doc = ''' +name = "upper" +signature = "upper(s: string) -> string" +short_doc = "Converts the string to upper case." +long_doc = """ ### Parameters * s: The string to convert to upper case. @@ -9,13 +9,13 @@ long_doc = ''' Returns a `string` column with each row containing the string `s` from that row converted to all upper case. The row contains `null` if `s` is `null` in that row. -''' -tags = ['string'] +""" +tags = ["string"] [[examples]] -name = 'Upper Case' -expression = 'Input.value | upper()' -input_csv = ''' +name = "Upper Case" +expression = "Input.value | upper()" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,Hello World 2021-01-02T00:00:00.000000000Z,Ryan, @@ -23,8 +23,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,Hello 2021-01-03T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,hi -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,Hello World,HELLO WORLD 2021-01-02T00:00:00.000000000,Ryan,, @@ -32,4 +32,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,Hello,HELLO 2021-01-03T00:00:00.000000000,Ben,, 2021-01-04T00:00:00.000000000,Ryan,hi,HI -''' +""" diff --git a/crates/sparrow-catalog/catalog/variance.toml b/crates/sparrow-catalog/catalog/variance.toml index 2c11bf98a..7077e7ef1 100644 --- a/crates/sparrow-catalog/catalog/variance.toml +++ b/crates/sparrow-catalog/catalog/variance.toml @@ -1,7 +1,7 @@ -name = 'variance' -signature = 'variance(input: number, window: window = null) -> f64' -short_doc = 'Computes the sample variance of values across the input.' -long_doc = ''' +name = "variance" +signature = "variance(input: number, window: window = null) -> f64" +short_doc = "Computes the sample variance of values across the input." +long_doc = """ Computes the sample variance. This divides by the number of values minus 1, rather the number of values (which would be the population variance). @@ -16,16 +16,16 @@ See [window functions](#window-functions) for how to specify the aggregation win For each input row, return the mean of new, non-`null` rows in `input` up to and including the input row for the given entity. Returns `null` until there has been at least two such inputs. -''' +""" tags = [ - 'aggregation', - 'math', + "aggregation", + "math", ] [[examples]] -name = 'Variance' -expression = 'variance(Input.value)' -input_csv = ''' +name = "Variance" +expression = "variance(Input.value)" +input_csv = """ time,key,value 2021-01-01T00:00:00.000000000Z,Ben,50.7 2021-01-01T00:00:00.000000000Z,Ryan, @@ -33,8 +33,8 @@ time,key,value 2021-01-03T00:00:00.000000000Z,Ben,1.2 2021-01-04T00:00:00.000000000Z,Ben, 2021-01-04T00:00:00.000000000Z,Ryan,2.3 -''' -output_csv = ''' +""" +output_csv = """ time,key,value,result 2021-01-01T00:00:00.000000000,Ben,50.7, 2021-01-01T00:00:00.000000000,Ryan,, @@ -42,4 +42,4 @@ time,key,value,result 2021-01-03T00:00:00.000000000,Ben,1.2,612.5625000000001 2021-01-04T00:00:00.000000000,Ben,,612.5625000000001 2021-01-04T00:00:00.000000000,Ryan,2.3,1053.0025000000003 -''' +""" diff --git a/crates/sparrow-catalog/catalog/when.toml b/crates/sparrow-catalog/catalog/when.toml index 9022ba56d..478f6b10f 100644 --- a/crates/sparrow-catalog/catalog/when.toml +++ b/crates/sparrow-catalog/catalog/when.toml @@ -1,9 +1,9 @@ -name = 'when' -signature = 'when(condition: bool, value: any) -> any' -short_doc = 'Produces the current `value` when the `condition` evaluates to `true`.' -long_doc = ''' +name = "when" +signature = "when(condition: bool, value: any) -> any" +short_doc = "Produces the current `value` when the `condition` evaluates to `true`." +long_doc = """ Performs filtering of rows. -Unlike [`if`](#if) which just "nulls" out a value in the current row, this removes the row entirely. +Unlike [`if`](#if) which just \"nulls\" out a value in the current row, this removes the row entirely. ### Parameters * condition: Determines whether to include a given row. @@ -21,13 +21,13 @@ then this returns the latest result of the aggregation when `condition` is `true`. If the `value` is not continuous (eg., taken directly from events) then this returns the current `value` when the `condition` is `true`. -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'When' -full_expression = 'Input | when(Input.condition)' -input_csv = ''' +name = "When" +full_expression = "Input | when(Input.condition)" +input_csv = """ time,key,condition 1996-03-21T00:00:00-00:00,Ben,true 1996-04-21T00:00:00-00:00,Ryan,true @@ -35,11 +35,11 @@ time,key,condition 1996-06-21T00:00:00-00:00,Ryan,true 1996-07-21T00:00:00-00:00,Ben,false 1996-08-21T00:00:00-00:00,Ben,true -''' -output_csv = ''' +""" +output_csv = """ time,key,condition 1996-03-21T00:00:00.000000000,Ben,true 1996-04-21T00:00:00.000000000,Ryan,true 1996-06-21T00:00:00.000000000,Ryan,true 1996-08-21T00:00:00.000000000,Ben,true -''' +""" diff --git a/crates/sparrow-catalog/catalog/with_key.toml b/crates/sparrow-catalog/catalog/with_key.toml index 857e87d5a..11f0e034c 100644 --- a/crates/sparrow-catalog/catalog/with_key.toml +++ b/crates/sparrow-catalog/catalog/with_key.toml @@ -1,12 +1,12 @@ -name = 'with_key' -signature = 'with_key(key: key, value: any, const grouping: string = null) -> any' -short_doc = 'Changes the grouping of the input `value`.' -experimental = ''' +name = "with_key" +signature = "with_key(key: key, value: any, const grouping: string = null) -> any" +short_doc = "Changes the grouping of the input `value`." +experimental = """ `with_key` is experimental functionality. You should expect the behavior to potentially change in the future. There may be issues when using this if multiple rows are assigned the same key. -''' -long_doc = ''' +""" +long_doc = """ ### Parameters * key: The new key to use for the grouping. * value: The value to be re-grouped. @@ -18,12 +18,12 @@ long_doc = ''' Returns a column containing the non-`null` rows of `value`. Each row occurs at the same time as in `value`. The results have been re-keyed based on the value of `key` to be part of the named `grouping`. -''' -tags = ['grouping'] +""" +tags = ["grouping"] [[examples]] -name = 'Changing Keys' -description = ''' +name = "Changing Keys" +description = """ This example starts with input grouped by the `key` column. We wish to instead compute aggregates grouped by the `other_key` column. We do this by using the `with_key` function to change the grouping. @@ -31,25 +31,25 @@ We use `other_key` as the name of the grouping so that this table is compatible After we have regrouped we compute the `sum`, which we see is grouped by the `other_key`. The `extend` function is used so that we can add fields to the regrouped record. -''' -full_expression = ''' +""" +full_expression = """ Input | with_key($input.other_key, grouping = 'other_key') | extend($input, { sum_n_by_other_key: sum($input.n) }) -''' -input_csv = ''' +""" +input_csv = """ time,key,other_key,n 2021-01-01T00:00:00.000000000Z,A,X,5 2021-01-02T00:00:00.000000000Z,A,Y,8 2021-03-01T00:00:00.000000000Z,B,X,9 2021-04-10T00:00:00.000000000Z,A,X, 2021-04-11T00:00:00.000000000Z,A,,9 -''' -output_csv = ''' +""" +output_csv = """ time,key,other_key,n,sum_n_by_other_key 2021-01-01T00:00:00.000000000,A,X,5,5 2021-01-02T00:00:00.000000000,A,Y,8,8 2021-03-01T00:00:00.000000000,B,X,9,14 2021-04-10T00:00:00.000000000,A,X,,14 2021-04-11T00:00:00.000000000,A,,9,9 -''' +""" diff --git a/crates/sparrow-catalog/catalog/year.toml b/crates/sparrow-catalog/catalog/year.toml index 92a073608..09424f8b1 100644 --- a/crates/sparrow-catalog/catalog/year.toml +++ b/crates/sparrow-catalog/catalog/year.toml @@ -1,20 +1,20 @@ -name = 'year' -signature = 'year(time: timestamp_ns) -> i32' -short_doc = 'Return the year of the given timestamp.' -long_doc = ''' +name = "year" +signature = "year(time: timestamp_ns) -> i32" +short_doc = "Return the year of the given timestamp." +long_doc = """ ### Parameters * time: The timestamp to return the year for. ### Results Returns an `i32` column containing the year for each input `time`. Returns `null` for rows where `time` is `null`. -''' -tags = ['time'] +""" +tags = ["time"] [[examples]] -name = 'Year' -expression = 'year(Input.time)' -input_csv = ''' +name = "Year" +expression = "year(Input.time)" +input_csv = """ time,key 1996-03-21T00:00:00-00:00,Ben 1997-04-21T00:00:00-00:00,Ryan @@ -22,8 +22,8 @@ time,key 2000-06-21T00:00:00-00:00,Ryan 2021-07-21T00:00:00-00:00,Ben 2022-08-21T00:00:00-00:00,Ben -''' -output_csv = ''' +""" +output_csv = """ time,key,result 1996-03-21T00:00:00.000000000,Ben,1996 1997-04-21T00:00:00.000000000,Ryan,1997 @@ -31,4 +31,4 @@ time,key,result 2000-06-21T00:00:00.000000000,Ryan,2000 2021-07-21T00:00:00.000000000,Ben,2021 2022-08-21T00:00:00.000000000,Ben,2022 -''' +""" diff --git a/crates/sparrow-catalog/catalog/yearly.toml b/crates/sparrow-catalog/catalog/yearly.toml index f5215842f..b6f786917 100644 --- a/crates/sparrow-catalog/catalog/yearly.toml +++ b/crates/sparrow-catalog/catalog/yearly.toml @@ -1,29 +1,29 @@ -name = 'yearly' -signature = 'yearly() -> bool' -short_doc = 'A periodic function that produces a `true` value at the start of each calendar year (UTC).' -long_doc = ''' +name = "yearly" +signature = "yearly() -> bool" +short_doc = "A periodic function that produces a `true` value at the start of each calendar year (UTC)." +long_doc = """ This function is often used in aggregations to produce windows or as a predicate column. ### Results Returns a boolean column with each row containing a `true` value at the start of each calendary yea rand `null` at all other times. -''' -tags = ['tick'] +""" +tags = ["tick"] [[examples]] -name = 'Yearly Aggregated Window' -description = ''' +name = "Yearly Aggregated Window" +description = """ In this example, the `yearly()` function is used as an argument to the [`since](#since) function, which produces a window. The result is a windowed aggregation that resets at the start of each calendar year (UTC). -''' -full_expression = ''' +""" +full_expression = """ { n: Input.n, yearly_sum: sum(Input.n, window = since(yearly())) } | extend({time: time_of($input), key: first(Input.key) }) -''' -input_csv = ''' +""" +input_csv = """ time,key,n 1996-12-19T16:00:00-00:00,Ben,2 1996-12-19T16:00:00-00:00,Ryan,3 @@ -31,8 +31,8 @@ time,key,n 1997-12-20T16:01:00-00:00,Ben,9 1997-12-21T16:00:00-00:00,Ryan,8 1998-12-21T16:00:00-00:00,Ben,1 -''' -output_csv = ''' +""" +output_csv = """ time,key,n,yearly_sum 1996-12-19T16:00:00.000000000,Ben,2,2 1996-12-19T16:00:00.000000000,Ryan,3,3 @@ -44,18 +44,18 @@ time,key,n,yearly_sum 1998-01-01T00:00:00.000000000,Ryan,,8 1998-01-01T00:00:00.000000000,Ben,,15 1998-12-21T16:00:00.000000000,Ben,1,1 -''' +""" [[examples]] -name = 'Filter Yearly' -description = ''' +name = "Filter Yearly" +description = """ In this example, the `yearly()` function is used as an argument to the [`when`](#when) function, which filters input. The output includes the last input row before a [`tick`](#tick) occurs. -''' -full_expression = 'Input | last() | when(yearly())' -input_csv = ''' +""" +full_expression = "Input | last() | when(yearly())" +input_csv = """ time,key,n 1996-12-19T16:00:00-00:00,Ben,2 1996-12-19T16:00:00-00:00,Ryan,3 @@ -63,11 +63,11 @@ time,key,n 1997-12-20T16:01:00-00:00,Ben,9 1997-12-21T16:00:00-00:00,Ryan,8 1998-12-21T16:00:00-00:00,Ben,1 -''' -output_csv = ''' +""" +output_csv = """ time,key,n 1996-12-19T16:00:00.000000000,Ryan,3 1996-12-19T16:00:00.000000000,Ben,2 1997-12-21T16:00:00.000000000,Ryan,8 1997-12-20T16:01:00.000000000,Ben,9 -''' +""" diff --git a/crates/sparrow-catalog/catalog/zip_max.toml b/crates/sparrow-catalog/catalog/zip_max.toml index d1cdaf8ce..fcff49ae8 100644 --- a/crates/sparrow-catalog/catalog/zip_max.toml +++ b/crates/sparrow-catalog/catalog/zip_max.toml @@ -1,7 +1,7 @@ -name = 'zip_max' -signature = 'zip_max(a: ordered, b: ordered) -> ordered' -short_doc = 'Returns the maximum of two values.' -long_doc = ''' +name = "zip_max" +signature = "zip_max(a: ordered, b: ordered) -> ordered" +short_doc = "Returns the maximum of two values." +long_doc = """ This returns the maximum of two values. See the aggregation [`max`](#max) for the maximum of values in a column up to and including the current row. @@ -16,23 +16,23 @@ Returns a numeric column of the promoted type. Each row contains the value from `a` if `a` is greater than `b`, otherwise it contains `b`. Specifically, if `a` or `b` is `NaN` then `b` will be returned. If `a` or `b` are `null`, then `b` will be returned. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Zip Max' -expression = 'zip_max(Input.a, Input.b)' -input_csv = ''' +name = "Zip Max" +expression = "zip_max(Input.a, Input.b)" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,A,5.7,1.2 2021-01-01T00:00:00.000000000Z,A,6.3,0.4 2021-01-02T00:00:00.000000000Z,B,,3.7 2021-01-03T00:00:00.000000000Z,A,13.2, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,A,5.7,1.2,5.7 2021-01-01T00:00:00.000000000,A,6.3,0.4,6.3 2021-01-02T00:00:00.000000000,B,,3.7, 2021-01-03T00:00:00.000000000,A,13.2,, -''' +""" diff --git a/crates/sparrow-catalog/catalog/zip_min.toml b/crates/sparrow-catalog/catalog/zip_min.toml index a768ffcfe..a012985e8 100644 --- a/crates/sparrow-catalog/catalog/zip_min.toml +++ b/crates/sparrow-catalog/catalog/zip_min.toml @@ -1,7 +1,7 @@ -name = 'zip_min' -signature = 'zip_min(a: ordered, b: ordered) -> ordered' -short_doc = 'Returns the minimum of two values.' -long_doc = ''' +name = "zip_min" +signature = "zip_min(a: ordered, b: ordered) -> ordered" +short_doc = "Returns the minimum of two values." +long_doc = """ This returns the minimum of two values. See the aggregation [`min`](#min) for the minimum of values in a column up to and including the current row. @@ -16,23 +16,23 @@ Returns a numeric column of the promoted type. Each row contains the value from `a` if `a` is less than `b`, otherwise it contains `b`. Specifically, if `a` or `b` is `NaN` then `b` will be returned. If `a` or `b` are `null`, then `b` will be returned. -''' -tags = ['math'] +""" +tags = ["math"] [[examples]] -name = 'Zip Min' -expression = 'zip_min(Input.a, Input.b)' -input_csv = ''' +name = "Zip Min" +expression = "zip_min(Input.a, Input.b)" +input_csv = """ time,key,a,b 2021-01-01T00:00:00.000000000Z,A,5.7,1.2 2021-01-01T00:00:00.000000000Z,A,6.3,0.4 2021-01-02T00:00:00.000000000Z,B,,3.7 2021-01-03T00:00:00.000000000Z,A,13.2, -''' -output_csv = ''' +""" +output_csv = """ time,key,a,b,result 2021-01-01T00:00:00.000000000,A,5.7,1.2,1.2 2021-01-01T00:00:00.000000000,A,6.3,0.4,0.4 2021-01-02T00:00:00.000000000,B,,3.7, 2021-01-03T00:00:00.000000000,A,13.2,, -''' +""" diff --git a/crates/sparrow-compiler/src/frontend/parse_expr.rs b/crates/sparrow-compiler/src/frontend/parse_expr.rs index bb4466c91..ae100f1c4 100644 --- a/crates/sparrow-compiler/src/frontend/parse_expr.rs +++ b/crates/sparrow-compiler/src/frontend/parse_expr.rs @@ -27,7 +27,7 @@ fn parse_error_to_diagnostic( ParseError::InvalidToken { location } => DiagnosticCode::SyntaxError .builder() .with_label(Label::primary(part_id, location..location).with_message("Invalid token")), - ParseError::UnrecognizedEOF { location, expected } => { + ParseError::UnrecognizedEof { location, expected } => { let diagnostic = DiagnosticCode::SyntaxError.builder().with_label( Label::primary(part_id, location..location).with_message("Unexpected EOF"), ); diff --git a/crates/sparrow-main/tests/e2e/basic_error_tests.rs b/crates/sparrow-main/tests/e2e/basic_error_tests.rs index eb582281b..6c746e553 100644 --- a/crates/sparrow-main/tests/e2e/basic_error_tests.rs +++ b/crates/sparrow-main/tests/e2e/basic_error_tests.rs @@ -474,7 +474,7 @@ async fn test_parse_error_missing_parentheses() { - "1 | { n: Numbers.n" - " | ^ Unexpected EOF" - " |" - - " = Expected \")\",\",\",\"]\",\"in\",\"let\",\"}\"" + - " = Expected \",\",\"}\"" - "" - "" "###); @@ -497,7 +497,7 @@ async fn test_parse_error_unrecognized() { - "1 | limit x = 5 in { n: Numbers.n}" - " | ^ Invalid token 'x'" - " |" - - " = Expected \"!=\", \"(\", \")\", \"*\", \"+\", \",\", \"-\", \".\", \"/\", \":\", \"<\", \"<=\", \"<>\", \"=\", \"=\", \"==\", \">\", \">=\", \"[\", \"]\", \"and\", \"as\", \"in\", \"let\", \"or\", \"|\", \"}\"" + - " = Expected \"!=\", \"(\", \"*\", \"+\", \"-\", \".\", \"/\", \"<\", \"<=\", \"<>\", \"==\", \">\", \">=\", \"[\", \"and\", \"as\", \"or\", \"|\"" - "" - "" - severity: error