diff --git a/.gitignore b/.gitignore index 3d4e96d..edc131e 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,12 @@ .fleet/ *.iml +# VS Code specific +.bloop +.metals +metals.sbt +.vscode + # SBT specific .bsp/ coverage.xml diff --git a/core/src/main/scala/org/polars/scala/polars/api/io/Writeable.scala b/core/src/main/scala/org/polars/scala/polars/api/io/Writeable.scala index a023317..2d396b1 100644 --- a/core/src/main/scala/org/polars/scala/polars/api/io/Writeable.scala +++ b/core/src/main/scala/org/polars/scala/polars/api/io/Writeable.scala @@ -155,4 +155,9 @@ class Writeable private[polars] (ptr: Long) { writeMode = _mode ) } + + def toJsonString(pretty: Boolean, rowOriented: Boolean): String = json(ptr, pretty, rowOriented) + + def toJsonBytes(pretty: Boolean, rowOriented: Boolean): Array[Byte] = jsonBytes(ptr, pretty, rowOriented) + } diff --git a/core/src/main/scala/org/polars/scala/polars/internal/jni/io/write.scala b/core/src/main/scala/org/polars/scala/polars/internal/jni/io/write.scala index 90859ba..26a5407 100644 --- a/core/src/main/scala/org/polars/scala/polars/internal/jni/io/write.scala +++ b/core/src/main/scala/org/polars/scala/polars/internal/jni/io/write.scala @@ -30,4 +30,8 @@ private[polars] object write extends Natively { writeMode: String ): Unit + @native def json(ptr: Long, pretty: Boolean, rowOriented: Boolean): String + + @native def jsonBytes(ptr: Long, pretty: Boolean, rowOriented: Boolean): Array[Byte] + } diff --git a/examples/src/main/scala/examples/scala/io/Json.scala b/examples/src/main/scala/examples/scala/io/Json.scala new file mode 100644 index 0000000..4adfee0 --- /dev/null +++ b/examples/src/main/scala/examples/scala/io/Json.scala @@ -0,0 +1,47 @@ +package examples.scala.io + +import org.polars.scala.polars.Polars +import org.polars.scala.polars.api.DataFrame + +import examples.scala.utils.CommonUtils + +/** Polars supports exporting the contents of a [[DataFrame]] to JSON. + * + * It has 2 formats: + * - a row-oriented format, which represents the frame as an array of objects whose keys are + * the column names and whose values are the row’s corresponding values. + * - a column-oriented format, which represents the frame as an array of objects containing a + * column name, type, and the array of column values + * + * The column-oriented format may be pretty-printed. The row-oriented format is less efficient, + * but may be more convenient for downstream applications. + */ +object Json { + + def main(args: Array[String]) = { + + val path = CommonUtils.getResource("/files/web-ds/data.csv") + val df: DataFrame = Polars.csv.scan(path).collect + + println("Showing CSV file as a DataFrame to stdout.") + df.show() + + println("Showing column-oriented CSV file as a DataFrame to stdout.") + val colOriented = df.write().toJsonString(pretty = false, rowOriented = false) + println(colOriented) + + println("Showing pretty column-oriented CSV file as a DataFrame to stdout.") + val prettyOriented = df.write().toJsonString(pretty = true, rowOriented = false) + println(prettyOriented) + + println("Showing row column-oriented CSV file as a DataFrame to stdout.") + val rowOriented = df.write().toJsonString(pretty = false, rowOriented = true) + println(rowOriented) + + + println("Showing pretty column-oriented CSV file as a DataFrame to stdout.") + val prettyOrientedBytes = df.write().toJsonBytes(pretty = true, rowOriented = false) + println(new String(prettyOrientedBytes, "UTF-8")) + } + +} diff --git a/native/src/internal_jni/io/write/json.rs b/native/src/internal_jni/io/write/json.rs new file mode 100644 index 0000000..733b281 --- /dev/null +++ b/native/src/internal_jni/io/write/json.rs @@ -0,0 +1,61 @@ +#![allow(non_snake_case)] + +use jni::objects::{JObject, JPrimitiveArray}; +use jni::sys::{_jobject, jboolean, jlong}; +use jni::JNIEnv; +use jni_fn::jni_fn; +use polars::prelude::*; + +use crate::j_data_frame::JDataFrame; + +#[jni_fn("org.polars.scala.polars.internal.jni.io.write$")] +pub fn json( + env: JNIEnv, + _object: JObject, + df_ptr: jlong, + pretty: jboolean, + row_oriented: jboolean, +) -> *mut _jobject { + let buf = json_bytes(df_ptr, pretty, row_oriented); + let rust_string = String::from_utf8(buf).unwrap(); + + let output = env + .new_string(rust_string) + .expect("Couldn't create Java string!"); + + output.into_raw() +} + +#[jni_fn("org.polars.scala.polars.internal.jni.io.write$")] +pub fn jsonBytes<'a>( + env: JNIEnv<'a>, + _object: JObject, + df_ptr: jlong, + pretty: jboolean, + row_oriented: jboolean, +) -> JPrimitiveArray<'a, i8> { + let buf = json_bytes(df_ptr, pretty, row_oriented); + env.byte_array_from_slice(&buf).unwrap() +} + +fn json_bytes<'a>(df_ptr: jlong, pretty: jboolean, row_oriented: jboolean) -> Vec { + let j_df = unsafe { &mut *(df_ptr as *mut JDataFrame) }; + let mut data_frame = j_df.to_owned().df; + + let mut df = data_frame.as_single_chunk_par(); + + let mut buf: Vec = Vec::new(); + match (pretty == 1, row_oriented == 1) { + (_, true) => JsonWriter::new(&mut buf) + .with_json_format(JsonFormat::Json) + .finish(&mut df), + (true, _) => serde_json::to_writer_pretty(&mut buf, &df) + .map_err(|e| polars_err!(ComputeError: "{e}")), + (false, _) => { + serde_json::to_writer(&mut buf, &df).map_err(|e| polars_err!(ComputeError: "{e}")) + }, + } + .expect("Unable to format JSON"); + + buf +} diff --git a/native/src/internal_jni/io/write/mod.rs b/native/src/internal_jni/io/write/mod.rs index 68d053c..7fbd6f9 100644 --- a/native/src/internal_jni/io/write/mod.rs +++ b/native/src/internal_jni/io/write/mod.rs @@ -1,3 +1,4 @@ pub mod avro; pub mod ipc; +pub mod json; pub mod parquet;