Skip to content

Commit

Permalink
Fix: Ray nightly now uses its own schema type (#351)
Browse files Browse the repository at this point in the history
* fix: ray data now use its own schema type

Signed-off-by: Zhi Lin <zhi.lin@intel.com>

* upd

Signed-off-by: Zhi Lin <zhi.lin@intel.com>

---------

Signed-off-by: Zhi Lin <zhi.lin@intel.com>
  • Loading branch information
kira-lin authored Jun 13, 2023
1 parent 8d28396 commit b9e1b99
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions python/raydp/spark/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,13 +259,15 @@ def get_locations(blocks):
]

def ray_dataset_to_spark_dataframe(spark: sql.SparkSession,
arrow_schema: "pa.lib.Schema",
arrow_schema,
blocks: List[ObjectRef],
locations = None) -> DataFrame:
locations = get_locations(blocks)
if not isinstance(arrow_schema, pa.lib.Schema):
raise RuntimeError(f"Schema is {type(arrow_schema)}, required pyarrow.lib.Schema. \n" \
f"to_spark does not support converting non-arrow ray datasets.")
if hasattr(arrow_schema, "base_schema") and \
not isinstance(arrow_schema.base_schema, pa.lib.Schema):
raise RuntimeError(f"Schema is {type(arrow_schema)}, required pyarrow.lib.Schema. \n" \
f"to_spark does not support converting non-arrow ray datasets.")
schema = StructType()
for field in arrow_schema:
schema.add(field.name, from_arrow_type(field.type), nullable=field.nullable)
Expand Down

0 comments on commit b9e1b99

Please sign in to comment.