Fix: Ray nightly now uses its own schema type (#351)

* fix: ray data now use its own schema type Signed-off-by: Zhi Lin <zhi.lin@intel.com> * upd Signed-off-by: Zhi Lin <zhi.lin@intel.com> --------- Signed-off-by: Zhi Lin <zhi.lin@intel.com>
oap-project · Jun 13, 2023 · b9e1b99 · b9e1b99
1 parent 8d28396
commit b9e1b99
Showing 1 changed file with 5 additions and 3 deletions.
diff --git a/python/raydp/spark/dataset.py b/python/raydp/spark/dataset.py
@@ -259,13 +259,15 @@ def get_locations(blocks):
     ]
 
 def ray_dataset_to_spark_dataframe(spark: sql.SparkSession,
-                                   arrow_schema: "pa.lib.Schema",
+                                   arrow_schema,
                                    blocks: List[ObjectRef],
                                    locations = None) -> DataFrame:
     locations = get_locations(blocks)
     if not isinstance(arrow_schema, pa.lib.Schema):
-        raise RuntimeError(f"Schema is {type(arrow_schema)}, required pyarrow.lib.Schema. \n" \
-                           f"to_spark does not support converting non-arrow ray datasets.")
+        if hasattr(arrow_schema, "base_schema") and \
+                not isinstance(arrow_schema.base_schema, pa.lib.Schema):
+            raise RuntimeError(f"Schema is {type(arrow_schema)}, required pyarrow.lib.Schema. \n" \
+                               f"to_spark does not support converting non-arrow ray datasets.")
     schema = StructType()
     for field in arrow_schema:
         schema.add(field.name, from_arrow_type(field.type), nullable=field.nullable)