Skip to content

Commit

Permalink
Refine single-value column to treat it as that single value (#12120)
Browse files Browse the repository at this point in the history
- First experiments with intersection types as refinements to our Table/Column
- Closes #12095
  • Loading branch information
radeusgd authored Jan 27, 2025
1 parent 7c9f3c1 commit a47ed0f
Show file tree
Hide file tree
Showing 10 changed files with 311 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,9 @@ type Dictionary key value
insert self key=(Missing_Argument.throw "key") value=(Missing_Argument.throw "value") no_warning:Boolean=False =
new_dict = self.insert_builtin key value
case key of
_ : Float ->
key_as_float : Float ->
if no_warning then new_dict else
Warning.attach (Floating_Point_Equality.Used_As_Dictionary_Key key) new_dict
Warning.attach (Floating_Point_Equality.Used_As_Dictionary_Key key_as_float) new_dict
_ -> new_dict

## GROUP Selections
Expand Down
31 changes: 17 additions & 14 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ from project.Errors import Conversion_Failure, Inexact_Type_Coercion, Invalid_Co
from project.Internal.Column_Format import all
from project.Internal.Java_Exports import make_string_builder
from project.Internal.Storage import enso_to_java, java_to_enso
from project.Internal.Type_Refinements.Single_Value_Column import refine_with_single_value

polyglot java import org.enso.base.Time_Utils
polyglot java import org.enso.table.data.column.operation.cast.CastProblemAggregator
Expand Down Expand Up @@ -87,7 +88,7 @@ type Column
example_from_vector =
Column.from_vector "My Column" [1, 2, 3, 4, 5]
from_vector : Text -> Vector -> Auto | Value_Type -> Column ! Invalid_Value_Type
from_vector (name : Text) (items : Vector) (value_type : Auto | Value_Type = Auto) =
from_vector (name : Text) (items : Vector) (value_type : Auto | Value_Type = Auto) -> Column =
## If the type does not accept date-time-like values, we can skip the
additional logic for polyglot conversions that would normally be used,
which is quite costly - so if we can guarantee it is unnecessary,
Expand Down Expand Up @@ -120,10 +121,12 @@ type Column
case needs_polyglot_conversion of
True -> Java_Column.fromItems name (enso_to_java_maybe items) expected_storage_type java_problem_aggregator
False -> Java_Column.fromItemsNoDateConversion name items expected_storage_type java_problem_aggregator
result = Column.from_java_column java_column . throw_on_warning Conversion_Failure
result.catch Conversion_Failure error->
if error.example_values.is_empty then result else
raise_invalid_value_type_error error.example_values.first
multi_result = Column.from_java_column java_column
result = Warning.throw_on_warning multi_result Conversion_Failure
if Meta.is_error result . not then result else
result.catch Conversion_Failure error->
if error.example_values.is_empty then result else
raise_invalid_value_type_error error.example_values.first

## PRIVATE
Creates a new column given a name and an internal Java storage.
Expand All @@ -135,9 +138,9 @@ type Column

## PRIVATE
Creates a new column given a Java Column object.
from_java_column : Java_Column -> Column
from_java_column java_column =
Column.Value java_column
from_java_column java_column:Java_Column -> Column =
column = Column.Value java_column
column |> refine_with_single_value

## PRIVATE
ADVANCED
Expand Down Expand Up @@ -1202,8 +1205,8 @@ type Column
storage_type = Storage.from_value_type_strict common_type
new_storage = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
case default of
Column.Value java_col ->
other_storage = java_col.getStorage
col : Column ->
other_storage = col.java_column.getStorage
storage.fillMissingFrom other_storage storage_type java_problem_aggregator
_ ->
storage.fillMissing default storage_type java_problem_aggregator
Expand Down Expand Up @@ -2699,9 +2702,9 @@ run_vectorized_binary_op column name operand new_name=Nothing fallback_fn=Nothin
Java_Problems.with_map_operation_problem_aggregator column.name Problem_Behavior.Report_Warning problem_builder->
storage_type = resolve_storage_type expected_result_type
case operand of
Column.Value col2 ->
operand_column : Column ->
s1 = column.java_column.getStorage
s2 = col2.getStorage
s2 = operand_column.java_column.getStorage
rs = Polyglot_Helpers.handle_polyglot_dataflow_errors <|
s1.vectorizedOrFallbackZip name problem_builder fallback_fn s2 skip_nulls storage_type
Column.from_storage effective_new_name rs
Expand Down Expand Up @@ -2792,9 +2795,9 @@ run_vectorized_binary_op_with_fallback_problem_handling column name operand fall
_ -> fallback_fn problem_builder
storage_type = resolve_storage_type expected_result_type
case operand of
Column.Value col2 ->
operand_column : Column ->
s1 = column.java_column.getStorage
s2 = col2.getStorage
s2 = operand_column.java_column.getStorage
rs = Polyglot_Helpers.handle_polyglot_dataflow_errors <|
s1.vectorizedOrFallbackZip name problem_builder applied_fn s2 skip_nulls storage_type
Column.from_storage new_name rs
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
private

from Standard.Base import all

import project.Column.Column
import project.Value_Type.Value_Type
from project.Internal.Type_Refinements.Single_Value_Column_Extensions import all

refine_with_single_value (column : Column) =
## We treat a column as single value if it contains a single not-nothing value.
if is_single_value column . not then column else case column.inferred_precise_value_type of
Value_Type.Integer _ ->
# `inferred_precise_value_type` will return Integer if the column was Float (or Mixed) but contained integral values - e.g. [2.0]
# We inspect the actual value to correctly deal with both Float and Mixed base type.
value = column.at 0
case value of
# If the value was really a float, we preserve that.
_ : Float -> (column : Column & Float)
# Otherwise we treat it as an integer.
_ -> (column : Column & Integer)
Value_Type.Float _ -> (column : Column & Float)
Value_Type.Char _ _ -> (column : Column & Text)
Value_Type.Boolean -> (column : Column & Boolean)
Value_Type.Date -> (column : Column & Date)
Value_Type.Time -> (column : Column & Time_Of_Day)
Value_Type.Date_Time True -> (column : Column & Date_Time)
Value_Type.Decimal _ scale ->
is_integer = scale == 0
if is_integer then (column : Column & Integer) else (column : Column & Decimal)
# Other types (e.g. Mixed) are not supported.
_ -> column

is_single_value column:Column -> Boolean =
(column.length == 1) && (column.at 0 . is_nothing . not)
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
private

from Standard.Base import all

import project.Column.Column
from project.Internal.Type_Refinements.Single_Value_Column import is_single_value

## This conversion is internal and should never be exported.
Integer.from (that : Column) -> Integer =
Runtime.assert (is_single_value that)
x = that.at 0
case x of
_ : Integer -> x
_ : Float ->
Runtime.assert (x % 1.0 == 0.0)
x.truncate

## This conversion is internal and should never be exported.
Float.from (that : Column) -> Float =
Runtime.assert (is_single_value that)
that.at 0

## This conversion is internal and should never be exported.
Text.from (that : Column) -> Text =
Runtime.assert (is_single_value that)
that.at 0

## This conversion is internal and should never be exported.
Boolean.from (that : Column) -> Boolean =
Runtime.assert (is_single_value that)
that.at 0

## This conversion is internal and should never be exported.
Date.from (that : Column) -> Date =
Runtime.assert (is_single_value that)
that.at 0

## This conversion is internal and should never be exported.
Time_Of_Day.from (that : Column) -> Time_Of_Day =
Runtime.assert (is_single_value that)
that.at 0

## This conversion is internal and should never be exported.
Date_Time.from (that : Column) -> Date_Time =
Runtime.assert (is_single_value that)
that.at 0

## This conversion is internal and should never be exported.
Decimal.from (that : Column) -> Decimal =
Runtime.assert (is_single_value that)
that.at 0
6 changes: 3 additions & 3 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,13 @@ type Table
new : Vector (Vector | Column) -> Table
new columns =
invalid_input_shape =
Error.throw (Illegal_Argument.Error "Each column must be represented by a pair whose first element is the column name and the second element is a vector of elements that will constitute that column, or an existing column.")
Error.throw (Illegal_Argument.Error "Each column must be represented by a pair whose first element is the column name and the second element is a vector of elements that will constitute that column, or an existing column. Got: "+columns.to_text)
cols = columns.map on_problems=No_Wrap.Value c->
case c of
v : Vector ->
if v.length != 2 then invalid_input_shape else
Column.from_vector (v.at 0) (v.at 1) . java_column
Column.Value java_col -> java_col
col : Column -> col.java_column
_ -> invalid_input_shape
Panic.recover Illegal_Argument <|
if cols.is_empty then
Expand Down Expand Up @@ -2472,9 +2472,9 @@ type Table
unique.mark_used self.column_names

resolved = case value of
_ : Column -> value
_ : Text -> self.make_constant_column value
_ : Expression -> self.evaluate_expression value on_problems
_ : Column -> value
_ : Constant_Column -> self.make_constant_column value.value
_ : Simple_Expression -> value.evaluate self (set_mode==Set_Mode.Update && as=="") on_problems
_ -> Error.throw (Illegal_Argument.Error "Unsupported type for `Table.set`.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,18 @@
import org.enso.interpreter.runtime.EnsoContext;
import org.enso.interpreter.runtime.data.EnsoMultiValue;
import org.enso.interpreter.runtime.data.Type;
import org.enso.interpreter.runtime.library.dispatch.TypesLibrary;
import org.enso.interpreter.runtime.library.dispatch.TypeOfNode;

final class AllOfTypesCheckNode extends AbstractTypeCheckNode {

@Children private AbstractTypeCheckNode[] checks;
@Child private TypesLibrary types;
@Child private TypeOfNode typeNode;
@Child private EnsoMultiValue.NewNode newNode;

AllOfTypesCheckNode(String name, AbstractTypeCheckNode[] checks) {
super(name);
this.checks = checks;
this.types = TypesLibrary.getFactory().createDispatched(checks.length);
this.typeNode = TypeOfNode.create();
this.newNode = EnsoMultiValue.NewNode.create();
}

Expand All @@ -46,7 +46,8 @@ Object executeCheckOrConversion(VirtualFrame frame, Object value, ExpressionNode
if (result == null) {
return null;
}
var t = types.getType(result);
var t = typeNode.findTypeOrNull(result);
assert t != null : "Value " + result + " doesn't have type!";
var ctx = EnsoContext.get(this);
if (ctx.getBuiltins().number().getInteger() == t) {
if (++integers > 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,19 @@ final class EnsoMultiType {
private final Type[] types;

private EnsoMultiType(Type[] types) {
assert checkNonNull(types);
this.types = types;
}

private static boolean checkNonNull(Type[] types) {
for (var t : types) {
if (t == null) {
return false;
}
}
return true;
}

@CompilerDirectives.TruffleBoundary
static EnsoMultiType findOrCreateSlow(Type[] types, int from, int to) {
var mt = new EnsoMultiType(Arrays.copyOfRange(types, from, to));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,9 @@ Object invokeMember(
@Override
public String toString() {
var both = EnsoMultiType.AllTypesWith.getUncached().executeAllTypes(dispatch, extra, 0);
return Stream.of(both).map(t -> t.getName()).collect(Collectors.joining(" & "));
return Stream.of(both)
.map(t -> t != null ? t.getName() : "[?]")
.collect(Collectors.joining(" & "));
}

/** Casts {@link EnsoMultiValue} to requested type effectively. */
Expand Down
2 changes: 2 additions & 0 deletions test/Table_Tests/src/In_Memory/Main.enso
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import project.In_Memory.Fan_Out_Spec
import project.In_Memory.Integer_Overflow_Spec
import project.In_Memory.Lossy_Conversions_Spec
import project.In_Memory.Parse_To_Table_Spec
import project.In_Memory.Single_Value_Column_Spec
import project.In_Memory.Split_Tokenize_Spec
import project.In_Memory.Table_Spec
import project.In_Memory.Table_Xml_Spec
Expand All @@ -33,6 +34,7 @@ add_specs suite_builder =
Integer_Overflow_Spec.add_specs suite_builder
Lossy_Conversions_Spec.add_specs suite_builder
Parse_To_Table_Spec.add_specs suite_builder
Single_Value_Column_Spec.add_specs suite_builder
Split_Tokenize_Spec.add_specs suite_builder
Table_Conversion_Spec.add_specs suite_builder
Table_Date_Spec.add_specs suite_builder
Expand Down
Loading

0 comments on commit a47ed0f

Please sign in to comment.