pharo-ai · hernanmd · Dec 11, 2023 · Dec 13, 2023 · Dec 14, 2023 · Dec 14, 2023
diff --git a/src/AI-DataPartitioners-DataFrameTests/AIHashPartitionerDataFrameTest.class.st b/src/AI-DataPartitioners-DataFrameTests/AIHashPartitionerDataFrameTest.class.st
@@ -1,10 +1,11 @@
 Class {
-	#name : #AIHashPartitionerDataFrameTest,
-	#superclass : #TestCase,
-	#category : #'AI-DataPartitioners-DataFrameTests'
+	#name : 'AIHashPartitionerDataFrameTest',
+	#superclass : 'TestCase',
+	#category : 'AI-DataPartitioners-DataFrameTests',
+	#package : 'AI-DataPartitioners-DataFrameTests'
 }
 
-{ #category : #tests }
+{ #category : 'tests' }
 AIHashPartitionerDataFrameTest >> testPartitionDataFrameWithCustomRowNames [
 
 	| dataFrame proportions subsets |

diff --git a/src/AI-DataPartitioners-DataFrameTests/AIRandomPartitionerDataFrameTest.class.st b/src/AI-DataPartitioners-DataFrameTests/AIRandomPartitionerDataFrameTest.class.st
@@ -1,10 +1,24 @@
 Class {
-	#name : #AIRandomPartitionerDataFrameTest,
-	#superclass : #TestCase,
-	#category : #'AI-DataPartitioners-DataFrameTests'
+	#name : 'AIRandomPartitionerDataFrameTest',
+	#superclass : 'TestCase',
+	#instVars : [
+		'df'
+	],
+	#category : 'AI-DataPartitioners-DataFrameTests',
+	#package : 'AI-DataPartitioners-DataFrameTests'
 }
 
-{ #category : #tests }
+{ #category : 'running' }
+AIRandomPartitionerDataFrameTest >> setUp [
+
+	super setUp.
+	df := DataFrame withRows: #( #( 'Barcelona' 1.609 true ) #( 'Dubai' 2.789 true ) #( 'London' 8.788 false ) ).
+
+	df rowNames: #( 'A' 'B' 'C' ).
+	df columnNames: #( 'City' 'Population' 'BeenThere' )
+]
+
+{ #category : 'tests' }
 AIRandomPartitionerDataFrameTest >> testPartitionDataFrameWithCustomRowNames [
 	| dataFrame sizes partitioner subsets |
 
@@ -30,7 +44,7 @@ AIRandomPartitionerDataFrameTest >> testPartitionDataFrameWithCustomRowNames [
 			self assert: (dataFrame row: rowName) equals: (eachSubset row: rowName) ] ].
 ]
 
-{ #category : #tests }
+{ #category : 'tests' }
 AIRandomPartitionerDataFrameTest >> testPartitionDataSeriesWithCustomKeys [
 	| series sizes partitioner subsets |
 
@@ -54,7 +68,7 @@ AIRandomPartitionerDataFrameTest >> testPartitionDataSeriesWithCustomKeys [
 			self assert: key equals: (series keyAtValue: value) ] ].
 ]
 
-{ #category : #tests }
+{ #category : 'tests' }
 AIRandomPartitionerDataFrameTest >> testPartitionDataSeriesWithDefaultKeys [
 	| series sizes partitioner subsets |
 
@@ -76,3 +90,38 @@ AIRandomPartitionerDataFrameTest >> testPartitionDataSeriesWithDefaultKeys [
 		eachSubset keysAndValuesDo: [ :key :value |
 			self assert: key equals: (series keyAtValue: value) ] ].
 ]
+
+{ #category : 'tests' }
+AIRandomPartitionerDataFrameTest >> testSplitTrainTestFromUsingTargetColumnWithProportionsShuffle [
+
+	| expectedPartition partitionedDataSet |
+
+	expectedPartition := AIPartitionedDataSet new
+		xTrain: (DataFrame 
+			withRows: #( #( 'Barcelona' 1.609 ) #( 'London' 8.788 )) 
+			rowNames: #('A' 'C')
+			columnNames: #( 'City' 'Population' ));
+		xTest: (DataFrame 
+			withRows: #( #( 'Dubai' 2.789 )) 
+			rowNames: #('B')
+			columnNames: #( 'City' 'Population' ));
+		yTrain: (DataFrame 
+			withRows: #( #( true ) #( false )) 
+			rowNames: #('A' 'C')
+			columnNames:  #( 'BeenThere' ));
+		yTest: (DataFrame 
+			withRows: #( #( true )) 
+			rowNames: #('B')
+			columnNames:  #( 'BeenThere' ));
+		yourself.
+
+	partitionedDataSet := (AIRandomPartitioner new 
+		splitTrainTestFrom: df 
+		usingTargetColumn: #('BeenThere') 
+		withProportions: #(0.7 0.3)  
+		seed: 1).
+
+	self
+		assert: partitionedDataSet
+		equals: expectedPartition
+]
diff --git a/src/AI-DataPartitioners-DataFrameTests/package.st b/src/AI-DataPartitioners-DataFrameTests/package.st
@@ -1 +1 @@
-Package { #name : #'AI-DataPartitioners-DataFrameTests' }
+Package { #name : 'AI-DataPartitioners-DataFrameTests' }
diff --git a/src/AI-DataPartitioners-Tests/AIHashPartitionerTest.class.st b/src/AI-DataPartitioners-Tests/AIHashPartitionerTest.class.st
@@ -1,10 +1,11 @@
 Class {
-	#name : #AIHashPartitionerTest,
-	#superclass : #TestCase,
-	#category : #'AI-DataPartitioners-Tests'
+	#name : 'AIHashPartitionerTest',
+	#superclass : 'TestCase',
+	#category : 'AI-DataPartitioners-Tests',
+	#package : 'AI-DataPartitioners-Tests'
 }
 
-{ #category : #tests }
+{ #category : 'tests' }
 AIHashPartitionerTest >> testSplitDataWithProportionsCase1 [
 
 	| data proportions expectedSizes subsets subsetsSizes |

diff --git a/src/AI-DataPartitioners-Tests/AIRandomPartitionerTest.class.st b/src/AI-DataPartitioners-Tests/AIRandomPartitionerTest.class.st
@@ -1,10 +1,15 @@
 Class {
-	#name : #AIRandomPartitionerTest,
-	#superclass : #TestCase,
-	#category : #'AI-DataPartitioners-Tests'
+	#name : 'AIRandomPartitionerTest',
+	#superclass : 'TestCase',
+	#instVars : [
+		'partitioner',
+		'df'
+	],
+	#category : 'AI-DataPartitioners-Tests',
+	#package : 'AI-DataPartitioners-Tests'
 }
 
-{ #category : #tests }
+{ #category : 'tests' }
 AIRandomPartitionerTest >> testSplitDataWithProportionsCase1 [
 
 	| data proportions expectedSizes subsets subsetsSizes |
@@ -20,7 +25,7 @@ AIRandomPartitionerTest >> testSplitDataWithProportionsCase1 [
 	self assert: subsetsSizes equals: expectedSizes
 ]
 
-{ #category : #tests }
+{ #category : 'tests' }
 AIRandomPartitionerTest >> testSplitDataWithProportionsCase2 [
 
 	| data proportions expectedSizes subsets subsetsSizes |
@@ -36,7 +41,7 @@ AIRandomPartitionerTest >> testSplitDataWithProportionsCase2 [
 	self assert: subsetsSizes equals: expectedSizes
 ]
 
-{ #category : #tests }
+{ #category : 'tests' }
 AIRandomPartitionerTest >> testSplitDataWithSizesCase1 [
 
 	| data sizes subsets subsetsSizes |
@@ -51,7 +56,7 @@ AIRandomPartitionerTest >> testSplitDataWithSizesCase1 [
 	self assert: subsetsSizes equals: sizes
 ]
 
-{ #category : #tests }
+{ #category : 'tests' }
 AIRandomPartitionerTest >> testSplitDataWithSizesCase2 [
 
 	| data sizes subsets subsetsSizes |

diff --git a/src/AI-DataPartitioners-Tests/package.st b/src/AI-DataPartitioners-Tests/package.st
@@ -1 +1 @@
-Package { #name : #'AI-DataPartitioners-Tests' }
+Package { #name : 'AI-DataPartitioners-Tests' }
diff --git a/src/AI-DataPartitioners/AIHashPartitioner.class.st b/src/AI-DataPartitioners/AIHashPartitioner.class.st
@@ -30,18 +30,19 @@ subsets := AIHashPartitioner split: dataFrame withProportions: #(0.8 0.2).
 ```
 "
 Class {
-	#name : #AIHashPartitioner,
-	#superclass : #Object,
-	#category : #'AI-DataPartitioners'
+	#name : 'AIHashPartitioner',
+	#superclass : 'Object',
+	#category : 'AI-DataPartitioners',
+	#package : 'AI-DataPartitioners'
 }
 
-{ #category : #api }
+{ #category : 'api' }
 AIHashPartitioner class >> split: aCollection withProportions: aCollectionOfProportions [
 
 	^ self new split: aCollection withProportions: aCollectionOfProportions
 ]
 
-{ #category : #api }
+{ #category : 'api' }
 AIHashPartitioner >> split: aCollection withProportions: aCollectionOfProportions [
 
 	| cumulativeProportionsMap indexesMap |

diff --git a/src/AI-DataPartitioners/AIPartitionedDataSet.class.st b/src/AI-DataPartitioners/AIPartitionedDataSet.class.st
@@ -0,0 +1,101 @@
+"
+Encapsulates the result of a train-test split, holding four variables:
+
+# Attributes
+
+- xTrain `DataFrame`: Training features.
+- xTest `DataFrame`: Testing features.
+- yTrain `DataFrame` or `DataSeries`: Training target variable.
+- yTest `DataFrame` or `DataSeries`: Testing target variable.
+
+This class is designed to provide a structured representation of the result obtained from splitting a dataset into training and testing sets, making it convenient to manage and access the different subsets of data during machine learning workflows.
+
+"
+Class {
+	#name : 'AIPartitionedDataSet',
+	#superclass : 'Object',
+	#instVars : [
+		'xTrain',
+		'xTest',
+		'yTrain',
+		'yTest'
+	],
+	#category : 'AI-DataPartitioners',
+	#package : 'AI-DataPartitioners'
+}
+
+{ #category : 'comparing' }
+AIPartitionedDataSet >> = aPartitionedDataSet [
+
+	self == aPartitionedDataSet 
+		ifTrue: [ ^ true ].
+	self class = aPartitionedDataSet class
+		ifFalse: [ ^ false ].
+	self xTest = aPartitionedDataSet xTest
+		ifFalse: [ ^ false ].
+	self yTest = aPartitionedDataSet yTest
+		ifFalse: [ ^ false ].
+	self xTrain = aPartitionedDataSet xTrain
+		ifFalse: [ ^ false ].
+	self yTrain = aPartitionedDataSet yTrain
+		ifFalse: [ ^ false ].
+	^ true
+]
+
+{ #category : 'comparing' }
+AIPartitionedDataSet >> hash [
+	"hash is implemented because #= is implemented"
+
+	^ self species hash 
+		bitXor: (self xTest hash 
+			bitXor: (self xTrain hash bitXor: (self yTest hash 
+				bitXor: self yTrain hash)))
+]
+
+{ #category : 'accessing' }
+AIPartitionedDataSet >> xTest [
+
+	^ xTest
+]
+
+{ #category : 'accessing' }
+AIPartitionedDataSet >> xTest: anObject [
+
+	xTest := anObject
+]
+
+{ #category : 'accessing' }
+AIPartitionedDataSet >> xTrain [
+
+	^ xTrain
+]
+
+{ #category : 'accessing' }
+AIPartitionedDataSet >> xTrain: anObject [
+
+	xTrain := anObject
+]
+
+{ #category : 'accessing' }
+AIPartitionedDataSet >> yTest [
+
+	^ yTest
+]
+
+{ #category : 'accessing' }
+AIPartitionedDataSet >> yTest: anObject [
+
+	yTest := anObject
+]
+
+{ #category : 'accessing' }
+AIPartitionedDataSet >> yTrain [
+
+	^ yTrain
+]
+
+{ #category : 'accessing' }
+AIPartitionedDataSet >> yTrain: anObject [
+
+	yTrain := anObject
+]