Skip to content

Commit

Permalink
fix: add sliced string as a separate column (#71)
Browse files Browse the repository at this point in the history
  • Loading branch information
premsrii authored Jan 25, 2023
1 parent b03d46a commit 743f617
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 59 deletions.
50 changes: 25 additions & 25 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 25 additions & 25 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -480,9 +480,9 @@ jsonschema==4.17.3 ; python_version >= "3.8" and python_version < "3.11" \
jupyter-client==7.4.9 ; python_version >= "3.8" and python_version < "3.11" \
--hash=sha256:214668aaea208195f4c13d28eb272ba79f945fc0cf3f11c7092c20b2ca1980e7 \
--hash=sha256:52be28e04171f07aed8f20e1616a5a552ab9fee9cbbe6c1896ae170c3880d392
jupyter-core==5.1.4 ; python_version >= "3.8" and python_version < "3.11" \
--hash=sha256:80d29fef4210ca8d30f25018a28d06dcc582bb1a82b848f2eab61a349a007af1 \
--hash=sha256:fe812ffffd0ead286327dc570888684183cef4070e57d9ebadc92f3bd9c9a05d
jupyter-core==5.1.5 ; python_version >= "3.8" and python_version < "3.11" \
--hash=sha256:83064d61bb2a9bc874e8184331c117b3778c2a7e1851f60cb00d273ceb3285ae \
--hash=sha256:8e54c48cde1e0c8345f64bcf9658b78044ddf02b273726cea9d9f59be4b02130
jupyterlab-widgets==3.0.5 ; python_version >= "3.8" and python_version < "3.11" \
--hash=sha256:a04a42e50231b355b7087e16a818f541e53589f7647144ea0344c4bf16f300e5 \
--hash=sha256:eeaecdeaf6c03afc960ddae201ced88d5979b4ca9c3891bcb8f6631af705f5ef
Expand Down Expand Up @@ -1146,28 +1146,28 @@ pyzmq==25.0.0 ; python_version >= "3.8" and python_version < "3.11" \
requests==2.28.2 ; python_version >= "3.8" and python_version < "3.11" \
--hash=sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa \
--hash=sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf
scikit-learn==1.2.0 ; python_version >= "3.8" and python_version < "3.11" \
--hash=sha256:0834e4cec2a2e0d8978f39cb8fe1cad3be6c27a47927e1774bf5737ea65ec228 \
--hash=sha256:184a42842a4e698ffa4d849b6019de50a77a0aa24d26afa28fa49c9190bb144b \
--hash=sha256:1beaa631434d1f17a20b1eef5d842e58c195875d2bc11901a1a70b5fe544745b \
--hash=sha256:23a88883ca60c571a06278e4726b3b51b3709cfa4c93cacbf5568b22ba960899 \
--hash=sha256:25ba705ee1600ffc5df1dccd8fae129d7c6836e44ffcbb52d78536c9eaf8fcf9 \
--hash=sha256:40f3ff68c505cb9d1f3693397c73991875d609da905087e00e7b4477645ec67b \
--hash=sha256:4e1ea0bc1706da45589bcf2490cde6276490a1b88f9af208dbb396fdc3a0babf \
--hash=sha256:5546a8894a0616e92489ef995b39a0715829f3df96e801bb55cbf196be0d9649 \
--hash=sha256:680b65b3caee469541385d2ca5b03ff70408f6c618c583948312f0d2125df680 \
--hash=sha256:6b63ca2b0643d30fbf9d25d93017ed3fb8351f31175d82d104bfec60cba7bb87 \
--hash=sha256:83c772fa8c64776ad769fd764752c8452844307adcf10dee3adcc43988260f21 \
--hash=sha256:867023a044fdfe59e5014a7fec7a3086a8928f10b5dce9382eedf4135f6709a2 \
--hash=sha256:bc7073e025b62c1067cbfb76e69d08650c6b9d7a0e7afdfa20cb92d4afe516f6 \
--hash=sha256:ceb0008f345188aa236e49c973dc160b9ed504a3abd7b321a0ecabcb669be0bd \
--hash=sha256:d395730f26d8fc752321f1953ddf72647c892d8bed74fad4d7c816ec9b602dfa \
--hash=sha256:da29d2e379c396a63af5ed4b671ad2005cd690ac373a23bee5a0f66504e05272 \
--hash=sha256:de897720173b26842e21bed54362f5294e282422116b61cd931d4f5d870b9855 \
--hash=sha256:e9535e867281ae6987bb80620ba14cf1649e936bfe45f48727b978b7a2dbe835 \
--hash=sha256:f17420a8e3f40129aeb7e0f5ee35822d6178617007bb8f69521a2cefc20d5f00 \
--hash=sha256:fc0a72237f0c56780cf550df87201a702d3bdcbbb23c6ef7d54c19326fa23f19 \
--hash=sha256:fd3480c982b9e616b9f76ad8587804d3f4e91b4e2a6752e7dafb8a2e1f541098
scikit-learn==1.2.1 ; python_version >= "3.8" and python_version < "3.11" \
--hash=sha256:479aedd0abedbda6b8b4529145fe4cd8622f69f726a72cef8f75548a93eeb1e1 \
--hash=sha256:54731e2c2fbff40da6d76cbb9022ace5f44a4020a10bd5cd92107e86882bad15 \
--hash=sha256:5523e21ab2b4d52b2bd41bedd335dbe8f3c1b5f6dd7c9c001b2e17ec9818af8d \
--hash=sha256:559f66e12f93b34c8c85c0a5728c3b8af98f04eb12f2c9ee18ea3c82c3d2fad1 \
--hash=sha256:5a8111f3c7a314017ebf90d6feab861c11d1ca14f3dbafb39abcc31aa4c54ba6 \
--hash=sha256:5b2c5d9930ced2b7821ad936b9940706ccb5471d89b8a516bb641cec87257d1c \
--hash=sha256:61bb9c654b5d2e6cdd4b1c7e6048fc66270c1682bda1b0f7d2726fdae09010f4 \
--hash=sha256:70fa30d146b7e9d0c256e73e271b3e17f23123b7c4adcbde1a385031adf59090 \
--hash=sha256:a9abf17d177df54e529154f26acfd42930e19117d045e8a9a8e893ca82dd94ec \
--hash=sha256:bed9f75763bd392c094bf474c7ab75a01d68b15146ea7a20c0f9ff6fb3063dad \
--hash=sha256:c722f3446ad8c4f1a93b2399fe1a188635b94709a3f25e6f4d61efbe75fe8eaa \
--hash=sha256:c9285275a435d1f8f47bbe3500346ab9ead2499e0e090518404d318ea90d1c1c \
--hash=sha256:cba0c7c6bf1493f8ce670bab69f9317874826ee838988de377ae355abd4d74cf \
--hash=sha256:d00e46a2a7fce6e118ed0f4c6263785bf6c297a94ffd0cd7b32455043c508cc8 \
--hash=sha256:d8bcd303dd982494842a3f482f844d539484c6043b4eed896b43ea8e5f609a21 \
--hash=sha256:da0e2d50a8435ea8dc5cd21f1fc1a45d329bae03dcca92087ebed859d22d184e \
--hash=sha256:dbb7831b2308c67bb6dd83c5ea3cdaf8e8cafd2de4000b93d78bb689126bd2cf \
--hash=sha256:dc838b5a4057c55ba81b82316ea8bf443af445f96eb21500b0e40618017e0923 \
--hash=sha256:dcfab6a19b236194af88771d8e6e778a60c3339248ab0018696ebf2b7c8bed4b \
--hash=sha256:e0ee4d4d32c94e082344308528f7b3c9294b60ab19c84eb37a2d9c88bdffd9d1 \
--hash=sha256:fbf8a5c893c9b4b99bcc7ed8fb3e8500957a113f4101860386d06635520f7cfb
scipy==1.10.0 ; python_version >= "3.8" and python_version < "3.11" \
--hash=sha256:0490dc499fe23e4be35b8b6dd1e60a4a34f0c4adb30ac671e6332446b3cbbb5a \
--hash=sha256:0ab2a58064836632e2cec31ca197d3695c86b066bc4818052b3f5381bfd2a728 \
Expand Down
10 changes: 5 additions & 5 deletions src/sk_transformers/string_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,10 +347,10 @@ class StringSlicerTransformer(BaseTransformer):
transformer.fit_transform(X)
```
```
foo bar
0 ac jk
1 df mn
2 gi pq
foo bar foo_slice bar_slice
0 abc jkl ac jk
1 def mno df mn
2 ghi pqr gi pq
```
Args:
Expand Down Expand Up @@ -381,7 +381,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
X = check_ready_to_transform(self, X, [feature[0] for feature in self.features])

for feature, slice_args in self.features:
X[feature] = [x[slice(*slice_args)] for x in X[feature]]
X[feature + "_slice"] = [x[slice(*slice_args)] for x in X[feature]]

return X

Expand Down
10 changes: 6 additions & 4 deletions tests/test_transformer/test_string_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,23 +128,23 @@ def test_string_slicer_transformer_in_pipeline(X_strings):

expected = pd.DataFrame(
{
"email": [
"email_slice": [
"test@",
"test1",
"test_",
"test_",
"ttt@t",
"test_",
],
"strings_1": [
"strings_1_slice": [
"a_string",
"another_",
"a_third_",
"a_fourth",
"a_fifth_",
"a_sixth_",
],
"strings_2": [
"strings_2_slice": [
"i_o__",
"i_nte",
"i hr",
Expand All @@ -156,7 +156,9 @@ def test_string_slicer_transformer_in_pipeline(X_strings):
)

assert pipeline.steps[0][0] == "stringslicertransformer"
assert result.equals(expected)
assert result[["email_slice", "strings_1_slice", "strings_2_slice"]].equals(
expected
)


def test_string_splitter_transformer_in_pipeline(X_strings):
Expand Down

0 comments on commit 743f617

Please sign in to comment.