diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6f81de218539..005a850cdcb1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,6 +29,12 @@ repos: hooks: - id: nbstripout exclude: .+/rendered/.+ + - repo: https://github.com/codespell-project/codespell + rev: v2.2.4 + hooks: + - id: codespell + additional_dependencies: + - tomli - repo: local hooks: - id: ruff diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 42a15e99ae5a..540da64a5cb8 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -46,5 +46,5 @@ members of the project's leadership. ## Attribution -Parts of this CoC are adapated from the [Dask code of +Parts of this CoC are adapted from the [Dask code of conduct](https://github.com/dask/governance/blob/main/code-of-conduct.md). diff --git a/ci/udf/CMakeLists.txt b/ci/udf/CMakeLists.txt index 33213e275be1..0bf5fae3f2ec 100644 --- a/ci/udf/CMakeLists.txt +++ b/ci/udf/CMakeLists.txt @@ -42,6 +42,6 @@ endfunction(COMPILE_TO_IR) add_library(udfsample SHARED udf-sample.cc) add_library(udasample SHARED uda-sample.cc) -# Custom targest to cross compile UDA/UDF to ir +# Custom targets to cross compile UDA/UDF to ir COMPILE_TO_IR(udf-sample.cc) COMPILE_TO_IR(uda-sample.cc) diff --git a/ci/udf/lib/udf.h b/ci/udf/lib/udf.h index cd07366f8103..04f4b2e00533 100644 --- a/ci/udf/lib/udf.h +++ b/ci/udf/lib/udf.h @@ -168,7 +168,7 @@ class FunctionContext { void Free(int64_t byte_size); /// Methods for maintaining state across UDF/UDA function calls. SetFunctionState() can - /// be used to store a pointer that can then be retreived via GetFunctionState(). If + /// be used to store a pointer that can then be retrieved via GetFunctionState(). If /// GetFunctionState() is called when no pointer is set, it will return /// NULL. SetFunctionState() does not take ownership of 'ptr'; it is up to the UDF/UDA /// to clean up any function state if necessary. @@ -599,7 +599,7 @@ struct StringVal : public AnyVal { struct DecimalVal : public impala_udf::AnyVal { /// Decimal data is stored as an unscaled integer value. For example, the decimal 1.00 - /// (precison 3, scale 2) is stored as 100. The byte size necessary to store the decimal + /// (precision 3, scale 2) is stored as 100. The byte size necessary to store the decimal /// depends on the precision, which determines which field of the union should be used to /// store and manipulate the unscaled value. // diff --git a/ci/udf/udf-sample.cc b/ci/udf/udf-sample.cc index 14f0003beb40..667b672dfb50 100644 --- a/ci/udf/udf-sample.cc +++ b/ci/udf/udf-sample.cc @@ -120,7 +120,7 @@ void ReturnConstantArgPrepare( } } -// Retreives and returns the shared state set in the prepare function +// Retrieves and returns the shared state set in the prepare function IntVal ReturnConstantArg(FunctionContext* context, const IntVal& const_val) { IntVal* state = reinterpret_cast( context->GetFunctionState(FunctionContext::THREAD_LOCAL)); @@ -131,7 +131,7 @@ IntVal ReturnConstantArg(FunctionContext* context, const IntVal& const_val) { void ReturnConstantArgClose( FunctionContext* context, FunctionContext::FunctionStateScope scope) { if (scope == FunctionContext::THREAD_LOCAL) { - // Retreive and deallocate the shared state + // Retrieve and deallocate the shared state void* state = context->GetFunctionState(scope); context->Free(reinterpret_cast(state)); context->SetFunctionState(scope, NULL); diff --git a/docs/backends/Impala.md b/docs/backends/Impala.md index fd4bcbc7286c..87d557b841d7 100644 --- a/docs/backends/Impala.md +++ b/docs/backends/Impala.md @@ -262,7 +262,7 @@ below). ### Creating an empty table To create an empty table, you must declare an Ibis schema that will be -translated to the appopriate Impala schema and data types. +translated to the appropriate Impala schema and data types. As Ibis types are simplified compared with Impala types, this may expand in the future to include a more fine-grained schema declaration. @@ -1338,7 +1338,7 @@ connection semantics are similar to the other access methods for working with secure clusters. Specifically, after authenticating yourself against Kerberos (e.g., by issuing -the appropriate `kinit` commmand), simply pass `auth_mechanism='GSSAPI'` or +the appropriate `kinit` command), simply pass `auth_mechanism='GSSAPI'` or `auth_mechanism='LDAP'` (and set `kerberos_service_name` if necessary along with `user` and `password` if necessary) to the `ibis.impala_connect(...)` method when instantiating an `ImpalaConnection`. @@ -1355,7 +1355,7 @@ when connecting to a Kerberized cluster. Because some Ibis commands create HDFS directories as well as new Impala databases and/or tables, your user will require the necessary privileges. -## Default Configuation Values for CDH Components +## Default Configuration Values for CDH Components Cloudera CDH ships with HDFS, Impala, Hive and many other components. Sometimes it's not obvious what default configuration values these tools are diff --git a/docs/blog/ffill-and-bfill-using-ibis.md b/docs/blog/ffill-and-bfill-using-ibis.md index 692aeef3cc7a..5e09fb5daa32 100644 --- a/docs/blog/ffill-and-bfill-using-ibis.md +++ b/docs/blog/ffill-and-bfill-using-ibis.md @@ -5,7 +5,7 @@ Suppose you have a table of data mapping events and dates to values, and that this data contains gaps in values. Suppose you want to forward fill these gaps such that, one-by-one, -if a value is null, it is replaced by the non-null value preceeding. +if a value is null, it is replaced by the non-null value preceding. For example, you might be measuring the total value of an account over time. Saving the same value until that value changes is an inefficient use of space, diff --git a/docs/blog/ibis_substrait_to_duckdb.md b/docs/blog/ibis_substrait_to_duckdb.md index 622b178502bc..db4aba618d39 100644 --- a/docs/blog/ibis_substrait_to_duckdb.md +++ b/docs/blog/ibis_substrait_to_duckdb.md @@ -7,7 +7,7 @@ of different analytical execution engines, most of which (but not all) speak some dialect of SQL. Today, Ibis accomplishes this with a lot of help from `sqlalchemy` and `sqlglot` -to handle differences in dialect, or we interact directly with avalable Python +to handle differences in dialect, or we interact directly with available Python bindings (for instance with the `pandas`, `datafusion`, and `polars` backends). Ibis goes to great lengths to generate sane and consistent SQL for those diff --git a/docs/blog/rendered/campaign-finance.ipynb b/docs/blog/rendered/campaign-finance.ipynb index 4130c5bdc0e8..3a5f05ddbf66 100644 --- a/docs/blog/rendered/campaign-finance.ipynb +++ b/docs/blog/rendered/campaign-finance.ipynb @@ -864,7 +864,7 @@ "metadata": {}, "source": [ "That worked well! There are 0 nulls in the resulting column, so we always were\n", - "able to determine the elction type." + "able to determine the election type." ] }, { diff --git a/docs/how_to/ffill_bfill_w_window.md b/docs/how_to/ffill_bfill_w_window.md index 83865fc3d3dd..8dd717d911d5 100644 --- a/docs/how_to/ffill_bfill_w_window.md +++ b/docs/how_to/ffill_bfill_w_window.md @@ -1,7 +1,7 @@ # How to `ffill` and `bfill` using Window Functions If you have gaps in your data and need to fill them in using a simple forward fill -(given an order, null values are replaced by the value preceeding) or backward fill +(given an order, null values are replaced by the value preceding) or backward fill (given an order, null values are replaced by the value following), then you can do this in Ibis: === "`ffill`" diff --git a/docs/release_notes.md b/docs/release_notes.md index 90ef3f1d4797..e60ca35623b4 100644 --- a/docs/release_notes.md +++ b/docs/release_notes.md @@ -177,7 +177,7 @@ Release Notes * **common:** support `Callable` arguments and return types in `Validator.from_annotable()` ([ae57c36](https://github.com/ibis-project/ibis/commit/ae57c367da307880a2d067ffb1081fecd42a83bb)) * **common:** support positional only and keyword only arguments in annotations ([340dca1](https://github.com/ibis-project/ibis/commit/340dca1dd046d1ae1c8e42b92016296137021e59)) * **dask/pandas:** raise OperationNotDefinedError exc for not defined operations ([2833685](https://github.com/ibis-project/ibis/commit/2833685dff46239d09986d689498b59804e04d59)) -* **datafusion:** implement ops.Degress, ops.Radians ([7e61391](https://github.com/ibis-project/ibis/commit/7e61391819f17ce87f1f63123fd8862a18eb3098)) +* **datafusion:** implement ops.Degrees, ops.Radians ([7e61391](https://github.com/ibis-project/ibis/commit/7e61391819f17ce87f1f63123fd8862a18eb3098)) * **datafusion:** implement ops.Exp ([7cb3ade](https://github.com/ibis-project/ibis/commit/7cb3adef86a781ae422c8774a2da7a96313b22ea)) * **datafusion:** implement ops.Pi, ops.E ([5a74cb4](https://github.com/ibis-project/ibis/commit/5a74cb4e409593ded3572df6383fd273c17d44f3)) * **datafusion:** implement ops.RandomScalar ([5d1cd0f](https://github.com/ibis-project/ibis/commit/5d1cd0f0658f9f2efcb46949febb2d59028bb18d)) @@ -210,7 +210,7 @@ Release Notes * **examples:** add wowah_data data to examples ([bf9a7cc](https://github.com/ibis-project/ibis/commit/bf9a7cc390991e29cc0552197b4b5f9a4973ee1a)) * **examples:** enable progressbar and faster hashing ([4adfe29](https://github.com/ibis-project/ibis/commit/4adfe292307d042175b4221a33ebbfc3d53029e8)) * **impala:** implement ops.Clip ([279fd78](https://github.com/ibis-project/ibis/commit/279fd78e9eed00492f9c07037b148edada2e918c)) -* **impala:** implement ops.Radians, ops.Degress ([a794ace](https://github.com/ibis-project/ibis/commit/a794ace76daa781afec83db816d45a317d27e16b)) +* **impala:** implement ops.Radians, ops.Degrees ([a794ace](https://github.com/ibis-project/ibis/commit/a794ace76daa781afec83db816d45a317d27e16b)) * **impala:** implement ops.RandomScalar ([874f2ff](https://github.com/ibis-project/ibis/commit/874f2ffaed9ab200268b88c27b3d798cd430eaf1)) * **io:** add to_parquet, to_csv to backends ([fecca42](https://github.com/ibis-project/ibis/commit/fecca421ad405465c4c001afa4e2976c07164530)) * **ir:** add `ArrayFilter` operation ([e719d60](https://github.com/ibis-project/ibis/commit/e719d601cbd9cc86d181f28a3658146985c052dc)) @@ -339,7 +339,7 @@ Release Notes * **sqlalchemy:** use `sa.true` instead of Python literal ([8423eba](https://github.com/ibis-project/ibis/commit/8423eba4a46d13cbe00968a12fe3815ce28ecf8c)) * **sqlalchemy:** use indexed group by key references everywhere possible ([9f1ddd8](https://github.com/ibis-project/ibis/commit/9f1ddd8328a9c10155ff934b0fa157d58b63e4fe)) * **sql:** ensure that set operations generate valid sql in the presence of additional constructs such as sort keys ([3e2c364](https://github.com/ibis-project/ibis/commit/3e2c3648aba41b74eec54a31d14cc72e5bef3e31)) -* **sqlite:** explicite disallow array in literal ([de73b37](https://github.com/ibis-project/ibis/commit/de73b37aeeab868fde3612cb993d16725cf3d19a)) +* **sqlite:** explicitly disallow array in literal ([de73b37](https://github.com/ibis-project/ibis/commit/de73b37aeeab868fde3612cb993d16725cf3d19a)) * **sqlite:** fix random scalar range ([26d0dde](https://github.com/ibis-project/ibis/commit/26d0ddeaeca096919de966cbf2b7907a54331051)) * support negative string indices ([f84a54d](https://github.com/ibis-project/ibis/commit/f84a54da3629da0b602ecf081db6a0ba07aa6192)) * **trino:** workaround broken dialect ([b502faf](https://github.com/ibis-project/ibis/commit/b502faf19bd0cf3b3b9b00e813f8f6a20d19a4ee)) @@ -371,7 +371,7 @@ Release Notes * **datatype:** clean up parsing rules ([c15fb5f](https://github.com/ibis-project/ibis/commit/c15fb5fc27c13cb679199bd1ed2aeca6ed8c697f)) * **datatype:** remove `Category` type and related APIs ([bb0ee78](https://github.com/ibis-project/ibis/commit/bb0ee786271dc21cb18a8561f523b4cee2549ce0)) * **datatype:** remove `StructType.pairs` property in favor of identical `fields` attribute ([6668122](https://github.com/ibis-project/ibis/commit/66681223a10ad421cfaa5b000dd2dffbf114ba42)) -* **datatypes:** move sqlalchemy datatypes to specfic backend ([d7b49eb](https://github.com/ibis-project/ibis/commit/d7b49eb2fcce396a40707c9ef4838fba44a4a477)) +* **datatypes:** move sqlalchemy datatypes to specific backend ([d7b49eb](https://github.com/ibis-project/ibis/commit/d7b49eb2fcce396a40707c9ef4838fba44a4a477)) * **datatypes:** remove `String` parent type from `JSON` type ([34f3898](https://github.com/ibis-project/ibis/commit/34f3898253c35ad81876cd222888cd3c3c8e1b0a)) * **datatype:** use a dictionary to store `StructType` fields rather than `names` and `types` tuples ([84455ac](https://github.com/ibis-project/ibis/commit/84455ac1c0c0ca6941f871a5d49b39864975febc)) * **datatype:** use lazy dispatch when inferring pandas Timedelta objects ([e5280ea](https://github.com/ibis-project/ibis/commit/e5280ea14b100afb8ba04d7dcb449a6fc4ccf0ab)) @@ -574,7 +574,7 @@ Release Notes ### Refactors -* **bigquery:** explicite disallow INT64 in JS UDF ([fb33bf9](https://github.com/ibis-project/ibis/commit/fb33bf9b1d33f3f38010f983552330f084152b8b)) +* **bigquery:** explicitly disallow INT64 in JS UDF ([fb33bf9](https://github.com/ibis-project/ibis/commit/fb33bf9b1d33f3f38010f983552330f084152b8b)) * **datatype:** add custom sqlalchemy nested types for backend differentiation ([dec70f5](https://github.com/ibis-project/ibis/commit/dec70f53ebe45103f1392be5a5c738f163700c69)) * **datatype:** introduce to_sqla_type dispatching on dialect ([a8bbc00](https://github.com/ibis-project/ibis/commit/a8bbc0011f4c4a7519adaf841ff8cd5bf8de7a80)) * **datatypes:** remove Geography and Geometry types in favor of GeoSpatial ([d44978c](https://github.com/ibis-project/ibis/commit/d44978ca53d4261871173f277289e2e479073fcf)) @@ -641,7 +641,7 @@ Release Notes * **ir:** Removed `Node.output_type` property in favor of abstractmethod `Node.to_expr()` which now must be explicitly implemented * **ir:** `Expr(Op(Expr(Op(Expr(Op)))))` is now represented as `Expr(Op(Op(Op)))`, so code using ibis internals must be migrated * **pandas:** Use timezone conversion functions to compute the original machine localized value -* **common:** use `ibis.common.validators.{Patameter, Signature}` instead +* **common:** use `ibis.common.validators.{Parameter, Signature}` instead * **ir:** `ibis.expr.lineage.lineage()` is now removed * **ir:** removed `ir.DestructValue`, `ir.DestructScalar` and `ir.DestructColumn`, use `table.unpack()` instead * **ir:** removed `Node.root_tables()` method, use `ibis.expr.analysis.find_immediate_parent_tables()` instead @@ -765,7 +765,7 @@ Release Notes * support Table.fillna for SQL backends ([26d4cac](https://github.com/ibis-project/ibis/commit/26d4cacf93f41b66f1407e313e86ec9f0a48aa29)) * **trino:** add `bit_xor` aggregation ([830acf4](https://github.com/ibis-project/ibis/commit/830acf4d896a5d3bfedd2549aada3aa2f274e59b)) * **trino:** add `EXTRACT`-based functionality ([6549657](https://github.com/ibis-project/ibis/commit/654965705d437ec7f90c0658d3f436a355414b71)) -* **trino:** add milisecond scale to *_trunc function ([3065248](https://github.com/ibis-project/ibis/commit/3065248be55a8d691d6e552ce0eb3c9634232c95)) +* **trino:** add millisecond scale to *_trunc function ([3065248](https://github.com/ibis-project/ibis/commit/3065248be55a8d691d6e552ce0eb3c9634232c95)) * **trino:** add some basic aggregation ops ([7ecf7ab](https://github.com/ibis-project/ibis/commit/7ecf7ab939b2f85615a7694240ba98782d678e5f)) * **trino:** extract milliseconds ([09517a5](https://github.com/ibis-project/ibis/commit/09517a5b8b21cfbad6b65c8a0f5c798a36ff212c)) * **trino:** implement `approx_median` ([1cba8bd](https://github.com/ibis-project/ibis/commit/1cba8bd9f43fc5756a32341c6bcf18f45a1c8b1d)) @@ -1355,7 +1355,7 @@ Release Notes * **repr:** when formatting DestructValue selections, use struct field names as column names ([d01fe42](https://github.com/ibis-project/ibis/commit/d01fe42b4b8055b29cca8dc5048477616405c176)) * **sqlalchemy:** fix parsing and construction of nested array types ([e20bcc0](https://github.com/ibis-project/ibis/commit/e20bcc0941ac90a38b1263018f32ba8af5e5c267)) * **sqlalchemy:** remove unused second argument when creating temporary views ([8766b40](https://github.com/ibis-project/ibis/commit/8766b40ec8a5a853402bf7ab51629b6fb0ab252e)) -* **sqlite:** register coversion to isoformat for `pandas.Timestamp` ([fe95dca](https://github.com/ibis-project/ibis/commit/fe95dca312511b1c43b915d640434c5e3104d79c)) +* **sqlite:** register conversion to isoformat for `pandas.Timestamp` ([fe95dca](https://github.com/ibis-project/ibis/commit/fe95dca312511b1c43b915d640434c5e3104d79c)) * **sqlite:** test case with whitespace at the end of the line ([7623ae9](https://github.com/ibis-project/ibis/commit/7623ae9597e8b82fe12ca3b5ccac5e6e8540c6fb)) * **sql:** use isoformat for timestamp literals ([70d0ba6](https://github.com/ibis-project/ibis/commit/70d0ba625fe6008ee34e233c40f6b489a751bfa5)) * **type-system:** infer null datatype for empty sequence of expressions ([f67d5f9](https://github.com/ibis-project/ibis/commit/f67d5f911fe8e1791584c051cd0bd8a007f5b8f7)) @@ -1504,7 +1504,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * **expr:** fix formatting of table info using tabulate ([b110636](https://github.com/ibis-project/ibis/commit/b110636f09b11df108cf91ac8d20fd8db7ee28d3)) * fix float vs real data type detection in sqlalchemy ([24e6774](https://github.com/ibis-project/ibis/commit/24e677480f830caf367283c6815c6f759ac33d7a)) * fix list_schemas argument ([69c1abf](https://github.com/ibis-project/ibis/commit/69c1abf21fff25d877a71c791f78b0e3ece552f0)) -* fix postgres udfs and reenable ci tests ([7d480d2](https://github.com/ibis-project/ibis/commit/7d480d225d713274f8068af07cb7fcffac438691)) +* fix postgres udfs and re-enable ci tests ([7d480d2](https://github.com/ibis-project/ibis/commit/7d480d225d713274f8068af07cb7fcffac438691)) * fix tablecolumn execution for filter following join ([064595b](https://github.com/ibis-project/ibis/commit/064595b9c2a85f6532b93b7b8b5343fabe2dbe29)) * **format:** remove some newlines from formatted expr repr ([ed4fa78](https://github.com/ibis-project/ibis/commit/ed4fa78a484f1b6a08531fa406558c471dd5762f)) * **histogram:** cross_join needs onclause=True ([5d36a58](https://github.com/ibis-project/ibis/commit/5d36a58d2df83b045487e9701e309978c3dd777d)), closes [#622](https://github.com/ibis-project/ibis/issues/622) @@ -1526,7 +1526,7 @@ behavior from UUID datatypes will need to add an explicit cast first. * remove passing schema into register_parquet ([bdcbb08](https://github.com/ibis-project/ibis/commit/bdcbb083a112d3cc81bb98ee63a26674b5397563)) * **repr:** add ops.TimeAdd to repr binop lookup table ([fd94275](https://github.com/ibis-project/ibis/commit/fd94275e945137be3b95b4aa4c0b1cdb16b7a41d)) * **repr:** allow ops.TableNode in fmt_value ([6f57003](https://github.com/ibis-project/ibis/commit/6f57003620d21e07d31ca5d2013302ba2899fdb0)) -* reverse the predicate pushdown subsitution ([f3cd358](https://github.com/ibis-project/ibis/commit/f3cd3581b078a7f297f303866aeb30c8f826b19d)) +* reverse the predicate pushdown substitution ([f3cd358](https://github.com/ibis-project/ibis/commit/f3cd3581b078a7f297f303866aeb30c8f826b19d)) * sort_index to satisfy pandas 1.4.x ([6bac0fc](https://github.com/ibis-project/ibis/commit/6bac0fc2bec2434f5d2eb8b1c2b0328a0e5a80a3)) * **sqlalchemy:** ensure correlated subqueries FROM clauses are rendered ([3175321](https://github.com/ibis-project/ibis/commit/3175321844897ad4fa88547c9474724736685209)) * **sqlalchemy:** use corresponding_column to prevent spurious cross joins ([fdada21](https://github.com/ibis-project/ibis/commit/fdada217afbf0e8b07d421fd2da0092a20c578c7)) @@ -2068,7 +2068,7 @@ this release. ([#1498](https://github.com/ibis-project/ibis/issues/1498)) - Add `mean` and `sum` for `boolean` types in BigQuery ([#1516](https://github.com/ibis-project/ibis/issues/1516)) -- All recent versions of SQLAlchemy are now suppported +- All recent versions of SQLAlchemy are now supported ([#1384](https://github.com/ibis-project/ibis/issues/1384)) - Add support for `NUMERIC` types in the BigQuery backend ([#1534](https://github.com/ibis-project/ibis/issues/1534)) @@ -2178,7 +2178,7 @@ that all users upgrade from earlier versions of Ibis. call with no operation ([#1378](https://github.com/ibis-project/ibis/issues/1378)) - Fix parameterized subqueries ([#1300](https://github.com/ibis-project/ibis/issues/1300), [#1331](https://github.com/ibis-project/ibis/issues/1331), [#1303](https://github.com/ibis-project/ibis/issues/1303), [#1378](https://github.com/ibis-project/ibis/issues/1378)) -- Fix subquery extraction, which wasn\'t happening in topological +- Fix subquery extraction, which wasn't happening in topological order ([#1342](https://github.com/ibis-project/ibis/issues/1342)) - Fix parenthesization if `isnull` ([#1307](https://github.com/ibis-project/ibis/issues/1307)) - Calling drop after mutate did not work ([#1296](https://github.com/ibis-project/ibis/issues/1296), [#1299](https://github.com/ibis-project/ibis/issues/1299)) diff --git a/ibis/backends/base/sql/alchemy/__init__.py b/ibis/backends/base/sql/alchemy/__init__.py index 691c27b0bbf5..c685d97a2b2f 100644 --- a/ibis/backends/base/sql/alchemy/__init__.py +++ b/ibis/backends/base/sql/alchemy/__init__.py @@ -770,10 +770,10 @@ def create_view( database: str | None = None, overwrite: bool = False, ) -> ir.Table: - import sqlalchemy_views as sav + from sqlalchemy_views import CreateView source = self.compile(obj) - view = sav.CreateView( + view = CreateView( sa.Table( name, sa.MetaData(), @@ -790,9 +790,9 @@ def create_view( def drop_view( self, name: str, *, database: str | None = None, force: bool = False ) -> None: - import sqlalchemy_views as sav + from sqlalchemy_views import DropView - view = sav.DropView( + view = DropView( sa.Table( name, sa.MetaData(), diff --git a/ibis/backends/base/sql/alchemy/query_builder.py b/ibis/backends/base/sql/alchemy/query_builder.py index 2af0cf649a13..eeca3ac65560 100644 --- a/ibis/backends/base/sql/alchemy/query_builder.py +++ b/ibis/backends/base/sql/alchemy/query_builder.py @@ -268,8 +268,8 @@ def _add_select(self, table_set): return result if unnest_children: - # get all the unnests plus the current froms of the result selection - # and build up the cross join + # get all the unnests plus the current FROM clauses of the result + # selection and build up the cross join table_set = functools.reduce( functools.partial(sa.sql.FromClause.join, onclause=sa.true()), toolz.unique(toolz.concatv(unnest_children, result.get_final_froms())), diff --git a/ibis/backends/dask/tests/execution/test_operations.py b/ibis/backends/dask/tests/execution/test_operations.py index 8775bed02446..721289eb8f31 100644 --- a/ibis/backends/dask/tests/execution/test_operations.py +++ b/ibis/backends/dask/tests/execution/test_operations.py @@ -776,12 +776,12 @@ def test_where_series(t, df, left_f, right_f): col_expr > col_expr.mean(), left_f(col_expr), right_f(col_expr) ).execute() - ser = df['plain_int64'].compute() - cond = ser > ser.mean() - left = left_f(ser) + series = df['plain_int64'].compute() + cond = series > series.mean() + left = left_f(series) if not isinstance(left, pd.Series): left = pd.Series(np.repeat(left, len(cond)), name=cond.name) - expected = left.where(cond, right_f(ser)).astype(result.dtype) + expected = left.where(cond, right_f(series)).astype(result.dtype) tm.assert_series_equal(result, expected, check_index=False, check_names=False) @@ -804,8 +804,8 @@ def test_where_long(batting, batting_df): col_expr = batting['AB'] result = ibis.where(col_expr > col_expr.mean(), col_expr, 0.0).compile() - ser = batting_df['AB'] - expected = ser.where(ser > ser.mean(), other=0.0) + series = batting_df['AB'] + expected = series.where(series > series.mean(), other=0.0) tm.assert_series_equal(result.compute(), expected.compute(), check_index=False) diff --git a/ibis/backends/impala/tests/test_udf.py b/ibis/backends/impala/tests/test_udf.py index 4b9e3836fe0e..60623e5057b7 100644 --- a/ibis/backends/impala/tests/test_udf.py +++ b/ibis/backends/impala/tests/test_udf.py @@ -621,8 +621,8 @@ def test_delete_udf_db(name, inputs, snapshot): snapshot.assert_match(result, "out.sql") -@pytest.mark.parametrize("ser", [True, False]) -def test_create_uda(name, inputs, output, ser, snapshot): +@pytest.mark.parametrize("series", [True, False]) +def test_create_uda(name, inputs, output, series, snapshot): func = api.wrap_uda( '/foo/bar.so', inputs, @@ -631,7 +631,7 @@ def test_create_uda(name, inputs, output, ser, snapshot): init_fn='Init', merge_fn='Merge', finalize_fn='Finalize', - serialize_fn='Serialize' if ser else None, + serialize_fn='Serialize' if series else None, ) stmt = ddl.CreateUDA(func, name=name, database='bar') result = stmt.compile() diff --git a/ibis/backends/pandas/tests/execution/test_operations.py b/ibis/backends/pandas/tests/execution/test_operations.py index 92de36347ca6..74239d64d15d 100644 --- a/ibis/backends/pandas/tests/execution/test_operations.py +++ b/ibis/backends/pandas/tests/execution/test_operations.py @@ -575,12 +575,12 @@ def test_where_series(t, df, left_f, right_f): col_expr > col_expr.mean(), left_f(col_expr), right_f(col_expr) ).execute() - ser = df['plain_int64'] - cond = ser > ser.mean() - left = left_f(ser) + series = df['plain_int64'] + cond = series > series.mean() + left = left_f(series) if not isinstance(left, pd.Series): left = pd.Series(np.repeat(left, len(cond)), name=cond.name) - expected = left.where(cond, right_f(ser)) + expected = left.where(cond, right_f(series)) tm.assert_series_equal(result, expected, check_dtype=False) @@ -603,8 +603,8 @@ def test_where_long(batting, batting_df): col_expr = batting['AB'] result = ibis.where(col_expr > col_expr.mean(), col_expr, 0.0).execute() - ser = batting_df['AB'] - expected = ser.where(ser > ser.mean(), other=0.0).astype("float64") + series = batting_df['AB'] + expected = series.where(series > series.mean(), other=0.0).astype("float64") tm.assert_series_equal(result, expected) diff --git a/ibis/common/tests/test_grounds.py b/ibis/common/tests/test_grounds.py index 70529a3a9966..31d7bbce4e47 100644 --- a/ibis/common/tests/test_grounds.py +++ b/ibis/common/tests/test_grounds.py @@ -203,7 +203,7 @@ def __init__(self, a, b): def test_annotable(): - class InBetween(BetweenSimple): + class Between(BetweenSimple): pass argnames = ('value', 'lower', 'upper') @@ -221,12 +221,12 @@ class InBetween(BetweenSimple): assert not hasattr(obj, "__dict__") # test that a child without additional arguments doesn't have __dict__ - obj = InBetween(10, lower=2) + obj = Between(10, lower=2) assert obj.__slots__ == tuple() assert not hasattr(obj, "__dict__") assert obj == obj.copy() assert obj == copy.copy(obj) - obj2 = InBetween(10, lower=8) + obj2 = Between(10, lower=8) assert obj.copy(lower=8) == obj2 diff --git a/ibis/formats/tests/test_pandas.py b/ibis/formats/tests/test_pandas.py index 1e02ceec03bf..60a08de9747f 100644 --- a/ibis/formats/tests/test_pandas.py +++ b/ibis/formats/tests/test_pandas.py @@ -76,18 +76,18 @@ def test_dtype_to_pandas(pandas_type, ibis_type): ids=str, ) def test_dtype_from_pandas_arrow_dtype(pandas_type, ibis_type): - ser = pd.Series([], dtype=f"{pandas_type}[pyarrow]") - assert dtype_from_pandas(ser.dtype) == ibis_type + series = pd.Series([], dtype=f"{pandas_type}[pyarrow]") + assert dtype_from_pandas(series.dtype) == ibis_type def test_dtype_from_pandas_arrow_string_dtype(): - ser = pd.Series([], dtype="string[pyarrow]") - assert dtype_from_pandas(ser.dtype) == dt.String() + series = pd.Series([], dtype="string[pyarrow]") + assert dtype_from_pandas(series.dtype) == dt.String() def test_dtype_from_pandas_arrow_list_dtype(): - ser = pd.Series([], dtype=pd.ArrowDtype(pa.list_(pa.string()))) - assert dtype_from_pandas(ser.dtype) == dt.Array(dt.string) + series = pd.Series([], dtype=pd.ArrowDtype(pa.list_(pa.string()))) + assert dtype_from_pandas(series.dtype) == dt.Array(dt.string) @pytest.mark.parametrize( diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index 709751e5ad03..c83b0e3159b8 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -343,8 +343,8 @@ def test_add_predicate_coalesce(table): assert_equal(result, expected) # 59, if we are not careful, we can obtain broken refs - interm = table[pred1] - result = interm.filter([interm['b'] > 0]) + subset = table[pred1] + result = subset.filter([subset['b'] > 0]) assert_equal(result, expected) diff --git a/poetry.lock b/poetry.lock index a68fe5932102..b7320ff8b2fb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -444,6 +444,18 @@ files = [ {file = "cfgv-3.3.1.tar.gz", hash = "sha256:f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736"}, ] +[[package]] +name = "chardet" +version = "5.1.0" +description = "Universal encoding detector for Python 3" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "chardet-5.1.0-py3-none-any.whl", hash = "sha256:362777fb014af596ad31334fde1e8c327dfdb076e1960d1694662d46a6917ab9"}, + {file = "chardet-5.1.0.tar.gz", hash = "sha256:0d62712b956bc154f85fb0a266e2a3c5913c2967e00348701b32411d6def31e5"}, +] + [[package]] name = "charset-normalizer" version = "3.1.0" @@ -685,6 +697,28 @@ files = [ {file = "cloudpickle-2.2.1.tar.gz", hash = "sha256:d89684b8de9e34a2a43b3460fbca07d09d6e25ce858df4d5a44240403b6178f5"}, ] +[[package]] +name = "codespell" +version = "2.2.4" +description = "Codespell" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "codespell-2.2.4-py3-none-any.whl", hash = "sha256:7d984b8130108e6f82524b7d09f8b7bf2fb1e398c5d4b37d9e2bd310145b3e29"}, + {file = "codespell-2.2.4.tar.gz", hash = "sha256:0b4620473c257d9cde1ff8998b26b2bb209a35c2b7489f5dc3436024298ce83a"}, +] + +[package.dependencies] +chardet = {version = "*", optional = true, markers = "extra == \"hard-encoding-detection\""} +tomli = {version = "*", optional = true, markers = "python_version < \"3.11\" and extra == \"toml\""} + +[package.extras] +dev = ["Pygments", "build", "chardet", "flake8", "flake8-pyproject", "pytest", "pytest-cov", "pytest-dependency", "tomli"] +hard-encoding-detection = ["chardet"] +toml = ["tomli"] +types = ["chardet (>=5.1.0)", "mypy", "pytest", "pytest-cov", "pytest-dependency"] + [[package]] name = "colorama" version = "0.4.6" @@ -5329,4 +5363,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "c582997f4e396b85db8aa623bdf793c5151eceb28c4ecd450307fc1e083663cd" +content-hash = "5a6bc5a767eeaeedff23cff78442dee5ba020c794612013869810ea45ee2ae4a" diff --git a/pyproject.toml b/pyproject.toml index 6fe32deb95cb..51a1610c03e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,6 +97,10 @@ trino = { version = ">=0.321,<1", optional = true, extras = ["sqlalchemy"] } [tool.poetry.group.dev.dependencies] black = ">=22.1.0,<24" +codespell = { version = ">=2.2.4,<3", extras = [ + "hard-encoding-detection", + "toml", +] } google-cloud-storage = ">=2.7.0,<3" ipython = ">=8.7.0,<9" poetry-dynamic-versioning = ">=0.18.0,<1" @@ -362,6 +366,13 @@ show_dot = false no_dot = true show_deps = true +[tool.codespell] +# notebooks are skipped because there's no straightforward way to ignore base64 +# encoded strings +skip = "*.lock,.direnv,.git,*.ipynb" +ignore-regex = '\b(DOUB|i[if]f|I[IF]F)\b' +builtin = "clear,rare,names" + [tool.ruff] line-length = 88 select = [ @@ -426,7 +437,7 @@ ignore = [ "SIM108", # convert everything to ternary operator "SIM114", # combine `if` branches using logical `or` operator "SIM116", # dictionary instead of `if` statements - "SIM117", # nested withs + "SIM117", # nested with statements "SIM118", # remove .keys() calls from dictionaries "SIM300", # yoda conditions "UP037", # remove quotes from type annotation diff --git a/requirements.txt b/requirements.txt index 7eea9325acd8..e2e2a6e79582 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,12 +15,14 @@ cachetools==5.3.1 ; python_version >= "3.8" and python_version < "4.0" certifi==2023.5.7 ; python_version >= "3.8" and python_version < "4.0" cffi==1.15.1 ; python_version >= "3.8" and python_version < "4.0" cfgv==3.3.1 ; python_version >= "3.8" and python_version < "4.0" +chardet==5.1.0 ; python_version >= "3.8" and python_version < "4.0" charset-normalizer==3.1.0 ; python_version >= "3.8" and python_version < "4.0" click-plugins==1.1.1 ; python_version >= "3.8" and python_version < "4.0" click==8.1.3 ; python_version >= "3.8" and python_version < "4.0" clickhouse-connect[arrow,numpy,pandas]==0.5.25 ; python_version >= "3.8" and python_version < "4.0" cligj==0.7.2 ; python_version >= "3.8" and python_version < "4" cloudpickle==2.2.1 ; python_version >= "3.8" and python_version < "4.0" +codespell[hard-encoding-detection,toml]==2.2.4 ; python_version >= "3.8" and python_version < "4.0" colorama==0.4.6 ; python_version >= "3.8" and python_version < "4.0" comm==0.1.3 ; python_version >= "3.8" and python_version < "4.0" coverage[toml]==7.2.7 ; python_version >= "3.8" and python_version < "4.0"