From 41f73f3a8160c08ed1557ac5243cc1bad6a41a86 Mon Sep 17 00:00:00 2001 From: Robin Date: Thu, 8 Feb 2024 14:16:41 +0100 Subject: [PATCH] docs: Realign file structure of user guide (#14360) --- .../data-structures.py} | 0 .../expressions/{null.py => missing-data.py} | 0 .../expressions.py | 0 .../{basics => getting-started}/joins.py | 0 .../reading-writing.py | 0 docs/src/rust/Cargo.toml | 20 +++--- .../data-structures.rs} | 0 .../expressions/{null.rs => missing-data.rs} | 0 .../expressions.rs | 0 .../{basics => getting-started}/joins.rs | 0 .../reading-writing.rs | 0 docs/user-guide/concepts/data-structures.md | 24 +++---- docs/user-guide/expressions/index.md | 2 +- .../expressions/{null.md => missing-data.md} | 62 +++++++++---------- docs/user-guide/getting-started.md | 60 +++++++++--------- docs/user-guide/migration/pandas.md | 4 +- mkdocs.yml | 2 +- 17 files changed, 87 insertions(+), 87 deletions(-) rename docs/src/python/user-guide/{basics/series-dataframes.py => concepts/data-structures.py} (100%) rename docs/src/python/user-guide/expressions/{null.py => missing-data.py} (100%) rename docs/src/python/user-guide/{basics => getting-started}/expressions.py (100%) rename docs/src/python/user-guide/{basics => getting-started}/joins.py (100%) rename docs/src/python/user-guide/{basics => getting-started}/reading-writing.py (100%) rename docs/src/rust/user-guide/{basics/series-dataframes.rs => concepts/data-structures.rs} (100%) rename docs/src/rust/user-guide/expressions/{null.rs => missing-data.rs} (100%) rename docs/src/rust/user-guide/{basics => getting-started}/expressions.rs (100%) rename docs/src/rust/user-guide/{basics => getting-started}/joins.rs (100%) rename docs/src/rust/user-guide/{basics => getting-started}/reading-writing.rs (100%) rename docs/user-guide/expressions/{null.md => missing-data.md} (68%) diff --git a/docs/src/python/user-guide/basics/series-dataframes.py b/docs/src/python/user-guide/concepts/data-structures.py similarity index 100% rename from docs/src/python/user-guide/basics/series-dataframes.py rename to docs/src/python/user-guide/concepts/data-structures.py diff --git a/docs/src/python/user-guide/expressions/null.py b/docs/src/python/user-guide/expressions/missing-data.py similarity index 100% rename from docs/src/python/user-guide/expressions/null.py rename to docs/src/python/user-guide/expressions/missing-data.py diff --git a/docs/src/python/user-guide/basics/expressions.py b/docs/src/python/user-guide/getting-started/expressions.py similarity index 100% rename from docs/src/python/user-guide/basics/expressions.py rename to docs/src/python/user-guide/getting-started/expressions.py diff --git a/docs/src/python/user-guide/basics/joins.py b/docs/src/python/user-guide/getting-started/joins.py similarity index 100% rename from docs/src/python/user-guide/basics/joins.py rename to docs/src/python/user-guide/getting-started/joins.py diff --git a/docs/src/python/user-guide/basics/reading-writing.py b/docs/src/python/user-guide/getting-started/reading-writing.py similarity index 100% rename from docs/src/python/user-guide/basics/reading-writing.py rename to docs/src/python/user-guide/getting-started/reading-writing.py diff --git a/docs/src/rust/Cargo.toml b/docs/src/rust/Cargo.toml index fea9ad2e736c..96e31ebd04b6 100644 --- a/docs/src/rust/Cargo.toml +++ b/docs/src/rust/Cargo.toml @@ -25,19 +25,19 @@ path = "home/example.rs" required-features = ["polars/lazy"] [[bin]] -name = "user-guide-basics-expressions" -path = "user-guide/basics/expressions.rs" +name = "user-guide-getting-started-expressions" +path = "user-guide/getting-started/expressions.rs" required-features = ["polars/lazy"] [[bin]] -name = "user-guide-basics-joins" -path = "user-guide/basics/joins.rs" +name = "user-guide-getting-started-joins" +path = "user-guide/getting-started/joins.rs" [[bin]] -name = "user-guide-basics-reading-writing" -path = "user-guide/basics/reading-writing.rs" +name = "user-guide-getting-started-reading-writing" +path = "user-guide/getting-started/reading-writing.rs" required-features = ["polars/json"] [[bin]] -name = "user-guide-basics-series-dataframes" -path = "user-guide/basics/series-dataframes.rs" +name = "user-guide-concepts-data-structures" +path = "user-guide/concepts/data-structures.rs" [[bin]] name = "user-guide-concepts-contexts" @@ -81,8 +81,8 @@ name = "user-guide-expressions-lists" path = "user-guide/expressions/lists.rs" required-features = ["polars/lazy"] [[bin]] -name = "user-guide-expressions-null" -path = "user-guide/expressions/null.rs" +name = "user-guide-expressions-missing-data" +path = "user-guide/expressions/missing-data.rs" required-features = ["polars/lazy"] [[bin]] name = "user-guide-expressions-operators" diff --git a/docs/src/rust/user-guide/basics/series-dataframes.rs b/docs/src/rust/user-guide/concepts/data-structures.rs similarity index 100% rename from docs/src/rust/user-guide/basics/series-dataframes.rs rename to docs/src/rust/user-guide/concepts/data-structures.rs diff --git a/docs/src/rust/user-guide/expressions/null.rs b/docs/src/rust/user-guide/expressions/missing-data.rs similarity index 100% rename from docs/src/rust/user-guide/expressions/null.rs rename to docs/src/rust/user-guide/expressions/missing-data.rs diff --git a/docs/src/rust/user-guide/basics/expressions.rs b/docs/src/rust/user-guide/getting-started/expressions.rs similarity index 100% rename from docs/src/rust/user-guide/basics/expressions.rs rename to docs/src/rust/user-guide/getting-started/expressions.rs diff --git a/docs/src/rust/user-guide/basics/joins.rs b/docs/src/rust/user-guide/getting-started/joins.rs similarity index 100% rename from docs/src/rust/user-guide/basics/joins.rs rename to docs/src/rust/user-guide/getting-started/joins.rs diff --git a/docs/src/rust/user-guide/basics/reading-writing.rs b/docs/src/rust/user-guide/getting-started/reading-writing.rs similarity index 100% rename from docs/src/rust/user-guide/basics/reading-writing.rs rename to docs/src/rust/user-guide/getting-started/reading-writing.rs diff --git a/docs/user-guide/concepts/data-structures.md b/docs/user-guide/concepts/data-structures.md index 54e0e2ff9771..860ac9da99bb 100644 --- a/docs/user-guide/concepts/data-structures.md +++ b/docs/user-guide/concepts/data-structures.md @@ -7,20 +7,20 @@ The core base data structures provided by Polars are `Series` and `DataFrame`. Series are a 1-dimensional data structure. Within a series all elements have the same [Data Type](data-types/overview.md) . The snippet below shows how to create a simple named `Series` object. -{{code_block('user-guide/basics/series-dataframes','series',['Series'])}} +{{code_block('user-guide/concepts/data-structures','series',['Series'])}} ```python exec="on" result="text" session="user-guide/data-structures" ---8<-- "python/user-guide/basics/series-dataframes.py:series" +--8<-- "python/user-guide/concepts/data-structures.py:series" ``` ## DataFrame A `DataFrame` is a 2-dimensional data structure that is backed by a `Series`, and it can be seen as an abstraction of a collection (e.g. list) of `Series`. Operations that can be executed on a `DataFrame` are very similar to what is done in a `SQL` like query. You can `GROUP BY`, `JOIN`, `PIVOT`, but also define custom functions. -{{code_block('user-guide/basics/series-dataframes','dataframe',['DataFrame'])}} +{{code_block('user-guide/concepts/data-structures','dataframe',['DataFrame'])}} ```python exec="on" result="text" session="user-guide/data-structures" ---8<-- "python/user-guide/basics/series-dataframes.py:dataframe" +--8<-- "python/user-guide/concepts/data-structures.py:dataframe" ``` ### Viewing data @@ -31,38 +31,38 @@ This part focuses on viewing data in a `DataFrame`. We will use the `DataFrame` The `head` function shows by default the first 5 rows of a `DataFrame`. You can specify the number of rows you want to see (e.g. `df.head(10)`). -{{code_block('user-guide/basics/series-dataframes','head',['head'])}} +{{code_block('user-guide/concepts/data-structures','head',['head'])}} ```python exec="on" result="text" session="user-guide/data-structures" ---8<-- "python/user-guide/basics/series-dataframes.py:head" +--8<-- "python/user-guide/concepts/data-structures.py:head" ``` #### Tail The `tail` function shows the last 5 rows of a `DataFrame`. You can also specify the number of rows you want to see, similar to `head`. -{{code_block('user-guide/basics/series-dataframes','tail',['tail'])}} +{{code_block('user-guide/concepts/data-structures','tail',['tail'])}} ```python exec="on" result="text" session="user-guide/data-structures" ---8<-- "python/user-guide/basics/series-dataframes.py:tail" +--8<-- "python/user-guide/concepts/data-structures.py:tail" ``` #### Sample If you want to get an impression of the data of your `DataFrame`, you can also use `sample`. With `sample` you get an _n_ number of random rows from the `DataFrame`. -{{code_block('user-guide/basics/series-dataframes','sample',['sample'])}} +{{code_block('user-guide/concepts/data-structures','sample',['sample'])}} ```python exec="on" result="text" session="user-guide/data-structures" ---8<-- "python/user-guide/basics/series-dataframes.py:sample" +--8<-- "python/user-guide/concepts/data-structures.py:sample" ``` #### Describe `Describe` returns summary statistics of your `DataFrame`. It will provide several quick statistics if possible. -{{code_block('user-guide/basics/series-dataframes','describe',['describe'])}} +{{code_block('user-guide/concepts/data-structures','describe',['describe'])}} ```python exec="on" result="text" session="user-guide/data-structures" ---8<-- "python/user-guide/basics/series-dataframes.py:describe" +--8<-- "python/user-guide/concepts/data-structures.py:describe" ``` diff --git a/docs/user-guide/expressions/index.md b/docs/user-guide/expressions/index.md index 3724e09ce15e..32550974782e 100644 --- a/docs/user-guide/expressions/index.md +++ b/docs/user-guide/expressions/index.md @@ -8,7 +8,7 @@ In the `Contexts` sections we outlined what `Expressions` are and how they are i - [Casting](casting.md) - [Strings](strings.md) - [Aggregation](aggregation.md) -- [Null](null.md) +- [Missing data](missing-data.md) - [Window](window.md) - [Folds](folds.md) - [Lists](lists.md) diff --git a/docs/user-guide/expressions/null.md b/docs/user-guide/expressions/missing-data.md similarity index 68% rename from docs/user-guide/expressions/null.md rename to docs/user-guide/expressions/missing-data.md index 8092a7187cdd..8b95efabe847 100644 --- a/docs/user-guide/expressions/null.md +++ b/docs/user-guide/expressions/missing-data.md @@ -10,11 +10,11 @@ Polars also allows `NotaNumber` or `NaN` values for float columns. These `NaN` v You can manually define a missing value with the python `None` value: -{{code_block('user-guide/expressions/null','dataframe',['DataFrame'])}} +{{code_block('user-guide/expressions/missing-data','dataframe',['DataFrame'])}} -```python exec="on" result="text" session="user-guide/null" ---8<-- "python/user-guide/expressions/null.py:setup" ---8<-- "python/user-guide/expressions/null.py:dataframe" +```python exec="on" result="text" session="user-guide/missing-data" +--8<-- "python/user-guide/expressions/missing-data.py:setup" +--8<-- "python/user-guide/expressions/missing-data.py:dataframe" ``` !!! info @@ -27,10 +27,10 @@ Each Arrow array used by Polars stores two kinds of metadata related to missing The first piece of metadata is the `null_count` - this is the number of rows with `null` values in the column: -{{code_block('user-guide/expressions/null','count',['null_count'])}} +{{code_block('user-guide/expressions/missing-data','count',['null_count'])}} -```python exec="on" result="text" session="user-guide/null" ---8<-- "python/user-guide/expressions/null.py:count" +```python exec="on" result="text" session="user-guide/missing-data" +--8<-- "python/user-guide/expressions/missing-data.py:count" ``` The `null_count` method can be called on a `DataFrame`, a column from a `DataFrame` or a `Series`. The `null_count` method is a cheap operation as `null_count` is already calculated for the underlying Arrow array. @@ -40,10 +40,10 @@ The validity bitmap is memory efficient as it is bit encoded - each value is eit You can return a `Series` based on the validity bitmap for a column in a `DataFrame` or a `Series` with the `is_null` method: -{{code_block('user-guide/expressions/null','isnull',['is_null'])}} +{{code_block('user-guide/expressions/missing-data','isnull',['is_null'])}} -```python exec="on" result="text" session="user-guide/null" ---8<-- "python/user-guide/expressions/null.py:isnull" +```python exec="on" result="text" session="user-guide/missing-data" +--8<-- "python/user-guide/expressions/missing-data.py:isnull" ``` The `is_null` method is a cheap operation that does not require scanning the full column for `null` values. This is because the validity bitmap already exists and can be returned as a Boolean array. @@ -59,30 +59,30 @@ Missing data in a `Series` can be filled with the `fill_null` method. You have t We illustrate each way to fill nulls by defining a simple `DataFrame` with a missing value in `col2`: -{{code_block('user-guide/expressions/null','dataframe2',['DataFrame'])}} +{{code_block('user-guide/expressions/missing-data','dataframe2',['DataFrame'])}} -```python exec="on" result="text" session="user-guide/null" ---8<-- "python/user-guide/expressions/null.py:dataframe2" +```python exec="on" result="text" session="user-guide/missing-data" +--8<-- "python/user-guide/expressions/missing-data.py:dataframe2" ``` ### Fill with specified literal value We can fill the missing data with a specified literal value with `pl.lit`: -{{code_block('user-guide/expressions/null','fill',['fill_null'])}} +{{code_block('user-guide/expressions/missing-data','fill',['fill_null'])}} -```python exec="on" result="text" session="user-guide/null" ---8<-- "python/user-guide/expressions/null.py:fill" +```python exec="on" result="text" session="user-guide/missing-data" +--8<-- "python/user-guide/expressions/missing-data.py:fill" ``` ### Fill with a strategy We can fill the missing data with a strategy such as filling forward: -{{code_block('user-guide/expressions/null','fillstrategy',['fill_null'])}} +{{code_block('user-guide/expressions/missing-data','fillstrategy',['fill_null'])}} -```python exec="on" result="text" session="user-guide/null" ---8<-- "python/user-guide/expressions/null.py:fillstrategy" +```python exec="on" result="text" session="user-guide/missing-data" +--8<-- "python/user-guide/expressions/missing-data.py:fillstrategy" ``` You can find other fill strategies in the API docs. @@ -92,10 +92,10 @@ You can find other fill strategies in the API docs. For more flexibility we can fill the missing data with an expression. For example, to fill nulls with the median value from that column: -{{code_block('user-guide/expressions/null','fillexpr',['fill_null'])}} +{{code_block('user-guide/expressions/missing-data','fillexpr',['fill_null'])}} -```python exec="on" result="text" session="user-guide/null" ---8<-- "python/user-guide/expressions/null.py:fillexpr" +```python exec="on" result="text" session="user-guide/missing-data" +--8<-- "python/user-guide/expressions/missing-data.py:fillexpr" ``` In this case the column is cast from integer to float because the median is a float statistic. @@ -104,20 +104,20 @@ In this case the column is cast from integer to float because the median is a fl In addition, we can fill nulls with interpolation (without using the `fill_null` function): -{{code_block('user-guide/expressions/null','fillinterpolate',['interpolate'])}} +{{code_block('user-guide/expressions/missing-data','fillinterpolate',['interpolate'])}} -```python exec="on" result="text" session="user-guide/null" ---8<-- "python/user-guide/expressions/null.py:fillinterpolate" +```python exec="on" result="text" session="user-guide/missing-data" +--8<-- "python/user-guide/expressions/missing-data.py:fillinterpolate" ``` ## `NotaNumber` or `NaN` values Missing data in a `Series` has a `null` value. However, you can use `NotaNumber` or `NaN` values in columns with float datatypes. These `NaN` values can be created from Numpy's `np.nan` or the native python `float('nan')`: -{{code_block('user-guide/expressions/null','nan',['DataFrame'])}} +{{code_block('user-guide/expressions/missing-data','nan',['DataFrame'])}} -```python exec="on" result="text" session="user-guide/null" ---8<-- "python/user-guide/expressions/null.py:nan" +```python exec="on" result="text" session="user-guide/missing-data" +--8<-- "python/user-guide/expressions/missing-data.py:nan" ``` !!! info @@ -133,8 +133,8 @@ Polars has `is_nan` and `fill_nan` methods which work in a similar way to the `i One further difference between `null` and `NaN` values is that taking the `mean` of a column with `null` values excludes the `null` values from the calculation but with `NaN` values taking the mean results in a `NaN`. This behaviour can be avoided by replacing the `NaN` values with `null` values; -{{code_block('user-guide/expressions/null','nanfill',['fill_nan'])}} +{{code_block('user-guide/expressions/missing-data','nanfill',['fill_nan'])}} -```python exec="on" result="text" session="user-guide/null" ---8<-- "python/user-guide/expressions/null.py:nanfill" +```python exec="on" result="text" session="user-guide/missing-data" +--8<-- "python/user-guide/expressions/missing-data.py:nanfill" ``` diff --git a/docs/user-guide/getting-started.md b/docs/user-guide/getting-started.md index c93ca1ab10e0..4a841961986d 100644 --- a/docs/user-guide/getting-started.md +++ b/docs/user-guide/getting-started.md @@ -24,18 +24,18 @@ This chapter is here to help you get started with Polars. It covers all the fund Polars supports reading and writing for common file formats (e.g. csv, json, parquet), cloud storage (S3, Azure Blob, BigQuery) and databases (e.g. postgres, mysql). Below we show the concept of reading and writing to disk. -{{code_block('user-guide/basics/reading-writing','dataframe',['DataFrame'])}} +{{code_block('user-guide/getting-started/reading-writing','dataframe',['DataFrame'])}} ```python exec="on" result="text" session="getting-started/reading" ---8<-- "python/user-guide/basics/reading-writing.py:dataframe" +--8<-- "python/user-guide/getting-started/reading-writing.py:dataframe" ``` In the example below we write the DataFrame to a csv file called `output.csv`. After that, we read it back using `read_csv` and then `print` the result for inspection. -{{code_block('user-guide/basics/reading-writing','csv',['read_csv','write_csv'])}} +{{code_block('user-guide/getting-started/reading-writing','csv',['read_csv','write_csv'])}} ```python exec="on" result="text" session="getting-started/reading" ---8<-- "python/user-guide/basics/reading-writing.py:csv" +--8<-- "python/user-guide/getting-started/reading-writing.py:csv" ``` For more examples on the CSV file format and other data formats, start with the [IO section](io/index.md) of the user guide. @@ -60,22 +60,22 @@ To select a column we need to do two things: In the example below you see that we select `col('*')`. The asterisk stands for all columns. -{{code_block('user-guide/basics/expressions','select',['select'])}} +{{code_block('user-guide/getting-started/expressions','select',['select'])}} ```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/basics/expressions.py:setup" +--8<-- "python/user-guide/getting-started/expressions.py:setup" print( - --8<-- "python/user-guide/basics/expressions.py:select" + --8<-- "python/user-guide/getting-started/expressions.py:select" ) ``` You can also specify the specific columns that you want to return. There are two ways to do this. The first option is to pass the column names, as seen below. -{{code_block('user-guide/basics/expressions','select2',['select'])}} +{{code_block('user-guide/getting-started/expressions','select2',['select'])}} ```python exec="on" result="text" session="getting-started/expressions" print( - --8<-- "python/user-guide/basics/expressions.py:select2" + --8<-- "python/user-guide/getting-started/expressions.py:select2" ) ``` @@ -85,21 +85,21 @@ Follow these links to other parts of the user guide to learn more about [basic o The `filter` option allows us to create a subset of the `DataFrame`. We use the same `DataFrame` as earlier and we filter between two specified dates. -{{code_block('user-guide/basics/expressions','filter',['filter'])}} +{{code_block('user-guide/getting-started/expressions','filter',['filter'])}} ```python exec="on" result="text" session="getting-started/expressions" print( - --8<-- "python/user-guide/basics/expressions.py:filter" + --8<-- "python/user-guide/getting-started/expressions.py:filter" ) ``` With `filter` you can also create more complex filters that include multiple columns. -{{code_block('user-guide/basics/expressions','filter2',['filter'])}} +{{code_block('user-guide/getting-started/expressions','filter2',['filter'])}} ```python exec="on" result="text" session="getting-started/expressions" print( - --8<-- "python/user-guide/basics/expressions.py:filter2" + --8<-- "python/user-guide/getting-started/expressions.py:filter2" ) ``` @@ -107,11 +107,11 @@ print( `with_columns` allows you to create new columns for your analyses. We create two new columns `e` and `b+42`. First we sum all values from column `b` and store the results in column `e`. After that we add `42` to the values of `b`. Creating a new column `b+42` to store these results. -{{code_block('user-guide/basics/expressions','with_columns',['with_columns'])}} +{{code_block('user-guide/getting-started/expressions','with_columns',['with_columns'])}} ```python exec="on" result="text" session="getting-started/expressions" print( - --8<-- "python/user-guide/basics/expressions.py:with_columns" + --8<-- "python/user-guide/getting-started/expressions.py:with_columns" ) ``` @@ -119,26 +119,26 @@ print( We will create a new `DataFrame` for the Group by functionality. This new `DataFrame` will include several 'groups' that we want to group by. -{{code_block('user-guide/basics/expressions','dataframe2',['DataFrame'])}} +{{code_block('user-guide/getting-started/expressions','dataframe2',['DataFrame'])}} ```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/basics/expressions.py:dataframe2" +--8<-- "python/user-guide/getting-started/expressions.py:dataframe2" print(df2) ``` -{{code_block('user-guide/basics/expressions','group_by',['group_by'])}} +{{code_block('user-guide/bgetting-startedasics/expressions','group_by',['group_by'])}} ```python exec="on" result="text" session="getting-started/expressions" print( - --8<-- "python/user-guide/basics/expressions.py:group_by" + --8<-- "python/user-guide/getting-started/expressions.py:group_by" ) ``` -{{code_block('user-guide/basics/expressions','group_by2',['group_by'])}} +{{code_block('user-guide/getting-started/expressions','group_by2',['group_by'])}} ```python exec="on" result="text" session="getting-started/expressions" print( - --8<-- "python/user-guide/basics/expressions.py:group_by2" + --8<-- "python/user-guide/getting-started/expressions.py:group_by2" ) ``` @@ -146,16 +146,16 @@ print( Below are some examples on how to combine operations to create the `DataFrame` you require. -{{code_block('user-guide/basics/expressions','combine',['select','with_columns'])}} +{{code_block('user-guide/getting-started/expressions','combine',['select','with_columns'])}} ```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/basics/expressions.py:combine" +--8<-- "python/user-guide/getting-started/expressions.py:combine" ``` -{{code_block('user-guide/basics/expressions','combine2',['select','with_columns'])}} +{{code_block('user-guide/getting-started/expressions','combine2',['select','with_columns'])}} ```python exec="on" result="text" session="getting-started/expressions" ---8<-- "python/user-guide/basics/expressions.py:combine2" +--8<-- "python/user-guide/getting-started/expressions.py:combine2" ``` ## Combining DataFrames @@ -166,11 +166,11 @@ There are two ways `DataFrame`s can be combined depending on the use case: join Polars supports all types of join (e.g. left, right, inner, outer). Let's have a closer look on how to `join` two `DataFrames` into a single `DataFrame`. Our two `DataFrames` both have an 'id'-like column: `a` and `x`. We can use those columns to `join` the `DataFrames` in this example. -{{code_block('user-guide/basics/joins','join',['join'])}} +{{code_block('user-guide/getting-started/joins','join',['join'])}} ```python exec="on" result="text" session="getting-started/joins" ---8<-- "python/user-guide/basics/joins.py:setup" ---8<-- "python/user-guide/basics/joins.py:join" +--8<-- "python/user-guide/getting-started/joins.py:setup" +--8<-- "python/user-guide/getting-started/joins.py:join" ``` To see more examples with other types of joins, see the [Transformations section](transformations/joins.md) in the user guide. @@ -179,8 +179,8 @@ To see more examples with other types of joins, see the [Transformations section We can also `concatenate` two `DataFrames`. Vertical concatenation will make the `DataFrame` longer. Horizontal concatenation will make the `DataFrame` wider. Below you can see the result of an horizontal concatenation of our two `DataFrames`. -{{code_block('user-guide/basics/joins','hstack',['hstack'])}} +{{code_block('user-guide/getting-started/joins','hstack',['hstack'])}} ```python exec="on" result="text" session="getting-started/joins" ---8<-- "python/user-guide/basics/joins.py:hstack" +--8<-- "python/user-guide/getting-started/joins.py:hstack" ``` diff --git a/docs/user-guide/migration/pandas.md b/docs/user-guide/migration/pandas.md index dc57354c43ab..d0d2f292e447 100644 --- a/docs/user-guide/migration/pandas.md +++ b/docs/user-guide/migration/pandas.md @@ -314,7 +314,7 @@ For float columns Polars permits the use of `NaN` values. These `NaN` values are In pandas an integer column with missing values is cast to be a float column with `NaN` values for the missing values (unless using optional nullable integer dtypes). In Polars any missing values in an integer column are simply `null` values and the column remains an integer column. -See the [missing data](../expressions/null.md) section for more details. +See the [missing data](../expressions/missing-data.md) section for more details. ## Pipe littering @@ -344,7 +344,7 @@ def add_ham(df: pd.DataFrame) -> pd.DataFrame: ) ``` -If we do this in polars, we would create 3 `with_column` contexts, that forces Polars to run the 3 pipes sequentially, +If we do this in polars, we would create 3 `with_columns` contexts, that forces Polars to run the 3 pipes sequentially, utilizing zero parallelism. The way to get similar abstractions in polars is creating functions that create expressions. diff --git a/mkdocs.yml b/mkdocs.yml index 58ff35d90bcb..6673d17741ce 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -30,7 +30,7 @@ nav: - user-guide/expressions/casting.md - user-guide/expressions/strings.md - user-guide/expressions/aggregation.md - - user-guide/expressions/null.md + - user-guide/expressions/missing-data.md - user-guide/expressions/window.md - user-guide/expressions/folds.md - user-guide/expressions/lists.md