Skip to content

Commit

Permalink
docs(clean): add documentation for multiple clean functions for numbe…
Browse files Browse the repository at this point in the history
…r types
  • Loading branch information
NoirTree authored and qidanrui committed Sep 23, 2021
1 parent 9f6f5b2 commit 732480f
Show file tree
Hide file tree
Showing 146 changed files with 42,474 additions and 34 deletions.
2 changes: 1 addition & 1 deletion dataprep/clean/clean_cy_vat.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def clean_cy_vat(
>>> df = pd.DataFrame({{
"vat": [
'12302 6635',
'CY-10259033P',
'CY-10259033Z',]
})
>>> clean_cy_vat(df, 'vat')
Expand Down
2 changes: 1 addition & 1 deletion dataprep/clean/clean_de_vat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Clean and validate a DataFrame column containing German VAT numberss (VATs).
Clean and validate a DataFrame column containing German VAT numbers (VATs).
"""
# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
Expand Down
3 changes: 2 additions & 1 deletion dataprep/clean/clean_de_wkn.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
Clean and validate a DataFrame column containing Wertpapierkennnummer (WKNs).
Clean and validate a DataFrame column containing
German Securities Identification Codes (WKNs).
"""
# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
Expand Down
2 changes: 1 addition & 1 deletion dataprep/clean/clean_fi_associationid.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def clean_fi_associationid(
--------
Clean a column of Finnish association registry id data.
>>> df = pd.DataFrame({{
>>> df = pd.DataFrame({
"associationid": [
"1234",
"12df",]
Expand Down
2 changes: 1 addition & 1 deletion dataprep/clean/clean_fr_siret.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def clean_fr_siret(
progress: bool = True,
) -> pd.DataFrame:
"""
Clean French company establishment identification numbers (SIRETs) data in a DataFrame column.
Clean French Company Establishment Identification Numbers (SIRETs) in a DataFrame column.
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion dataprep/clean/clean_in_aadhaar.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def clean_in_aadhaar(
The output format of standardized number string.
If output_format = 'compact', return string without any separators or whitespace.
If output_format = 'standard', return string with proper separators and whitespace.
If output_format = 'mask', Masks the first 8 digits as per MeitY guidelines for
If output_format = 'mask', mask the first 8 digits as per MeitY guidelines for
securing identity information and Sensitive personal data.
(default: "standard")
Expand Down
14 changes: 7 additions & 7 deletions dataprep/clean/clean_lv_pvn.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Clean and validate a DataFrame column containing Latvian PVN (VAT) numbers (PVTs).
Clean and validate a DataFrame column containing Latvian PVN (VAT) numbers (PVNs).
"""
# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
Expand All @@ -23,21 +23,21 @@ def clean_lv_pvn(
progress: bool = True,
) -> pd.DataFrame:
"""
Clean Latvian PVN (VAT) numbers (PVTs) type data in a DataFrame column.
Clean Latvian PVN (VAT) numbers (PVNs) type data in a DataFrame column.
Parameters
----------
df
A pandas or Dask DataFrame containing the data to be cleaned.
col
The name of the column containing data of PVT type.
The name of the column containing data of PVN type.
output_format
The output format of standardized number string.
If output_format = 'compact', return string without any separators or whitespace.
If output_format = 'standard', return string with proper separators and whitespace.
If output_format = 'birthdate', return the birthdate of the person. Note only when
PVN refers to a person (but not a legal entity) this format will be available.
Note: in the case of PVT, the compact format is the same as the standard one.
Note: in the case of PVN, the compact format is the same as the standard one.
(default: "standard")
inplace
Expand All @@ -59,7 +59,7 @@ def clean_lv_pvn(
Examples
--------
Clean a column of PVT data.
Clean a column of PVN data.
>>> df = pd.DataFrame({{
"pvn": [
Expand Down Expand Up @@ -114,7 +114,7 @@ def validate_lv_pvn(
column: str = "",
) -> Union[bool, pd.Series, pd.DataFrame]:
"""
Validate if a data cell is PVT in a DataFrame column. For each cell, return True or False.
Validate if a data cell is PVN in a DataFrame column. For each cell, return True or False.
Parameters
----------
Expand Down Expand Up @@ -146,7 +146,7 @@ def _format(val: Any, output_format: str = "standard", errors: str = "coarse") -
If output_format = 'standard', return string with proper separators and whitespace.
If output_format = 'birthdate', return the birthdate of the person. Note only when
PVN refers to a person (but not a legal entity) this format will be available.
Note: in the case of PVT, the compact format is the same as the standard one.
Note: in the case of PVN, the compact format is the same as the standard one.
"""
# pylint: disable=bare-except

Expand Down
2 changes: 1 addition & 1 deletion dataprep/clean/clean_no_fodselsnummer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def clean_no_fodselsnummer(
progress: bool = True,
) -> pd.DataFrame:
"""
Clean Estonian Personcal ID number type data in a DataFrame column.
Clean Norwegian birth number data in a DataFrame column.
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion dataprep/clean/clean_no_iban.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def clean_no_iban(
--------
Clean a column of IBAN data.
>>> df = pd.DataFrame({{
>>> df = pd.DataFrame({
"iban": [
'NO9386011117947',
'NO92 8601 1117 947',]
Expand Down
2 changes: 1 addition & 1 deletion dataprep/clean/clean_no_kontonr.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def clean_no_kontonr(
--------
Clean a column of kontonr data.
>>> df = pd.DataFrame({{
>>> df = pd.DataFrame({
"kontonr": [
"8601 11 17947",
"8601 11 17949",]
Expand Down
2 changes: 1 addition & 1 deletion dataprep/clean/clean_no_orgnr.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def clean_no_orgnr(
--------
Clean a column of Orgnr data.
>>> df = pd.DataFrame({{
>>> df = pd.DataFrame({
"orgnr": [
"988077917",
"988 077 918",]
Expand Down
6 changes: 3 additions & 3 deletions dataprep/clean/clean_nz_bankaccount.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ def clean_nz_bankaccount(
--------
Clean a column of bankaccount data.
>>> df = pd.DataFrame({{
>>> df = pd.DataFrame({
"bankaccount": [
"51824753556",
"99999999999",]
"0102420100194000",
"01-0242-0100195-00",]
})
>>> clean_nz_bankaccount(df, 'bankaccount')
bankaccount bankaccount_clean
Expand Down
2 changes: 1 addition & 1 deletion dataprep/clean/clean_nz_ird.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def clean_nz_ird(
--------
Clean a column of IRD data.
>>> df = pd.DataFrame({{
>>> df = pd.DataFrame({
"ird": [
"49091850",
"136410133",]
Expand Down
2 changes: 1 addition & 1 deletion dataprep/clean/clean_pl_regon.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def clean_pl_regon(
--------
Clean a column of REGON data.
>>> df = pd.DataFrame({{
>>> df = pd.DataFrame({
"regon": [
'192598184',
'192598183',]
Expand Down
4 changes: 2 additions & 2 deletions dataprep/clean/clean_pt_nif.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@ def clean_pt_nif(
--------
Clean a column of NIF data.
>>> df = pd.DataFrame({{
>>> df = pd.DataFrame({
"nif": [
'PT 501 964 843',
'PT 501 964 842',]
})
>>> clean_pt_nif(df, 'nif')
nif nif_clean
0 PT 501 964 843 PT 501 964 843
0 PT 501 964 843 501964843
1 PT 501 964 842 NaN
"""

Expand Down
2 changes: 1 addition & 1 deletion dataprep/clean/clean_py_ruc.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def clean_py_ruc(
--------
Clean a column of RUC data.
>>> df = pd.DataFrame({{
>>> df = pd.DataFrame({
"ruc": [
"800000358",
"80123456789",]
Expand Down
4 changes: 2 additions & 2 deletions dataprep/clean/clean_ro_cf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Clean and validate a DataFrame column containing Romanian CF (CF) numbers (CFs).
Clean and validate a DataFrame column containing Romanian CF (VAT) numbers (CFs).
"""
# pylint: disable=too-many-lines, too-many-arguments, too-many-branches
from typing import Any, Union
Expand All @@ -23,7 +23,7 @@ def clean_ro_cf(
progress: bool = True,
) -> pd.DataFrame:
"""
Clean Romanian CF (CF) numbers (CFs) type data in a DataFrame column.
Clean Romanian CF (VAT) numbers (CFs) type data in a DataFrame column.
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion dataprep/clean/clean_ro_onrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def clean_ro_onrc(
--------
Clean a column of ONRC data.
>>> df = pd.DataFrame({{
>>> df = pd.DataFrame({
"onrc": [
"J52/750/2012",
"X52/750/2012",]
Expand Down
Loading

0 comments on commit 732480f

Please sign in to comment.