diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..cd5e81d --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,48 @@ +name: docs + +on: + push: + branches: + - main + # Alternative: only build for tags. + # tags: + # - '*' + +# security: restrict permissions for CI jobs. +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: pip install -e . + + - name: Build docs + run: pdoc -o docs/pdoc -d google datasus_db + + - name: Upload docs artifacts + uses: actions/upload-pages-artifact@v2 + with: + path: docs/pdoc + + deploy: + needs: build + runs-on: ubuntu-latest + permissions: + pages: write + id-token: write + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - id: deployment + uses: actions/deploy-pages@v2 diff --git a/.gitignore b/.gitignore index f872300..7a0e370 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ __pycache__ *.parquet .venv dist - +docs/pdoc diff --git a/datasus_db/__init__.py b/datasus_db/__init__.py index 09f919c..94b4539 100644 --- a/datasus_db/__init__.py +++ b/datasus_db/__init__.py @@ -1,6 +1,28 @@ -from .datasources.auxiliar import import_auxiliar_tables +""" +[![PyPI version](https://badge.fury.io/py/datasus-db.svg)](https://pypi.org/project/datasus-db/) + +A python package to **download and import** public available data from **DATASUS's** ftp servers into a [DuckDB](https://duckdb.org/) database. + + +# Import functions +Bellow is the list of all **import functions**: +- `datasus_db.datasources.sih_rd.import_sih_rd` +- `datasus_db.datasources.sim_do.import_sim_do` +- `datasus_db.datasources.po.import_po` +- `datasus_db.datasources.ibge_pop.import_ibge_pop` +- `datasus_db.datasources.ibge_pop_tcu.import_ibge_pop_tcu` +- `datasus_db.datasources.auxiliar.import_auxiliar_tables` + + +## Datasources +The list of all available DATASUS's datasources can be seen here: https://datasus.saude.gov.br/transferencia-de-arquivos/ + +If `datasus_db` is missing a datasource that you need, feel free to create an issue here: https://github.com/mymatsubara/datasus-db/issues/new +""" + +from .datasources.sih_rd import import_sih_rd +from .datasources.sim_do import import_sim_do from .datasources.po import import_po from .datasources.ibge_pop import import_ibge_pop from .datasources.ibge_pop_tcu import import_ibge_pop_tcu -from .datasources.sih_rd import import_sih_rd -from .datasources.sim_do import import_sim_do +from .datasources.auxiliar import import_auxiliar_tables diff --git a/datasus_db/cnv.py b/datasus_db/cnv.py index 1133373..84001f2 100644 --- a/datasus_db/cnv.py +++ b/datasus_db/cnv.py @@ -1,3 +1,7 @@ +""" +Module with functions to deal with DATASUS convetion files (*.cnv), which are usually file which maps ids to a readable names. +""" + import io import re import polars as pl diff --git a/datasus_db/datasources/__init__.py b/datasus_db/datasources/__init__.py index e69de29..4f6f728 100644 --- a/datasus_db/datasources/__init__.py +++ b/datasus_db/datasources/__init__.py @@ -0,0 +1,3 @@ +""" +Module with the implemented DATASUS's datasources imports. +""" diff --git a/datasus_db/datasources/auxiliar.py b/datasus_db/datasources/auxiliar.py index 805c55b..2538eee 100644 --- a/datasus_db/datasources/auxiliar.py +++ b/datasus_db/datasources/auxiliar.py @@ -13,11 +13,16 @@ def import_auxiliar_tables(db_file="datasus.db"): - """ - Import auxiliar tables with some datasus codes definitions (eg: municipios, doenças, ...) + """Import auxiliar tables with some datasus codes definitions (eg: municipios, doenças, ...) Args: - `db_file (str)`: path to the duckdb file in which the data will be imported to. + db_file (str, optional): path to the duckdb file in which the data will be imported to. + + --- + + Extra: + - **Municipio data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/auxiliar/municipio.pdf + - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIM/CID10/DOCS/Docs_Tabs_CID10.zip """ logging.info(f"⏳ [AUX_TABLES] Starting import...") diff --git a/datasus_db/datasources/ibge_pop.py b/datasus_db/datasources/ibge_pop.py index 71496c5..4f1e7fd 100644 --- a/datasus_db/datasources/ibge_pop.py +++ b/datasus_db/datasources/ibge_pop.py @@ -17,6 +17,12 @@ def import_ibge_pop(db_file="datasus.db", years=["*"]): Args: db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]. Defaults to ["*"]. + + --- + + Extra: + - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop.pdf + - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POP/POPBR*.zip """ logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") diff --git a/datasus_db/datasources/ibge_pop_tcu.py b/datasus_db/datasources/ibge_pop_tcu.py index 834750d..2985169 100644 --- a/datasus_db/datasources/ibge_pop_tcu.py +++ b/datasus_db/datasources/ibge_pop_tcu.py @@ -11,13 +11,17 @@ def import_ibge_pop_tcu(db_file="datasus.db", years=["*"]): - """ - Import population estimated per city by TCU (Tribunal de Contas da União). + """Import population estimated per city by TCU (Tribunal de Contas da União). Args: - `db_file (str)`: path to the duckdb file in which the data will be imported to. + db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". + years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"]. + + --- - `years (list[int])`: list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]` + Extra: + - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop_tcu.pdf + - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POPTCU/POPTBR*.zip """ logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") diff --git a/datasus_db/datasources/po.py b/datasus_db/datasources/po.py index 01a5ae0..ef5ec01 100644 --- a/datasus_db/datasources/po.py +++ b/datasus_db/datasources/po.py @@ -9,15 +9,18 @@ def import_po(db_file="datasus.db", years=["*"]): - """ - Import PO (Painel de Oncologia) data (since 2013). + """Import PO (Painel de Oncologia) data (since 2013). Args: - `db_file (str)`: path to the duckdb file in which the data will be imported to. + db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". + years (list, optional): list of years for which data will be imported (if available). Eg: `[2013, 2020]` Defaults to ["*"]. - `years (list[int])`: list of years for which data will be imported (if available). Eg: `[2013, 2020]` - """ + --- + Extra: + - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/po.pdf + - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POP/POPBR*.zip + """ logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") import_from_ftp( diff --git a/datasus_db/datasources/sih_rd.py b/datasus_db/datasources/sih_rd.py index e2d0b0e..310d2a9 100644 --- a/datasus_db/datasources/sih_rd.py +++ b/datasus_db/datasources/sih_rd.py @@ -9,17 +9,19 @@ def import_sih_rd(db_file="datasus.db", years=["*"], states=["*"], months=["*"]): - """ - Import RD (Autorização de Internação Hospitalar Reduzida) from SIMSUS (Sistema de Informações Hospitalares do SUS). + """Import RD (Autorização de Internação Hospitalar Reduzida) from SIMSUS (Sistema de Informações Hospitalares do SUS). Args: - `db_file (str)`: path to the duckdb file in which the data will be imported to. - - `years (list[int])`: list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]` + db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". + years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"]. + states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`. Defaults to ["*"]. + months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg: `[1, 12, 6]`. Defaults to ["*"]. - `states (list[str])`: list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]` + --- - `months (list[int])`: list of months numbers (1-12) for which data will be imported (if available). Eg: `[1, 12, 6]` + Extra: + - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/sih_rd.pdf + - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIHSUS/200801_/Dados/RD*.dbc """ logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") diff --git a/datasus_db/datasources/sim_do.py b/datasus_db/datasources/sim_do.py index 4d28cb7..6e1d1f9 100644 --- a/datasus_db/datasources/sim_do.py +++ b/datasus_db/datasources/sim_do.py @@ -18,15 +18,19 @@ def import_sim_do(db_file="datasus.db", years=["*"], states=["*"]): - """ - Import DO (Declaração de Óbito) from SIM (Sistema de informações de Mortalidade). + """Import DO (Declaração de Óbito) from SIM (Sistema de informações de Mortalidade). Args: - `db_file (str)`: path to the duckdb file in which the data will be imported to. + db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". + years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"]. + states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`. Defaults to ["*"]. - `years (list[int])`: list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]` + --- - `states (list[str])`: list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]` + Extra: + - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/sim_do.pdf + - **ftp path non preliminary data**: ftp.datasus.gov.br/dissemin/publicos/SIM/CID10/DORES/DO*.dbc + - **ftp path preliminary data**: ftp.datasus.gov.br/dissemin/publicos/SIM/PRELIM/DORES/DO*.dbc """ logging.info(f"⏳ [{MAIN_TABLE}] Starting import for non preliminary data...") import_from_ftp( diff --git a/datasus_db/datasus.py b/datasus_db/datasus.py index 2dbe152..a5fa202 100644 --- a/datasus_db/datasus.py +++ b/datasus_db/datasus.py @@ -1,3 +1,7 @@ +""" +Module with functions used to batch multiple imports from DATASUS's ftp server in parallel +""" + from typing import Callable import os.path as path import duckdb diff --git a/datasus_db/db.py b/datasus_db/db.py index 3b8ee69..8d45129 100644 --- a/datasus_db/db.py +++ b/datasus_db/db.py @@ -1,3 +1,7 @@ +""" +Module with common functions used to interact with DuckDB +""" + import duckdb import os.path as path import polars as pl diff --git a/datasus_db/dbf.py b/datasus_db/dbf.py index 2dbd94f..15402ad 100644 --- a/datasus_db/dbf.py +++ b/datasus_db/dbf.py @@ -1,3 +1,6 @@ +""" +Module with helper functions to handler with *.dbf files +""" import os.path as path import polars as pl from dbfread import DBF diff --git a/datasus_db/ftp.py b/datasus_db/ftp.py index f246c0b..128d152 100644 --- a/datasus_db/ftp.py +++ b/datasus_db/ftp.py @@ -1,3 +1,7 @@ +""" +Module with helper functions to interact with DATASUS ftp server +""" + import urllib.request as request import ftplib import logging @@ -27,7 +31,7 @@ def fetch_dbc_as_df(ftp_path: str) -> pl.DataFrame: ) as f: f.write(dbc_raw) - dbc_2_dbf(dbc_file, dbf_file) + datasus_dbc.decompress(dbc_file, dbf_file) df = pl.DataFrame(iter(DBF(dbf_file, encoding="iso-8859-1"))) @@ -52,10 +56,6 @@ def try_nlst(pattern: str, ftp: ftplib.FTP): return files -def dbc_2_dbf(dbc: str, dbf: str): - datasus_dbc.decompress(dbc, dbf) - - def fetch_from_zip(ftp_path: str, files: list[str]): response = request.urlopen(ftp_path) zip_file = ZipFile(io.BytesIO(response.read())) diff --git a/datasus_db/pl_utils.py b/datasus_db/pl_utils.py index c508369..dab657d 100644 --- a/datasus_db/pl_utils.py +++ b/datasus_db/pl_utils.py @@ -1,3 +1,7 @@ +""" +Module with helper functions to work with polars dataframes. +""" + import polars as pl from dataclasses import dataclass diff --git a/datasus_db/utils.py b/datasus_db/utils.py index 62f5d31..28ea607 100644 --- a/datasus_db/utils.py +++ b/datasus_db/utils.py @@ -1,3 +1,5 @@ +"""Module with generic helper functions""" + import itertools import os diff --git a/datasus_db/views/__init__.py b/datasus_db/views/__init__.py index e69de29..70b735b 100644 --- a/datasus_db/views/__init__.py +++ b/datasus_db/views/__init__.py @@ -0,0 +1,3 @@ +""" +Module used to create DuckDB views +"""