Skip to content

Commit

Permalink
docs: update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
mymatsubara committed Dec 7, 2023
1 parent 40d7c6e commit ed83f4c
Show file tree
Hide file tree
Showing 18 changed files with 154 additions and 33 deletions.
48 changes: 48 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: docs

on:
push:
branches:
- main
# Alternative: only build for tags.
# tags:
# - '*'

# security: restrict permissions for CI jobs.
permissions:
contents: read

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Install python
uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: Install dependencies
run: pip install -e .

- name: Build docs
run: pdoc -o docs/pdoc -d google datasus_db

- name: Upload docs artifacts
uses: actions/upload-page-artifact@v2
with:
path: docs/pdoc

deploy:
needs: build
runs-on: ubuntu-latest
permissions:
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- id: deployment
uses: actions/deploy-pages@v2
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ __pycache__
*.parquet
.venv
dist

docs/pdoc
28 changes: 25 additions & 3 deletions datasus_db/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,28 @@
from .datasources.auxiliar import import_auxiliar_tables
"""
[![PyPI version](https://badge.fury.io/py/datasus-db.svg)](https://pypi.org/project/datasus-db/)
A python package to **download and import** public available data from **DATASUS's** ftp servers into a [DuckDB](https://duckdb.org/) database.
# Import functions
Bellow is the list of all **import functions**:
- `datasus_db.datasources.sih_rd.import_sih_rd`
- `datasus_db.datasources.sim_do.import_sim_do`
- `datasus_db.datasources.po.import_po`
- `datasus_db.datasources.ibge_pop.import_ibge_pop`
- `datasus_db.datasources.ibge_pop_tcu.import_ibge_pop_tcu`
- `datasus_db.datasources.auxiliar.import_auxiliar_tables`
## Datasources
The list of all available DATASUS's datasources can be seen here: https://datasus.saude.gov.br/transferencia-de-arquivos/
If `datasus_db` is missing a datasource that you need, feel free to create an issue here: https://github.com/mymatsubara/datasus-db/issues/new
"""

from .datasources.sih_rd import import_sih_rd
from .datasources.sim_do import import_sim_do
from .datasources.po import import_po
from .datasources.ibge_pop import import_ibge_pop
from .datasources.ibge_pop_tcu import import_ibge_pop_tcu
from .datasources.sih_rd import import_sih_rd
from .datasources.sim_do import import_sim_do
from .datasources.auxiliar import import_auxiliar_tables
4 changes: 4 additions & 0 deletions datasus_db/cnv.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
Module with functions to deal with DATASUS convetion files (*.cnv), which are usually file which maps ids to a readable names.
"""

import io
import re
import polars as pl
Expand Down
3 changes: 3 additions & 0 deletions datasus_db/datasources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""
Module with the implemented DATASUS's datasources imports.
"""
11 changes: 8 additions & 3 deletions datasus_db/datasources/auxiliar.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,16 @@


def import_auxiliar_tables(db_file="datasus.db"):
"""
Import auxiliar tables with some datasus codes definitions (eg: municipios, doenças, ...)
"""Import auxiliar tables with some datasus codes definitions (eg: municipios, doenças, ...)
Args:
`db_file (str)`: path to the duckdb file in which the data will be imported to.
db_file (str, optional): path to the duckdb file in which the data will be imported to.
---
Extra:
- **Municipio data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/auxiliar/municipio.pdf
- **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIM/CID10/DOCS/Docs_Tabs_CID10.zip
"""
logging.info(f"⏳ [AUX_TABLES] Starting import...")

Expand Down
6 changes: 6 additions & 0 deletions datasus_db/datasources/ibge_pop.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ def import_ibge_pop(db_file="datasus.db", years=["*"]):
Args:
db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]. Defaults to ["*"].
---
Extra:
- **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop.pdf
- **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POP/POPBR*.zip
"""
logging.info(f"⏳ [{MAIN_TABLE}] Starting import...")

Expand Down
12 changes: 8 additions & 4 deletions datasus_db/datasources/ibge_pop_tcu.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@


def import_ibge_pop_tcu(db_file="datasus.db", years=["*"]):
"""
Import population estimated per city by TCU (Tribunal de Contas da União).
"""Import population estimated per city by TCU (Tribunal de Contas da União).
Args:
`db_file (str)`: path to the duckdb file in which the data will be imported to.
db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"].
---
`years (list[int])`: list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`
Extra:
- **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/ibge_pop_tcu.pdf
- **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POPTCU/POPTBR*.zip
"""
logging.info(f"⏳ [{MAIN_TABLE}] Starting import...")

Expand Down
13 changes: 8 additions & 5 deletions datasus_db/datasources/po.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,18 @@


def import_po(db_file="datasus.db", years=["*"]):
"""
Import PO (Painel de Oncologia) data (since 2013).
"""Import PO (Painel de Oncologia) data (since 2013).
Args:
`db_file (str)`: path to the duckdb file in which the data will be imported to.
db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
years (list, optional): list of years for which data will be imported (if available). Eg: `[2013, 2020]` Defaults to ["*"].
`years (list[int])`: list of years for which data will be imported (if available). Eg: `[2013, 2020]`
"""
---
Extra:
- **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/po.pdf
- **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POP/POPBR*.zip
"""
logging.info(f"⏳ [{MAIN_TABLE}] Starting import...")

import_from_ftp(
Expand Down
16 changes: 9 additions & 7 deletions datasus_db/datasources/sih_rd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,19 @@


def import_sih_rd(db_file="datasus.db", years=["*"], states=["*"], months=["*"]):
"""
Import RD (Autorização de Internação Hospitalar Reduzida) from SIMSUS (Sistema de Informações Hospitalares do SUS).
"""Import RD (Autorização de Internação Hospitalar Reduzida) from SIMSUS (Sistema de Informações Hospitalares do SUS).
Args:
`db_file (str)`: path to the duckdb file in which the data will be imported to.
`years (list[int])`: list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`
db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"].
states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`. Defaults to ["*"].
months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg: `[1, 12, 6]`. Defaults to ["*"].
`states (list[str])`: list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`
---
`months (list[int])`: list of months numbers (1-12) for which data will be imported (if available). Eg: `[1, 12, 6]`
Extra:
- **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/sih_rd.pdf
- **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIHSUS/200801_/Dados/RD*.dbc
"""
logging.info(f"⏳ [{MAIN_TABLE}] Starting import...")

Expand Down
14 changes: 9 additions & 5 deletions datasus_db/datasources/sim_do.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,19 @@


def import_sim_do(db_file="datasus.db", years=["*"], states=["*"]):
"""
Import DO (Declaração de Óbito) from SIM (Sistema de informações de Mortalidade).
"""Import DO (Declaração de Óbito) from SIM (Sistema de informações de Mortalidade).
Args:
`db_file (str)`: path to the duckdb file in which the data will be imported to.
db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"].
states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`. Defaults to ["*"].
`years (list[int])`: list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`
---
`states (list[str])`: list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`
Extra:
- **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/sim_do.pdf
- **ftp path non preliminary data**: ftp.datasus.gov.br/dissemin/publicos/SIM/CID10/DORES/DO*.dbc
- **ftp path preliminary data**: ftp.datasus.gov.br/dissemin/publicos/SIM/PRELIM/DORES/DO*.dbc
"""
logging.info(f"⏳ [{MAIN_TABLE}] Starting import for non preliminary data...")
import_from_ftp(
Expand Down
4 changes: 4 additions & 0 deletions datasus_db/datasus.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
Module with functions used to batch multiple imports from DATASUS's ftp server in parallel
"""

from typing import Callable
import os.path as path
import duckdb
Expand Down
4 changes: 4 additions & 0 deletions datasus_db/db.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
Module with common functions used to interact with DuckDB
"""

import duckdb
import os.path as path
import polars as pl
Expand Down
3 changes: 3 additions & 0 deletions datasus_db/dbf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
Module with helper functions to handler with *.dbf files
"""
import os.path as path
import polars as pl
from dbfread import DBF
Expand Down
10 changes: 5 additions & 5 deletions datasus_db/ftp.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
Module with helper functions to interact with DATASUS ftp server
"""

import urllib.request as request
import ftplib
import logging
Expand Down Expand Up @@ -27,7 +31,7 @@ def fetch_dbc_as_df(ftp_path: str) -> pl.DataFrame:
) as f:
f.write(dbc_raw)

dbc_2_dbf(dbc_file, dbf_file)
datasus_dbc.decompress(dbc_file, dbf_file)

df = pl.DataFrame(iter(DBF(dbf_file, encoding="iso-8859-1")))

Expand All @@ -52,10 +56,6 @@ def try_nlst(pattern: str, ftp: ftplib.FTP):
return files


def dbc_2_dbf(dbc: str, dbf: str):
datasus_dbc.decompress(dbc, dbf)


def fetch_from_zip(ftp_path: str, files: list[str]):
response = request.urlopen(ftp_path)
zip_file = ZipFile(io.BytesIO(response.read()))
Expand Down
4 changes: 4 additions & 0 deletions datasus_db/pl_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
Module with helper functions to work with polars dataframes.
"""

import polars as pl
from dataclasses import dataclass

Expand Down
2 changes: 2 additions & 0 deletions datasus_db/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Module with generic helper functions"""

import itertools
import os

Expand Down
3 changes: 3 additions & 0 deletions datasus_db/views/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""
Module used to create DuckDB views
"""

0 comments on commit ed83f4c

Please sign in to comment.