Skip to content

Commit

Permalink
feat(PNI): include PNI Database (#181)
Browse files Browse the repository at this point in the history
* feat(PNI): include PNI Database

* feat(databases): include CIHA database

* Implement get_available_years for all online_data databases

* Remove online_data.__init__ legacy methods

* Mock SIM tests

* Mock SIA tests

* Mock SIH tests

* Remove legacy tests

* Update CIHA, SINAN and CNES tests

* Update SINASC test

* Update PNI test & update pandas to 2.10

* Update decode tests

* Move tests to a proper directory

* Remove tests for python 3.10

* Update fastparquet version

* Fix PNI & SIM tests

* Enabel ibge tests

* Increase runner timeout to 15 min

* Include unittests for ftp.File

* add back CI tests for 3.10

* Remove states for all get_available_years

* Include get_city_name_by_geocode

* fix PNI tests
  • Loading branch information
luabida authored Dec 21, 2023
1 parent 357b7ca commit 67fca92
Show file tree
Hide file tree
Showing 51 changed files with 49,948 additions and 1,525 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on: [push, pull_request]
jobs:
tests:
runs-on: ubuntu-latest
timeout-minutes: 10
timeout-minutes: 15

defaults:
run:
Expand Down
3,965 changes: 3,965 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ python = ">=3.10,<3.12"
python-dateutil = "2.8.2"
cffi = "1.15.1"
dbfread = "2.0.7"
fastparquet = "^0.8.1"
fastparquet = ">=2023.10.1"
numpy = "1.26.2"
pyarrow = ">=11.0.0"
pycparser = "2.21"
Expand All @@ -22,7 +22,7 @@ wget = "^3.2"
loguru = "^0.6.0"
Unidecode = "^1.3.6"
dateparser = "^1.1.8"
pandas = ">=1.5.3"
pandas = ">=2.1.0"
urwid = "^2.1.2"
elasticsearch = { version = "7.16.2", extras=["preprocessing"] }
# FTP
Expand Down
39 changes: 35 additions & 4 deletions pysus/ftp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pathlib
from datetime import datetime
from ftplib import FTP
from typing import Any, Dict, List, Optional, Set, Union
from typing import Any, Dict, List, Optional, Set, Union, Self

import humanize
from aioftp import Client
Expand All @@ -16,6 +16,9 @@
"PYSUS_CACHEPATH", os.path.join(str(pathlib.Path.home()), "pysus")
)

__cachepath__ = pathlib.Path(CACHEPATH)
__cachepath__.mkdir(exist_ok=True)


def to_list(ite: Any) -> list:
"""Parse any builtin data type into a list"""
Expand Down Expand Up @@ -48,6 +51,7 @@ class File:
extension: str
basename: str
path: str
# parent: Directory # TODO: This causes too much overhead
__info__: Set[Union[int, str, datetime]]

def __init__(self, path: str, name: str, info: dict) -> None:
Expand All @@ -60,6 +64,12 @@ def __init__(self, path: str, name: str, info: dict) -> None:
if path.endswith("/")
else path + "/" + self.basename
)
ppath = self.path.replace(self.basename, "")
self.parent_path = (
ppath[:-1]
if ppath.endswith("/")
else ppath
)
self.__info__ = info

def __str__(self) -> str:
Expand Down Expand Up @@ -119,7 +129,7 @@ def download(self, local_dir: str = CACHEPATH, _pbar=None) -> Data:
_pbar.set_description(f"{self.basename}")

try:
ftp = ftp = FTP("ftp.datasus.gov.br")
ftp = FTP("ftp.datasus.gov.br")
ftp.login()
output = open(f"{filepath}", "wb")

Expand Down Expand Up @@ -327,7 +337,7 @@ def content(self):

def load(self):
"""
The content of a Directory must be explicity loaded
The content of a Directory must be explicitly loaded
"""
self.__content__ |= load_path(self.path)
self.loaded = True
Expand All @@ -340,6 +350,27 @@ def reload(self):
self.loaded = False
return self.load()

def is_parent(self, other: Union[Self, File]) -> bool:
"""
Checks if Directory or File is inside (or at any subdir) of self.
"""
if self.path == "/":
return True

target = other
while target.path != "/":

if self.path == target.path:
return True

if isinstance(other, File):
# TODO: Implement parent logic on File (too much overhead)
target = Directory(other.parent_path)
else:
target = target.parent

return False


CACHE["/"] = Directory("/")

Expand Down Expand Up @@ -444,7 +475,7 @@ def __repr__(self) -> str:
def content(self) -> List[Union[Directory, File]]:
"""
Lists Database content. The `paths` will be loaded if this property is
called or if explicty using `load()`. To add specific Directory inside
called or if explicitly using `load()`. To add specific Directory inside
content, `load()` the directory and call `content` again.
"""
if not self.__content__:
Expand Down
97 changes: 97 additions & 0 deletions pysus/ftp/databases/ciha.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from typing import List, Union, Optional

from pysus.ftp import Database, Directory, File
from pysus.ftp.utils import zfill_year, to_list, parse_UFs, UFs, MONTHS


class CIHA(Database):
name = "CIHA"
paths = (Directory("/dissemin/publicos/CIHA/201101_/Dados"))
metadata = {
"long_name": "Comunicação de Internação Hospitalar e Ambulatorial",
"source": "http://ciha.datasus.gov.br/CIHA/index.php",
"description": (
"A CIHA foi criada para ampliar o processo de planejamento, programação, "
"controle, avaliação e regulação da assistência à saúde permitindo um "
"conhecimento mais abrangente e profundo dos perfis nosológico e "
"epidemiológico da população brasileira, da capacidade instalada e do "
"potencial de produção de serviços do conjunto de estabelecimentos de saúde "
"do País. O sistema permite o acompanhamento das ações e serviços de saúde "
"custeados por: planos privados de assistência à saúde; planos públicos; "
"pagamento particular por pessoa física; pagamento particular por pessoa "
"jurídica; programas e projetos federais (PRONON, PRONAS, PROADI); recursos "
"próprios das secretarias municipais e estaduais de saúde; DPVAT; gratuidade "
"e, a partir da publicação da Portaria GM/MS nº 2.905/2022, consórcios públicos. "
"As informações registradas na CIHA servem como base para o processo de "
"Certificação de Entidades Beneficentes de Assistência Social em Saúde (CEBAS) "
"e para monitoramento dos programas PRONAS e PRONON."
),
}
groups = {
"CIHA": "Comunicação de Internação Hospitalar e Ambulatorial",
}

def describe(self, file: File):
if not isinstance(file, File):
return file

if file.extension.upper() in [".DBC", ".DBF"]:
group, _uf, year, month = self.format(file)

try:
uf = UFs[_uf]
except KeyError:
uf = _uf

description = {
"name": str(file.basename),
"group": self.groups[group],
"uf": uf,
"month": MONTHS[int(month)],
"year": zfill_year(year),
"size": file.info["size"],
"last_update": file.info["modify"],
}

return description
return file

def format(self, file: File) -> tuple:
group, _uf = file.name[:4].upper(), file.name[4:6].upper()
year, month = file.name[-4:-2], file.name[-2:]
return group, _uf, zfill_year(year), month

def get_files(
self,
uf: Optional[Union[List[str], str]] = None,
year: Optional[Union[list, str, int]] = None,
month: Optional[Union[list, str, int]] = None,
group: Union[List[str], str] = "CIHA",
) -> List[File]:
files = list(filter(
lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
))

groups = [gr.upper() for gr in to_list(group)]

if not all(gr in list(self.groups) for gr in groups):
raise ValueError(
"Unknown CIHA Group(s): "
f"{set(groups).difference(list(self.groups))}"
)

files = list(filter(lambda f: self.format(f)[0] in groups, files))

if uf:
ufs = parse_UFs(uf)
files = list(filter(lambda f: self.format(f)[1] in ufs, files))

if year or str(year) in ["0", "00"]:
years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
files = list(filter(lambda f: self.format(f)[2] in years, files))

if month:
months = [str(y)[-2:].zfill(2) for y in to_list(month)]
files = list(filter(lambda f: self.format(f)[3] in months, files))

return files
20 changes: 15 additions & 5 deletions pysus/ftp/databases/cnes.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ def load(
group in self.groups for group in [gr.upper() for gr in groups]
):
raise ValueError(
f"Unknown CNES group(s): {set(groups).difference(self.groups)}"
"Unknown CNES group(s): "
f"{set(groups).difference(self.groups)}"
)

for group in groups:
Expand All @@ -68,25 +69,34 @@ def load(
self.__loaded__.add(directory.name)
return self

def describe(self, file: File):
def describe(self, file: File) -> dict:
if not isinstance(file, File):
return file
return {}

if file.name == "GMufAAmm":
# Leftover
return {}

if file.extension.upper() in [".DBC", ".DBF"]:
group, _uf, year, month = self.format(file)

try:
uf = UFs[_uf]
except KeyError:
uf = _uf

description = {
"name": str(file.basename),
"group": self.groups[group],
"uf": UFs[_uf],
"uf": uf,
"month": MONTHS[int(month)],
"year": zfill_year(year),
"size": file.info["size"],
"last_update": file.info["modify"],
}

return description
return file
return {}

def format(self, file: File) -> tuple:
group, _uf = file.name[:2].upper(), file.name[2:4].upper()
Expand Down
94 changes: 94 additions & 0 deletions pysus/ftp/databases/pni.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from typing import List, Union, Optional, Literal

from pysus.ftp import Database, Directory, File
from pysus.ftp.utils import zfill_year, to_list, parse_UFs, UFs


class PNI(Database):
name = "PNI"
paths = (
Directory("/dissemin/publicos/PNI/DADOS"),
)
metadata = {
"long_name": "Sistema de Informações do Programa Nacional de Imunizações",
"source": (
"https://datasus.saude.gov.br/acesso-a-informacao/morbidade-hospitalar-do-sus-sih-sus/",
"https://datasus.saude.gov.br/acesso-a-informacao/producao-hospitalar-sih-sus/",
),
"description": (
"O SI-PNI é um sistema desenvolvido para possibilitar aos gestores "
"envolvidos no Programa Nacional de Imunização, a avaliação dinâmica "
"do risco quanto à ocorrência de surtos ou epidemias, a partir do "
"registro dos imunobiológicos aplicados e do quantitativo populacional "
"vacinado, agregados por faixa etária, período de tempo e área geográfica. "
"Possibilita também o controle do estoque de imunobiológicos necessário "
"aos administradores que têm a incumbência de programar sua aquisição e "
"distribuição. Controla as indicações de aplicação de vacinas de "
"imunobiológicos especiais e seus eventos adversos, dentro dos Centros "
"de Referências em imunobiológicos especiais."
),
}
groups = {
"CPNI": "Cobertura Vacinal", # TODO: may be incorrect
"DPNI": "Doses Aplicadas", # TODO: may be incorrect
}

def describe(self, file: File) -> dict:
if file.extension.upper() in [".DBC", ".DBF"]:
group, _uf, year = self.format(file)

try:
uf = UFs[_uf]
except KeyError:
uf = _uf

description = {
"name": file.basename,
"group": self.groups[group],
"uf": uf,
"year": zfill_year(year),
"size": file.info["size"],
"last_update": file.info["modify"],
}

return description
return {}

def format(self, file: File) -> tuple:

if len(file.name) != 8:
raise ValueError(f"Can't format {file.name}")

n = file.name
group, _uf, year = n[:4], n[4:6], n[-2:]
return group, _uf, zfill_year(year)

def get_files(
self,
group: Union[list, Literal["CNPI", "DPNI"]],
uf: Optional[Union[List[str], str]] = None,
year: Optional[Union[list, str, int]] = None,
) -> List[File]:
files = list(filter(
lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
))

groups = [gr.upper() for gr in to_list(group)]

if not all(gr in list(self.groups) for gr in groups):
raise ValueError(
"Unknown PNI Group(s): "
f"{set(groups).difference(list(self.groups))}"
)

files = list(filter(lambda f: self.format(f)[0] in groups, files))

if uf:
ufs = parse_UFs(uf)
files = list(filter(lambda f: self.format(f)[1] in ufs, files))

if year or str(year) in ["0", "00"]:
years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
files = list(filter(lambda f: self.format(f)[2] in years, files))

return files
Loading

0 comments on commit 67fca92

Please sign in to comment.