-
Notifications
You must be signed in to change notification settings - Fork 0
/
calculo_pobreza_enaho_2020.py
102 lines (84 loc) · 4.07 KB
/
calculo_pobreza_enaho_2020.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# -*- coding: utf-8 -*-
"""Calculo pobreza: Enaho 2020.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1p6ermtPh5cmEpQ87cH8hjzfMOLuzRAN-
"""
#Instalamos librerias de interes
!pip install pyreadstat
!pip install weightedcalcs
#Importamos librerias de interes
import os
import pandas as pd
import numpy as np
import weightedcalcs as wc
#Consultamos directorio actual
!pwd
#Establecemos directorio de trabajo
path = "/content/drive/MyDrive/Colab Notebooks/enaho_2020"
os.chdir(path)
!pwd
print(os.listdir())
#Cargamos base de datos
sumaria = pd.read_spss("Sumaria-2020.sav")
sumaria.columns = sumaria.columns.str.lower()
sumaria.head(2)
sumaria.shape
#Generamos variables de interes
# 1) Ingreso per capita mensual
# 2) Gasto per capita mensual
sumaria["ingreso"] = sumaria["inghog1d"]/(12*sumaria["mieperho"])
sumaria["gasto"] = sumaria["gashog2d"]/(12*sumaria["mieperho"])
sumaria.head(2)
#Generamos tasa de pobreza
#pobre = (gasto<linea)
#Generamos variable "pobre" mediante comparación gasto y linea de pobreza
sumaria["pobre"] = np.where(sumaria["gasto"] < sumaria["linea"], "pobre", "no pobre")
#Generamos variable "pc_pobre" recodificando variable "pobreza"
sumaria["pc_pobre"] = sumaria["pobreza"].replace({"Pobre Extremo": "Pobre",
"Pobre No Extremo": "Pobre",
"No pobre": "No pobre"})
sumaria.head(2)
#Tabla de comparación
print("*-----------------------------------*")
print("Comparación de variables de pobreza")
print("*-----------------------------------*")
print("Pobreza")
print(sumaria["pobreza"].value_counts())
print("*-----------------------------------*")
print("Pobre")
print(sumaria["pobre"].value_counts())
print("*-----------------------------------*")
print("pc_pobre")
print(sumaria["pc_pobre"].value_counts())
print("*-----------------------------------*")
#Generamos variable pobreza
sumaria["dpto"] = sumaria["ubigeo"].str[0:2]
sumaria["dpto"] = sumaria["dpto"].replace({ "01": "Amazonas","02": "Ancash","03": "Apurimac","04":"Arequipa","05": "Ayacucho",
"06": "Cajamarca","07": "Callao","08": "Cusco","09": "Huancavelica","10":"Huanuco",
"11": "Ica","12":"Junin", "13": "La Libertad","14": "Lambayeque","15":"Lima",
"16": "Loreto","17":"Madre de Dios","18": "Moquegua","19":"Pasco","20":"Piura",
"21": "Puno","22":"San Martin","23":"Tacna","24":"Tumbes","25":"Ucayali"
})
sumaria.head(2)
#Generamos variable "area" geografica
sumaria["area"] = sumaria["estrato"].replace({"de 500,000 a más habitantes" : "Urbana",
"de 100,000 a 499,999 habitantes" : "Urbana",
"de 50,000 a 99,999 habitantes" : "Urbana",
"de 20,000 a 49,999 habitantes" : "Urbana",
"de 2,000 a 19,999 habitantes" : "Urbana",
"de 500 a 1,999 habitantes" : "Rural",
"Área de empadronamiento rural (aer) simple" : "Rural",
"Área de empadronamiento rural (aer) compuesto" : "Rural"
})
#sumaria["area"] = np.where(sumaria["estrato"] <= 5, "Urbana", "Rural")
sumaria["area"].value_counts()
#Generamos tablas sin ponderador
pd.crosstab(sumaria["dpto"], sumaria["pc_pobre"])
#Generamos tablas sin ponderador
pd.crosstab([sumaria["area"],sumaria["estrsocial"]], sumaria["pc_pobre"] , margins=True)
#Tasa de pobreza usando factor expansión / ponderador
sumaria["facpop"] = sumaria["factor07"]*sumaria["mieperho"]
calc = wc.Calculator("facpop")
#Distribución de variable "pc_pobre"
calc.distribution(sumaria, "pc_pobre").round(3).sort_values(ascending=False)