diff --git a/Labs/tarea6/GRUPO3_T6.ipynb b/Labs/tarea6/GRUPO3_T6.ipynb
new file mode 100644
index 0000000..fa42696
--- /dev/null
+++ b/Labs/tarea6/GRUPO3_T6.ipynb
@@ -0,0 +1,1133 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "4137dd69",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import re \n",
+ "import warnings\n",
+ "warnings.filterwarnings('ignore') "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "2ef139e0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " numper | \n",
+ " numpanh15 | \n",
+ " numpanh16 | \n",
+ " numpanh17 | \n",
+ " numpanh18 | \n",
+ " numpanh19 | \n",
+ " mes_15 | \n",
+ " ubigeo_15 | \n",
+ " dominio_15 | \n",
+ " p400a3_15 | \n",
+ " ... | \n",
+ " mes_18 | \n",
+ " ubigeo_18 | \n",
+ " dominio_18 | \n",
+ " p400a3_18 | \n",
+ " p4022_18 | \n",
+ " mes_19 | \n",
+ " ubigeo_19 | \n",
+ " dominio_19 | \n",
+ " p400a3_19 | \n",
+ " p4022_19 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 15368 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1946.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1946.0 | \n",
+ " 0.0 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1946.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 15369 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1973.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1973.0 | \n",
+ " 0.0 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1973.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 15380 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1987.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1987.0 | \n",
+ " 0.0 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1987.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 15381 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 2009.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 2009.0 | \n",
+ " 0.0 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 2009.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 15410 | \n",
+ " 1185 | \n",
+ " 1185 | \n",
+ " 1185 | \n",
+ " 1185 | \n",
+ " 1185 | \n",
+ " 7 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1955.0 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1955.0 | \n",
+ " 0.0 | \n",
+ " 7 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1955.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 5541 | \n",
+ " 34848 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2002.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2002.0 | \n",
+ " 0.0 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2002.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 5542 | \n",
+ " 34846 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1985.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1985.0 | \n",
+ " 0.0 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1985.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 5543 | \n",
+ " 34847 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1976.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1976.0 | \n",
+ " 0.0 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1976.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 5544 | \n",
+ " 34849 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2007.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2007.0 | \n",
+ " 0.0 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2007.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 5545 | \n",
+ " 34850 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2011.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2011.0 | \n",
+ " 1.0 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2011.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5546 rows × 31 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " numper numpanh15 numpanh16 numpanh17 numpanh18 numpanh19 mes_15 \\\n",
+ "0 15368 1158 1158 1158 1158 1158 11 \n",
+ "1 15369 1158 1158 1158 1158 1158 11 \n",
+ "2 15380 1162 1162 1162 1162 1162 11 \n",
+ "3 15381 1162 1162 1162 1162 1162 11 \n",
+ "4 15410 1185 1185 1185 1185 1185 7 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "5541 34848 18549 18549 18549 18549 18549 12 \n",
+ "5542 34846 18549 18549 18549 18549 18549 12 \n",
+ "5543 34847 18549 18549 18549 18549 18549 12 \n",
+ "5544 34849 18549 18549 18549 18549 18549 12 \n",
+ "5545 34850 18549 18549 18549 18549 18549 12 \n",
+ "\n",
+ " ubigeo_15 dominio_15 p400a3_15 ... mes_18 ubigeo_18 \\\n",
+ "0 10101 sierra norte 1946.0 ... 12 10101 \n",
+ "1 10101 sierra norte 1973.0 ... 12 10101 \n",
+ "2 10101 sierra norte 1987.0 ... 12 10101 \n",
+ "3 10101 sierra norte 2009.0 ... 12 10101 \n",
+ "4 10101 sierra norte 1955.0 ... 7 10101 \n",
+ "... ... ... ... ... ... ... \n",
+ "5541 250101 selva 2002.0 ... 12 250101 \n",
+ "5542 250101 selva 1985.0 ... 12 250101 \n",
+ "5543 250101 selva 1976.0 ... 12 250101 \n",
+ "5544 250101 selva 2007.0 ... 12 250101 \n",
+ "5545 250101 selva 2011.0 ... 12 250101 \n",
+ "\n",
+ " dominio_18 p400a3_18 p4022_18 mes_19 ubigeo_19 dominio_19 \\\n",
+ "0 sierra norte 1946.0 0.0 11 10101 sierra norte \n",
+ "1 sierra norte 1973.0 0.0 11 10101 sierra norte \n",
+ "2 sierra norte 1987.0 0.0 11 10101 sierra norte \n",
+ "3 sierra norte 2009.0 0.0 11 10101 sierra norte \n",
+ "4 sierra norte 1955.0 0.0 7 10101 sierra norte \n",
+ "... ... ... ... ... ... ... \n",
+ "5541 selva 2002.0 0.0 10 250101 selva \n",
+ "5542 selva 1985.0 0.0 10 250101 selva \n",
+ "5543 selva 1976.0 0.0 10 250101 selva \n",
+ "5544 selva 2007.0 0.0 10 250101 selva \n",
+ "5545 selva 2011.0 1.0 10 250101 selva \n",
+ "\n",
+ " p400a3_19 p4022_19 \n",
+ "0 1946.0 0.0 \n",
+ "1 1973.0 0.0 \n",
+ "2 1987.0 1.0 \n",
+ "3 2009.0 0.0 \n",
+ "4 1955.0 1.0 \n",
+ "... ... ... \n",
+ "5541 2002.0 1.0 \n",
+ "5542 1985.0 0.0 \n",
+ "5543 1976.0 1.0 \n",
+ "5544 2007.0 1.0 \n",
+ "5545 2011.0 0.0 \n",
+ "\n",
+ "[5546 rows x 31 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "panel = pd.read_stata(\"C:/Users/ALICIA/Documents/GitHub/ultima tarea/data/data.dta\",convert_categoricals=False)\n",
+ "panel"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "cce469c2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['numper', 'numpanh15', 'numpanh16', 'numpanh17', 'numpanh18',\n",
+ " 'numpanh19', 'mes_15', 'ubigeo_15', 'dominio_15', 'p400a3_15',\n",
+ " 'p4022_15', 'mes_16', 'ubigeo_16', 'dominio_16', 'p400a3_16',\n",
+ " 'p4022_16', 'mes_17', 'ubigeo_17', 'dominio_17', 'p400a3_17',\n",
+ " 'p4022_17', 'mes_18', 'ubigeo_18', 'dominio_18', 'p400a3_18',\n",
+ " 'p4022_18', 'mes_19', 'ubigeo_19', 'dominio_19', 'p400a3_19',\n",
+ " 'p4022_19'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "panel.columns[:]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "9b9d749e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "panel.rename(columns = {'numpanh15':'numpanh_15', 'numpanh16':'numpanh_16','numpanh17':'numpanh_17','numpanh18':'numpanh_18','numpanh19':'numpanh_19'}, inplace = True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "f7412527",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "filter_list = list(panel.columns)[:] "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "7e38e765",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['numper',\n",
+ " 'numpanh_15',\n",
+ " 'numpanh_16',\n",
+ " 'numpanh_17',\n",
+ " 'numpanh_18',\n",
+ " 'numpanh_19',\n",
+ " 'mes_15',\n",
+ " 'ubigeo_15',\n",
+ " 'dominio_15',\n",
+ " 'p400a3_15',\n",
+ " 'p4022_15',\n",
+ " 'mes_16',\n",
+ " 'ubigeo_16',\n",
+ " 'dominio_16',\n",
+ " 'p400a3_16',\n",
+ " 'p4022_16',\n",
+ " 'mes_17',\n",
+ " 'ubigeo_17',\n",
+ " 'dominio_17',\n",
+ " 'p400a3_17',\n",
+ " 'p4022_17',\n",
+ " 'mes_18',\n",
+ " 'ubigeo_18',\n",
+ " 'dominio_18',\n",
+ " 'p400a3_18',\n",
+ " 'p4022_18',\n",
+ " 'mes_19',\n",
+ " 'ubigeo_19',\n",
+ " 'dominio_19',\n",
+ " 'p400a3_19',\n",
+ " 'p4022_19']"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "new_list = list(dict.fromkeys(filter_list))\n",
+ "new_list"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "416a041f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " numper | \n",
+ " period | \n",
+ " numpanh | \n",
+ " mes | \n",
+ " ubigeo | \n",
+ " dominio | \n",
+ " p400a3 | \n",
+ " p4022 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 15368 | \n",
+ " 15 | \n",
+ " 1158 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1946.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 15369 | \n",
+ " 15 | \n",
+ " 1158 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1973.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 15380 | \n",
+ " 15 | \n",
+ " 1162 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1987.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 15381 | \n",
+ " 15 | \n",
+ " 1162 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 2009.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 15410 | \n",
+ " 15 | \n",
+ " 1185 | \n",
+ " 7 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1955.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 27725 | \n",
+ " 34848 | \n",
+ " 19 | \n",
+ " 18549 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2002.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 27726 | \n",
+ " 34846 | \n",
+ " 19 | \n",
+ " 18549 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1985.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 27727 | \n",
+ " 34847 | \n",
+ " 19 | \n",
+ " 18549 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1976.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 27728 | \n",
+ " 34849 | \n",
+ " 19 | \n",
+ " 18549 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2007.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 27729 | \n",
+ " 34850 | \n",
+ " 19 | \n",
+ " 18549 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2011.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
27730 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " numper period numpanh mes ubigeo dominio p400a3 p4022\n",
+ "0 15368 15 1158 11 10101 sierra norte 1946.0 0.0\n",
+ "1 15369 15 1158 11 10101 sierra norte 1973.0 0.0\n",
+ "2 15380 15 1162 11 10101 sierra norte 1987.0 1.0\n",
+ "3 15381 15 1162 11 10101 sierra norte 2009.0 1.0\n",
+ "4 15410 15 1185 7 10101 sierra norte 1955.0 0.0\n",
+ "... ... ... ... ... ... ... ... ...\n",
+ "27725 34848 19 18549 10 250101 selva 2002.0 1.0\n",
+ "27726 34846 19 18549 10 250101 selva 1985.0 0.0\n",
+ "27727 34847 19 18549 10 250101 selva 1976.0 1.0\n",
+ "27728 34849 19 18549 10 250101 selva 2007.0 1.0\n",
+ "27729 34850 19 18549 10 250101 selva 2011.0 0.0\n",
+ "\n",
+ "[27730 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "reshape_panel = pd.wide_to_long(panel, stubnames = ['numpanh','mes','ubigeo','dominio','p400a3','p4022'], i = ['numper'] , \n",
+ " j = 'period' , sep = '_').reset_index()\n",
+ "reshape_panel"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "e9ab148b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ubigeo | \n",
+ " distrito | \n",
+ " provincia | \n",
+ " region | \n",
+ " unidos | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 10101 | \n",
+ " Chachapoyas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 10102 | \n",
+ " Asuncion | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 10103 | \n",
+ " Balsas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 10104 | \n",
+ " Cheto | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 10105 | \n",
+ " Chiliquin | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1869 | \n",
+ " 250302 | \n",
+ " Irazola | \n",
+ " Padre Abad | \n",
+ " Ucayali | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1870 | \n",
+ " 250303 | \n",
+ " Curimana | \n",
+ " Padre Abad | \n",
+ " Ucayali | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1871 | \n",
+ " 250304 | \n",
+ " Neshuya | \n",
+ " Padre Abad | \n",
+ " Ucayali | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1872 | \n",
+ " 250305 | \n",
+ " Alexander von Humboldt | \n",
+ " Padre Abad | \n",
+ " Ucayali | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1873 | \n",
+ " 250401 | \n",
+ " Purus | \n",
+ " Purus | \n",
+ " Ucayali | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1874 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ubigeo distrito provincia region unidos\n",
+ "0 10101 Chachapoyas Chachapoyas Amazonas 1\n",
+ "1 10102 Asuncion Chachapoyas Amazonas 1\n",
+ "2 10103 Balsas Chachapoyas Amazonas 0\n",
+ "3 10104 Cheto Chachapoyas Amazonas 1\n",
+ "4 10105 Chiliquin Chachapoyas Amazonas 0\n",
+ "... ... ... ... ... ...\n",
+ "1869 250302 Irazola Padre Abad Ucayali 0\n",
+ "1870 250303 Curimana Padre Abad Ucayali 0\n",
+ "1871 250304 Neshuya Padre Abad Ucayali 0\n",
+ "1872 250305 Alexander von Humboldt Padre Abad Ucayali 0\n",
+ "1873 250401 Purus Purus Ucayali 1\n",
+ "\n",
+ "[1874 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "unidos = pd.read_stata(\"C:/Users/ALICIA/Documents/GitHub/ultima tarea/data/unidos.dta\",\n",
+ " convert_categoricals=False)\n",
+ "unidos"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "a13973a0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " numper | \n",
+ " period | \n",
+ " numpanh | \n",
+ " mes | \n",
+ " ubigeo | \n",
+ " dominio | \n",
+ " p400a3 | \n",
+ " p4022 | \n",
+ " distrito | \n",
+ " provincia | \n",
+ " region | \n",
+ " unidos | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 15368 | \n",
+ " 15 | \n",
+ " 1158 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1946.0 | \n",
+ " 0.0 | \n",
+ " Chachapoyas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 15369 | \n",
+ " 15 | \n",
+ " 1158 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1973.0 | \n",
+ " 0.0 | \n",
+ " Chachapoyas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 15380 | \n",
+ " 15 | \n",
+ " 1162 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1987.0 | \n",
+ " 1.0 | \n",
+ " Chachapoyas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 15381 | \n",
+ " 15 | \n",
+ " 1162 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 2009.0 | \n",
+ " 1.0 | \n",
+ " Chachapoyas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 15410 | \n",
+ " 15 | \n",
+ " 1185 | \n",
+ " 7 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1955.0 | \n",
+ " 0.0 | \n",
+ " Chachapoyas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 27725 | \n",
+ " 34777 | \n",
+ " 19 | \n",
+ " 18472 | \n",
+ " 7 | \n",
+ " 220301 | \n",
+ " selva | \n",
+ " 1942.0 | \n",
+ " 0.0 | \n",
+ " San Jose de Sisa | \n",
+ " El Dorado | \n",
+ " San Martin | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 27726 | \n",
+ " 34779 | \n",
+ " 19 | \n",
+ " 18473 | \n",
+ " 7 | \n",
+ " 220301 | \n",
+ " selva | \n",
+ " 1988.0 | \n",
+ " 0.0 | \n",
+ " San Jose de Sisa | \n",
+ " El Dorado | \n",
+ " San Martin | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 27727 | \n",
+ " 34781 | \n",
+ " 19 | \n",
+ " 18473 | \n",
+ " 7 | \n",
+ " 220301 | \n",
+ " selva | \n",
+ " 2015.0 | \n",
+ " 0.0 | \n",
+ " San Jose de Sisa | \n",
+ " El Dorado | \n",
+ " San Martin | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 27728 | \n",
+ " 34778 | \n",
+ " 19 | \n",
+ " 18473 | \n",
+ " 7 | \n",
+ " 220301 | \n",
+ " selva | \n",
+ " 1984.0 | \n",
+ " 1.0 | \n",
+ " San Jose de Sisa | \n",
+ " El Dorado | \n",
+ " San Martin | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 27729 | \n",
+ " 34780 | \n",
+ " 19 | \n",
+ " 18473 | \n",
+ " 7 | \n",
+ " 220301 | \n",
+ " selva | \n",
+ " 2010.0 | \n",
+ " 0.0 | \n",
+ " San Jose de Sisa | \n",
+ " El Dorado | \n",
+ " San Martin | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
27730 rows × 12 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " numper period numpanh mes ubigeo dominio p400a3 p4022 \\\n",
+ "0 15368 15 1158 11 10101 sierra norte 1946.0 0.0 \n",
+ "1 15369 15 1158 11 10101 sierra norte 1973.0 0.0 \n",
+ "2 15380 15 1162 11 10101 sierra norte 1987.0 1.0 \n",
+ "3 15381 15 1162 11 10101 sierra norte 2009.0 1.0 \n",
+ "4 15410 15 1185 7 10101 sierra norte 1955.0 0.0 \n",
+ "... ... ... ... ... ... ... ... ... \n",
+ "27725 34777 19 18472 7 220301 selva 1942.0 0.0 \n",
+ "27726 34779 19 18473 7 220301 selva 1988.0 0.0 \n",
+ "27727 34781 19 18473 7 220301 selva 2015.0 0.0 \n",
+ "27728 34778 19 18473 7 220301 selva 1984.0 1.0 \n",
+ "27729 34780 19 18473 7 220301 selva 2010.0 0.0 \n",
+ "\n",
+ " distrito provincia region unidos \n",
+ "0 Chachapoyas Chachapoyas Amazonas 1 \n",
+ "1 Chachapoyas Chachapoyas Amazonas 1 \n",
+ "2 Chachapoyas Chachapoyas Amazonas 1 \n",
+ "3 Chachapoyas Chachapoyas Amazonas 1 \n",
+ "4 Chachapoyas Chachapoyas Amazonas 1 \n",
+ "... ... ... ... ... \n",
+ "27725 San Jose de Sisa El Dorado San Martin 0 \n",
+ "27726 San Jose de Sisa El Dorado San Martin 0 \n",
+ "27727 San Jose de Sisa El Dorado San Martin 0 \n",
+ "27728 San Jose de Sisa El Dorado San Martin 0 \n",
+ "27729 San Jose de Sisa El Dorado San Martin 0 \n",
+ "\n",
+ "[27730 rows x 12 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "juntos = pd.merge(reshape_panel, unidos, on='ubigeo', how='inner')\n",
+ "juntos"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ee08714c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Labs/tarea6/GRUPO_3_GoogleCollab.ipynb b/Labs/tarea6/GRUPO_3_GoogleCollab.ipynb
new file mode 100644
index 0000000..1889542
--- /dev/null
+++ b/Labs/tarea6/GRUPO_3_GoogleCollab.ipynb
@@ -0,0 +1,1759 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install ghostscript\n",
+ "!pip install camelot-py[cv]\n",
+ "!pip install excalibur-py\n",
+ "!apt install ghostscript python3-tk"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "mvCaIqLfjQ4D",
+ "outputId": "e5176b58-3f55-49c5-f836-791cfce83878"
+ },
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+ "Collecting ghostscript\n",
+ " Downloading ghostscript-0.7-py2.py3-none-any.whl (25 kB)\n",
+ "Requirement already satisfied: setuptools>=38.6.0 in /usr/local/lib/python3.10/dist-packages (from ghostscript) (67.7.2)\n",
+ "Installing collected packages: ghostscript\n",
+ "Successfully installed ghostscript-0.7\n",
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+ "Collecting camelot-py[cv]\n",
+ " Downloading camelot_py-0.11.0-py3-none-any.whl (40 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: chardet>=3.0.4 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]) (4.0.0)\n",
+ "Requirement already satisfied: click>=6.7 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]) (8.1.3)\n",
+ "Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]) (1.22.4)\n",
+ "Requirement already satisfied: openpyxl>=2.5.8 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]) (3.0.10)\n",
+ "Requirement already satisfied: pandas>=0.23.4 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]) (1.5.3)\n",
+ "Collecting pdfminer.six>=20200726 (from camelot-py[cv])\n",
+ " Downloading pdfminer.six-20221105-py3-none-any.whl (5.6 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m82.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting pypdf>=3.0.0 (from camelot-py[cv])\n",
+ " Downloading pypdf-3.9.1-py3-none-any.whl (249 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m249.3/249.3 kB\u001b[0m \u001b[31m31.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: tabulate>=0.8.9 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]) (0.8.10)\n",
+ "Requirement already satisfied: ghostscript>=0.7 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]) (0.7)\n",
+ "Requirement already satisfied: opencv-python>=3.4.2.17 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]) (4.7.0.72)\n",
+ "INFO: pip is looking at multiple versions of camelot-py[cv] to determine which version is compatible with other requirements. This could take a while.\n",
+ "Collecting camelot-py[cv]\n",
+ " Downloading camelot_py-0.10.1-py3-none-any.whl (40 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: PyPDF2>=1.26.0 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]) (2.12.1)\n",
+ " Downloading camelot_py-0.10.0-py3-none-any.whl (40 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading camelot_py-0.9.0-py3-none-any.whl (43 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.2/43.2 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: et-xmlfile in /usr/local/lib/python3.10/dist-packages (from openpyxl>=2.5.8->camelot-py[cv]) (1.1.0)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.23.4->camelot-py[cv]) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.23.4->camelot-py[cv]) (2022.7.1)\n",
+ "Requirement already satisfied: charset-normalizer>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six>=20200726->camelot-py[cv]) (2.0.12)\n",
+ "Requirement already satisfied: cryptography>=36.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six>=20200726->camelot-py[cv]) (40.0.2)\n",
+ "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography>=36.0.0->pdfminer.six>=20200726->camelot-py[cv]) (1.15.1)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas>=0.23.4->camelot-py[cv]) (1.16.0)\n",
+ "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six>=20200726->camelot-py[cv]) (2.21)\n",
+ "Installing collected packages: pdfminer.six, camelot-py\n",
+ "Successfully installed camelot-py-0.9.0 pdfminer.six-20221105\n",
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+ "Collecting excalibur-py\n",
+ " Downloading excalibur_py-0.4.3-py3-none-any.whl (1.5 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m30.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: camelot-py[cv]>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from excalibur-py) (0.9.0)\n",
+ "Collecting celery>=4.1.1 (from excalibur-py)\n",
+ " Downloading celery-5.3.0-py3-none-any.whl (420 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m420.3/420.3 kB\u001b[0m \u001b[31m46.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: Click>=7.0 in /usr/local/lib/python3.10/dist-packages (from excalibur-py) (8.1.3)\n",
+ "Collecting configparser<3.6.0,>=3.5.0 (from excalibur-py)\n",
+ " Downloading configparser-3.5.3-py3-none-any.whl (21 kB)\n",
+ "Requirement already satisfied: Flask>=1.0.2 in /usr/local/lib/python3.10/dist-packages (from excalibur-py) (2.2.4)\n",
+ "Requirement already satisfied: SQLAlchemy>=1.2.12 in /usr/local/lib/python3.10/dist-packages (from excalibur-py) (2.0.10)\n",
+ "Collecting Werkzeug<1.0.0 (from excalibur-py)\n",
+ " Downloading Werkzeug-0.16.1-py2.py3-none-any.whl (327 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m327.4/327.4 kB\u001b[0m \u001b[31m39.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: chardet>=3.0.4 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]>=0.7.1->excalibur-py) (4.0.0)\n",
+ "Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]>=0.7.1->excalibur-py) (1.22.4)\n",
+ "Requirement already satisfied: openpyxl>=2.5.8 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]>=0.7.1->excalibur-py) (3.0.10)\n",
+ "Requirement already satisfied: pandas>=0.23.4 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]>=0.7.1->excalibur-py) (1.5.3)\n",
+ "Requirement already satisfied: pdfminer.six>=20200726 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]>=0.7.1->excalibur-py) (20221105)\n",
+ "Requirement already satisfied: PyPDF2>=1.26.0 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]>=0.7.1->excalibur-py) (2.12.1)\n",
+ "Requirement already satisfied: opencv-python>=3.4.2.17 in /usr/local/lib/python3.10/dist-packages (from camelot-py[cv]>=0.7.1->excalibur-py) (4.7.0.72)\n",
+ "Collecting billiard<5.0,>=4.1.0 (from celery>=4.1.1->excalibur-py)\n",
+ " Downloading billiard-4.1.0-py3-none-any.whl (86 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.7/86.7 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting click-didyoumean>=0.3.0 (from celery>=4.1.1->excalibur-py)\n",
+ " Downloading click_didyoumean-0.3.0-py3-none-any.whl (2.7 kB)\n",
+ "Collecting click-plugins>=1.1.1 (from celery>=4.1.1->excalibur-py)\n",
+ " Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)\n",
+ "Collecting click-repl>=0.2.0 (from celery>=4.1.1->excalibur-py)\n",
+ " Downloading click_repl-0.2.0-py3-none-any.whl (5.2 kB)\n",
+ "Collecting kombu<6.0,>=5.3.0 (from celery>=4.1.1->excalibur-py)\n",
+ " Downloading kombu-5.3.0-py3-none-any.whl (198 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m198.5/198.5 kB\u001b[0m \u001b[31m27.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from celery>=4.1.1->excalibur-py) (2.8.2)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from celery>=4.1.1->excalibur-py) (2023.3)\n",
+ "Collecting vine<6.0,>=5.0.0 (from celery>=4.1.1->excalibur-py)\n",
+ " Downloading vine-5.0.0-py2.py3-none-any.whl (9.4 kB)\n",
+ "INFO: pip is looking at multiple versions of flask to determine which version is compatible with other requirements. This could take a while.\n",
+ "Collecting Flask>=1.0.2 (from excalibur-py)\n",
+ " Downloading Flask-2.3.2-py3-none-any.whl (96 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m96.9/96.9 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-2.3.1-py3-none-any.whl (96 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.0/97.0 kB\u001b[0m \u001b[31m12.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-2.3.0-py3-none-any.whl (96 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.0/97.0 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-2.2.5-py3-none-any.whl (101 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.8/101.8 kB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-2.2.3-py3-none-any.whl (101 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.8/101.8 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-2.2.2-py3-none-any.whl (101 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.5/101.5 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-2.2.1-py3-none-any.whl (101 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.5/101.5 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hINFO: pip is looking at multiple versions of flask to determine which version is compatible with other requirements. This could take a while.\n",
+ " Downloading Flask-2.2.0-py3-none-any.whl (101 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.1/101.1 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-2.1.3-py3-none-any.whl (95 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.6/95.6 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-2.1.2-py3-none-any.whl (95 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.2/95.2 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-2.1.1-py3-none-any.whl (95 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.2/95.2 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-2.1.0-py3-none-any.whl (95 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.2/95.2 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hINFO: This is taking longer than usual. You might need to provide the dependency resolver with stricter constraints to reduce runtime. See https://pip.pypa.io/warnings/backtracking for guidance. If you want to abort this run, press Ctrl + C.\n",
+ " Downloading Flask-2.0.3-py3-none-any.whl (95 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.6/95.6 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-2.0.2-py3-none-any.whl (95 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.2/95.2 kB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-2.0.1-py3-none-any.whl (94 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.8/94.8 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-2.0.0-py3-none-any.whl (93 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m93.2/93.2 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-1.1.4-py2.py3-none-any.whl (94 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.6/94.6 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting Jinja2<3.0,>=2.10.1 (from Flask>=1.0.2->excalibur-py)\n",
+ " Downloading Jinja2-2.11.3-py2.py3-none-any.whl (125 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m125.7/125.7 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting itsdangerous<2.0,>=0.24 (from Flask>=1.0.2->excalibur-py)\n",
+ " Downloading itsdangerous-1.1.0-py2.py3-none-any.whl (16 kB)\n",
+ "Collecting Flask>=1.0.2 (from excalibur-py)\n",
+ " Downloading Flask-1.1.3-py2.py3-none-any.whl (94 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.6/94.6 kB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Downloading Flask-1.1.2-py2.py3-none-any.whl (94 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.6/94.6 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: Jinja2>=2.10.1 in /usr/local/lib/python3.10/dist-packages (from Flask>=1.0.2->excalibur-py) (3.1.2)\n",
+ "Requirement already satisfied: itsdangerous>=0.24 in /usr/local/lib/python3.10/dist-packages (from Flask>=1.0.2->excalibur-py) (2.1.2)\n",
+ "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy>=1.2.12->excalibur-py) (4.5.0)\n",
+ "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy>=1.2.12->excalibur-py) (2.0.2)\n",
+ "Requirement already satisfied: prompt-toolkit in /usr/local/lib/python3.10/dist-packages (from click-repl>=0.2.0->celery>=4.1.1->excalibur-py) (3.0.38)\n",
+ "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from click-repl>=0.2.0->celery>=4.1.1->excalibur-py) (1.16.0)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from Jinja2>=2.10.1->Flask>=1.0.2->excalibur-py) (2.1.2)\n",
+ "Collecting amqp<6.0.0,>=5.1.1 (from kombu<6.0,>=5.3.0->celery>=4.1.1->excalibur-py)\n",
+ " Downloading amqp-5.1.1-py3-none-any.whl (50 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: et-xmlfile in /usr/local/lib/python3.10/dist-packages (from openpyxl>=2.5.8->camelot-py[cv]>=0.7.1->excalibur-py) (1.1.0)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.23.4->camelot-py[cv]>=0.7.1->excalibur-py) (2022.7.1)\n",
+ "Requirement already satisfied: charset-normalizer>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six>=20200726->camelot-py[cv]>=0.7.1->excalibur-py) (2.0.12)\n",
+ "Requirement already satisfied: cryptography>=36.0.0 in /usr/local/lib/python3.10/dist-packages (from pdfminer.six>=20200726->camelot-py[cv]>=0.7.1->excalibur-py) (40.0.2)\n",
+ "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography>=36.0.0->pdfminer.six>=20200726->camelot-py[cv]>=0.7.1->excalibur-py) (1.15.1)\n",
+ "Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit->click-repl>=0.2.0->celery>=4.1.1->excalibur-py) (0.2.6)\n",
+ "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six>=20200726->camelot-py[cv]>=0.7.1->excalibur-py) (2.21)\n",
+ "Installing collected packages: Werkzeug, vine, configparser, click-plugins, click-didyoumean, billiard, Flask, click-repl, amqp, kombu, celery, excalibur-py\n",
+ " Attempting uninstall: Werkzeug\n",
+ " Found existing installation: Werkzeug 2.3.0\n",
+ " Uninstalling Werkzeug-2.3.0:\n",
+ " Successfully uninstalled Werkzeug-2.3.0\n",
+ " Attempting uninstall: Flask\n",
+ " Found existing installation: Flask 2.2.4\n",
+ " Uninstalling Flask-2.2.4:\n",
+ " Successfully uninstalled Flask-2.2.4\n",
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "tensorboard 2.12.2 requires werkzeug>=1.0.1, but you have werkzeug 0.16.1 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0mSuccessfully installed Flask-1.1.2 Werkzeug-0.16.1 amqp-5.1.1 billiard-4.1.0 celery-5.3.0 click-didyoumean-0.3.0 click-plugins-1.1.1 click-repl-0.2.0 configparser-3.5.3 excalibur-py-0.4.3 kombu-5.3.0 vine-5.0.0\n",
+ "Reading package lists... Done\n",
+ "Building dependency tree \n",
+ "Reading state information... Done\n",
+ "The following additional packages will be installed:\n",
+ " blt fonts-droid-fallback fonts-noto-mono fonts-urw-base35 libgs9\n",
+ " libgs9-common libidn11 libijs-0.35 libjbig2dec0 poppler-data tk8.6-blt2.5\n",
+ "Suggested packages:\n",
+ " blt-demo fonts-noto fonts-freefont-otf | fonts-freefont-ttf fonts-texgyre\n",
+ " ghostscript-x poppler-utils fonts-japanese-mincho | fonts-ipafont-mincho\n",
+ " fonts-japanese-gothic | fonts-ipafont-gothic fonts-arphic-ukai\n",
+ " fonts-arphic-uming fonts-nanum tix python3-tk-dbg\n",
+ "The following NEW packages will be installed:\n",
+ " blt fonts-droid-fallback fonts-noto-mono fonts-urw-base35 ghostscript libgs9\n",
+ " libgs9-common libidn11 libijs-0.35 libjbig2dec0 poppler-data python3-tk\n",
+ " tk8.6-blt2.5\n",
+ "0 upgraded, 13 newly installed, 0 to remove and 38 not upgraded.\n",
+ "Need to get 13.4 MB of archives.\n",
+ "After this operation, 54.8 MB of additional disk space will be used.\n",
+ "Get:1 http://archive.ubuntu.com/ubuntu focal/main amd64 fonts-droid-fallback all 1:6.0.1r16-1.1 [1,805 kB]\n",
+ "Get:2 http://archive.ubuntu.com/ubuntu focal/main amd64 poppler-data all 0.4.9-2 [1,475 kB]\n",
+ "Get:3 http://archive.ubuntu.com/ubuntu focal/main amd64 tk8.6-blt2.5 amd64 2.5.3+dfsg-4 [572 kB]\n",
+ "Get:4 http://archive.ubuntu.com/ubuntu focal/main amd64 blt amd64 2.5.3+dfsg-4 [4,944 B]\n",
+ "Get:5 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 fonts-noto-mono all 20200323-1build1~ubuntu20.04.1 [80.6 kB]\n",
+ "Get:6 http://archive.ubuntu.com/ubuntu focal/main amd64 fonts-urw-base35 all 20170801.1-3 [6,333 kB]\n",
+ "Get:7 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 libgs9-common all 9.50~dfsg-5ubuntu4.7 [681 kB]\n",
+ "Get:8 http://archive.ubuntu.com/ubuntu focal/main amd64 libidn11 amd64 1.33-2.2ubuntu2 [46.2 kB]\n",
+ "Get:9 http://archive.ubuntu.com/ubuntu focal/main amd64 libijs-0.35 amd64 0.35-15 [15.7 kB]\n",
+ "Get:10 http://archive.ubuntu.com/ubuntu focal/main amd64 libjbig2dec0 amd64 0.18-1ubuntu1 [60.0 kB]\n",
+ "Get:11 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 libgs9 amd64 9.50~dfsg-5ubuntu4.7 [2,173 kB]\n",
+ "Get:12 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 ghostscript amd64 9.50~dfsg-5ubuntu4.7 [51.9 kB]\n",
+ "Get:13 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 python3-tk amd64 3.8.10-0ubuntu1~20.04 [104 kB]\n",
+ "Fetched 13.4 MB in 0s (37.4 MB/s)\n",
+ "Selecting previously unselected package fonts-droid-fallback.\n",
+ "(Reading database ... 122541 files and directories currently installed.)\n",
+ "Preparing to unpack .../00-fonts-droid-fallback_1%3a6.0.1r16-1.1_all.deb ...\n",
+ "Unpacking fonts-droid-fallback (1:6.0.1r16-1.1) ...\n",
+ "Selecting previously unselected package poppler-data.\n",
+ "Preparing to unpack .../01-poppler-data_0.4.9-2_all.deb ...\n",
+ "Unpacking poppler-data (0.4.9-2) ...\n",
+ "Selecting previously unselected package tk8.6-blt2.5.\n",
+ "Preparing to unpack .../02-tk8.6-blt2.5_2.5.3+dfsg-4_amd64.deb ...\n",
+ "Unpacking tk8.6-blt2.5 (2.5.3+dfsg-4) ...\n",
+ "Selecting previously unselected package blt.\n",
+ "Preparing to unpack .../03-blt_2.5.3+dfsg-4_amd64.deb ...\n",
+ "Unpacking blt (2.5.3+dfsg-4) ...\n",
+ "Selecting previously unselected package fonts-noto-mono.\n",
+ "Preparing to unpack .../04-fonts-noto-mono_20200323-1build1~ubuntu20.04.1_all.deb ...\n",
+ "Unpacking fonts-noto-mono (20200323-1build1~ubuntu20.04.1) ...\n",
+ "Selecting previously unselected package fonts-urw-base35.\n",
+ "Preparing to unpack .../05-fonts-urw-base35_20170801.1-3_all.deb ...\n",
+ "Unpacking fonts-urw-base35 (20170801.1-3) ...\n",
+ "Selecting previously unselected package libgs9-common.\n",
+ "Preparing to unpack .../06-libgs9-common_9.50~dfsg-5ubuntu4.7_all.deb ...\n",
+ "Unpacking libgs9-common (9.50~dfsg-5ubuntu4.7) ...\n",
+ "Selecting previously unselected package libidn11:amd64.\n",
+ "Preparing to unpack .../07-libidn11_1.33-2.2ubuntu2_amd64.deb ...\n",
+ "Unpacking libidn11:amd64 (1.33-2.2ubuntu2) ...\n",
+ "Selecting previously unselected package libijs-0.35:amd64.\n",
+ "Preparing to unpack .../08-libijs-0.35_0.35-15_amd64.deb ...\n",
+ "Unpacking libijs-0.35:amd64 (0.35-15) ...\n",
+ "Selecting previously unselected package libjbig2dec0:amd64.\n",
+ "Preparing to unpack .../09-libjbig2dec0_0.18-1ubuntu1_amd64.deb ...\n",
+ "Unpacking libjbig2dec0:amd64 (0.18-1ubuntu1) ...\n",
+ "Selecting previously unselected package libgs9:amd64.\n",
+ "Preparing to unpack .../10-libgs9_9.50~dfsg-5ubuntu4.7_amd64.deb ...\n",
+ "Unpacking libgs9:amd64 (9.50~dfsg-5ubuntu4.7) ...\n",
+ "Selecting previously unselected package ghostscript.\n",
+ "Preparing to unpack .../11-ghostscript_9.50~dfsg-5ubuntu4.7_amd64.deb ...\n",
+ "Unpacking ghostscript (9.50~dfsg-5ubuntu4.7) ...\n",
+ "Selecting previously unselected package python3-tk:amd64.\n",
+ "Preparing to unpack .../12-python3-tk_3.8.10-0ubuntu1~20.04_amd64.deb ...\n",
+ "Unpacking python3-tk:amd64 (3.8.10-0ubuntu1~20.04) ...\n",
+ "Setting up tk8.6-blt2.5 (2.5.3+dfsg-4) ...\n",
+ "Setting up fonts-noto-mono (20200323-1build1~ubuntu20.04.1) ...\n",
+ "Setting up libijs-0.35:amd64 (0.35-15) ...\n",
+ "Setting up blt (2.5.3+dfsg-4) ...\n",
+ "Setting up python3-tk:amd64 (3.8.10-0ubuntu1~20.04) ...\n",
+ "Setting up fonts-urw-base35 (20170801.1-3) ...\n",
+ "Setting up poppler-data (0.4.9-2) ...\n",
+ "Setting up libjbig2dec0:amd64 (0.18-1ubuntu1) ...\n",
+ "Setting up libidn11:amd64 (1.33-2.2ubuntu2) ...\n",
+ "Setting up fonts-droid-fallback (1:6.0.1r16-1.1) ...\n",
+ "Setting up libgs9-common (9.50~dfsg-5ubuntu4.7) ...\n",
+ "Setting up libgs9:amd64 (9.50~dfsg-5ubuntu4.7) ...\n",
+ "Setting up ghostscript (9.50~dfsg-5ubuntu4.7) ...\n",
+ "Processing triggers for fontconfig (2.13.1-2ubuntu3) ...\n",
+ "Processing triggers for libc-bin (2.31-0ubuntu9.9) ...\n",
+ "Processing triggers for man-db (2.9.1-1) ...\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "m07h1PVqi5o7",
+ "outputId": "46571940-7515-4734-c483-7816759b8f78"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+ "Collecting PyPDF2==2.12.1\n",
+ " Downloading pypdf2-2.12.1-py3-none-any.whl (222 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m222.8/222.8 kB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hInstalling collected packages: PyPDF2\n",
+ "Successfully installed PyPDF2-2.12.1\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install --upgrade PyPDF2==2.12.1\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import camelot as cm # libreria de camelot\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from pandas import Series, DataFrame\n",
+ "import os\n",
+ "from PyPDF2 import PdfFileReader\n",
+ "import re # expresiones regulares"
+ ],
+ "metadata": {
+ "id": "gqdYIizhi_lx"
+ },
+ "execution_count": 4,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "tables = cm.read_pdf(\"/content/sample_data/G3_JUNIN.pdf\")\n"
+ ],
+ "metadata": {
+ "id": "K6YPDnp_jdnk"
+ },
+ "execution_count": 6,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "tables"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "SOt__8yzj4h8",
+ "outputId": "7d68f20f-f9d7-4888-c2b2-37893527fba7"
+ },
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 7
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "dir(tables\n",
+ " )"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Jlkpc5OmjoQh",
+ "outputId": "3c7e721e-3e41-4f79-81e4-6c6bc7efa6f5"
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "['__class__',\n",
+ " '__delattr__',\n",
+ " '__dict__',\n",
+ " '__dir__',\n",
+ " '__doc__',\n",
+ " '__eq__',\n",
+ " '__format__',\n",
+ " '__ge__',\n",
+ " '__getattribute__',\n",
+ " '__getitem__',\n",
+ " '__gt__',\n",
+ " '__hash__',\n",
+ " '__init__',\n",
+ " '__init_subclass__',\n",
+ " '__le__',\n",
+ " '__len__',\n",
+ " '__lt__',\n",
+ " '__module__',\n",
+ " '__ne__',\n",
+ " '__new__',\n",
+ " '__reduce__',\n",
+ " '__reduce_ex__',\n",
+ " '__repr__',\n",
+ " '__setattr__',\n",
+ " '__sizeof__',\n",
+ " '__str__',\n",
+ " '__subclasshook__',\n",
+ " '__weakref__',\n",
+ " '_compress_dir',\n",
+ " '_format_func',\n",
+ " '_tables',\n",
+ " '_write_file',\n",
+ " 'export',\n",
+ " 'n']"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 8
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "dir(tables[0])"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ad5c0C74kITO",
+ "outputId": "698760b7-574c-4e3a-9678-99f45898a56b"
+ },
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "['__class__',\n",
+ " '__delattr__',\n",
+ " '__dict__',\n",
+ " '__dir__',\n",
+ " '__doc__',\n",
+ " '__eq__',\n",
+ " '__format__',\n",
+ " '__ge__',\n",
+ " '__getattribute__',\n",
+ " '__gt__',\n",
+ " '__hash__',\n",
+ " '__init__',\n",
+ " '__init_subclass__',\n",
+ " '__le__',\n",
+ " '__lt__',\n",
+ " '__module__',\n",
+ " '__ne__',\n",
+ " '__new__',\n",
+ " '__reduce__',\n",
+ " '__reduce_ex__',\n",
+ " '__repr__',\n",
+ " '__setattr__',\n",
+ " '__sizeof__',\n",
+ " '__str__',\n",
+ " '__subclasshook__',\n",
+ " '__weakref__',\n",
+ " '_bbox',\n",
+ " '_image',\n",
+ " '_segments',\n",
+ " '_text',\n",
+ " '_textedges',\n",
+ " 'accuracy',\n",
+ " 'cells',\n",
+ " 'cols',\n",
+ " 'data',\n",
+ " 'df',\n",
+ " 'flavor',\n",
+ " 'order',\n",
+ " 'page',\n",
+ " 'parsing_report',\n",
+ " 'rows',\n",
+ " 'set_all_edges',\n",
+ " 'set_border',\n",
+ " 'set_edges',\n",
+ " 'set_span',\n",
+ " 'shape',\n",
+ " 'to_csv',\n",
+ " 'to_excel',\n",
+ " 'to_html',\n",
+ " 'to_json',\n",
+ " 'to_sqlite',\n",
+ " 'whitespace']"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 9
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "tables[0].cells "
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "5IAXhe2OkgAV",
+ "outputId": "1e02f6df-776c-47c1-e5f5-9f4d4b2e8ede"
+ },
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[[,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ],\n",
+ " [,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ,\n",
+ " ]]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "tables[0].data # entrega cada fila de la tabla en formato lista "
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "rhjmfd4tkmgc",
+ "outputId": "a30d8fc2-cebb-4bb0-ca23-31c457259ecb"
+ },
+ "execution_count": 12,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[['#',\n",
+ " 'Año JEC',\n",
+ " 'Código \\nmodular',\n",
+ " 'Código \\nlocal',\n",
+ " 'Nombre de la IE',\n",
+ " 'DRE',\n",
+ " 'UGEL',\n",
+ " 'Ubigeo',\n",
+ " 'Departamento',\n",
+ " 'Provincia',\n",
+ " 'Distrito',\n",
+ " 'Dirección',\n",
+ " 'Área Geográfica'],\n",
+ " ['1073',\n",
+ " '2015',\n",
+ " '1214410',\n",
+ " '234382',\n",
+ " 'SANTA ROSA',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL \\nCHANCHAMAYO',\n",
+ " '120305',\n",
+ " 'JUNIN',\n",
+ " 'CHANCHAMAYO',\n",
+ " 'SAN RAMON',\n",
+ " 'AVENIDA 06 DE AGOSTO S/N',\n",
+ " 'URBANA'],\n",
+ " ['1078',\n",
+ " '2015',\n",
+ " '0692814',\n",
+ " '230077',\n",
+ " 'MARIA AUXILIADORA',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL \\nCHANCHAMAYO',\n",
+ " '120301',\n",
+ " 'JUNIN',\n",
+ " 'CHANCHAMAYO',\n",
+ " 'CHANCHAMAYO',\n",
+ " 'CALLE SAN CARLOS S/N',\n",
+ " 'URBANA'],\n",
+ " ['1080',\n",
+ " '2015',\n",
+ " '0738799',\n",
+ " '232509',\n",
+ " 'PUCHARINI',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL \\nCHANCHAMAYO',\n",
+ " '120302',\n",
+ " 'JUNIN',\n",
+ " 'CHANCHAMAYO',\n",
+ " 'PERENE',\n",
+ " 'CARRETERA MARGINAL KM \\n53 S/N',\n",
+ " 'URBANA'],\n",
+ " ['1081',\n",
+ " '2015',\n",
+ " '0599175',\n",
+ " '230058',\n",
+ " 'NUESTRA SEÑORA DE LAS \\nMERCEDES',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL \\nCHANCHAMAYO',\n",
+ " '120301',\n",
+ " 'JUNIN',\n",
+ " 'CHANCHAMAYO',\n",
+ " 'CHANCHAMAYO',\n",
+ " 'JIRON JOSE DE SAN MARTIN \\nS/N',\n",
+ " 'URBANA'],\n",
+ " ['1082',\n",
+ " '2015',\n",
+ " '0373290',\n",
+ " '230906',\n",
+ " 'Perené',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL \\nCHANCHAMAYO',\n",
+ " '120302',\n",
+ " 'JUNIN',\n",
+ " 'CHANCHAMAYO',\n",
+ " 'PERENÉ',\n",
+ " 'JIRON LOS CAFETOS PAMPA \\nSILVA S/N',\n",
+ " 'URBANA'],\n",
+ " ['1085',\n",
+ " '2015',\n",
+ " '0525428',\n",
+ " '229917',\n",
+ " 'AUGUSTO SALAZAR BONDY',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL CHUPACA',\n",
+ " '120214',\n",
+ " 'JUNIN',\n",
+ " 'CONCEPCION',\n",
+ " 'SAN JOSE DE \\nQUERO',\n",
+ " 'AVENIDA CONCEPCION S/N',\n",
+ " 'URBANA'],\n",
+ " ['1086',\n",
+ " '2015',\n",
+ " '0590919',\n",
+ " '249109',\n",
+ " 'HEROES DE LA BREÑA',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL CHUPACA',\n",
+ " '120905',\n",
+ " 'JUNIN',\n",
+ " 'CHUPACA',\n",
+ " 'HUAMANCACA \\nCHICO',\n",
+ " 'AVENIDA MARIA PARADO DE \\nBELLIDO S/N',\n",
+ " 'URBANA'],\n",
+ " ['1087',\n",
+ " '2015',\n",
+ " '0372912',\n",
+ " '248850',\n",
+ " 'AMAUTA',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL CHUPACA',\n",
+ " '120902',\n",
+ " 'JUNIN',\n",
+ " 'CHUPACA',\n",
+ " 'AHUAC',\n",
+ " 'AVENIDA MARISCAL CACERES \\n1007',\n",
+ " 'URBANA'],\n",
+ " ['1088',\n",
+ " '2015',\n",
+ " '0372961',\n",
+ " '248930',\n",
+ " 'SANTIAGO LEON',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL CHUPACA',\n",
+ " '120903',\n",
+ " 'JUNIN',\n",
+ " 'CHUPACA',\n",
+ " 'CHONGOS BAJO',\n",
+ " 'CALLE LA MAR S/N',\n",
+ " 'RURAL'],\n",
+ " ['1090',\n",
+ " '2015',\n",
+ " '0580290',\n",
+ " '249492',\n",
+ " 'CAHUIDE',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL CHUPACA',\n",
+ " '120909',\n",
+ " 'JUNIN',\n",
+ " 'CHUPACA',\n",
+ " 'YANACANCHA',\n",
+ " 'CARRETERA PRINCIPAL LAIVE \\nS/N',\n",
+ " 'URBANA'],\n",
+ " ['1093',\n",
+ " '2015',\n",
+ " '1099852',\n",
+ " '227555',\n",
+ " '31511 LORENZO ALCALA POMALAZA',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL CONCEPCION',\n",
+ " '120201',\n",
+ " 'JUNIN',\n",
+ " 'CONCEPCION',\n",
+ " 'CONCEPCION',\n",
+ " 'AVENIDA AGRICULTURA 542',\n",
+ " 'URBANA'],\n",
+ " ['1094',\n",
+ " '2015',\n",
+ " '0372730',\n",
+ " '227490',\n",
+ " 'HEROINAS TOLEDO',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL CONCEPCION',\n",
+ " '120201',\n",
+ " 'JUNIN',\n",
+ " 'CONCEPCION',\n",
+ " 'CONCEPCION',\n",
+ " 'AVENIDA AGRICULTURA 596-\\n598',\n",
+ " 'URBANA'],\n",
+ " ['1095',\n",
+ " '2015',\n",
+ " '0373027',\n",
+ " '229012',\n",
+ " 'APU INCA',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL CONCEPCION',\n",
+ " '120206',\n",
+ " 'JUNIN',\n",
+ " 'CONCEPCION',\n",
+ " 'COMAS',\n",
+ " 'CALLE ESTADIO MUNICIPAL \\nS/N',\n",
+ " 'URBANA'],\n",
+ " ['1100',\n",
+ " '2015',\n",
+ " '0372946',\n",
+ " '224608',\n",
+ " 'JOSE OLAYA',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL HUANCAYO',\n",
+ " '120117',\n",
+ " 'JUNIN',\n",
+ " 'HUANCAYO',\n",
+ " 'HUALHUAS',\n",
+ " 'AVENIDA ALFONSO UGARTE \\n944-970',\n",
+ " 'URBANA'],\n",
+ " ['1101',\n",
+ " '2015',\n",
+ " '0667022',\n",
+ " '224383',\n",
+ " 'SALESIANO DON BOSCO',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL HUANCAYO',\n",
+ " '120114',\n",
+ " 'JUNIN',\n",
+ " 'HUANCAYO',\n",
+ " 'EL TAMBO',\n",
+ " 'AVENIDA HUANCAVELICA \\n165',\n",
+ " 'URBANA'],\n",
+ " ['1103',\n",
+ " '2015',\n",
+ " '0372870',\n",
+ " '226551',\n",
+ " 'CHINCHAYSUYO',\n",
+ " 'DRE JUNIN',\n",
+ " 'UGEL HUANCAYO',\n",
+ " '120133',\n",
+ " 'JUNIN',\n",
+ " 'HUANCAYO',\n",
+ " 'SAPALLANGA',\n",
+ " 'AVENIDA PEÐALOZA S/N',\n",
+ " 'RURAL']]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 12
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data = pd.concat(tables, ignore_index=True)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 310
+ },
+ "id": "qzjescS-kUhJ",
+ "outputId": "c4598f0d-b235-4e7b-c90d-25c50cac0e5f"
+ },
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "error",
+ "ename": "TypeError",
+ "evalue": "ignored",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtables\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/util/_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 329\u001b[0m \u001b[0mstacklevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfind_stack_level\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 330\u001b[0m )\n\u001b[0;32m--> 331\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 332\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 333\u001b[0m \u001b[0;31m# error: \"Callable[[VarArg(Any), KwArg(Any)], Any]\" has no\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/reshape/concat.py\u001b[0m in \u001b[0;36mconcat\u001b[0;34m(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001b[0m\n\u001b[1;32m 366\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;36m3\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 367\u001b[0m \"\"\"\n\u001b[0;32m--> 368\u001b[0;31m op = _Concatenator(\n\u001b[0m\u001b[1;32m 369\u001b[0m \u001b[0mobjs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 370\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/reshape/concat.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[0;34m\"only Series and DataFrame objs are valid\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 457\u001b[0m )\n\u001b[0;32m--> 458\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 459\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 460\u001b[0m \u001b[0mndims\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mTypeError\u001b[0m: cannot concatenate object of type ''; only Series and DataFrame objs are valid"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "\n",
+ "# Crea un solo DataFrame combinando todas las tablas\n",
+ "data_frames = [table.df for table in tables]\n",
+ "data = pd.concat(data_frames, ignore_index=True)"
+ ],
+ "metadata": {
+ "id": "JjFfbw1LmIKT"
+ },
+ "execution_count": 15,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(data)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "5BuTw6kxmKMy",
+ "outputId": "3d5cf343-29dd-464d-d5c9-552d44e166b9"
+ },
+ "execution_count": 16,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " 0 1 2 3 \\\n",
+ "0 # Año JEC Código \\nmodular Código \\nlocal \n",
+ "1 1073 2015 1214410 234382 \n",
+ "2 1078 2015 0692814 230077 \n",
+ "3 1080 2015 0738799 232509 \n",
+ "4 1081 2015 0599175 230058 \n",
+ "5 1082 2015 0373290 230906 \n",
+ "6 1085 2015 0525428 229917 \n",
+ "7 1086 2015 0590919 249109 \n",
+ "8 1087 2015 0372912 248850 \n",
+ "9 1088 2015 0372961 248930 \n",
+ "10 1090 2015 0580290 249492 \n",
+ "11 1093 2015 1099852 227555 \n",
+ "12 1094 2015 0372730 227490 \n",
+ "13 1095 2015 0373027 229012 \n",
+ "14 1100 2015 0372946 224608 \n",
+ "15 1101 2015 0667022 224383 \n",
+ "16 1103 2015 0372870 226551 \n",
+ "\n",
+ " 4 5 6 7 \\\n",
+ "0 Nombre de la IE DRE UGEL Ubigeo \n",
+ "1 SANTA ROSA DRE JUNIN UGEL \\nCHANCHAMAYO 120305 \n",
+ "2 MARIA AUXILIADORA DRE JUNIN UGEL \\nCHANCHAMAYO 120301 \n",
+ "3 PUCHARINI DRE JUNIN UGEL \\nCHANCHAMAYO 120302 \n",
+ "4 NUESTRA SEÑORA DE LAS \\nMERCEDES DRE JUNIN UGEL \\nCHANCHAMAYO 120301 \n",
+ "5 Perené DRE JUNIN UGEL \\nCHANCHAMAYO 120302 \n",
+ "6 AUGUSTO SALAZAR BONDY DRE JUNIN UGEL CHUPACA 120214 \n",
+ "7 HEROES DE LA BREÑA DRE JUNIN UGEL CHUPACA 120905 \n",
+ "8 AMAUTA DRE JUNIN UGEL CHUPACA 120902 \n",
+ "9 SANTIAGO LEON DRE JUNIN UGEL CHUPACA 120903 \n",
+ "10 CAHUIDE DRE JUNIN UGEL CHUPACA 120909 \n",
+ "11 31511 LORENZO ALCALA POMALAZA DRE JUNIN UGEL CONCEPCION 120201 \n",
+ "12 HEROINAS TOLEDO DRE JUNIN UGEL CONCEPCION 120201 \n",
+ "13 APU INCA DRE JUNIN UGEL CONCEPCION 120206 \n",
+ "14 JOSE OLAYA DRE JUNIN UGEL HUANCAYO 120117 \n",
+ "15 SALESIANO DON BOSCO DRE JUNIN UGEL HUANCAYO 120114 \n",
+ "16 CHINCHAYSUYO DRE JUNIN UGEL HUANCAYO 120133 \n",
+ "\n",
+ " 8 9 10 \\\n",
+ "0 Departamento Provincia Distrito \n",
+ "1 JUNIN CHANCHAMAYO SAN RAMON \n",
+ "2 JUNIN CHANCHAMAYO CHANCHAMAYO \n",
+ "3 JUNIN CHANCHAMAYO PERENE \n",
+ "4 JUNIN CHANCHAMAYO CHANCHAMAYO \n",
+ "5 JUNIN CHANCHAMAYO PERENÉ \n",
+ "6 JUNIN CONCEPCION SAN JOSE DE \\nQUERO \n",
+ "7 JUNIN CHUPACA HUAMANCACA \\nCHICO \n",
+ "8 JUNIN CHUPACA AHUAC \n",
+ "9 JUNIN CHUPACA CHONGOS BAJO \n",
+ "10 JUNIN CHUPACA YANACANCHA \n",
+ "11 JUNIN CONCEPCION CONCEPCION \n",
+ "12 JUNIN CONCEPCION CONCEPCION \n",
+ "13 JUNIN CONCEPCION COMAS \n",
+ "14 JUNIN HUANCAYO HUALHUAS \n",
+ "15 JUNIN HUANCAYO EL TAMBO \n",
+ "16 JUNIN HUANCAYO SAPALLANGA \n",
+ "\n",
+ " 11 12 \n",
+ "0 Dirección Área Geográfica \n",
+ "1 AVENIDA 06 DE AGOSTO S/N URBANA \n",
+ "2 CALLE SAN CARLOS S/N URBANA \n",
+ "3 CARRETERA MARGINAL KM \\n53 S/N URBANA \n",
+ "4 JIRON JOSE DE SAN MARTIN \\nS/N URBANA \n",
+ "5 JIRON LOS CAFETOS PAMPA \\nSILVA S/N URBANA \n",
+ "6 AVENIDA CONCEPCION S/N URBANA \n",
+ "7 AVENIDA MARIA PARADO DE \\nBELLIDO S/N URBANA \n",
+ "8 AVENIDA MARISCAL CACERES \\n1007 URBANA \n",
+ "9 CALLE LA MAR S/N RURAL \n",
+ "10 CARRETERA PRINCIPAL LAIVE \\nS/N URBANA \n",
+ "11 AVENIDA AGRICULTURA 542 URBANA \n",
+ "12 AVENIDA AGRICULTURA 596-\\n598 URBANA \n",
+ "13 CALLE ESTADIO MUNICIPAL \\nS/N URBANA \n",
+ "14 AVENIDA ALFONSO UGARTE \\n944-970 URBANA \n",
+ "15 AVENIDA HUANCAVELICA \\n165 URBANA \n",
+ "16 AVENIDA PEÐALOZA S/N RURAL \n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 582
+ },
+ "id": "xzaSSCXcmNTC",
+ "outputId": "907cbbd5-e99f-4805-ac24-9af0bac9cee7"
+ },
+ "execution_count": 17,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " 0 1 2 3 \\\n",
+ "0 # Año JEC Código \\nmodular Código \\nlocal \n",
+ "1 1073 2015 1214410 234382 \n",
+ "2 1078 2015 0692814 230077 \n",
+ "3 1080 2015 0738799 232509 \n",
+ "4 1081 2015 0599175 230058 \n",
+ "5 1082 2015 0373290 230906 \n",
+ "6 1085 2015 0525428 229917 \n",
+ "7 1086 2015 0590919 249109 \n",
+ "8 1087 2015 0372912 248850 \n",
+ "9 1088 2015 0372961 248930 \n",
+ "10 1090 2015 0580290 249492 \n",
+ "11 1093 2015 1099852 227555 \n",
+ "12 1094 2015 0372730 227490 \n",
+ "13 1095 2015 0373027 229012 \n",
+ "14 1100 2015 0372946 224608 \n",
+ "15 1101 2015 0667022 224383 \n",
+ "16 1103 2015 0372870 226551 \n",
+ "\n",
+ " 4 5 6 7 \\\n",
+ "0 Nombre de la IE DRE UGEL Ubigeo \n",
+ "1 SANTA ROSA DRE JUNIN UGEL \\nCHANCHAMAYO 120305 \n",
+ "2 MARIA AUXILIADORA DRE JUNIN UGEL \\nCHANCHAMAYO 120301 \n",
+ "3 PUCHARINI DRE JUNIN UGEL \\nCHANCHAMAYO 120302 \n",
+ "4 NUESTRA SEÑORA DE LAS \\nMERCEDES DRE JUNIN UGEL \\nCHANCHAMAYO 120301 \n",
+ "5 Perené DRE JUNIN UGEL \\nCHANCHAMAYO 120302 \n",
+ "6 AUGUSTO SALAZAR BONDY DRE JUNIN UGEL CHUPACA 120214 \n",
+ "7 HEROES DE LA BREÑA DRE JUNIN UGEL CHUPACA 120905 \n",
+ "8 AMAUTA DRE JUNIN UGEL CHUPACA 120902 \n",
+ "9 SANTIAGO LEON DRE JUNIN UGEL CHUPACA 120903 \n",
+ "10 CAHUIDE DRE JUNIN UGEL CHUPACA 120909 \n",
+ "11 31511 LORENZO ALCALA POMALAZA DRE JUNIN UGEL CONCEPCION 120201 \n",
+ "12 HEROINAS TOLEDO DRE JUNIN UGEL CONCEPCION 120201 \n",
+ "13 APU INCA DRE JUNIN UGEL CONCEPCION 120206 \n",
+ "14 JOSE OLAYA DRE JUNIN UGEL HUANCAYO 120117 \n",
+ "15 SALESIANO DON BOSCO DRE JUNIN UGEL HUANCAYO 120114 \n",
+ "16 CHINCHAYSUYO DRE JUNIN UGEL HUANCAYO 120133 \n",
+ "\n",
+ " 8 9 10 \\\n",
+ "0 Departamento Provincia Distrito \n",
+ "1 JUNIN CHANCHAMAYO SAN RAMON \n",
+ "2 JUNIN CHANCHAMAYO CHANCHAMAYO \n",
+ "3 JUNIN CHANCHAMAYO PERENE \n",
+ "4 JUNIN CHANCHAMAYO CHANCHAMAYO \n",
+ "5 JUNIN CHANCHAMAYO PERENÉ \n",
+ "6 JUNIN CONCEPCION SAN JOSE DE \\nQUERO \n",
+ "7 JUNIN CHUPACA HUAMANCACA \\nCHICO \n",
+ "8 JUNIN CHUPACA AHUAC \n",
+ "9 JUNIN CHUPACA CHONGOS BAJO \n",
+ "10 JUNIN CHUPACA YANACANCHA \n",
+ "11 JUNIN CONCEPCION CONCEPCION \n",
+ "12 JUNIN CONCEPCION CONCEPCION \n",
+ "13 JUNIN CONCEPCION COMAS \n",
+ "14 JUNIN HUANCAYO HUALHUAS \n",
+ "15 JUNIN HUANCAYO EL TAMBO \n",
+ "16 JUNIN HUANCAYO SAPALLANGA \n",
+ "\n",
+ " 11 12 \n",
+ "0 Dirección Área Geográfica \n",
+ "1 AVENIDA 06 DE AGOSTO S/N URBANA \n",
+ "2 CALLE SAN CARLOS S/N URBANA \n",
+ "3 CARRETERA MARGINAL KM \\n53 S/N URBANA \n",
+ "4 JIRON JOSE DE SAN MARTIN \\nS/N URBANA \n",
+ "5 JIRON LOS CAFETOS PAMPA \\nSILVA S/N URBANA \n",
+ "6 AVENIDA CONCEPCION S/N URBANA \n",
+ "7 AVENIDA MARIA PARADO DE \\nBELLIDO S/N URBANA \n",
+ "8 AVENIDA MARISCAL CACERES \\n1007 URBANA \n",
+ "9 CALLE LA MAR S/N RURAL \n",
+ "10 CARRETERA PRINCIPAL LAIVE \\nS/N URBANA \n",
+ "11 AVENIDA AGRICULTURA 542 URBANA \n",
+ "12 AVENIDA AGRICULTURA 596-\\n598 URBANA \n",
+ "13 CALLE ESTADIO MUNICIPAL \\nS/N URBANA \n",
+ "14 AVENIDA ALFONSO UGARTE \\n944-970 URBANA \n",
+ "15 AVENIDA HUANCAVELICA \\n165 URBANA \n",
+ "16 AVENIDA PEÐALOZA S/N RURAL "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 7 | \n",
+ " 8 | \n",
+ " 9 | \n",
+ " 10 | \n",
+ " 11 | \n",
+ " 12 | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " # | \n",
+ " Año JEC | \n",
+ " Código \\nmodular | \n",
+ " Código \\nlocal | \n",
+ " Nombre de la IE | \n",
+ " DRE | \n",
+ " UGEL | \n",
+ " Ubigeo | \n",
+ " Departamento | \n",
+ " Provincia | \n",
+ " Distrito | \n",
+ " Dirección | \n",
+ " Área Geográfica | \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 1073 | \n",
+ " 2015 | \n",
+ " 1214410 | \n",
+ " 234382 | \n",
+ " SANTA ROSA | \n",
+ " DRE JUNIN | \n",
+ " UGEL \\nCHANCHAMAYO | \n",
+ " 120305 | \n",
+ " JUNIN | \n",
+ " CHANCHAMAYO | \n",
+ " SAN RAMON | \n",
+ " AVENIDA 06 DE AGOSTO S/N | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " 1078 | \n",
+ " 2015 | \n",
+ " 0692814 | \n",
+ " 230077 | \n",
+ " MARIA AUXILIADORA | \n",
+ " DRE JUNIN | \n",
+ " UGEL \\nCHANCHAMAYO | \n",
+ " 120301 | \n",
+ " JUNIN | \n",
+ " CHANCHAMAYO | \n",
+ " CHANCHAMAYO | \n",
+ " CALLE SAN CARLOS S/N | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " 1080 | \n",
+ " 2015 | \n",
+ " 0738799 | \n",
+ " 232509 | \n",
+ " PUCHARINI | \n",
+ " DRE JUNIN | \n",
+ " UGEL \\nCHANCHAMAYO | \n",
+ " 120302 | \n",
+ " JUNIN | \n",
+ " CHANCHAMAYO | \n",
+ " PERENE | \n",
+ " CARRETERA MARGINAL KM \\n53 S/N | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 4 | \n",
+ " 1081 | \n",
+ " 2015 | \n",
+ " 0599175 | \n",
+ " 230058 | \n",
+ " NUESTRA SEÑORA DE LAS \\nMERCEDES | \n",
+ " DRE JUNIN | \n",
+ " UGEL \\nCHANCHAMAYO | \n",
+ " 120301 | \n",
+ " JUNIN | \n",
+ " CHANCHAMAYO | \n",
+ " CHANCHAMAYO | \n",
+ " JIRON JOSE DE SAN MARTIN \\nS/N | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 5 | \n",
+ " 1082 | \n",
+ " 2015 | \n",
+ " 0373290 | \n",
+ " 230906 | \n",
+ " Perené | \n",
+ " DRE JUNIN | \n",
+ " UGEL \\nCHANCHAMAYO | \n",
+ " 120302 | \n",
+ " JUNIN | \n",
+ " CHANCHAMAYO | \n",
+ " PERENÉ | \n",
+ " JIRON LOS CAFETOS PAMPA \\nSILVA S/N | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 6 | \n",
+ " 1085 | \n",
+ " 2015 | \n",
+ " 0525428 | \n",
+ " 229917 | \n",
+ " AUGUSTO SALAZAR BONDY | \n",
+ " DRE JUNIN | \n",
+ " UGEL CHUPACA | \n",
+ " 120214 | \n",
+ " JUNIN | \n",
+ " CONCEPCION | \n",
+ " SAN JOSE DE \\nQUERO | \n",
+ " AVENIDA CONCEPCION S/N | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 7 | \n",
+ " 1086 | \n",
+ " 2015 | \n",
+ " 0590919 | \n",
+ " 249109 | \n",
+ " HEROES DE LA BREÑA | \n",
+ " DRE JUNIN | \n",
+ " UGEL CHUPACA | \n",
+ " 120905 | \n",
+ " JUNIN | \n",
+ " CHUPACA | \n",
+ " HUAMANCACA \\nCHICO | \n",
+ " AVENIDA MARIA PARADO DE \\nBELLIDO S/N | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 8 | \n",
+ " 1087 | \n",
+ " 2015 | \n",
+ " 0372912 | \n",
+ " 248850 | \n",
+ " AMAUTA | \n",
+ " DRE JUNIN | \n",
+ " UGEL CHUPACA | \n",
+ " 120902 | \n",
+ " JUNIN | \n",
+ " CHUPACA | \n",
+ " AHUAC | \n",
+ " AVENIDA MARISCAL CACERES \\n1007 | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 9 | \n",
+ " 1088 | \n",
+ " 2015 | \n",
+ " 0372961 | \n",
+ " 248930 | \n",
+ " SANTIAGO LEON | \n",
+ " DRE JUNIN | \n",
+ " UGEL CHUPACA | \n",
+ " 120903 | \n",
+ " JUNIN | \n",
+ " CHUPACA | \n",
+ " CHONGOS BAJO | \n",
+ " CALLE LA MAR S/N | \n",
+ " RURAL | \n",
+ " \n",
+ " \n",
+ " 10 | \n",
+ " 1090 | \n",
+ " 2015 | \n",
+ " 0580290 | \n",
+ " 249492 | \n",
+ " CAHUIDE | \n",
+ " DRE JUNIN | \n",
+ " UGEL CHUPACA | \n",
+ " 120909 | \n",
+ " JUNIN | \n",
+ " CHUPACA | \n",
+ " YANACANCHA | \n",
+ " CARRETERA PRINCIPAL LAIVE \\nS/N | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 11 | \n",
+ " 1093 | \n",
+ " 2015 | \n",
+ " 1099852 | \n",
+ " 227555 | \n",
+ " 31511 LORENZO ALCALA POMALAZA | \n",
+ " DRE JUNIN | \n",
+ " UGEL CONCEPCION | \n",
+ " 120201 | \n",
+ " JUNIN | \n",
+ " CONCEPCION | \n",
+ " CONCEPCION | \n",
+ " AVENIDA AGRICULTURA 542 | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 12 | \n",
+ " 1094 | \n",
+ " 2015 | \n",
+ " 0372730 | \n",
+ " 227490 | \n",
+ " HEROINAS TOLEDO | \n",
+ " DRE JUNIN | \n",
+ " UGEL CONCEPCION | \n",
+ " 120201 | \n",
+ " JUNIN | \n",
+ " CONCEPCION | \n",
+ " CONCEPCION | \n",
+ " AVENIDA AGRICULTURA 596-\\n598 | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 13 | \n",
+ " 1095 | \n",
+ " 2015 | \n",
+ " 0373027 | \n",
+ " 229012 | \n",
+ " APU INCA | \n",
+ " DRE JUNIN | \n",
+ " UGEL CONCEPCION | \n",
+ " 120206 | \n",
+ " JUNIN | \n",
+ " CONCEPCION | \n",
+ " COMAS | \n",
+ " CALLE ESTADIO MUNICIPAL \\nS/N | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 14 | \n",
+ " 1100 | \n",
+ " 2015 | \n",
+ " 0372946 | \n",
+ " 224608 | \n",
+ " JOSE OLAYA | \n",
+ " DRE JUNIN | \n",
+ " UGEL HUANCAYO | \n",
+ " 120117 | \n",
+ " JUNIN | \n",
+ " HUANCAYO | \n",
+ " HUALHUAS | \n",
+ " AVENIDA ALFONSO UGARTE \\n944-970 | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 15 | \n",
+ " 1101 | \n",
+ " 2015 | \n",
+ " 0667022 | \n",
+ " 224383 | \n",
+ " SALESIANO DON BOSCO | \n",
+ " DRE JUNIN | \n",
+ " UGEL HUANCAYO | \n",
+ " 120114 | \n",
+ " JUNIN | \n",
+ " HUANCAYO | \n",
+ " EL TAMBO | \n",
+ " AVENIDA HUANCAVELICA \\n165 | \n",
+ " URBANA | \n",
+ " \n",
+ " \n",
+ " 16 | \n",
+ " 1103 | \n",
+ " 2015 | \n",
+ " 0372870 | \n",
+ " 226551 | \n",
+ " CHINCHAYSUYO | \n",
+ " DRE JUNIN | \n",
+ " UGEL HUANCAYO | \n",
+ " 120133 | \n",
+ " JUNIN | \n",
+ " HUANCAYO | \n",
+ " SAPALLANGA | \n",
+ " AVENIDA PEÐALOZA S/N | \n",
+ " RURAL | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 17
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from google.colab import files"
+ ],
+ "metadata": {
+ "id": "LH3goXVIn41V"
+ },
+ "execution_count": 22,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Exportar el DataFrame a formato .dta (Stata)\n",
+ "data.to_stata(\"jec_data_Grupo3.dta\")\n",
+ "\n",
+ "# Descargar el archivo .dta\n",
+ "files.download(\"jec_data_Grupo3.dta\")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 416
+ },
+ "id": "eYFDa0A6mlJ9",
+ "outputId": "ab267151-21ea-4b98-fbfd-849e4cce5583"
+ },
+ "execution_count": 24,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ ":2: InvalidColumnName: \n",
+ "Not all pandas column names were valid Stata variable names.\n",
+ "The following replacements have been made:\n",
+ "\n",
+ " 0 -> _0\n",
+ " 1 -> _1\n",
+ " 2 -> _2\n",
+ " 3 -> _3\n",
+ " 4 -> _4\n",
+ " 5 -> _5\n",
+ " 6 -> _6\n",
+ " 7 -> _7\n",
+ " 8 -> _8\n",
+ " 9 -> _9\n",
+ " 10 -> _10\n",
+ " 11 -> _11\n",
+ " 12 -> _12\n",
+ "\n",
+ "If this is not what you expect, please make sure you have Stata-compliant\n",
+ "column names in your DataFrame (strings only, max 32 characters, only\n",
+ "alphanumerics and underscores, no Stata reserved words)\n",
+ "\n",
+ " data.to_stata(\"jec_data_Grupo3.dta\")\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "application/javascript": [
+ "\n",
+ " async function download(id, filename, size) {\n",
+ " if (!google.colab.kernel.accessAllowed) {\n",
+ " return;\n",
+ " }\n",
+ " const div = document.createElement('div');\n",
+ " const label = document.createElement('label');\n",
+ " label.textContent = `Downloading \"${filename}\": `;\n",
+ " div.appendChild(label);\n",
+ " const progress = document.createElement('progress');\n",
+ " progress.max = size;\n",
+ " div.appendChild(progress);\n",
+ " document.body.appendChild(div);\n",
+ "\n",
+ " const buffers = [];\n",
+ " let downloaded = 0;\n",
+ "\n",
+ " const channel = await google.colab.kernel.comms.open(id);\n",
+ " // Send a message to notify the kernel that we're ready.\n",
+ " channel.send({})\n",
+ "\n",
+ " for await (const message of channel.messages) {\n",
+ " // Send a message to notify the kernel that we're ready.\n",
+ " channel.send({})\n",
+ " if (message.buffers) {\n",
+ " for (const buffer of message.buffers) {\n",
+ " buffers.push(buffer);\n",
+ " downloaded += buffer.byteLength;\n",
+ " progress.value = downloaded;\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ " const blob = new Blob(buffers, {type: 'application/binary'});\n",
+ " const a = document.createElement('a');\n",
+ " a.href = window.URL.createObjectURL(blob);\n",
+ " a.download = filename;\n",
+ " div.appendChild(a);\n",
+ " a.click();\n",
+ " div.remove();\n",
+ " }\n",
+ " "
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "application/javascript": [
+ "download(\"download_74d964e6-d0d1-4840-9368-412f805a6513\", \"jec_data_Grupo3.dta\", 6268)"
+ ]
+ },
+ "metadata": {}
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/Labs/tarea6/GRUPO_3_RShape.ipynb b/Labs/tarea6/GRUPO_3_RShape.ipynb
new file mode 100644
index 0000000..c8e0d1e
--- /dev/null
+++ b/Labs/tarea6/GRUPO_3_RShape.ipynb
@@ -0,0 +1,1157 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "87bb4297",
+ "metadata": {},
+ "source": [
+ "#### Tarea 6 - Grupo 3\n",
+ "#### Integrantes\n",
+ "* Narumi Miyamoto \n",
+ "* Rodrigo Cervera\n",
+ "* Alicia Chaquila"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "4137dd69",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Exportamos las librerías que necesitaremos\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import re \n",
+ "import warnings\n",
+ "warnings.filterwarnings('ignore') "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "2ef139e0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " numper | \n",
+ " numpanh15 | \n",
+ " numpanh16 | \n",
+ " numpanh17 | \n",
+ " numpanh18 | \n",
+ " numpanh19 | \n",
+ " mes_15 | \n",
+ " ubigeo_15 | \n",
+ " dominio_15 | \n",
+ " p400a3_15 | \n",
+ " ... | \n",
+ " mes_18 | \n",
+ " ubigeo_18 | \n",
+ " dominio_18 | \n",
+ " p400a3_18 | \n",
+ " p4022_18 | \n",
+ " mes_19 | \n",
+ " ubigeo_19 | \n",
+ " dominio_19 | \n",
+ " p400a3_19 | \n",
+ " p4022_19 | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 15368 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1946.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1946.0 | \n",
+ " 0.0 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1946.0 | \n",
+ " 0.0 | \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 15369 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 1158 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1973.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1973.0 | \n",
+ " 0.0 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1973.0 | \n",
+ " 0.0 | \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " 15380 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1987.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1987.0 | \n",
+ " 0.0 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1987.0 | \n",
+ " 1.0 | \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " 15381 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 1162 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 2009.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 2009.0 | \n",
+ " 0.0 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 2009.0 | \n",
+ " 0.0 | \n",
+ " \n",
+ " \n",
+ " 4 | \n",
+ " 15410 | \n",
+ " 1185 | \n",
+ " 1185 | \n",
+ " 1185 | \n",
+ " 1185 | \n",
+ " 1185 | \n",
+ " 7 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1955.0 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1955.0 | \n",
+ " 0.0 | \n",
+ " 7 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1955.0 | \n",
+ " 1.0 | \n",
+ " \n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " \n",
+ " \n",
+ " 5541 | \n",
+ " 34848 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2002.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2002.0 | \n",
+ " 0.0 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2002.0 | \n",
+ " 1.0 | \n",
+ " \n",
+ " \n",
+ " 5542 | \n",
+ " 34846 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1985.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1985.0 | \n",
+ " 0.0 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1985.0 | \n",
+ " 0.0 | \n",
+ " \n",
+ " \n",
+ " 5543 | \n",
+ " 34847 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1976.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1976.0 | \n",
+ " 0.0 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1976.0 | \n",
+ " 1.0 | \n",
+ " \n",
+ " \n",
+ " 5544 | \n",
+ " 34849 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2007.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2007.0 | \n",
+ " 0.0 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2007.0 | \n",
+ " 1.0 | \n",
+ " \n",
+ " \n",
+ " 5545 | \n",
+ " 34850 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 18549 | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2011.0 | \n",
+ " ... | \n",
+ " 12 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2011.0 | \n",
+ " 1.0 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2011.0 | \n",
+ " 0.0 | \n",
+ " \n",
+ " \n",
+ " \n",
+ " 5546 rows × 31 columns \n",
+ " "
+ ],
+ "text/plain": [
+ " numper numpanh15 numpanh16 numpanh17 numpanh18 numpanh19 mes_15 \\\n",
+ "0 15368 1158 1158 1158 1158 1158 11 \n",
+ "1 15369 1158 1158 1158 1158 1158 11 \n",
+ "2 15380 1162 1162 1162 1162 1162 11 \n",
+ "3 15381 1162 1162 1162 1162 1162 11 \n",
+ "4 15410 1185 1185 1185 1185 1185 7 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "5541 34848 18549 18549 18549 18549 18549 12 \n",
+ "5542 34846 18549 18549 18549 18549 18549 12 \n",
+ "5543 34847 18549 18549 18549 18549 18549 12 \n",
+ "5544 34849 18549 18549 18549 18549 18549 12 \n",
+ "5545 34850 18549 18549 18549 18549 18549 12 \n",
+ "\n",
+ " ubigeo_15 dominio_15 p400a3_15 ... mes_18 ubigeo_18 \\\n",
+ "0 10101 sierra norte 1946.0 ... 12 10101 \n",
+ "1 10101 sierra norte 1973.0 ... 12 10101 \n",
+ "2 10101 sierra norte 1987.0 ... 12 10101 \n",
+ "3 10101 sierra norte 2009.0 ... 12 10101 \n",
+ "4 10101 sierra norte 1955.0 ... 7 10101 \n",
+ "... ... ... ... ... ... ... \n",
+ "5541 250101 selva 2002.0 ... 12 250101 \n",
+ "5542 250101 selva 1985.0 ... 12 250101 \n",
+ "5543 250101 selva 1976.0 ... 12 250101 \n",
+ "5544 250101 selva 2007.0 ... 12 250101 \n",
+ "5545 250101 selva 2011.0 ... 12 250101 \n",
+ "\n",
+ " dominio_18 p400a3_18 p4022_18 mes_19 ubigeo_19 dominio_19 \\\n",
+ "0 sierra norte 1946.0 0.0 11 10101 sierra norte \n",
+ "1 sierra norte 1973.0 0.0 11 10101 sierra norte \n",
+ "2 sierra norte 1987.0 0.0 11 10101 sierra norte \n",
+ "3 sierra norte 2009.0 0.0 11 10101 sierra norte \n",
+ "4 sierra norte 1955.0 0.0 7 10101 sierra norte \n",
+ "... ... ... ... ... ... ... \n",
+ "5541 selva 2002.0 0.0 10 250101 selva \n",
+ "5542 selva 1985.0 0.0 10 250101 selva \n",
+ "5543 selva 1976.0 0.0 10 250101 selva \n",
+ "5544 selva 2007.0 0.0 10 250101 selva \n",
+ "5545 selva 2011.0 1.0 10 250101 selva \n",
+ "\n",
+ " p400a3_19 p4022_19 \n",
+ "0 1946.0 0.0 \n",
+ "1 1973.0 0.0 \n",
+ "2 1987.0 1.0 \n",
+ "3 2009.0 0.0 \n",
+ "4 1955.0 1.0 \n",
+ "... ... ... \n",
+ "5541 2002.0 1.0 \n",
+ "5542 1985.0 0.0 \n",
+ "5543 1976.0 1.0 \n",
+ "5544 2007.0 1.0 \n",
+ "5545 2011.0 0.0 \n",
+ "\n",
+ "[5546 rows x 31 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Leemos la base de datos\n",
+ "# convert_caterials : se usará para especificar si se deben convertir las variables categóricas del archivo Stata a variables categóricas de Pandas\n",
+ "panel = pd.read_stata(\"C:/Users/ALICIA/Documents/GitHub/ultima tarea/data/data.dta\",convert_categoricals=False) \n",
+ "panel"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "cce469c2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['numper', 'numpanh15', 'numpanh16', 'numpanh17', 'numpanh18',\n",
+ " 'numpanh19', 'mes_15', 'ubigeo_15', 'dominio_15', 'p400a3_15',\n",
+ " 'p4022_15', 'mes_16', 'ubigeo_16', 'dominio_16', 'p400a3_16',\n",
+ " 'p4022_16', 'mes_17', 'ubigeo_17', 'dominio_17', 'p400a3_17',\n",
+ " 'p4022_17', 'mes_18', 'ubigeo_18', 'dominio_18', 'p400a3_18',\n",
+ " 'p4022_18', 'mes_19', 'ubigeo_19', 'dominio_19', 'p400a3_19',\n",
+ " 'p4022_19'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Nos devuelve una lista de todos los nombres de las columnas \n",
+ "panel.columns[:]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "9b9d749e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Renombramos a las columnas\n",
+ "panel.rename(columns = {'numpanh15':'numpanh_15', 'numpanh16':'numpanh_16','numpanh17':'numpanh_17','numpanh18':'numpanh_18','numpanh19':'numpanh_19'}, inplace = True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "f7412527",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Crearemos una lista llamada filter_list que contiene los nombres de todas las columnas del DataFrame llamado \"panel\"\n",
+ "filter_list = list(panel.columns)[:] "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "7e38e765",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['numper',\n",
+ " 'numpanh_15',\n",
+ " 'numpanh_16',\n",
+ " 'numpanh_17',\n",
+ " 'numpanh_18',\n",
+ " 'numpanh_19',\n",
+ " 'mes_15',\n",
+ " 'ubigeo_15',\n",
+ " 'dominio_15',\n",
+ " 'p400a3_15',\n",
+ " 'p4022_15',\n",
+ " 'mes_16',\n",
+ " 'ubigeo_16',\n",
+ " 'dominio_16',\n",
+ " 'p400a3_16',\n",
+ " 'p4022_16',\n",
+ " 'mes_17',\n",
+ " 'ubigeo_17',\n",
+ " 'dominio_17',\n",
+ " 'p400a3_17',\n",
+ " 'p4022_17',\n",
+ " 'mes_18',\n",
+ " 'ubigeo_18',\n",
+ " 'dominio_18',\n",
+ " 'p400a3_18',\n",
+ " 'p4022_18',\n",
+ " 'mes_19',\n",
+ " 'ubigeo_19',\n",
+ " 'dominio_19',\n",
+ " 'p400a3_19',\n",
+ " 'p4022_19']"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Crearemos una nueva lista llamada new_list que contiene los elementos únicos de la lista filter_list, eliminando cualquier duplicado\n",
+ "new_list = list(dict.fromkeys(filter_list))\n",
+ "new_list"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "416a041f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " numper | \n",
+ " period | \n",
+ " numpanh | \n",
+ " mes | \n",
+ " ubigeo | \n",
+ " dominio | \n",
+ " p400a3 | \n",
+ " p4022 | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 15368 | \n",
+ " 15 | \n",
+ " 1158 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1946.0 | \n",
+ " 0.0 | \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 15369 | \n",
+ " 15 | \n",
+ " 1158 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1973.0 | \n",
+ " 0.0 | \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " 15380 | \n",
+ " 15 | \n",
+ " 1162 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1987.0 | \n",
+ " 1.0 | \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " 15381 | \n",
+ " 15 | \n",
+ " 1162 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 2009.0 | \n",
+ " 1.0 | \n",
+ " \n",
+ " \n",
+ " 4 | \n",
+ " 15410 | \n",
+ " 15 | \n",
+ " 1185 | \n",
+ " 7 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1955.0 | \n",
+ " 0.0 | \n",
+ " \n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " \n",
+ " \n",
+ " 27725 | \n",
+ " 34848 | \n",
+ " 19 | \n",
+ " 18549 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2002.0 | \n",
+ " 1.0 | \n",
+ " \n",
+ " \n",
+ " 27726 | \n",
+ " 34846 | \n",
+ " 19 | \n",
+ " 18549 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1985.0 | \n",
+ " 0.0 | \n",
+ " \n",
+ " \n",
+ " 27727 | \n",
+ " 34847 | \n",
+ " 19 | \n",
+ " 18549 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 1976.0 | \n",
+ " 1.0 | \n",
+ " \n",
+ " \n",
+ " 27728 | \n",
+ " 34849 | \n",
+ " 19 | \n",
+ " 18549 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2007.0 | \n",
+ " 1.0 | \n",
+ " \n",
+ " \n",
+ " 27729 | \n",
+ " 34850 | \n",
+ " 19 | \n",
+ " 18549 | \n",
+ " 10 | \n",
+ " 250101 | \n",
+ " selva | \n",
+ " 2011.0 | \n",
+ " 0.0 | \n",
+ " \n",
+ " \n",
+ " \n",
+ " 27730 rows × 8 columns \n",
+ " "
+ ],
+ "text/plain": [
+ " numper period numpanh mes ubigeo dominio p400a3 p4022\n",
+ "0 15368 15 1158 11 10101 sierra norte 1946.0 0.0\n",
+ "1 15369 15 1158 11 10101 sierra norte 1973.0 0.0\n",
+ "2 15380 15 1162 11 10101 sierra norte 1987.0 1.0\n",
+ "3 15381 15 1162 11 10101 sierra norte 2009.0 1.0\n",
+ "4 15410 15 1185 7 10101 sierra norte 1955.0 0.0\n",
+ "... ... ... ... ... ... ... ... ...\n",
+ "27725 34848 19 18549 10 250101 selva 2002.0 1.0\n",
+ "27726 34846 19 18549 10 250101 selva 1985.0 0.0\n",
+ "27727 34847 19 18549 10 250101 selva 1976.0 1.0\n",
+ "27728 34849 19 18549 10 250101 selva 2007.0 1.0\n",
+ "27729 34850 19 18549 10 250101 selva 2011.0 0.0\n",
+ "\n",
+ "[27730 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Transformaremos de los datos en un formato más largo (long format)\n",
+ "# reset_index : Lo utilizaremos para reiniciar el índice del DataFrame resultante\n",
+ "\n",
+ "reshape_panel = pd.wide_to_long(panel, stubnames = ['numpanh','mes','ubigeo','dominio','p400a3','p4022'], i = ['numper'] , \n",
+ " j = 'period' , sep = '_').reset_index()\n",
+ "reshape_panel"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "e9ab148b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " ubigeo | \n",
+ " distrito | \n",
+ " provincia | \n",
+ " region | \n",
+ " unidos | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 10101 | \n",
+ " Chachapoyas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 10102 | \n",
+ " Asuncion | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " 10103 | \n",
+ " Balsas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 0 | \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " 10104 | \n",
+ " Cheto | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ " \n",
+ " \n",
+ " 4 | \n",
+ " 10105 | \n",
+ " Chiliquin | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 0 | \n",
+ " \n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " \n",
+ " \n",
+ " 1869 | \n",
+ " 250302 | \n",
+ " Irazola | \n",
+ " Padre Abad | \n",
+ " Ucayali | \n",
+ " 0 | \n",
+ " \n",
+ " \n",
+ " 1870 | \n",
+ " 250303 | \n",
+ " Curimana | \n",
+ " Padre Abad | \n",
+ " Ucayali | \n",
+ " 0 | \n",
+ " \n",
+ " \n",
+ " 1871 | \n",
+ " 250304 | \n",
+ " Neshuya | \n",
+ " Padre Abad | \n",
+ " Ucayali | \n",
+ " 0 | \n",
+ " \n",
+ " \n",
+ " 1872 | \n",
+ " 250305 | \n",
+ " Alexander von Humboldt | \n",
+ " Padre Abad | \n",
+ " Ucayali | \n",
+ " 0 | \n",
+ " \n",
+ " \n",
+ " 1873 | \n",
+ " 250401 | \n",
+ " Purus | \n",
+ " Purus | \n",
+ " Ucayali | \n",
+ " 1 | \n",
+ " \n",
+ " \n",
+ " \n",
+ " 1874 rows × 5 columns \n",
+ " "
+ ],
+ "text/plain": [
+ " ubigeo distrito provincia region unidos\n",
+ "0 10101 Chachapoyas Chachapoyas Amazonas 1\n",
+ "1 10102 Asuncion Chachapoyas Amazonas 1\n",
+ "2 10103 Balsas Chachapoyas Amazonas 0\n",
+ "3 10104 Cheto Chachapoyas Amazonas 1\n",
+ "4 10105 Chiliquin Chachapoyas Amazonas 0\n",
+ "... ... ... ... ... ...\n",
+ "1869 250302 Irazola Padre Abad Ucayali 0\n",
+ "1870 250303 Curimana Padre Abad Ucayali 0\n",
+ "1871 250304 Neshuya Padre Abad Ucayali 0\n",
+ "1872 250305 Alexander von Humboldt Padre Abad Ucayali 0\n",
+ "1873 250401 Purus Purus Ucayali 1\n",
+ "\n",
+ "[1874 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Llamaremos a una nueva base de datos\n",
+ "unidos = pd.read_stata(\"C:/Users/ALICIA/Documents/GitHub/ultima tarea/data/unidos.dta\",\n",
+ " convert_categoricals=False)\n",
+ "unidos"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "a13973a0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " numper | \n",
+ " period | \n",
+ " numpanh | \n",
+ " mes | \n",
+ " ubigeo | \n",
+ " dominio | \n",
+ " p400a3 | \n",
+ " p4022 | \n",
+ " distrito | \n",
+ " provincia | \n",
+ " region | \n",
+ " unidos | \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 15368 | \n",
+ " 15 | \n",
+ " 1158 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1946.0 | \n",
+ " 0.0 | \n",
+ " Chachapoyas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 15369 | \n",
+ " 15 | \n",
+ " 1158 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1973.0 | \n",
+ " 0.0 | \n",
+ " Chachapoyas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " 15380 | \n",
+ " 15 | \n",
+ " 1162 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1987.0 | \n",
+ " 1.0 | \n",
+ " Chachapoyas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " 15381 | \n",
+ " 15 | \n",
+ " 1162 | \n",
+ " 11 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 2009.0 | \n",
+ " 1.0 | \n",
+ " Chachapoyas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ " \n",
+ " \n",
+ " 4 | \n",
+ " 15410 | \n",
+ " 15 | \n",
+ " 1185 | \n",
+ " 7 | \n",
+ " 10101 | \n",
+ " sierra norte | \n",
+ " 1955.0 | \n",
+ " 0.0 | \n",
+ " Chachapoyas | \n",
+ " Chachapoyas | \n",
+ " Amazonas | \n",
+ " 1 | \n",
+ " \n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " \n",
+ " \n",
+ " 27725 | \n",
+ " 34777 | \n",
+ " 19 | \n",
+ " 18472 | \n",
+ " 7 | \n",
+ " 220301 | \n",
+ " selva | \n",
+ " 1942.0 | \n",
+ " 0.0 | \n",
+ " San Jose de Sisa | \n",
+ " El Dorado | \n",
+ " San Martin | \n",
+ " 0 | \n",
+ " \n",
+ " \n",
+ " 27726 | \n",
+ " 34779 | \n",
+ " 19 | \n",
+ " 18473 | \n",
+ " 7 | \n",
+ " 220301 | \n",
+ " selva | \n",
+ " 1988.0 | \n",
+ " 0.0 | \n",
+ " San Jose de Sisa | \n",
+ " El Dorado | \n",
+ " San Martin | \n",
+ " 0 | \n",
+ " \n",
+ " \n",
+ " 27727 | \n",
+ " 34781 | \n",
+ " 19 | \n",
+ " 18473 | \n",
+ " 7 | \n",
+ " 220301 | \n",
+ " selva | \n",
+ " 2015.0 | \n",
+ " 0.0 | \n",
+ " San Jose de Sisa | \n",
+ " El Dorado | \n",
+ " San Martin | \n",
+ " 0 | \n",
+ " \n",
+ " \n",
+ " 27728 | \n",
+ " 34778 | \n",
+ " 19 | \n",
+ " 18473 | \n",
+ " 7 | \n",
+ " 220301 | \n",
+ " selva | \n",
+ " 1984.0 | \n",
+ " 1.0 | \n",
+ " San Jose de Sisa | \n",
+ " El Dorado | \n",
+ " San Martin | \n",
+ " 0 | \n",
+ " \n",
+ " \n",
+ " 27729 | \n",
+ " 34780 | \n",
+ " 19 | \n",
+ " 18473 | \n",
+ " 7 | \n",
+ " 220301 | \n",
+ " selva | \n",
+ " 2010.0 | \n",
+ " 0.0 | \n",
+ " San Jose de Sisa | \n",
+ " El Dorado | \n",
+ " San Martin | \n",
+ " 0 | \n",
+ " \n",
+ " \n",
+ " \n",
+ " 27730 rows × 12 columns \n",
+ " "
+ ],
+ "text/plain": [
+ " numper period numpanh mes ubigeo dominio p400a3 p4022 \\\n",
+ "0 15368 15 1158 11 10101 sierra norte 1946.0 0.0 \n",
+ "1 15369 15 1158 11 10101 sierra norte 1973.0 0.0 \n",
+ "2 15380 15 1162 11 10101 sierra norte 1987.0 1.0 \n",
+ "3 15381 15 1162 11 10101 sierra norte 2009.0 1.0 \n",
+ "4 15410 15 1185 7 10101 sierra norte 1955.0 0.0 \n",
+ "... ... ... ... ... ... ... ... ... \n",
+ "27725 34777 19 18472 7 220301 selva 1942.0 0.0 \n",
+ "27726 34779 19 18473 7 220301 selva 1988.0 0.0 \n",
+ "27727 34781 19 18473 7 220301 selva 2015.0 0.0 \n",
+ "27728 34778 19 18473 7 220301 selva 1984.0 1.0 \n",
+ "27729 34780 19 18473 7 220301 selva 2010.0 0.0 \n",
+ "\n",
+ " distrito provincia region unidos \n",
+ "0 Chachapoyas Chachapoyas Amazonas 1 \n",
+ "1 Chachapoyas Chachapoyas Amazonas 1 \n",
+ "2 Chachapoyas Chachapoyas Amazonas 1 \n",
+ "3 Chachapoyas Chachapoyas Amazonas 1 \n",
+ "4 Chachapoyas Chachapoyas Amazonas 1 \n",
+ "... ... ... ... ... \n",
+ "27725 San Jose de Sisa El Dorado San Martin 0 \n",
+ "27726 San Jose de Sisa El Dorado San Martin 0 \n",
+ "27727 San Jose de Sisa El Dorado San Martin 0 \n",
+ "27728 San Jose de Sisa El Dorado San Martin 0 \n",
+ "27729 San Jose de Sisa El Dorado San Martin 0 \n",
+ "\n",
+ "[27730 rows x 12 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Realizaremos una fusión (merge) entre dos DataFrames: reshape_panel y unidos\n",
+ "juntos = pd.merge(reshape_panel, unidos, on='ubigeo', how='inner')\n",
+ "juntos"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ee08714c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Labs/tarea6/Grupo_3_Rshape.R b/Labs/tarea6/Grupo_3_Rshape.R
new file mode 100644
index 0000000..8c58679
--- /dev/null
+++ b/Labs/tarea6/Grupo_3_Rshape.R
@@ -0,0 +1,121 @@
+# se instalan las librerías
+
+install.packages("stringr")
+install.packages("rebus")
+
+library(stringr)
+library(rebus)
+library(haven)
+library(dplyr)
+library(tidyverse)
+library(tidyr)
+
+#se lee el archivo data.dta
+panel <- read_dta("C:\\Users\\ALICIA\\Documents\\GitHub\\ultima tarea\\data\\data.dta")
+panel
+
+#se visualizan los nombres de las columnas del archivo
+columnas <- colnames(panel)
+
+
+#se renombran las columnas específicas en el dataframe 'panel'
+panel <- panel %>%
+ rename(numpanh_15 = numpanh15, # Renombrar columna 'numpanh15' a 'numpanh_15'
+ numpanh_16 = numpanh16, # Renombrar columna 'numpanh16' a 'numpanh_16'
+ numpanh_17 = numpanh17, # Renombrar columna 'numpanh17' a 'numpanh_17'
+ numpanh_18 = numpanh18, # Renombrar columna 'numpanh18' a 'numpanh_18'
+ numpanh_19 = numpanh19) # Renombrar columna 'numpanh19' a 'numpanh_19'
+
+#se imprime el dataframe 'panel' actualizado
+panel
+
+#se obtiene la lista de nombres de columnas del dataframe 'panel'
+filter_list <- colnames(panel)
+
+#se crea una nueva lista con valores únicos de la lista de nombres de columnas
+new_list <- unique(filter_list)
+
+#se imprime la nueva lista de nombres de columnas
+new_list
+
+
+
+
+#se obtiene la lista de columnas
+columnas <- c('numper', 'numpanh_15', 'numpanh_16', 'numpanh_17', 'numpanh_18', 'numpanh_19', 'mes_15', 'ubigeo_15', 'dominio_15', 'p400a3_15', 'p4022_15', 'mes_16', 'ubigeo_16', 'dominio_16', 'p400a3_16', 'p4022_16', 'mes_17', 'ubigeo_17', 'dominio_17', 'p400a3_17', 'p4022_17', 'mes_18', 'ubigeo_18', 'dominio_18', 'p400a3_18', 'p4022_18', 'mes_19', 'ubigeo_19', 'dominio_19', 'p400a3_19', 'p4022_19')
+
+#se obtiene el prefijo común en los nombres de las columnas
+prefixo <- unique(sub("_.*", "", columnas[-1]))
+
+#se utiliza reshape y gather
+reshape_panel <- panel %>%
+ gather(key, value, -numper) %>%
+ separate(key, into = c("variable", "period"), sep = "_", remove = FALSE) %>%
+ filter(variable %in% c("numpanh", "mes", "ubigeo", "dominio", "p400a3", "p4022")) %>%
+ select(numper, period, variable, value) %>%
+ spread(variable, value)
+
+#se imprime el resultado
+print(reshape_panel)
+
+
+
+
+#se lee el archivo .dta
+unidos <- haven::read_dta("C:\\Users\\ALICIA\\Documents\\GitHub\\ultima tarea\\data\\unidos.dta")
+unidos
+
+#se unen ambos archivos mediante lo común que es ubigeo
+juntos <- merge(reshape_panel, unidos, by = "ubigeo", all = FALSE)
+juntos
+
+
+
+
+# Asignar etiquetas a las variables en 'reshape_panel'
+labels <- c("Número de persona", "Período", "Número de panh", "Mes", "Ubigeo", "Dominio", "P400a3", "P4022")
+for (var in names(reshape_panel)) {
+ attr(reshape_panel[[var]], "label") <- labels[var]
+}
+
+# Se imprime el resultado
+print(reshape_panel)
+
+
+
+
+
+#se instala el paquete sjlabelled
+if (!require(sjlabelled)) {
+ install.packages("sjlabelled")
+}
+
+#se carga el paquete sjlabelled
+library(sjlabelled)
+
+#se añade etiquetas a todas las variables en 'juntos'
+set_label(juntos$numper) <- "Número de persona"
+set_label(juntos$period) <- "Período"
+set_label(juntos$numpanh) <- "Número de panh"
+set_label(juntos$mes) <- "Mes"
+set_label(juntos$ubigeo) <- "Ubigeo"
+set_label(juntos$dominio) <- "Dominio"
+set_label(juntos$p400a3) <- "P400a3"
+set_label(juntos$p4022) <- "P4022"
+set_label(juntos$unidos) <- "Etiqueta de 'unidos'"
+
+#se añaden etiquetas de valores a las columnas 'p4022' y 'unidos'
+set_labels(juntos$p4022, labels = c("Valor1", "Valor2", "Valor3", "Valor4", "Valor5"))
+set_labels(juntos$unidos, labels = c("Etiqueta1", "Etiqueta2", "Etiqueta3", "Etiqueta4", "Etiqueta5"))
+
+#se imprime el resultado
+print(juntos)
+
+
+
+#se añaden etiquetas de valores a las columnas 'p4022' y 'unidos'
+juntos$p4022 <- factor(juntos$p4022, levels = c(1, 2, 3, 4, 5), labels = c("Valor1", "Valor2", "Valor3", "Valor4", "Valor5"))
+juntos$unidos <- factor(juntos$unidos, levels = c(1, 2, 3, 4, 5), labels = c("Etiqueta1", "Etiqueta2", "Etiqueta3", "Etiqueta4", "Etiqueta5"))
+
+#se imprime el resultado
+print(juntos)
diff --git a/Labs/tarea6/jec_data_Grupo3.dta b/Labs/tarea6/jec_data_Grupo3.dta
new file mode 100644
index 0000000..1eaecf0
Binary files /dev/null and b/Labs/tarea6/jec_data_Grupo3.dta differ
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |