{ "cells": [ { "cell_type": "markdown", "id": "5c8decec", "metadata": {}, "source": [ "# Propósito del Modelo\n", "El objetivo es hacer análisis que involucran múltiples variables de diferentes bases de datos combinadas." ] }, { "cell_type": "code", "execution_count": 1, "id": "ff4e01c3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "( 1960 1961 1962 \\\n", " Country Name Country Code \n", " Aruba ABW -0.201874 -0.200982 -0.200327 \n", " Africa Eastern and Southern AFE -0.094914 -0.098259 -0.094392 \n", " Afghanistan AFG -0.201874 -0.200982 -0.200327 \n", " Africa Western and Central AFW -0.135881 -0.136426 -0.136892 \n", " Angola AGO -0.201874 -0.200982 -0.200327 \n", " \n", " 1963 1964 1965 \\\n", " Country Name Country Code \n", " Aruba ABW -0.199863 -0.200426 -0.202832 \n", " Africa Eastern and Southern AFE -0.094446 -0.097204 -0.095992 \n", " Afghanistan AFG -0.199863 -0.200426 -0.202832 \n", " Africa Western and Central AFW -0.135198 -0.136542 -0.136252 \n", " Angola AGO -0.199863 -0.200426 -0.202832 \n", " \n", " 1966 1967 1968 \\\n", " Country Name Country Code \n", " Aruba ABW -0.202705 -0.202662 -0.203255 \n", " Africa Eastern and Southern AFE -0.097136 -0.094766 -0.097880 \n", " Afghanistan AFG -0.202705 -0.202662 -0.203255 \n", " Africa Western and Central AFW -0.141240 -0.150569 -0.154342 \n", " Angola AGO -0.202705 -0.202662 -0.203255 \n", " \n", " 1969 ... 2013 2014 \\\n", " Country Name Country Code ... \n", " Aruba ABW -0.204786 ... -0.288371 -0.289320 \n", " Africa Eastern and Southern AFE -0.099362 ... -0.173739 -0.173601 \n", " Afghanistan AFG -0.204786 ... -0.286345 -0.287284 \n", " Africa Western and Central AFW -0.149334 ... -0.195458 -0.193785 \n", " Angola AGO -0.204786 ... -0.277472 -0.278209 \n", " \n", " 2015 2016 2017 \\\n", " Country Name Country Code \n", " Aruba ABW -0.289823 -0.290696 -0.291427 \n", " Africa Eastern and Southern AFE -0.174254 -0.175796 -0.177555 \n", " Afghanistan AFG -0.287813 -0.288695 -0.289457 \n", " Africa Western and Central AFW -0.194550 -0.197907 -0.199728 \n", " Angola AGO -0.278940 -0.280400 -0.281517 \n", " \n", " 2018 2019 2020 \\\n", " Country Name Country Code \n", " Aruba ABW -0.292036 -0.292556 -0.292998 \n", " Africa Eastern and Southern AFE -0.179122 -0.180347 -0.180619 \n", " Afghanistan AFG -0.290112 -0.290586 -0.290938 \n", " Africa Western and Central AFW -0.200799 -0.200851 -0.199375 \n", " Angola AGO -0.282588 -0.283415 -0.284049 \n", " \n", " 2021 2022 \n", " Country Name Country Code \n", " Aruba ABW -0.293201 -0.294042 \n", " Africa Eastern and Southern AFE -0.183041 -0.183463 \n", " Afghanistan AFG -0.291793 -0.287261 \n", " Africa Western and Central AFW -0.201723 -0.201925 \n", " Angola AGO -0.284755 -0.285617 \n", " \n", " [5 rows x 63 columns],\n", " 1960 1961 1962 \\\n", " Country Name Country Code \n", " Aruba ABW -0.084868 -0.08484 -0.083201 \n", " Africa Eastern and Southern AFE -0.084868 -0.08484 -0.083201 \n", " Afghanistan AFG -0.084868 -0.08484 -0.083201 \n", " Africa Western and Central AFW -0.084868 -0.08484 -0.083201 \n", " Angola AGO -0.084868 -0.08484 -0.083201 \n", " \n", " 1963 1964 1965 \\\n", " Country Name Country Code \n", " Aruba ABW -0.082048 -0.080368 -0.074607 \n", " Africa Eastern and Southern AFE -0.082048 -0.080368 -0.074607 \n", " Afghanistan AFG -0.082048 -0.080368 -0.074607 \n", " Africa Western and Central AFW -0.082048 -0.080368 -0.074607 \n", " Angola AGO -0.082048 -0.080368 -0.074607 \n", " \n", " 1966 1967 1968 \\\n", " Country Name Country Code \n", " Aruba ABW -0.075705 -0.073737 -0.072911 \n", " Africa Eastern and Southern AFE -0.075705 -0.073737 -0.072911 \n", " Afghanistan AFG -0.075705 -0.073737 -0.072911 \n", " Africa Western and Central AFW -0.075705 -0.073737 -0.072911 \n", " Angola AGO -0.075705 -0.073737 -0.072911 \n", " \n", " 1969 ... 2013 2014 \\\n", " Country Name Country Code ... \n", " Aruba ABW -0.071835 ... -0.105875 -0.107196 \n", " Africa Eastern and Southern AFE -0.071835 ... -0.105875 -0.107196 \n", " Afghanistan AFG -0.071835 ... -0.105875 -0.107196 \n", " Africa Western and Central AFW -0.071835 ... -0.105875 -0.107196 \n", " Angola AGO -0.071835 ... -0.104616 -0.106276 \n", " \n", " 2015 2016 2017 \\\n", " Country Name Country Code \n", " Aruba ABW -0.105721 -0.104515 -0.105551 \n", " Africa Eastern and Southern AFE -0.105721 -0.104515 -0.105551 \n", " Afghanistan AFG -0.105721 -0.104515 -0.105551 \n", " Africa Western and Central AFW -0.105721 -0.104515 -0.105551 \n", " Angola AGO -0.105172 -0.104014 -0.105000 \n", " \n", " 2018 2019 2020 \\\n", " Country Name Country Code \n", " Aruba ABW -0.107831 -0.111312 -0.111963 \n", " Africa Eastern and Southern AFE -0.107831 -0.111312 -0.111963 \n", " Afghanistan AFG -0.107831 -0.111312 -0.111067 \n", " Africa Western and Central AFW -0.107831 -0.111312 -0.111963 \n", " Angola AGO -0.107297 -0.110756 -0.111903 \n", " \n", " 2021 2022 \n", " Country Name Country Code \n", " Aruba ABW -0.110932 -0.111682 \n", " Africa Eastern and Southern AFE -0.110932 -0.111682 \n", " Afghanistan AFG -0.110692 -0.111682 \n", " Africa Western and Central AFW -0.110932 -0.111682 \n", " Angola AGO -0.111045 -0.111847 \n", " \n", " [5 rows x 63 columns],\n", " 1960 1961 1962 \\\n", " Country Name Country Code \n", " Aruba ABW -0.146825 -0.158595 -0.153408 \n", " Africa Eastern and Southern AFE -0.146825 -0.158595 -0.153408 \n", " Afghanistan AFG -0.146825 -0.158595 -0.153408 \n", " Africa Western and Central AFW -0.146825 -0.158595 -0.153408 \n", " Angola AGO -0.146825 -0.158595 -0.153408 \n", " \n", " 1963 1964 1965 \\\n", " Country Name Country Code \n", " Aruba ABW -0.158273 -0.158734 -0.162173 \n", " Africa Eastern and Southern AFE -0.158273 -0.158734 -0.162173 \n", " Afghanistan AFG -0.158273 -0.158734 -0.162173 \n", " Africa Western and Central AFW -0.158273 -0.158734 -0.162173 \n", " Angola AGO -0.158273 -0.158734 -0.162173 \n", " \n", " 1966 1967 1968 \\\n", " Country Name Country Code \n", " Aruba ABW -0.158459 -0.162375 -0.1682 \n", " Africa Eastern and Southern AFE -0.158459 -0.162375 -0.1682 \n", " Afghanistan AFG -0.158459 -0.162375 -0.1682 \n", " Africa Western and Central AFW -0.158459 -0.162375 -0.1682 \n", " Angola AGO -0.158459 -0.162375 -0.1682 \n", " \n", " 1969 ... 2013 2014 \\\n", " Country Name Country Code ... \n", " Aruba ABW -0.184395 ... -0.203738 -0.202930 \n", " Africa Eastern and Southern AFE -0.184395 ... -0.071547 -0.081470 \n", " Afghanistan AFG -0.184395 ... -0.203738 -0.202930 \n", " Africa Western and Central AFW -0.184395 ... -0.203738 -0.202930 \n", " Angola AGO -0.184395 ... -0.194796 -0.193997 \n", " \n", " 2015 2016 2017 \\\n", " Country Name Country Code \n", " Aruba ABW -0.285333 -0.202682 -0.202571 \n", " Africa Eastern and Southern AFE -0.195116 -0.092256 -0.090919 \n", " Afghanistan AFG -0.285333 -0.202682 -0.202571 \n", " Africa Western and Central AFW -0.285333 -0.202682 -0.202571 \n", " Angola AGO -0.280513 -0.198833 -0.198563 \n", " \n", " 2018 2019 2020 \\\n", " Country Name Country Code \n", " Aruba ABW -0.202676 -0.202183 -0.201677 \n", " Africa Eastern and Southern AFE -0.093306 -0.094302 -0.094236 \n", " Afghanistan AFG -0.202676 -0.202183 -0.201677 \n", " Africa Western and Central AFW -0.202676 -0.202183 -0.201677 \n", " Angola AGO -0.200401 -0.201239 -0.199575 \n", " \n", " 2021 2022 \n", " Country Name Country Code \n", " Aruba ABW -0.201440 -0.201034 \n", " Africa Eastern and Southern AFE -0.093547 -0.090401 \n", " Afghanistan AFG -0.201440 -0.201034 \n", " Africa Western and Central AFW -0.201440 -0.201034 \n", " Angola AGO -0.201274 -0.203768 \n", " \n", " [5 rows x 63 columns],\n", " 1960 1961 1962 \\\n", " Country Name Country Code \n", " Aruba ABW -0.090809 -0.091864 -0.09344 \n", " Africa Eastern and Southern AFE -0.090809 -0.091864 -0.09344 \n", " Afghanistan AFG -0.090809 -0.091864 -0.09344 \n", " Africa Western and Central AFW -0.090809 -0.091864 -0.09344 \n", " Angola AGO -0.090809 -0.091864 -0.09344 \n", " \n", " 1963 1964 1965 \\\n", " Country Name Country Code \n", " Aruba ABW -0.093163 -0.094031 -0.092187 \n", " Africa Eastern and Southern AFE -0.093163 -0.094031 -0.092187 \n", " Afghanistan AFG -0.093163 -0.094031 -0.092187 \n", " Africa Western and Central AFW -0.093163 -0.094031 -0.092187 \n", " Angola AGO -0.093163 -0.094031 -0.092187 \n", " \n", " 1966 1967 1968 \\\n", " Country Name Country Code \n", " Aruba ABW -0.090953 -0.089691 -0.090635 \n", " Africa Eastern and Southern AFE -0.090953 -0.089691 -0.090635 \n", " Afghanistan AFG -0.090953 -0.089691 -0.090635 \n", " Africa Western and Central AFW -0.090953 -0.089691 -0.090635 \n", " Angola AGO -0.090953 -0.089691 -0.090635 \n", " \n", " 1969 ... 2013 2014 \\\n", " Country Name Country Code ... \n", " Aruba ABW -0.089882 ... -0.113248 -0.114293 \n", " Africa Eastern and Southern AFE -0.089882 ... -0.113248 -0.114293 \n", " Afghanistan AFG -0.089882 ... -0.113248 -0.114293 \n", " Africa Western and Central AFW -0.089882 ... -0.113248 -0.114293 \n", " Angola AGO -0.089882 ... -0.112900 -0.113945 \n", " \n", " 2015 2016 2017 \\\n", " Country Name Country Code \n", " Aruba ABW -0.115194 -0.114932 -0.115308 \n", " Africa Eastern and Southern AFE -0.115194 -0.114932 -0.115308 \n", " Afghanistan AFG -0.115194 -0.114932 -0.115308 \n", " Africa Western and Central AFW -0.115194 -0.114932 -0.115308 \n", " Angola AGO -0.114668 -0.114465 -0.114863 \n", " \n", " 2018 2019 2020 \\\n", " Country Name Country Code \n", " Aruba ABW -0.116877 -0.119049 -0.118598 \n", " Africa Eastern and Southern AFE -0.116877 -0.119049 -0.118598 \n", " Afghanistan AFG -0.116877 -0.119049 -0.117498 \n", " Africa Western and Central AFW -0.116877 -0.119049 -0.118598 \n", " Angola AGO -0.116601 -0.118793 -0.118486 \n", " \n", " 2021 2022 \n", " Country Name Country Code \n", " Aruba ABW -0.118736 -0.117705 \n", " Africa Eastern and Southern AFE -0.118736 -0.117705 \n", " Afghanistan AFG -0.118341 -0.117705 \n", " Africa Western and Central AFW -0.118736 -0.117705 \n", " Angola AGO -0.118741 -0.118102 \n", " \n", " [5 rows x 63 columns],\n", " 1960 1961 1962 \\\n", " Country Name Country Code \n", " Aruba ABW -0.162675 -0.152507 -0.175345 \n", " Africa Eastern and Southern AFE 2.381755 2.202534 2.959211 \n", " Afghanistan AFG -0.162675 -0.152507 -0.175345 \n", " Africa Western and Central AFW -0.162675 -0.152507 -0.175345 \n", " Angola AGO -0.162675 -0.109791 -0.142530 \n", " \n", " 1963 1964 1965 \\\n", " Country Name Country Code \n", " Aruba ABW -0.199606 -0.207111 -0.257548 \n", " Africa Eastern and Southern AFE 2.810270 2.750210 3.317384 \n", " Afghanistan AFG -0.199606 -0.207111 -0.257548 \n", " Africa Western and Central AFW -0.199606 -0.207111 -0.257548 \n", " Angola AGO -0.165347 -0.144329 -0.250614 \n", " \n", " 1966 1967 1968 \\\n", " Country Name Country Code \n", " Aruba ABW -0.288931 -0.277482 -0.278466 \n", " Africa Eastern and Southern AFE 4.608648 3.940742 3.916744 \n", " Afghanistan AFG -0.288931 -0.277482 -0.278466 \n", " Africa Western and Central AFW -0.288931 -0.277482 0.066949 \n", " Angola AGO -0.261803 -0.277482 -0.278466 \n", " \n", " 1969 ... 2011 2012 \\\n", " Country Name Country Code ... \n", " Aruba ABW -0.270550 ... -0.325080 -0.311868 \n", " Africa Eastern and Southern AFE 4.802118 ... -0.325080 -0.311868 \n", " Afghanistan AFG -0.270550 ... -0.325080 -0.311868 \n", " Africa Western and Central AFW 0.209348 ... 0.935966 0.632917 \n", " Angola AGO -0.270550 ... 0.266913 0.170396 \n", " \n", " 2013 2014 2015 \\\n", " Country Name Country Code \n", " Aruba ABW -0.306099 -0.309704 -0.250354 \n", " Africa Eastern and Southern AFE -0.306099 -0.309704 -0.250354 \n", " Afghanistan AFG -0.306099 -0.309704 -0.250354 \n", " Africa Western and Central AFW 0.353438 0.164271 -0.175233 \n", " Angola AGO 0.224552 0.035088 -0.153382 \n", " \n", " 2016 2017 2018 \\\n", " Country Name Country Code \n", " Aruba ABW -0.252749 -0.257753 -0.291725 \n", " Africa Eastern and Southern AFE -0.252749 -0.257753 -0.291725 \n", " Afghanistan AFG -0.252749 -0.257753 -0.291725 \n", " Africa Western and Central AFW -0.252749 -0.133328 0.042448 \n", " Angola AGO -0.115071 -0.036075 0.042644 \n", " \n", " 2019 2020 \n", " Country Name Country Code \n", " Aruba ABW -0.281673 -0.251303 \n", " Africa Eastern and Southern AFE -0.281673 -0.251303 \n", " Afghanistan AFG -0.281673 -0.251303 \n", " Africa Western and Central AFW -0.051936 -0.239934 \n", " Angola AGO -0.088127 -0.217208 \n", " \n", " [5 rows x 61 columns])" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import statsmodels.formula.api as smf\n", "import statsmodels.api as sm\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "main_gdp = pd.read_csv('main_gdp.csv', index_col=['Country Name', 'Country Code'])\n", "main_government = pd.read_csv('main_government.csv', index_col=['Country Name', 'Country Code'])\n", "main_investments = pd.read_csv('main_investments.csv', index_col=['Country Name', 'Country Code'])\n", "main_consumption = pd.read_csv('main_consumption.csv', index_col=['Country Name', 'Country Code'])\n", "main_trade = pd.read_csv('main_trade.csv', index_col=['Country Name', 'Country Code'])\n", "\n", "(main_gdp.head(), main_government.head(), main_investments.head(), main_consumption.head(), main_trade.head())" ] }, { "cell_type": "markdown", "id": "5c1b62dd", "metadata": {}, "source": [ "#### Unir mediante sufijos especificados" ] }, { "cell_type": "code", "execution_count": 2, "id": "f35b32d2", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GDP_calculated_1960GDP_calculated_1961GDP_calculated_1962GDP_calculated_1963GDP_calculated_1964GDP_calculated_1965GDP_calculated_1966GDP_calculated_1967GDP_calculated_1968GDP_calculated_1969...GDP_calculated_2013GDP_calculated_2014GDP_calculated_2015GDP_calculated_2016GDP_calculated_2017GDP_calculated_2018GDP_calculated_2019GDP_calculated_2020GDP_calculated_2021GDP_calculated_2022
Country NameCountry Code
ArubaABW-0.687051-0.688787-0.705721-0.732952-0.740670-0.789346-0.816753-0.805947-0.813468-0.821448...-1.017331-1.023442-1.046426-0.965575-0.972609-1.011146-1.006774-0.976539-0.925748-0.925497
Africa Eastern and SouthernAFE1.9643401.7689772.5347702.3823402.3198732.8924254.1863963.5201733.4871174.356643...-0.770508-0.786264-0.840639-0.740249-0.747085-0.788861-0.786684-0.756719-0.599803-0.593651
AfghanistanAFG-0.687051-0.688787-0.705721-0.732952-0.740670-0.789346-0.816753-0.805947-0.813468-0.821448...-1.015306-1.021407-1.044415-0.963573-0.970639-1.009221-1.004804-0.972484-0.923705-0.918717
Africa Western and CentralAFW-0.621058-0.624231-0.642287-0.668287-0.676786-0.722767-0.755287-0.753854-0.419140-0.286099...-0.264882-0.453932-0.876033-0.872786-0.756485-0.585735-0.685331-0.871547-0.834270-0.833380
AngolaAGO-0.687051-0.646070-0.672906-0.698693-0.677888-0.782412-0.789625-0.805947-0.813468-0.821448...-0.465233-0.657339-0.932675-0.812782-0.736018-0.664243-0.802329-0.931221-0.917089-0.923103
\n", "

5 rows × 63 columns

\n", "
" ], "text/plain": [ " GDP_calculated_1960 \\\n", "Country Name Country Code \n", "Aruba ABW -0.687051 \n", "Africa Eastern and Southern AFE 1.964340 \n", "Afghanistan AFG -0.687051 \n", "Africa Western and Central AFW -0.621058 \n", "Angola AGO -0.687051 \n", "\n", " GDP_calculated_1961 \\\n", "Country Name Country Code \n", "Aruba ABW -0.688787 \n", "Africa Eastern and Southern AFE 1.768977 \n", "Afghanistan AFG -0.688787 \n", "Africa Western and Central AFW -0.624231 \n", "Angola AGO -0.646070 \n", "\n", " GDP_calculated_1962 \\\n", "Country Name Country Code \n", "Aruba ABW -0.705721 \n", "Africa Eastern and Southern AFE 2.534770 \n", "Afghanistan AFG -0.705721 \n", "Africa Western and Central AFW -0.642287 \n", "Angola AGO -0.672906 \n", "\n", " GDP_calculated_1963 \\\n", "Country Name Country Code \n", "Aruba ABW -0.732952 \n", "Africa Eastern and Southern AFE 2.382340 \n", "Afghanistan AFG -0.732952 \n", "Africa Western and Central AFW -0.668287 \n", "Angola AGO -0.698693 \n", "\n", " GDP_calculated_1964 \\\n", "Country Name Country Code \n", "Aruba ABW -0.740670 \n", "Africa Eastern and Southern AFE 2.319873 \n", "Afghanistan AFG -0.740670 \n", "Africa Western and Central AFW -0.676786 \n", "Angola AGO -0.677888 \n", "\n", " GDP_calculated_1965 \\\n", "Country Name Country Code \n", "Aruba ABW -0.789346 \n", "Africa Eastern and Southern AFE 2.892425 \n", "Afghanistan AFG -0.789346 \n", "Africa Western and Central AFW -0.722767 \n", "Angola AGO -0.782412 \n", "\n", " GDP_calculated_1966 \\\n", "Country Name Country Code \n", "Aruba ABW -0.816753 \n", "Africa Eastern and Southern AFE 4.186396 \n", "Afghanistan AFG -0.816753 \n", "Africa Western and Central AFW -0.755287 \n", "Angola AGO -0.789625 \n", "\n", " GDP_calculated_1967 \\\n", "Country Name Country Code \n", "Aruba ABW -0.805947 \n", "Africa Eastern and Southern AFE 3.520173 \n", "Afghanistan AFG -0.805947 \n", "Africa Western and Central AFW -0.753854 \n", "Angola AGO -0.805947 \n", "\n", " GDP_calculated_1968 \\\n", "Country Name Country Code \n", "Aruba ABW -0.813468 \n", "Africa Eastern and Southern AFE 3.487117 \n", "Afghanistan AFG -0.813468 \n", "Africa Western and Central AFW -0.419140 \n", "Angola AGO -0.813468 \n", "\n", " GDP_calculated_1969 ... \\\n", "Country Name Country Code ... \n", "Aruba ABW -0.821448 ... \n", "Africa Eastern and Southern AFE 4.356643 ... \n", "Afghanistan AFG -0.821448 ... \n", "Africa Western and Central AFW -0.286099 ... \n", "Angola AGO -0.821448 ... \n", "\n", " GDP_calculated_2013 \\\n", "Country Name Country Code \n", "Aruba ABW -1.017331 \n", "Africa Eastern and Southern AFE -0.770508 \n", "Afghanistan AFG -1.015306 \n", "Africa Western and Central AFW -0.264882 \n", "Angola AGO -0.465233 \n", "\n", " GDP_calculated_2014 \\\n", "Country Name Country Code \n", "Aruba ABW -1.023442 \n", "Africa Eastern and Southern AFE -0.786264 \n", "Afghanistan AFG -1.021407 \n", "Africa Western and Central AFW -0.453932 \n", "Angola AGO -0.657339 \n", "\n", " GDP_calculated_2015 \\\n", "Country Name Country Code \n", "Aruba ABW -1.046426 \n", "Africa Eastern and Southern AFE -0.840639 \n", "Afghanistan AFG -1.044415 \n", "Africa Western and Central AFW -0.876033 \n", "Angola AGO -0.932675 \n", "\n", " GDP_calculated_2016 \\\n", "Country Name Country Code \n", "Aruba ABW -0.965575 \n", "Africa Eastern and Southern AFE -0.740249 \n", "Afghanistan AFG -0.963573 \n", "Africa Western and Central AFW -0.872786 \n", "Angola AGO -0.812782 \n", "\n", " GDP_calculated_2017 \\\n", "Country Name Country Code \n", "Aruba ABW -0.972609 \n", "Africa Eastern and Southern AFE -0.747085 \n", "Afghanistan AFG -0.970639 \n", "Africa Western and Central AFW -0.756485 \n", "Angola AGO -0.736018 \n", "\n", " GDP_calculated_2018 \\\n", "Country Name Country Code \n", "Aruba ABW -1.011146 \n", "Africa Eastern and Southern AFE -0.788861 \n", "Afghanistan AFG -1.009221 \n", "Africa Western and Central AFW -0.585735 \n", "Angola AGO -0.664243 \n", "\n", " GDP_calculated_2019 \\\n", "Country Name Country Code \n", "Aruba ABW -1.006774 \n", "Africa Eastern and Southern AFE -0.786684 \n", "Afghanistan AFG -1.004804 \n", "Africa Western and Central AFW -0.685331 \n", "Angola AGO -0.802329 \n", "\n", " GDP_calculated_2020 \\\n", "Country Name Country Code \n", "Aruba ABW -0.976539 \n", "Africa Eastern and Southern AFE -0.756719 \n", "Afghanistan AFG -0.972484 \n", "Africa Western and Central AFW -0.871547 \n", "Angola AGO -0.931221 \n", "\n", " GDP_calculated_2021 \\\n", "Country Name Country Code \n", "Aruba ABW -0.925748 \n", "Africa Eastern and Southern AFE -0.599803 \n", "Afghanistan AFG -0.923705 \n", "Africa Western and Central AFW -0.834270 \n", "Angola AGO -0.917089 \n", "\n", " GDP_calculated_2022 \n", "Country Name Country Code \n", "Aruba ABW -0.925497 \n", "Africa Eastern and Southern AFE -0.593651 \n", "Afghanistan AFG -0.918717 \n", "Africa Western and Central AFW -0.833380 \n", "Angola AGO -0.923103 \n", "\n", "[5 rows x 63 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Unir los DataFrames uno por uno\n", "df_combined = main_gdp.join(main_government, lsuffix='_gdp', rsuffix='_gov')\n", "df_combined = df_combined.join(main_investments, rsuffix='_inv')\n", "df_combined = df_combined.join(main_consumption, rsuffix='_con')\n", "df_combined = df_combined.join(main_trade, rsuffix='_trade')\n", "\n", "# Corregir la fórmula para calcular el GDP correctamente usando los sufijos adecuados para cada año\n", "for year in range(1960, 2023):\n", " gdp_col = f'{year}_gdp' if f'{year}_gdp' in df_combined.columns else str(year)\n", " gov_col = f'{year}_gov' if f'{year}_gov' in df_combined.columns else str(year)\n", " inv_col = f'{year}_inv' if f'{year}_inv' in df_combined.columns else str(year)\n", " con_col = f'{year}_con' if f'{year}_con' in df_combined.columns else str(year)\n", " trade_col = f'{year}_trade' if f'{year}_trade' in df_combined.columns else str(year)\n", "\n", " df_combined[f'GDP_calculated_{year}'] = (df_combined[gdp_col] +\n", " df_combined[gov_col] +\n", " df_combined[inv_col] +\n", " df_combined[con_col] +\n", " df_combined[trade_col])\n", "\n", "# Mostrar las primeras filas del DataFrame combinado para confirmar que los cálculos están correctos\n", "df_combined[[f'GDP_calculated_{year}' for year in range(1960, 2023)]].head()" ] }, { "cell_type": "code", "execution_count": 3, "id": "8d4ef2f3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "MultiIndex: 239 entries, ('Aruba', 'ABW') to ('Zimbabwe', 'ZWE')\n", "Columns: 376 entries, 1960_gdp to GDP_calculated_2022\n", "dtypes: float64(376)\n", "memory usage: 731.2+ KB\n" ] }, { "data": { "text/plain": [ "( 1960_gdp 1961_gdp 1962_gdp 1963_gdp 1964_gdp 1965_gdp \\\n", " count 239.000000 239.000000 239.000000 239.000000 239.000000 239.000000 \n", " mean -0.081045 -0.080586 -0.079957 -0.079715 -0.080026 -0.081028 \n", " std 0.811894 0.811040 0.811016 0.810860 0.810938 0.811302 \n", " min -0.224510 -0.223332 -0.222338 -0.222336 -0.222801 -0.221246 \n", " 25% -0.202029 -0.205308 -0.204461 -0.204545 -0.204212 -0.207263 \n", " 50% -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 -0.202832 \n", " 75% -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 -0.202832 \n", " max 8.925325 8.916576 8.903261 8.903658 8.909846 8.905935 \n", " \n", " 1966_gdp 1967_gdp 1968_gdp 1969_gdp ... \\\n", " count 239.000000 239.000000 239.000000 239.000000 ... \n", " mean -0.080735 -0.080437 -0.081089 -0.082057 ... \n", " std 0.811470 0.810851 0.810718 0.810380 ... \n", " min -0.220064 -0.219031 -0.219934 -0.221195 ... \n", " 25% -0.208238 -0.209050 -0.210544 -0.212014 ... \n", " 50% -0.202705 -0.202662 -0.203255 -0.204786 ... \n", " 75% -0.202705 -0.202662 -0.203255 -0.204786 ... \n", " max 8.893228 8.892912 8.894431 8.920468 ... \n", " \n", " GDP_calculated_2013 GDP_calculated_2014 GDP_calculated_2015 \\\n", " count 239.000000 239.000000 239.000000 \n", " mean -0.213543 -0.243189 -0.328689 \n", " std 2.892561 2.864930 2.860354 \n", " min -1.030556 -1.036704 -1.056181 \n", " 25% -1.017694 -1.023788 -1.047886 \n", " 50% -1.010545 -1.016673 -1.039728 \n", " 75% -0.773237 -0.793074 -0.879409 \n", " max 28.144082 27.835159 27.572665 \n", " \n", " GDP_calculated_2016 GDP_calculated_2017 GDP_calculated_2018 \\\n", " count 239.000000 239.000000 239.000000 \n", " mean -0.247591 -0.239064 -0.249890 \n", " std 2.856636 2.864796 2.849596 \n", " min -0.978107 -0.985575 -1.024731 \n", " 25% -0.965917 -0.973220 -1.012258 \n", " 50% -0.959720 -0.966302 -1.004253 \n", " 75% -0.808664 -0.790391 -0.808545 \n", " max 27.837297 27.701692 27.367289 \n", " \n", " GDP_calculated_2019 GDP_calculated_2020 GDP_calculated_2021 \\\n", " count 239.000000 239.000000 239.000000 \n", " mean -0.257321 -0.264348 -0.200992 \n", " std 2.831007 2.824620 3.298784 \n", " min -1.021194 -0.990254 -0.954674 \n", " 25% -1.007834 -0.976800 -0.933824 \n", " 50% -0.999458 -0.970490 -0.921164 \n", " 75% -0.807603 -0.831460 -0.816629 \n", " max 26.262588 26.264463 31.407289 \n", " \n", " GDP_calculated_2022 \n", " count 239.000000 \n", " mean -0.202149 \n", " std 3.301215 \n", " min -0.962321 \n", " 25% -0.936761 \n", " 50% -0.918836 \n", " 75% -0.826804 \n", " max 31.527472 \n", " \n", " [8 rows x 376 columns],\n", " None,\n", " (239, 376),\n", " Index(['1960_gdp', '1961_gdp', '1962_gdp', '1963_gdp', '1964_gdp', '1965_gdp',\n", " '1966_gdp', '1967_gdp', '1968_gdp', '1969_gdp',\n", " ...\n", " 'GDP_calculated_2013', 'GDP_calculated_2014', 'GDP_calculated_2015',\n", " 'GDP_calculated_2016', 'GDP_calculated_2017', 'GDP_calculated_2018',\n", " 'GDP_calculated_2019', 'GDP_calculated_2020', 'GDP_calculated_2021',\n", " 'GDP_calculated_2022'],\n", " dtype='object', length=376),\n", " MultiIndex([( 'Aruba', 'ABW'),\n", " ('Africa Eastern and Southern', 'AFE'),\n", " ( 'Afghanistan', 'AFG'),\n", " ( 'Africa Western and Central', 'AFW'),\n", " ( 'Angola', 'AGO'),\n", " ( 'Albania', 'ALB'),\n", " ( 'Andorra', 'AND'),\n", " ( 'Arab World', 'ARB'),\n", " ( 'United Arab Emirates', 'ARE'),\n", " ( 'Argentina', 'ARG'),\n", " ...\n", " ( 'Virgin Islands (U.S.)', 'VIR'),\n", " ( 'Viet Nam', 'VNM'),\n", " ( 'Vanuatu', 'VUT'),\n", " ( 'World', 'WLD'),\n", " ( 'Samoa', 'WSM'),\n", " ( 'Kosovo', 'XKX'),\n", " ( 'Yemen, Rep.', 'YEM'),\n", " ( 'South Africa', 'ZAF'),\n", " ( 'Zambia', 'ZMB'),\n", " ( 'Zimbabwe', 'ZWE')],\n", " names=['Country Name', 'Country Code'], length=239),\n", " 1960_gdp float64\n", " 1961_gdp float64\n", " 1962_gdp float64\n", " 1963_gdp float64\n", " 1964_gdp float64\n", " ... \n", " GDP_calculated_2018 float64\n", " GDP_calculated_2019 float64\n", " GDP_calculated_2020 float64\n", " GDP_calculated_2021 float64\n", " GDP_calculated_2022 float64\n", " Length: 376, dtype: object,\n", " 1960_gdp 0\n", " 1961_gdp 0\n", " 1962_gdp 0\n", " 1963_gdp 0\n", " 1964_gdp 0\n", " ..\n", " GDP_calculated_2018 0\n", " GDP_calculated_2019 0\n", " GDP_calculated_2020 0\n", " GDP_calculated_2021 0\n", " GDP_calculated_2022 0\n", " Length: 376, dtype: int64)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_combined.describe(), df_combined.info(), df_combined.shape, df_combined.columns, df_combined.index, df_combined.dtypes, df_combined.isnull().sum()" ] }, { "cell_type": "markdown", "id": "de9cbf31", "metadata": {}, "source": [ "# WORLD | COUNTRIES" ] }, { "cell_type": "markdown", "id": "1bc499c2", "metadata": {}, "source": [ "### World" ] }, { "cell_type": "code", "execution_count": 4, "id": "4bc0270b", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
1960_gdp1961_gdp1962_gdp1963_gdp1964_gdp1965_gdp1966_gdp1967_gdp1968_gdp1969_gdp...GDP_calculated_2013GDP_calculated_2014GDP_calculated_2015GDP_calculated_2016GDP_calculated_2017GDP_calculated_2018GDP_calculated_2019GDP_calculated_2020GDP_calculated_2021GDP_calculated_2022
Country Code
WLD8.9253258.9165768.9032618.9036588.9098468.9059358.8932288.8929128.8944318.920468...19.94583919.91139817.39171119.90971419.89727719.86540119.84448219.8014331.40728931.527472
\n", "

1 rows × 376 columns

\n", "
" ], "text/plain": [ " 1960_gdp 1961_gdp 1962_gdp 1963_gdp 1964_gdp 1965_gdp \\\n", "Country Code \n", "WLD 8.925325 8.916576 8.903261 8.903658 8.909846 8.905935 \n", "\n", " 1966_gdp 1967_gdp 1968_gdp 1969_gdp ... \\\n", "Country Code ... \n", "WLD 8.893228 8.892912 8.894431 8.920468 ... \n", "\n", " GDP_calculated_2013 GDP_calculated_2014 GDP_calculated_2015 \\\n", "Country Code \n", "WLD 19.945839 19.911398 17.391711 \n", "\n", " GDP_calculated_2016 GDP_calculated_2017 GDP_calculated_2018 \\\n", "Country Code \n", "WLD 19.909714 19.897277 19.865401 \n", "\n", " GDP_calculated_2019 GDP_calculated_2020 GDP_calculated_2021 \\\n", "Country Code \n", "WLD 19.844482 19.80143 31.407289 \n", "\n", " GDP_calculated_2022 \n", "Country Code \n", "WLD 31.527472 \n", "\n", "[1 rows x 376 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_world = df_combined.loc['World']\n", "df_world" ] }, { "cell_type": "markdown", "id": "46a3d25f", "metadata": {}, "source": [ "### Countries" ] }, { "cell_type": "code", "execution_count": 5, "id": "dbca801f", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
1960_gdp1961_gdp1962_gdp1963_gdp1964_gdp1965_gdp1966_gdp1967_gdp1968_gdp1969_gdp...GDP_calculated_2013GDP_calculated_2014GDP_calculated_2015GDP_calculated_2016GDP_calculated_2017GDP_calculated_2018GDP_calculated_2019GDP_calculated_2020GDP_calculated_2021GDP_calculated_2022
Country NameCountry Code
ArubaABW-0.201874-0.200982-0.200327-0.199863-0.200426-0.202832-0.202705-0.202662-0.203255-0.204786...-1.017331-1.023442-1.046426-0.965575-0.972609-1.011146-1.006774-0.976539-0.925748-0.925497
AfghanistanAFG-0.201874-0.200982-0.200327-0.199863-0.200426-0.202832-0.202705-0.202662-0.203255-0.204786...-1.015306-1.021407-1.044415-0.963573-0.970639-1.009221-1.004804-0.972484-0.923705-0.918717
AngolaAGO-0.201874-0.200982-0.200327-0.199863-0.200426-0.202832-0.202705-0.202662-0.203255-0.204786...-0.465233-0.657339-0.932675-0.812782-0.736018-0.664243-0.802329-0.931221-0.917089-0.923103
AlbaniaALB-0.201874-0.200982-0.200327-0.199863-0.200426-0.202832-0.202705-0.202662-0.203255-0.204786...-1.024486-1.030680-1.050654-0.972267-0.979659-1.018762-1.015177-0.984239-0.946538-0.954034
AndorraAND-0.201874-0.200982-0.200327-0.199863-0.200426-0.202832-0.202705-0.202662-0.203255-0.204786...-1.017377-1.023472-1.046447-0.965590-0.972647-1.011185-1.006798-0.976514-0.925774-0.925529
.....................................................................
KosovoXKX-0.201874-0.200982-0.200327-0.199863-0.200426-0.202832-0.202705-0.202662-0.203255-0.204786...-1.028729-1.034928-1.054572-0.976205-0.983589-1.022714-1.019152-0.988298-0.951340-0.959118
Yemen, Rep.YEM-0.201874-0.200982-0.200327-0.199863-0.200426-0.202832-0.202705-0.202662-0.203255-0.204786...-1.018770-1.025404-1.049979-0.972641-0.980441-1.018888-1.000565-0.970490-0.919628-0.918717
South AfricaZAF-0.169603-0.168437-0.167114-0.165969-0.165662-0.163839-0.163451-0.160740-0.162603-0.164274...-0.933638-0.942189-0.971863-0.888590-0.883052-0.940374-0.938708-0.791299-0.852809-0.859582
ZambiaZMB-0.221483-0.220372-0.219614-0.219669-0.219984-0.218117-0.217284-0.216134-0.217168-0.218587...-1.009544-1.021177-1.047666-0.963273-0.970319-1.008834-1.003544-0.951182-0.923430-0.923146
ZimbabweZWE-0.220994-0.219722-0.218875-0.218843-0.219564-0.218025-0.216980-0.215805-0.216831-0.217879...-1.026594-1.032658-1.052336-0.974104-0.981528-1.020672-1.017468-0.986864-0.950039-0.957146
\n", "

226 rows × 376 columns

\n", "
" ], "text/plain": [ " 1960_gdp 1961_gdp 1962_gdp 1963_gdp 1964_gdp \\\n", "Country Name Country Code \n", "Aruba ABW -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "Afghanistan AFG -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "Angola AGO -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "Albania ALB -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "Andorra AND -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "... ... ... ... ... ... \n", "Kosovo XKX -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "Yemen, Rep. YEM -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "South Africa ZAF -0.169603 -0.168437 -0.167114 -0.165969 -0.165662 \n", "Zambia ZMB -0.221483 -0.220372 -0.219614 -0.219669 -0.219984 \n", "Zimbabwe ZWE -0.220994 -0.219722 -0.218875 -0.218843 -0.219564 \n", "\n", " 1965_gdp 1966_gdp 1967_gdp 1968_gdp 1969_gdp \\\n", "Country Name Country Code \n", "Aruba ABW -0.202832 -0.202705 -0.202662 -0.203255 -0.204786 \n", "Afghanistan AFG -0.202832 -0.202705 -0.202662 -0.203255 -0.204786 \n", "Angola AGO -0.202832 -0.202705 -0.202662 -0.203255 -0.204786 \n", "Albania ALB -0.202832 -0.202705 -0.202662 -0.203255 -0.204786 \n", "Andorra AND -0.202832 -0.202705 -0.202662 -0.203255 -0.204786 \n", "... ... ... ... ... ... \n", "Kosovo XKX -0.202832 -0.202705 -0.202662 -0.203255 -0.204786 \n", "Yemen, Rep. YEM -0.202832 -0.202705 -0.202662 -0.203255 -0.204786 \n", "South Africa ZAF -0.163839 -0.163451 -0.160740 -0.162603 -0.164274 \n", "Zambia ZMB -0.218117 -0.217284 -0.216134 -0.217168 -0.218587 \n", "Zimbabwe ZWE -0.218025 -0.216980 -0.215805 -0.216831 -0.217879 \n", "\n", " ... GDP_calculated_2013 GDP_calculated_2014 \\\n", "Country Name Country Code ... \n", "Aruba ABW ... -1.017331 -1.023442 \n", "Afghanistan AFG ... -1.015306 -1.021407 \n", "Angola AGO ... -0.465233 -0.657339 \n", "Albania ALB ... -1.024486 -1.030680 \n", "Andorra AND ... -1.017377 -1.023472 \n", "... ... ... ... \n", "Kosovo XKX ... -1.028729 -1.034928 \n", "Yemen, Rep. YEM ... -1.018770 -1.025404 \n", "South Africa ZAF ... -0.933638 -0.942189 \n", "Zambia ZMB ... -1.009544 -1.021177 \n", "Zimbabwe ZWE ... -1.026594 -1.032658 \n", "\n", " GDP_calculated_2015 GDP_calculated_2016 \\\n", "Country Name Country Code \n", "Aruba ABW -1.046426 -0.965575 \n", "Afghanistan AFG -1.044415 -0.963573 \n", "Angola AGO -0.932675 -0.812782 \n", "Albania ALB -1.050654 -0.972267 \n", "Andorra AND -1.046447 -0.965590 \n", "... ... ... \n", "Kosovo XKX -1.054572 -0.976205 \n", "Yemen, Rep. YEM -1.049979 -0.972641 \n", "South Africa ZAF -0.971863 -0.888590 \n", "Zambia ZMB -1.047666 -0.963273 \n", "Zimbabwe ZWE -1.052336 -0.974104 \n", "\n", " GDP_calculated_2017 GDP_calculated_2018 \\\n", "Country Name Country Code \n", "Aruba ABW -0.972609 -1.011146 \n", "Afghanistan AFG -0.970639 -1.009221 \n", "Angola AGO -0.736018 -0.664243 \n", "Albania ALB -0.979659 -1.018762 \n", "Andorra AND -0.972647 -1.011185 \n", "... ... ... \n", "Kosovo XKX -0.983589 -1.022714 \n", "Yemen, Rep. YEM -0.980441 -1.018888 \n", "South Africa ZAF -0.883052 -0.940374 \n", "Zambia ZMB -0.970319 -1.008834 \n", "Zimbabwe ZWE -0.981528 -1.020672 \n", "\n", " GDP_calculated_2019 GDP_calculated_2020 \\\n", "Country Name Country Code \n", "Aruba ABW -1.006774 -0.976539 \n", "Afghanistan AFG -1.004804 -0.972484 \n", "Angola AGO -0.802329 -0.931221 \n", "Albania ALB -1.015177 -0.984239 \n", "Andorra AND -1.006798 -0.976514 \n", "... ... ... \n", "Kosovo XKX -1.019152 -0.988298 \n", "Yemen, Rep. YEM -1.000565 -0.970490 \n", "South Africa ZAF -0.938708 -0.791299 \n", "Zambia ZMB -1.003544 -0.951182 \n", "Zimbabwe ZWE -1.017468 -0.986864 \n", "\n", " GDP_calculated_2021 GDP_calculated_2022 \n", "Country Name Country Code \n", "Aruba ABW -0.925748 -0.925497 \n", "Afghanistan AFG -0.923705 -0.918717 \n", "Angola AGO -0.917089 -0.923103 \n", "Albania ALB -0.946538 -0.954034 \n", "Andorra AND -0.925774 -0.925529 \n", "... ... ... \n", "Kosovo XKX -0.951340 -0.959118 \n", "Yemen, Rep. YEM -0.919628 -0.918717 \n", "South Africa ZAF -0.852809 -0.859582 \n", "Zambia ZMB -0.923430 -0.923146 \n", "Zimbabwe ZWE -0.950039 -0.957146 \n", "\n", "[226 rows x 376 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Excluir filas que pertenecen a regiones y al mundo\n", "\n", "lista_regiones = [\n", " \"Africa Eastern and Southern\", \"Africa Western and Central\", \"Central Europe and the Baltics\",\n", " \"East Asia & Pacific\", \"Europe & Central Asia\", \"European Union\", \"Latin America & Caribbean\",\n", " \"Middle East & North Africa\", \"North America\", \"OECD members\", \"Sub-Saharan Africa (excluding high income)\",\n", " \"South Asia (IDA & IBRD)\"\n", "]\n", "country_mask = (~df_combined.index.get_level_values('Country Name').isin(lista_regiones + ['World']))\n", "df_countries = df_combined[country_mask]\n", "df_countries" ] }, { "cell_type": "markdown", "id": "b6c3c8fa", "metadata": {}, "source": [ "### Regions (De referencia, no se aplicará)" ] }, { "cell_type": "code", "execution_count": 6, "id": "3ef53e13", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
1960_gdp1961_gdp1962_gdp1963_gdp1964_gdp1965_gdp1966_gdp1967_gdp1968_gdp1969_gdp...GDP_calculated_2013GDP_calculated_2014GDP_calculated_2015GDP_calculated_2016GDP_calculated_2017GDP_calculated_2018GDP_calculated_2019GDP_calculated_2020GDP_calculated_2021GDP_calculated_2022
Region
East Asia & Pacific-6.824558-6.801649-6.773692-6.739758-6.723409-6.765620-6.720207-6.669891-6.659509-6.649542...-3.674142-2.3588340.1215311.4594651.6903200.2263572.4951863.851648-0.500491-0.310485
Europe & Central Asia-9.691664-9.603688-9.578843-9.550707-9.599877-9.727407-9.740002-9.710470-9.759768-9.842725...-41.788147-43.454942-49.563682-43.578853-42.284450-43.163723-44.222898-45.346960-46.765078-46.937925
Latin America & Caribbean-8.464656-8.408003-8.379139-8.387486-8.404706-8.423448-8.409930-8.381006-8.407552-8.457507...-38.686483-38.834487-40.279676-36.497662-36.286598-37.868593-37.789479-36.552207-34.779918-34.828626
Middle East & North Africa-4.249103-4.229591-4.217366-4.204654-4.214414-4.276927-4.277744-4.264914-4.259781-4.281930...16.65988714.1861889.04005510.89264911.53191112.37387910.4001598.7803819.0380838.685722
North America2.4383802.3962442.4181132.3968262.3774472.4038342.4252442.3812012.3474752.280019...1.9299531.9331121.3271172.0662892.0230861.9276991.9333612.0908024.8299244.690498
\n", "

5 rows × 376 columns

\n", "
" ], "text/plain": [ " 1960_gdp 1961_gdp 1962_gdp 1963_gdp 1964_gdp \\\n", "Region \n", "East Asia & Pacific -6.824558 -6.801649 -6.773692 -6.739758 -6.723409 \n", "Europe & Central Asia -9.691664 -9.603688 -9.578843 -9.550707 -9.599877 \n", "Latin America & Caribbean -8.464656 -8.408003 -8.379139 -8.387486 -8.404706 \n", "Middle East & North Africa -4.249103 -4.229591 -4.217366 -4.204654 -4.214414 \n", "North America 2.438380 2.396244 2.418113 2.396826 2.377447 \n", "\n", " 1965_gdp 1966_gdp 1967_gdp 1968_gdp 1969_gdp \\\n", "Region \n", "East Asia & Pacific -6.765620 -6.720207 -6.669891 -6.659509 -6.649542 \n", "Europe & Central Asia -9.727407 -9.740002 -9.710470 -9.759768 -9.842725 \n", "Latin America & Caribbean -8.423448 -8.409930 -8.381006 -8.407552 -8.457507 \n", "Middle East & North Africa -4.276927 -4.277744 -4.264914 -4.259781 -4.281930 \n", "North America 2.403834 2.425244 2.381201 2.347475 2.280019 \n", "\n", " ... GDP_calculated_2013 GDP_calculated_2014 \\\n", "Region ... \n", "East Asia & Pacific ... -3.674142 -2.358834 \n", "Europe & Central Asia ... -41.788147 -43.454942 \n", "Latin America & Caribbean ... -38.686483 -38.834487 \n", "Middle East & North Africa ... 16.659887 14.186188 \n", "North America ... 1.929953 1.933112 \n", "\n", " GDP_calculated_2015 GDP_calculated_2016 \\\n", "Region \n", "East Asia & Pacific 0.121531 1.459465 \n", "Europe & Central Asia -49.563682 -43.578853 \n", "Latin America & Caribbean -40.279676 -36.497662 \n", "Middle East & North Africa 9.040055 10.892649 \n", "North America 1.327117 2.066289 \n", "\n", " GDP_calculated_2017 GDP_calculated_2018 \\\n", "Region \n", "East Asia & Pacific 1.690320 0.226357 \n", "Europe & Central Asia -42.284450 -43.163723 \n", "Latin America & Caribbean -36.286598 -37.868593 \n", "Middle East & North Africa 11.531911 12.373879 \n", "North America 2.023086 1.927699 \n", "\n", " GDP_calculated_2019 GDP_calculated_2020 \\\n", "Region \n", "East Asia & Pacific 2.495186 3.851648 \n", "Europe & Central Asia -44.222898 -45.346960 \n", "Latin America & Caribbean -37.789479 -36.552207 \n", "Middle East & North Africa 10.400159 8.780381 \n", "North America 1.933361 2.090802 \n", "\n", " GDP_calculated_2021 GDP_calculated_2022 \n", "Region \n", "East Asia & Pacific -0.500491 -0.310485 \n", "Europe & Central Asia -46.765078 -46.937925 \n", "Latin America & Caribbean -34.779918 -34.828626 \n", "Middle East & North Africa 9.038083 8.685722 \n", "North America 4.829924 4.690498 \n", "\n", "[5 rows x 376 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Archivo Codes es un metadata sobre códigos de paises.\n", "codes = pd.read_excel('Codes.xlsx')\n", "\n", "# Crear un mapeo de 'Country Code' a 'Region' usando solo las entradas en 'lista_regiones'\n", "filtered_codes = codes[codes['Region'].isin(lista_regiones)]\n", "country_to_region = filtered_codes.set_index('Country Code')['Region']\n", "\n", "# Aplicar este mapeo al DataFrame 'df_combined'\n", "df_combined['Region'] = df_combined.index.get_level_values('Country Code').map(country_to_region)\n", "\n", "# Filtrar el DataFrame para incluir solo filas donde la 'Region' esté definida\n", "df_regions = df_combined.dropna(subset=['Region'])\n", "\n", "# Agrupar por 'Region' y sumar los valores para cada columna\n", "df_regions = df_regions.groupby('Region').sum()\n", "\n", "df_regions.head()" ] }, { "cell_type": "code", "execution_count": 7, "id": "4307b4b6", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "# Configurar Estilo de Seaborn\n", "sns.set(style=\"whitegrid\")\n", "\n", "# Seleccionar un subconjunto de columnas para la visualización por simplicidad\n", "columns_to_plot = [f'GDP_calculated_{year}' for year in range(2010, 2023)]\n", "\n", "# Crear un DataFrame para la visualización\n", "df_plot = df_regions[columns_to_plot].transpose()\n", "\n", "plt.figure(figsize=(20, 10))\n", "for column in df_plot.columns:\n", " plt.plot(df_plot.index, df_plot[column], marker='o', label=column)\n", "\n", "plt.title('GDP Calculated Over Time by Region')\n", "plt.xlabel('Year')\n", "plt.ylabel('GDP Calculated')\n", "plt.legend(title='Region', bbox_to_anchor=(1.05, 1), loc='upper left')\n", "plt.xticks(rotation=45)\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "8ff34dfc", "metadata": {}, "source": [ "Por alguna razón, después de tantos errores, descubro que al crear 'Regions' se crean **NaN**, por lo que no trabajaré con éste DataFrame." ] }, { "cell_type": "markdown", "id": "6e06a067", "metadata": {}, "source": [ "## Guardar los DataFrames como CSV" ] }, { "cell_type": "code", "execution_count": 8, "id": "be01121b", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "array(['1960_gdp', '1961_gdp', '1962_gdp', '1963_gdp', '1964_gdp',\n", " '1965_gdp', '1966_gdp', '1967_gdp', '1968_gdp', '1969_gdp',\n", " '1970_gdp', '1971_gdp', '1972_gdp', '1973_gdp', '1974_gdp',\n", " '1975_gdp', '1976_gdp', '1977_gdp', '1978_gdp', '1979_gdp',\n", " '1980_gdp', '1981_gdp', '1982_gdp', '1983_gdp', '1984_gdp',\n", " '1985_gdp', '1986_gdp', '1987_gdp', '1988_gdp', '1989_gdp',\n", " '1990_gdp', '1991_gdp', '1992_gdp', '1993_gdp', '1994_gdp',\n", " '1995_gdp', '1996_gdp', '1997_gdp', '1998_gdp', '1999_gdp',\n", " '2000_gdp', '2001_gdp', '2002_gdp', '2003_gdp', '2004_gdp',\n", " '2005_gdp', '2006_gdp', '2007_gdp', '2008_gdp', '2009_gdp',\n", " '2010_gdp', '2011_gdp', '2012_gdp', '2013_gdp', '2014_gdp',\n", " '2015_gdp', '2016_gdp', '2017_gdp', '2018_gdp', '2019_gdp',\n", " '2020_gdp', '2021_gdp', '2022_gdp', '1960_gov', '1961_gov',\n", " '1962_gov', '1963_gov', '1964_gov', '1965_gov', '1966_gov',\n", " '1967_gov', '1968_gov', '1969_gov', '1970_gov', '1971_gov',\n", " '1972_gov', '1973_gov', '1974_gov', '1975_gov', '1976_gov',\n", " '1977_gov', '1978_gov', '1979_gov', '1980_gov', '1981_gov',\n", " '1982_gov', '1983_gov', '1984_gov', '1985_gov', '1986_gov',\n", " '1987_gov', '1988_gov', '1989_gov', '1990_gov', '1991_gov',\n", " '1992_gov', '1993_gov', '1994_gov', '1995_gov', '1996_gov',\n", " '1997_gov', '1998_gov', '1999_gov', '2000_gov', '2001_gov',\n", " '2002_gov', '2003_gov', '2004_gov', '2005_gov', '2006_gov',\n", " '2007_gov', '2008_gov', '2009_gov', '2010_gov', '2011_gov',\n", " '2012_gov', '2013_gov', '2014_gov', '2015_gov', '2016_gov',\n", " '2017_gov', '2018_gov', '2019_gov', '2020_gov', '2021_gov',\n", " '2022_gov', '1960', '1961', '1962', '1963', '1964', '1965', '1966',\n", " '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974',\n", " '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982',\n", " '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990',\n", " '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998',\n", " '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006',\n", " '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014',\n", " '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022',\n", " '1960_con', '1961_con', '1962_con', '1963_con', '1964_con',\n", " '1965_con', '1966_con', '1967_con', '1968_con', '1969_con',\n", " '1970_con', '1971_con', '1972_con', '1973_con', '1974_con',\n", " '1975_con', '1976_con', '1977_con', '1978_con', '1979_con',\n", " '1980_con', '1981_con', '1982_con', '1983_con', '1984_con',\n", " '1985_con', '1986_con', '1987_con', '1988_con', '1989_con',\n", " '1990_con', '1991_con', '1992_con', '1993_con', '1994_con',\n", " '1995_con', '1996_con', '1997_con', '1998_con', '1999_con',\n", " '2000_con', '2001_con', '2002_con', '2003_con', '2004_con',\n", " '2005_con', '2006_con', '2007_con', '2008_con', '2009_con',\n", " '2010_con', '2011_con', '2012_con', '2013_con', '2014_con',\n", " '2015_con', '2016_con', '2017_con', '2018_con', '2019_con',\n", " '2020_con', '2021_con', '2022_con', '1960_trade', '1961_trade',\n", " '1962_trade', '1963_trade', '1964_trade', '1965_trade',\n", " '1966_trade', '1967_trade', '1968_trade', '1969_trade',\n", " '1970_trade', '1971_trade', '1972_trade', '1973_trade',\n", " '1974_trade', '1975_trade', '1976_trade', '1977_trade',\n", " '1978_trade', '1979_trade', '1980_trade', '1981_trade',\n", " '1982_trade', '1983_trade', '1984_trade', '1985_trade',\n", " '1986_trade', '1987_trade', '1988_trade', '1989_trade',\n", " '1990_trade', '1991_trade', '1992_trade', '1993_trade',\n", " '1994_trade', '1995_trade', '1996_trade', '1997_trade',\n", " '1998_trade', '1999_trade', '2000_trade', '2001_trade',\n", " '2002_trade', '2003_trade', '2004_trade', '2005_trade',\n", " '2006_trade', '2007_trade', '2008_trade', '2009_trade',\n", " '2010_trade', '2011_trade', '2012_trade', '2013_trade',\n", " '2014_trade', '2015_trade', '2016_trade', '2017_trade',\n", " '2018_trade', '2019_trade', '2020_trade', 'GDP_calculated_1960',\n", " 'GDP_calculated_1961', 'GDP_calculated_1962',\n", " 'GDP_calculated_1963', 'GDP_calculated_1964',\n", " 'GDP_calculated_1965', 'GDP_calculated_1966',\n", " 'GDP_calculated_1967', 'GDP_calculated_1968',\n", " 'GDP_calculated_1969', 'GDP_calculated_1970',\n", " 'GDP_calculated_1971', 'GDP_calculated_1972',\n", " 'GDP_calculated_1973', 'GDP_calculated_1974',\n", " 'GDP_calculated_1975', 'GDP_calculated_1976',\n", " 'GDP_calculated_1977', 'GDP_calculated_1978',\n", " 'GDP_calculated_1979', 'GDP_calculated_1980',\n", " 'GDP_calculated_1981', 'GDP_calculated_1982',\n", " 'GDP_calculated_1983', 'GDP_calculated_1984',\n", " 'GDP_calculated_1985', 'GDP_calculated_1986',\n", " 'GDP_calculated_1987', 'GDP_calculated_1988',\n", " 'GDP_calculated_1989', 'GDP_calculated_1990',\n", " 'GDP_calculated_1991', 'GDP_calculated_1992',\n", " 'GDP_calculated_1993', 'GDP_calculated_1994',\n", " 'GDP_calculated_1995', 'GDP_calculated_1996',\n", " 'GDP_calculated_1997', 'GDP_calculated_1998',\n", " 'GDP_calculated_1999', 'GDP_calculated_2000',\n", " 'GDP_calculated_2001', 'GDP_calculated_2002',\n", " 'GDP_calculated_2003', 'GDP_calculated_2004',\n", " 'GDP_calculated_2005', 'GDP_calculated_2006',\n", " 'GDP_calculated_2007', 'GDP_calculated_2008',\n", " 'GDP_calculated_2009', 'GDP_calculated_2010',\n", " 'GDP_calculated_2011', 'GDP_calculated_2012',\n", " 'GDP_calculated_2013', 'GDP_calculated_2014',\n", " 'GDP_calculated_2015', 'GDP_calculated_2016',\n", " 'GDP_calculated_2017', 'GDP_calculated_2018',\n", " 'GDP_calculated_2019', 'GDP_calculated_2020',\n", " 'GDP_calculated_2021', 'GDP_calculated_2022'], dtype=object)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Detalles de las columnas\n", "df_world.columns.values[:376]\n", "df_countries.columns.values[:376]" ] }, { "cell_type": "code", "execution_count": 9, "id": "def5e36d", "metadata": {}, "outputs": [], "source": [ "# Guardar el DataFrame como CSV\n", "df_world.to_csv('df_world.csv')\n", "df_countries.to_csv('df_countries.csv')" ] }, { "cell_type": "markdown", "id": "057d910d", "metadata": {}, "source": [ "### Diagnósticos" ] }, { "cell_type": "code", "execution_count": 10, "id": "65f693bf", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "( 1960_gdp 1961_gdp 1962_gdp 1963_gdp 1964_gdp 1965_gdp 1966_gdp \\\n", " count 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n", " mean 8.925325 8.916576 8.903261 8.903658 8.909846 8.905935 8.893228 \n", " std NaN NaN NaN NaN NaN NaN NaN \n", " min 8.925325 8.916576 8.903261 8.903658 8.909846 8.905935 8.893228 \n", " 25% 8.925325 8.916576 8.903261 8.903658 8.909846 8.905935 8.893228 \n", " 50% 8.925325 8.916576 8.903261 8.903658 8.909846 8.905935 8.893228 \n", " 75% 8.925325 8.916576 8.903261 8.903658 8.909846 8.905935 8.893228 \n", " max 8.925325 8.916576 8.903261 8.903658 8.909846 8.905935 8.893228 \n", " \n", " 1967_gdp 1968_gdp 1969_gdp ... GDP_calculated_2013 \\\n", " count 1.000000 1.000000 1.000000 ... 1.000000 \n", " mean 8.892912 8.894431 8.920468 ... 19.945839 \n", " std NaN NaN NaN ... NaN \n", " min 8.892912 8.894431 8.920468 ... 19.945839 \n", " 25% 8.892912 8.894431 8.920468 ... 19.945839 \n", " 50% 8.892912 8.894431 8.920468 ... 19.945839 \n", " 75% 8.892912 8.894431 8.920468 ... 19.945839 \n", " max 8.892912 8.894431 8.920468 ... 19.945839 \n", " \n", " GDP_calculated_2014 GDP_calculated_2015 GDP_calculated_2016 \\\n", " count 1.000000 1.000000 1.000000 \n", " mean 19.911398 17.391711 19.909714 \n", " std NaN NaN NaN \n", " min 19.911398 17.391711 19.909714 \n", " 25% 19.911398 17.391711 19.909714 \n", " 50% 19.911398 17.391711 19.909714 \n", " 75% 19.911398 17.391711 19.909714 \n", " max 19.911398 17.391711 19.909714 \n", " \n", " GDP_calculated_2017 GDP_calculated_2018 GDP_calculated_2019 \\\n", " count 1.000000 1.000000 1.000000 \n", " mean 19.897277 19.865401 19.844482 \n", " std NaN NaN NaN \n", " min 19.897277 19.865401 19.844482 \n", " 25% 19.897277 19.865401 19.844482 \n", " 50% 19.897277 19.865401 19.844482 \n", " 75% 19.897277 19.865401 19.844482 \n", " max 19.897277 19.865401 19.844482 \n", " \n", " GDP_calculated_2020 GDP_calculated_2021 GDP_calculated_2022 \n", " count 1.00000 1.000000 1.000000 \n", " mean 19.80143 31.407289 31.527472 \n", " std NaN NaN NaN \n", " min 19.80143 31.407289 31.527472 \n", " 25% 19.80143 31.407289 31.527472 \n", " 50% 19.80143 31.407289 31.527472 \n", " 75% 19.80143 31.407289 31.527472 \n", " max 19.80143 31.407289 31.527472 \n", " \n", " [8 rows x 376 columns],\n", " 1960_gdp 1961_gdp 1962_gdp 1963_gdp 1964_gdp 1965_gdp \\\n", " count 226.000000 226.000000 226.000000 226.000000 226.000000 226.000000 \n", " mean -0.169781 -0.169241 -0.168657 -0.168409 -0.168818 -0.169926 \n", " std 0.228428 0.225131 0.224893 0.223695 0.223447 0.224810 \n", " min -0.224510 -0.223332 -0.222338 -0.222336 -0.222801 -0.221246 \n", " 25% -0.205611 -0.208913 -0.208103 -0.206509 -0.207993 -0.209509 \n", " 50% -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 -0.202832 \n", " 75% -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 -0.202832 \n", " max 2.672641 2.629604 2.645718 2.624166 2.605340 2.627229 \n", " \n", " 1966_gdp 1967_gdp 1968_gdp 1969_gdp ... \\\n", " count 226.000000 226.000000 226.000000 226.000000 ... \n", " mean -0.169695 -0.169360 -0.170062 -0.171161 ... \n", " std 0.225328 0.223013 0.221838 0.220208 ... \n", " min -0.220064 -0.219031 -0.219934 -0.221195 ... \n", " 25% -0.211203 -0.211326 -0.213195 -0.214294 ... \n", " 50% -0.202705 -0.202662 -0.203255 -0.204786 ... \n", " 75% -0.202705 -0.202662 -0.203255 -0.204786 ... \n", " max 2.644509 2.600452 2.570434 2.506399 ... \n", " \n", " GDP_calculated_2013 GDP_calculated_2014 GDP_calculated_2015 \\\n", " count 226.000000 226.000000 226.000000 \n", " mean -0.477203 -0.499640 -0.564891 \n", " std 2.360871 2.346446 2.403204 \n", " min -1.030556 -1.036704 -1.056181 \n", " 25% -1.017725 -1.023818 -1.048722 \n", " 50% -1.011065 -1.017360 -1.040619 \n", " 75% -0.861807 -0.875602 -0.940122 \n", " max 28.144082 27.835159 27.572665 \n", " \n", " GDP_calculated_2016 GDP_calculated_2017 GDP_calculated_2018 \\\n", " count 226.000000 226.000000 226.000000 \n", " mean -0.495029 -0.487934 -0.501394 \n", " std 2.330991 2.336694 2.339151 \n", " min -0.978107 -0.985575 -1.024731 \n", " 25% -0.966506 -0.973869 -1.012617 \n", " 50% -0.959720 -0.966623 -1.005069 \n", " 75% -0.875190 -0.860159 -0.883994 \n", " max 27.837297 27.701692 27.367289 \n", " \n", " GDP_calculated_2019 GDP_calculated_2020 GDP_calculated_2021 \\\n", " count 226.000000 226.000000 226.000000 \n", " mean -0.504525 -0.502495 -0.510326 \n", " std 2.318477 2.319114 2.262333 \n", " min -1.021194 -0.990254 -0.954674 \n", " 25% -1.008344 -0.977401 -0.936421 \n", " 50% -1.000565 -0.970490 -0.923756 \n", " 75% -0.879247 -0.891251 -0.853708 \n", " max 26.262588 26.264463 26.564552 \n", " \n", " GDP_calculated_2022 \n", " count 226.000000 \n", " mean -0.512282 \n", " std 2.262235 \n", " min -0.962321 \n", " 25% -0.939380 \n", " 50% -0.921459 \n", " 75% -0.867499 \n", " max 26.579051 \n", " \n", " [8 rows x 376 columns])" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_world.describe(), df_countries.describe()" ] }, { "cell_type": "code", "execution_count": 11, "id": "93ba0e45", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0, 0)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_world.isnull().sum().sum(), df_countries.isnull().sum().sum()" ] }, { "cell_type": "code", "execution_count": 12, "id": "a38896aa", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "(1960_gdp float64\n", " 1961_gdp float64\n", " 1962_gdp float64\n", " 1963_gdp float64\n", " 1964_gdp float64\n", " ... \n", " GDP_calculated_2018 float64\n", " GDP_calculated_2019 float64\n", " GDP_calculated_2020 float64\n", " GDP_calculated_2021 float64\n", " GDP_calculated_2022 float64\n", " Length: 376, dtype: object,\n", " 1960_gdp float64\n", " 1961_gdp float64\n", " 1962_gdp float64\n", " 1963_gdp float64\n", " 1964_gdp float64\n", " ... \n", " GDP_calculated_2018 float64\n", " GDP_calculated_2019 float64\n", " GDP_calculated_2020 float64\n", " GDP_calculated_2021 float64\n", " GDP_calculated_2022 float64\n", " Length: 376, dtype: object)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_world.dtypes, df_countries.dtypes" ] }, { "cell_type": "code", "execution_count": 13, "id": "606c081e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((1, 376), (226, 376))" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_world.shape, df_countries.shape" ] }, { "cell_type": "code", "execution_count": 14, "id": "ceeafd60", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "(Index(['1960_gdp', '1961_gdp', '1962_gdp', '1963_gdp', '1964_gdp', '1965_gdp',\n", " '1966_gdp', '1967_gdp', '1968_gdp', '1969_gdp',\n", " ...\n", " 'GDP_calculated_2013', 'GDP_calculated_2014', 'GDP_calculated_2015',\n", " 'GDP_calculated_2016', 'GDP_calculated_2017', 'GDP_calculated_2018',\n", " 'GDP_calculated_2019', 'GDP_calculated_2020', 'GDP_calculated_2021',\n", " 'GDP_calculated_2022'],\n", " dtype='object', length=376),\n", " Index(['1960_gdp', '1961_gdp', '1962_gdp', '1963_gdp', '1964_gdp', '1965_gdp',\n", " '1966_gdp', '1967_gdp', '1968_gdp', '1969_gdp',\n", " ...\n", " 'GDP_calculated_2013', 'GDP_calculated_2014', 'GDP_calculated_2015',\n", " 'GDP_calculated_2016', 'GDP_calculated_2017', 'GDP_calculated_2018',\n", " 'GDP_calculated_2019', 'GDP_calculated_2020', 'GDP_calculated_2021',\n", " 'GDP_calculated_2022'],\n", " dtype='object', length=376))" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_world.columns, df_countries.columns" ] }, { "cell_type": "code", "execution_count": 15, "id": "2c78a78d", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "(Index(['WLD'], dtype='object', name='Country Code'),\n", " MultiIndex([( 'Aruba', 'ABW'),\n", " ( 'Afghanistan', 'AFG'),\n", " ( 'Angola', 'AGO'),\n", " ( 'Albania', 'ALB'),\n", " ( 'Andorra', 'AND'),\n", " ( 'Arab World', 'ARB'),\n", " ( 'United Arab Emirates', 'ARE'),\n", " ( 'Argentina', 'ARG'),\n", " ( 'Armenia', 'ARM'),\n", " ( 'American Samoa', 'ASM'),\n", " ...\n", " ('British Virgin Islands', 'VGB'),\n", " ( 'Virgin Islands (U.S.)', 'VIR'),\n", " ( 'Viet Nam', 'VNM'),\n", " ( 'Vanuatu', 'VUT'),\n", " ( 'Samoa', 'WSM'),\n", " ( 'Kosovo', 'XKX'),\n", " ( 'Yemen, Rep.', 'YEM'),\n", " ( 'South Africa', 'ZAF'),\n", " ( 'Zambia', 'ZMB'),\n", " ( 'Zimbabwe', 'ZWE')],\n", " names=['Country Name', 'Country Code'], length=226))" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_world.index, df_countries.index" ] }, { "cell_type": "code", "execution_count": 16, "id": "e2ec36d8", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 1 entries, WLD to WLD\n", "Columns: 376 entries, 1960_gdp to GDP_calculated_2022\n", "dtypes: float64(376)\n", "memory usage: 2.9+ KB\n", "\n", "MultiIndex: 226 entries, ('Aruba', 'ABW') to ('Zimbabwe', 'ZWE')\n", "Columns: 376 entries, 1960_gdp to GDP_calculated_2022\n", "dtypes: float64(376)\n", "memory usage: 684.8+ KB\n" ] }, { "data": { "text/plain": [ "(None, None)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_world.info(), df_countries.info()" ] }, { "cell_type": "markdown", "id": "02ba87fe", "metadata": {}, "source": [ "## Pasos Claves Faltantes\n", "\n", "\n" ] }, { "cell_type": "markdown", "id": "aa2cab2e", "metadata": {}, "source": [ "#### Verificación de Outliers" ] }, { "cell_type": "code", "execution_count": 17, "id": "6fe3d883", "metadata": { "scrolled": false }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "def plot_boxplots(df, title):\n", " plt.figure(figsize=(22, 6))\n", " df.boxplot()\n", " plt.xticks(rotation=90)\n", " plt.title(title)\n", " plt.grid(False)\n", " plt.show()\n", "\n", "plot_boxplots(df_world, \"Boxplot for World Data\")\n", "plot_boxplots(df_countries, \"Boxplot for Countries Data\")" ] }, { "cell_type": "code", "execution_count": 18, "id": "2c4ee1f1", "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Outliers in World Data:\n", " 1960_gdp 0\n", "1961_gdp 0\n", "1962_gdp 0\n", "1963_gdp 0\n", "1964_gdp 0\n", " ..\n", "GDP_calculated_2018 0\n", "GDP_calculated_2019 0\n", "GDP_calculated_2020 0\n", "GDP_calculated_2021 0\n", "GDP_calculated_2022 0\n", "Length: 376, dtype: int64\n", "Outliers in Countries Data:\n", " 1960_gdp 82\n", "1961_gdp 58\n", "1962_gdp 58\n", "1963_gdp 74\n", "1964_gdp 65\n", " ..\n", "GDP_calculated_2018 38\n", "GDP_calculated_2019 33\n", "GDP_calculated_2020 41\n", "GDP_calculated_2021 34\n", "GDP_calculated_2022 34\n", "Length: 376, dtype: int64\n" ] } ], "source": [ "# Función para calcular outliers basados en IQR\n", "def calculate_outliers(df):\n", " Q1 = df.quantile(0.25)\n", " Q3 = df.quantile(0.75)\n", " IQR = Q3 - Q1\n", " outliers = ((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).sum()\n", " return outliers\n", "\n", "# Aplicar la función a cada DataFrame y mostrar resultados\n", "outliers_world = calculate_outliers(df_world)\n", "outliers_countries = calculate_outliers(df_countries)\n", "\n", "print(\"Outliers in World Data:\\n\", outliers_world)\n", "print(\"Outliers in Countries Data:\\n\", outliers_countries)" ] }, { "cell_type": "markdown", "id": "88dae679", "metadata": {}, "source": [ "## Decisiones:\n", "- En este caso, no se requiere realizar ninguna corrección de outliers. En contextos económicos, los outliers pueden representar situaciones económicas reales (como crisis o booms económicos) y pueden ser críticos para ciertos análisis.\n", "- Dado que mi objetivo de análisis se centra en comprender el comportamiento general de la economía, considero poco relevante los **outliers** y no se requiera una transformación especial.\n", "- Además, los outliers son pocos y se distribuyen aleatoriamente, su efecto en el modelo puede ser mínimo." ] }, { "cell_type": "markdown", "id": "4b5794bc", "metadata": {}, "source": [ "### Escala de Estimación NO APLICAR\n", "- Los datos presentan estacionalidad, tendría que desestacionalizar antes de aplicar la Escala de Estimación, ya que esta técnica es sensible a los patrones estacionales.\n", "- La Regresión Robusta M-estimada minimiza una función de pérdida robusta en lugar de los mínimos cuadrados, como la **regresión Huber** que es menos sensible a los outliers.\n", "- Sin embargo, como se han realizado cambios en los datos después de la estandarización original, puede ser necesario volver a estandarizar los datos antes de aplicar la Escala de Estimación.\n", "- Entonces, el siguiente código queda **reservado** y no se aplicará:" ] }, { "cell_type": "code", "execution_count": null, "id": "64ae51cf", "metadata": {}, "outputs": [], "source": [ "#import statsmodels.api as sm\n", "\n", "#X = df_countries[['GDP_calculated_2019', 'GDP_calculated_2020']] # Ejemplo\n", "#y = df_countries['GDP_calculated_2021']\n", "\n", "# Añadir constante para el intercepto\n", "#X = sm.add_constant(X)\n", "\n", "# Crear y ajustar el modelo de regresión robusta\n", "#robust_model = sm.RLM(y, X, M=sm.robust.norms.HuberT())\n", "#results = robust_model.fit()\n", "\n", "#print(results.summary())" ] }, { "cell_type": "markdown", "id": "7667d088", "metadata": {}, "source": [ "### Si se hubiese aplicado...\n", "La regresión robusta mostró que el modelo ajustado es estadísticamente significativo y los coeficientes para **GDP_calculated_2019** y **GDP_calculated_2020** como predictores de **GDP_calculated_2021** eran ambos significativos con p-valores extremadamente bajos (prácticamente cero).\n", "\n", "**Interpretación de los Coeficientes**:\n", "\n", "**const**: El intercepto de **0.0103** indica el valor base de GDP_calculated_2021 cuando los predictores son cero.\n", "\n", "**GDP_calculated_2019**: El coeficiente de **-0.3149**, osea que si todo lo demás es constante, un incremento de una unidad en el GDP_calculated_2019 está asociado con una disminución de 0.3149 unidades en el GDP_calculated_2021. Esto podría interpretarse como un efecto retardado negativo o una corrección en el crecimiento del PIB.\n", "\n", "**GDP_calculated_2020**: El coeficiente de **1.2905** indica un fuerte impacto positivo en el GDP_calculated_2021 por cada unidad incrementada en el GDP_calculated_2020. \n", "\n", "**Consideraciones Estadísticas**:\n", "\n", "**Escala de Estimación (Scale Est.)**: Indica que el modelo es resistente a la influencia de los outliers.\n", "\n", "**Iteraciones (No. Iterations)**: El modelo convergió después de **24** iteraciones.\n", "\n", "**Intervalos de Confianza**: Los intervalos de confianza para los coeficientes son estrechos, indicando precisión en las estimaciones de los parámetros del modelo.\n", "\n", "Este modelo proporciona una base sólida para inferencias sobre cómo los valores pasados del PIB calculado podrían estar influyendo en los valores futuros, bajo el contexto de un modelo robusto a outliers. Estos resultados pueden ser útiles para tomar decisiones económicas informadas o para realizar proyecciones futuras basadas en tendencias pasadas." ] }, { "cell_type": "markdown", "id": "6fc9ead7", "metadata": {}, "source": [ "### Análisis de Componentes Temporales\n", "Para series temporales, es crucial entender tendencias y ciclicidad, especialmente si los datos se usarán para proyecciones o análisis predictivos." ] }, { "cell_type": "code", "execution_count": 19, "id": "d1c6857a", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "def decompose_time_series(series, title):\n", " if len(series) > 2: \n", " decomposition = sm.tsa.seasonal_decompose(series, model='additive', period=1) \n", " \n", " fig, ax = plt.subplots(4, 1, figsize=(16, 10), sharex=True)\n", " series.plot(ax=ax[0], color='b', title=title)\n", " ax[0].set_ylabel('Original')\n", " decomposition.trend.plot(ax=ax[1], color='r')\n", " ax[1].set_ylabel('Trend')\n", " decomposition.seasonal.plot(ax=ax[2], color='g')\n", " ax[2].set_ylabel('Seasonal')\n", " decomposition.resid.plot(ax=ax[3], color='k')\n", " ax[3].set_ylabel('Residual')\n", " plt.tight_layout()\n", " plt.show()\n", " else:\n", " print(f\"Not enough data to decompose {title}\")\n", "\n", "decompose_time_series(df_countries['GDP_calculated_2021'], 'Countries GDP 2021')\n" ] }, { "cell_type": "markdown", "id": "8326609e", "metadata": {}, "source": [ "Los gráficos muestran la descomposición de la serie temporal del PIB calculado para el año 2021 para los países. \n", "\n", "**Original**: Los picos pueden representar crecimientos económicos extraordinarios o caídas, posiblemente debido a eventos económicos específicos como reformas, crisis, o booms de recursos naturales.\n", "\n", "**Tendencia**: Algunas variaciones significativas indica que en esos países hubo cambios sustanciales en el tiempo en su actividad económica. \n", "\n", "**Estacional**: La componente estacional es esencialmente plana y cercana a cero en todos los casos, ya que sólo estamos viendo un año.\n", "\n", "**Residual**: Los residuos parecen ser pequeños para la mayoría de los países, sin embargo, los picos en los residuos pueden indicar modelos de comportamiento económico que no se explican completamente por la tendencia y que podrían ser objeto de una investigación más detallada." ] }, { "cell_type": "markdown", "id": "942ae41b", "metadata": {}, "source": [ "### Estandarización de Datos No Aplicada\n", "Si los nuevos DataFrames son derivados de DataFrames que ya han sido estandarizados, normalmente no necesitarías re-estandarizarlos, a menos que las transformaciones realizadas en los datos (como sumas o agrupaciones por región) pudieran haber alterado la escala o la distribución de los datos. Las estadísticas descriptivas muestran valores óptimos.." ] }, { "cell_type": "code", "execution_count": null, "id": "d84a8cce", "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "\n", "# Seleccionar las columnas numéricas para la estandarización\n", "columns_to_scale = df_countries.columns[df_countries.columns.str.contains('GDP')]\n", "\n", "# Inicializar el objeto StandardScaler\n", "scaler = StandardScaler()\n", "\n", "# Estandarizar las columnas seleccionadas\n", "df_countries.loc[:, columns_to_scale] = scaler.fit_transform(df_countries[columns_to_scale])\n", "\n", "# Verificar los cambios\n", "print(df_countries[columns_to_scale].head())\n", "print(df_countries[columns_to_scale].describe())" ] }, { "cell_type": "markdown", "id": "bb9fe7f1", "metadata": {}, "source": [ "### Verificación de Estacionariedad\n", "Si los nuevos DataFrames son derivados de otros que ya han verificado la ausencia de estacionariedad, generalmente no necesitarías volver a verificarlo para las mismas series temporales. Sin embargo, si se han agregado nuevas transformaciones o datos (por ejemplo, sumas o promedios de nuevas variables), sería prudente realizar una verificación de estacionariedad sobre las series resultantes para asegurar la validez de los análisis temporales." ] }, { "cell_type": "markdown", "id": "19cf4012", "metadata": {}, "source": [ "#### ADF Test 2021 Countries" ] }, { "cell_type": "code", "execution_count": 22, "id": "0d63434e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Augmented Dickey-Fuller Test on \"GDP_calculated_2021\" \n", " -----------------------------------------------\n", " Null Hypothesis: Data has unit root. Non-Stationary.\n", " Significance Level = 0.05\n", " Test Statistic = -5.1314\n", " No. Lags Chosen = 3\n", " Critical value 1% = -3.46\n", " Critical value 5% = -2.875\n", " Critical value 10% = -2.574\n", " => P-Value = 0.0. Rejecting Null Hypothesis.\n", " => Series is Stationary.\n" ] } ], "source": [ "from statsmodels.tsa.stattools import adfuller\n", "\n", "def test_stationarity(series, signif=0.05, name='', verbose=False):\n", " r = adfuller(series, autolag='AIC')\n", " output = {'test_statistic': round(r[0], 4), 'pvalue': round(r[1], 4), 'n_lags': r[2], 'n_obs': r[3]}\n", " p_value = output['pvalue'] \n", " def adjust(val, length= 6): return str(val).ljust(length)\n", "\n", " # Imprimir Resultados de la Prueba\n", " if verbose:\n", " print(f' Augmented Dickey-Fuller Test on \"{name}\"', \"\\n \", '-'*47)\n", " print(f' Null Hypothesis: Data has unit root. Non-Stationary.')\n", " print(f' Significance Level = {signif}')\n", " print(f' Test Statistic = {output[\"test_statistic\"]}')\n", " print(f' No. Lags Chosen = {output[\"n_lags\"]}')\n", "\n", " for key, val in r[4].items():\n", " print(f' Critical value {adjust(key)} = {round(val, 3)}')\n", "\n", " if p_value <= signif:\n", " print(f\" => P-Value = {p_value}. Rejecting Null Hypothesis.\")\n", " print(f\" => Series is Stationary.\")\n", " else:\n", " print(f\" => P-Value = {p_value}. Weak evidence to reject the Null Hypothesis.\")\n", " print(f\" => Series is Non-Stationary.\")\n", " \n", " return output\n", "\n", "# Ejemplo de aplicación\n", "series = df_countries['GDP_calculated_2021']\n", "result = test_stationarity(series, name='GDP_calculated_2021', verbose=True)" ] }, { "cell_type": "markdown", "id": "b9415edc", "metadata": {}, "source": [ "#### ADF Anual Countries" ] }, { "cell_type": "markdown", "id": "b6127d83", "metadata": {}, "source": [ "## Regresión lineal" ] }, { "cell_type": "code", "execution_count": 26, "id": "db0c4b7e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean Squared Error: 0.1084106968304278\n" ] } ], "source": [ "import pandas as pd\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import mean_squared_error\n", "\n", "# Variables predictoras: PIB de 2016 a 2020\n", "predictors = ['GDP_calculated_2016', 'GDP_calculated_2017', 'GDP_calculated_2018', 'GDP_calculated_2019', 'GDP_calculated_2020']\n", "X = df_countries[predictors]\n", "y = df_countries['GDP_calculated_2021']\n", "\n", "# Dividir los datos en entrenamiento y prueba\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "# Modelo de regresión lineal\n", "model = LinearRegression()\n", "model.fit(X_train, y_train)\n", "\n", "# Predicciones y evaluación\n", "y_pred = model.predict(X_test)\n", "mse = mean_squared_error(y_test, y_pred)\n", "print(\"Mean Squared Error:\", mse)" ] }, { "cell_type": "markdown", "id": "a285371e", "metadata": {}, "source": [ "Estos MSE son relativamente bajos dado que los datos están estandarizados. Estos son relativamente pequeños errores considerando la escala de los datos." ] }, { "cell_type": "code", "execution_count": 27, "id": "2f3ae8c4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Variance of GDP_calculated_2021: 5.095504994892357\n", "Root Mean Squared Error (RMSE): 0.3292577969166832\n" ] } ], "source": [ "import numpy as np\n", "\n", "# Calculando la varianza de GDP_calculated_2021\n", "variance = np.var(df_countries['GDP_calculated_2021'])\n", "\n", "# Calculando el RMSE\n", "rmse = np.sqrt(mse)\n", "\n", "print(\"Variance of GDP_calculated_2021:\", variance)\n", "print(\"Root Mean Squared Error (RMSE):\", rmse)" ] }, { "cell_type": "markdown", "id": "957a5aaa", "metadata": {}, "source": [ "- La varianza de los datos estandarizados es más alta que 1, lo cual puede indicar una distribución más dispersa de lo esperado para datos puramente estandarizados. Tal vez porque los datos tienen variabilidad inherente que podría estar afectando el desempeño del modelo.\n", "- Dado que el RMSE es considerablemente menor que la raíz cuadrada de la varianza de los datos (~2.26), esto indica que el modelo está haciendo un buen trabajo al capturar la variabilidad de los datos. Sin embargo, el RMSE todavía representa un error significativo en términos de la escala de los datos." ] }, { "cell_type": "code", "execution_count": 28, "id": "91f8f9ba", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Resultados de MSE de cada fold: [0.6766614 0.08180557 0.59952202 0.26042559 0.16903139]\n", "MSE promedio: 0.35748919398012907\n", "Desviación estándar de MSE: 0.23722981798694917\n" ] } ], "source": [ "import pandas as pd\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.model_selection import cross_val_score\n", "import numpy as np\n", "\n", "predictors = ['GDP_calculated_2016', 'GDP_calculated_2017', 'GDP_calculated_2018', 'GDP_calculated_2019', 'GDP_calculated_2020']\n", "X = df_countries[predictors]\n", "y = df_countries['GDP_calculated_2021']\n", "\n", "model = LinearRegression()\n", "\n", "# Cross-Validation\n", "# Usamos 5 pliegues (folds) y medimos el error cuadrático medio negativo\n", "scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')\n", "\n", "# Convertimos los scores a positivo porque 'neg_mean_squared_error' devuelve valores negativos\n", "mse_scores = -scores\n", "\n", "# Calculamos el promedio y la desviación estándar de los MSE para evaluar la consistencia del modelo\n", "print(\"Resultados de MSE de cada fold:\", mse_scores)\n", "print(\"MSE promedio:\", mse_scores.mean())\n", "print(\"Desviación estándar de MSE:\", mse_scores.std())\n", "\n", "# Esto proporciona una visión de cuán variados son los resultados del modelo entre diferentes subconjuntos de datos." ] }, { "cell_type": "markdown", "id": "bb38b6a8", "metadata": {}, "source": [ "- Con el MSE de cada fold muestra inestabilidad en cómo el modelo se comporta con diferentes subconjuntos de datos.\n", "- La Desviación estándar de MSE es alta, hay que ver la división entre subconjuntos de datos." ] }, { "cell_type": "markdown", "id": "eaf77199", "metadata": {}, "source": [ "### Regresión de Cresta (Ridge Regression)\n", "La regresión de cresta ajusta un modelo de regresión lineal que también incluye un término de penalización L2. Esta penalización puede ayudar a reducir la varianza del modelo sin aumentar significativamente el sesgo, lo cual es útil en situaciones de alta variabilidad entre los folds de validación cruzada." ] }, { "cell_type": "code", "execution_count": 29, "id": "f5f6dcda", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MSE de Regresión de Cresta: 0.46595401888464466\n" ] } ], "source": [ "from sklearn.linear_model import Ridge\n", "from sklearn.model_selection import cross_val_score\n", "\n", "# Definir el modelo de regresión de cresta\n", "ridge_model = Ridge(alpha=1.0) # Alpha es el parámetro de regularización\n", "\n", "# Aplicar Cross-Validation\n", "ridge_scores = cross_val_score(ridge_model, X, y, cv=5, scoring='neg_mean_squared_error')\n", "print(\"MSE de Regresión de Cresta:\", -ridge_scores.mean())" ] }, { "cell_type": "markdown", "id": "39e0670a", "metadata": {}, "source": [ "La Regresión de Cresta ha mostrado un MSE más alto en comparación con tus modelos lineales iniciales. Recordando que el MSE inicial más bajo fue de aproximadamente 0.036 y otro fue 0.108, el MSE de 0.466 sugiere que la regresión de cresta no ha mejorado el rendimiento y podría estar demasiado regularizada o no adecuadamente configurada para los datos. " ] }, { "cell_type": "markdown", "id": "7719f1ea", "metadata": {}, "source": [ "### Regresión Lasso\n", "Lasso es similar a la regresión de cresta, pero utiliza una penalización L1, que puede llevar a coeficientes a cero, ofreciendo una especie de selección automática de características. Esto puede ser útil si algunos predictores son redundantes o menos relevantes." ] }, { "cell_type": "code", "execution_count": 30, "id": "a8ad4630", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MSE de Lasso: 0.317814287588202\n" ] } ], "source": [ "from sklearn.linear_model import Lasso\n", "\n", "# Definir el modelo Lasso\n", "lasso_model = Lasso(alpha=0.1) # Alpha controla la cantidad de shrinkage\n", "\n", "# Aplicar validación cruzada\n", "lasso_scores = cross_val_score(lasso_model, X, y, cv=5, scoring='neg_mean_squared_error')\n", "print(\"MSE de Lasso:\", -lasso_scores.mean())" ] }, { "cell_type": "markdown", "id": "1a878093", "metadata": {}, "source": [ "Aun es más alto que los modelos lineales básicos anteriores. Parece que no todos los predictores son igualmente útiles o que la penalización está eliminando información valiosa. " ] }, { "cell_type": "markdown", "id": "9d5dcc1b", "metadata": {}, "source": [ "### Modelos de Árboles de Decisión\n", "Los árboles de decisión son más flexibles que los modelos lineales y pueden capturar patrones no lineales y complejidades que los modelos lineales no pueden." ] }, { "cell_type": "code", "execution_count": 31, "id": "cbb236ec", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MSE de Árbol de Decisión: 1.2236968662198517\n" ] } ], "source": [ "from sklearn.tree import DecisionTreeRegressor\n", "\n", "# Definir el modelo de árbol de decisión\n", "tree_model = DecisionTreeRegressor(max_depth=5) # Controla la profundidad del árbol\n", "\n", "# Aplicar validación cruzada\n", "tree_scores = cross_val_score(tree_model, X, y, cv=5, scoring='neg_mean_squared_error')\n", "print(\"MSE de Árbol de Decisión:\", -tree_scores.mean())" ] }, { "cell_type": "markdown", "id": "5a2874d8", "metadata": {}, "source": [ "Es un MSE mucho más alto (1.47) en comparación con cualquier otro modelo probado. Esto sugiere que, aunque los árboles de decisión pueden modelar relaciones no lineales, pueden estar sobreajustándose a los datos de entrenamiento o simplemente no se adaptan bien a la estructura de tu conjunto de datos específico. La naturaleza de los árboles de decisión los hace muy sensibles a la variabilidad en los datos, lo que puede llevar a un rendimiento inconsistente." ] }, { "cell_type": "markdown", "id": "6d609bcf", "metadata": {}, "source": [ "### Random Forest\n", "Random Forest es un método de ensamble que utiliza múltiples árboles de decisión para reducir el riesgo de sobreajuste, lo que es común en árboles de decisión simples. Ofrece un buen balance entre sesgo y varianza y es muy efectivo en muchos problemas prácticos." ] }, { "cell_type": "code", "execution_count": 32, "id": "2c8a1243", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MSE de Random Forest: 1.8480368646500238\n" ] } ], "source": [ "from sklearn.ensemble import RandomForestRegressor\n", "\n", "# Definir el modelo de Random Forest\n", "forest_model = RandomForestRegressor(n_estimators=100, random_state=42) # n_estimators controla el número de árboles\n", "\n", "# Aplicar validación cruzada\n", "forest_scores = cross_val_score(forest_model, X, y, cv=5, scoring='neg_mean_squared_error')\n", "print(\"MSE de Random Forest:\", -forest_scores.mean())" ] }, { "cell_type": "markdown", "id": "05ed4eb0", "metadata": {}, "source": [ "Aunque generalmente es un modelo robusto y efectivo para muchos problemas de regresión, ha mostrado el peor rendimiento con un MSE de 1.848. Esto es sorprendente ya que los modelos de ensamble como Random Forest suelen mejorar el rendimiento de los árboles de decisión individuales a través de la agregación de resultados. En este caso, el alto MSE podría indicar una mala configuración de hiperparámetros, una necesidad de más árboles en el ensamble, o simplemente que los datos no son adecuados para este tipo de modelado debido a su estructura o el tipo de relación entre las variables." ] }, { "cell_type": "markdown", "id": "61ca8060", "metadata": {}, "source": [ "**Conclusiones**\n", "Basándote en estos resultados, parece que los modelos más simples, como el lineal básico y Lasso, están funcionando mejor en este conjunto de datos. Esto podría indicar que la relación entre las variables predictoras y la variable objetivo es más lineal, o que la naturaleza de los datos no se presta a la complejidad adicional introducida por los árboles de decisión o los modelos de ensamble." ] }, { "cell_type": "markdown", "id": "7e64852a", "metadata": {}, "source": [ "# Mi Perspectiva como Economista" ] }, { "cell_type": "markdown", "id": "5896a646", "metadata": {}, "source": [ "1. Naturaleza de los Datos Macroeconómicos\n", "Los datos macroeconómicos, como el PIB, el consumo, la inversión, y los gastos del gobierno, a menudo exhiben características como tendencias a largo plazo, ciclicidad, y posibles no linealidades. Además, estos datos pueden estar sujetos a cambios estructurales debido a políticas económicas, crisis financieras, o cambios tecnológicos significativos.\n", "\n", "- Tendencias y Estacionariedad: Muchos indicadores macroeconómicos son no estacionarios, es decir, tienen propiedades estadísticas que cambian con el tiempo. Esto puede incluir una media o varianza que varía con el tiempo. Los modelos lineales y algunos modelos no lineales asumen estacionariedad, lo que puede llevar a predicciones sesgadas o incorrectas si se aplican directamente a datos no estacionarios.\n", "\n", "- Cointegración: En macroeconomía, es común que series temporales múltiples compartan una tendencia común a largo plazo a pesar de ser no estacionarias individualmente. Esto se conoce como cointegración. Modelos que no consideran la posibilidad de cointegración pueden no captar adecuadamente la dinámica a largo plazo entre las variables.\n", "\n", "2. Relaciones Entre Variables\n", "En macroeconomía, las relaciones entre variables a menudo son complejas y pueden ser influenciadas por numerosos factores externos e internos.\n", "\n", "- Endogeneidad y Exogeneidad: Las relaciones entre variables macroeconómicas pueden ser endógenas, lo que significa que pueden influirse mutuamente. Por ejemplo, el consumo y el PIB pueden afectarse mutuamente. Los modelos que no manejan adecuadamente la endogeneidad pueden dar lugar a estimaciones sesgadas.\n", "\n", "- Cambios Estructurales: Los cambios en la política económica, grandes eventos económicos (como crisis financieras), o innovaciones tecnológicas pueden causar cambios estructurales en las relaciones económicas. Si un modelo no puede adaptarse a cambios estructurales a lo largo del tiempo, puede perder precisión a medida que el contexto económico subyacente evoluciona." ] }, { "cell_type": "markdown", "id": "261ccc0b", "metadata": {}, "source": [ "## Verificación de Supuestos Económicos y Estadísticos\n", "Sin embargo continuaré con otros supuestos, como la prueba de Breusch-Pagan o la prueba de Durbin-Watson." ] }, { "cell_type": "markdown", "id": "bd3f1488", "metadata": {}, "source": [ "### Modelo ARIMA\n", "El modelo ARIMA es útil para modelar series temporales que muestran patrones claros de tendencias o estacionalidad." ] }, { "cell_type": "code", "execution_count": 33, "id": "3a65bf0a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " SARIMAX Results \n", "==============================================================================\n", "Dep. Variable: 2014 No. Observations: 226\n", "Model: ARIMA(1, 1, 1) Log Likelihood 47.490\n", "Date: Sun, 12 May 2024 AIC -88.980\n", "Time: 21:25:40 BIC -78.732\n", "Sample: 0 HQIC -84.844\n", " - 226 \n", "Covariance Type: opg \n", "==============================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "ar.L1 -0.0259 0.471 -0.055 0.956 -0.948 0.896\n", "ma.L1 -1.0000 21.365 -0.047 0.963 -42.874 40.874\n", "sigma2 0.0375 0.800 0.047 0.963 -1.531 1.606\n", "===================================================================================\n", "Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 72165.12\n", "Prob(Q): 0.95 Prob(JB): 0.00\n", "Heteroskedasticity (H): 2.19 Skew: 8.56\n", "Prob(H) (two-sided): 0.00 Kurtosis: 89.05\n", "===================================================================================\n", "\n", "Warnings:\n", "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "from statsmodels.tsa.arima.model import ARIMA\n", "import statsmodels.api as sm\n", "\n", "data = pd.read_csv('df_countries.csv') \n", "time_series = data['2014']\n", "\n", "model_arima = ARIMA(time_series, order=(1,1,1)) \n", "results_arima = model_arima.fit()\n", "print(results_arima.summary())" ] }, { "cell_type": "markdown", "id": "4232f806", "metadata": {}, "source": [ "- Un valor P alto (0.95) sugiere que no hay autocorrelación significativa en los residuos, lo cual es bueno.\n", "- Jarque-Bera: Prueba la normalidad de los residuos; un valor P de 0.00 rechaza la hipótesis de normalidad, indicando que los residuos no son normales.\n", "- Heteroskedasticity: Con un P de 0.00, hay evidencia de heterocedasticidad.\n", "\n", "**Interpretación**\n", "El modelo SARIMAX aplicado no parece capturar todas las dinámicas de los datos, como se evidencia por los coeficientes de AR y MA no significativos y las pruebas de diagnóstico que muestran problemas con la normalidad y la heterocedasticidad de los residuos. \n", "\n", "**Opinión personal**: Esto puede deberse a la necesidad de un modelo más complejo o diferentemente especificado, o puede reflejar la naturaleza desafiante de modelar datos macroeconómicos con estructuras subyacentes complejas." ] }, { "cell_type": "markdown", "id": "7b038680", "metadata": {}, "source": [ "### Modelo GARCH\n", "Utilizado para modelar la volatilidad de series financieras o económicas." ] }, { "cell_type": "code", "execution_count": 34, "id": "ac33d14a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Iteration: 5, Func. Count: 33, Neg. LLF: -4.4343562005323705\n", "Iteration: 10, Func. Count: 68, Neg. LLF: 74.82844976640274\n", "Iteration: 15, Func. Count: 101, Neg. LLF: 74.88308704571364\n", "Optimization terminated successfully (Exit mode 0)\n", " Current function value: -59.97551055030441\n", " Iterations: 23\n", " Function evaluations: 123\n", " Gradient evaluations: 19\n", " Constant Mean - GARCH Model Results \n", "==============================================================================\n", "Dep. Variable: 2014 R-squared: 0.000\n", "Mean Model: Constant Mean Adj. R-squared: 0.000\n", "Vol Model: GARCH Log-Likelihood: 59.9755\n", "Distribution: Normal AIC: -111.951\n", "Method: Maximum Likelihood BIC: -98.2689\n", " No. Observations: 226\n", "Date: Sun, May 12 2024 Df Residuals: 225\n", "Time: 21:28:17 Df Model: 1\n", " Mean Model \n", "========================================================================\n", " coef std err t P>|t| 95.0% Conf. Int.\n", "------------------------------------------------------------------------\n", "mu -0.1672 1.910e-02 -8.755 2.046e-18 [ -0.205, -0.130]\n", " Volatility Model \n", "=============================================================================\n", " coef std err t P>|t| 95.0% Conf. Int.\n", "-----------------------------------------------------------------------------\n", "omega 4.0612e-04 1.371e-03 0.296 0.767 [-2.280e-03,3.093e-03]\n", "alpha[1] 5.3858e-08 3.473e-02 1.551e-06 1.000 [-6.807e-02,6.807e-02]\n", "beta[1] 0.9994 2.271e-02 44.018 0.000 [ 0.955, 1.044]\n", "=============================================================================\n", "\n", "Covariance estimator: robust\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Oscar Murgueytio\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\arch\\univariate\\base.py:311: DataScaleWarning: y is poorly scaled, which may affect convergence of the optimizer when\n", "estimating the model parameters. The scale of y is 0.03733. Parameter\n", "estimation work better when this value is between 1 and 1000. The recommended\n", "rescaling is 10 * y.\n", "\n", "This warning can be disabled by either rescaling y before initializing the\n", "model or by setting rescale=False.\n", "\n", " warnings.warn(\n" ] } ], "source": [ "from arch import arch_model\n", "\n", "# Ajuste del modelo GARCH\n", "garch_model = arch_model(time_series, vol='Garch', p=1, q=1) \n", "results_garch = garch_model.fit(update_freq=5)\n", "print(results_garch.summary())" ] }, { "cell_type": "markdown", "id": "aabf3a75", "metadata": {}, "source": [ "**Interpretación**\n", "El modelo GARCH(1,1) indica que la volatilidad de la serie para el año 2014 es altamente persistente (beta cercano a 1), lo que es típico en datos financieros donde la volatilidad tiende a agruparse. Sin embargo, los choques específicos en la serie (medidos por alpha) no parecen tener un impacto significativo en la volatilidad futura, lo cual es atípico para un modelo GARCH y podría sugerir que los datos no exhiben mucha volatilidad de choques o que el modelo necesita ser ajustado o complementado con otros componentes." ] }, { "cell_type": "code", "execution_count": null, "id": "6fcbb25e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "c14f784f", "metadata": {}, "source": [ "### Homocedasticidad\n", "Puedes verificar la homocedasticidad utilizando pruebas como la prueba de Breusch-Pagan o visualizando los residuos de un modelo regresivo contra los valores ajustados. La homogeneidad de la varianza en los residuos es crucial para inferencias confiables en la regresión." ] }, { "cell_type": "code", "execution_count": 35, "id": "9c333f73", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Oscar Murgueytio\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\patsy\\util.py:672: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", " return _pandas_is_categorical_dtype(dt)\n", "C:\\Users\\Oscar Murgueytio\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\patsy\\util.py:672: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", " return _pandas_is_categorical_dtype(dt)\n", "C:\\Users\\Oscar Murgueytio\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\patsy\\util.py:672: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", " return _pandas_is_categorical_dtype(dt)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "p-value de la prueba de Breusch-Pagan: 5.3068079966277775e-05\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import statsmodels.api as sm\n", "from statsmodels.stats.diagnostic import het_breuschpagan\n", "from statsmodels.formula.api import ols\n", "import matplotlib.pyplot as plt\n", "\n", "modelo = 'GDP_calculated_2022 ~ GDP_calculated_2021'\n", "\n", "model = ols(modelo, data=df_countries).fit()\n", "\n", "residuos = model.resid\n", "\n", "# Prueba de Breusch-Pagan\n", "_, pvalue, _, _ = het_breuschpagan(residuos, model.model.exog)\n", "print(f\"p-value de la prueba de Breusch-Pagan: {pvalue}\")\n", "\n", "# Visualización de residuos\n", "plt.figure(figsize=(20, 6))\n", "plt.scatter(model.fittedvalues, residuos)\n", "plt.axhline(0, color='red', linestyle='--')\n", "plt.title('Residuos vs Valores Ajustados')\n", "plt.xlabel('Valores Ajustados')\n", "plt.ylabel('Residuos')\n", "plt.show()\n", "\n", "# Si el p-value es menor a 0.05, existe evidencia de heterocedasticidad." ] }, { "cell_type": "markdown", "id": "344a7fc4", "metadata": {}, "source": [ "El bajo **p-value** de la prueba de **Breusch-Pagan (5.31e-05)** indica que hay evidencia significativa de heterocedasticidad en los residuos del modelo. E\n", "\n", "La visualización también muestra algunos signos de heterocedasticidad, dado que los residuos no parecen distribuirse uniformemente en torno a la línea roja horizontal, especialmente evidente con algunos puntos alejados de la línea central hacia los valores ajustados más altos." ] }, { "cell_type": "markdown", "id": "20f52e52", "metadata": {}, "source": [ "#### Weighted Least Squares (WLS)\n", "Utiliza los residuos de un modelo OLS para estimar los pesos y aplica WLS para considerar la heterocedasticidad. Los pesos pueden basarse en el inverso de los residuos al cuadrado de un modelo OLS preliminar." ] }, { "cell_type": "code", "execution_count": 36, "id": "19baf524", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " WLS Regression Results \n", "===============================================================================\n", "Dep. Variable: GDP_calculated_2021 R-squared: 1.000\n", "Model: WLS Adj. R-squared: 1.000\n", "Method: Least Squares F-statistic: 1.160e+06\n", "Date: Sun, 12 May 2024 Prob (F-statistic): 0.00\n", "Time: 21:31:57 Log-Likelihood: 794.87\n", "No. Observations: 226 AIC: -1580.\n", "Df Residuals: 221 BIC: -1563.\n", "Df Model: 4 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const -0.0760 0.005 -15.237 0.000 -0.086 -0.066\n", "2020_gov 1.1297 0.007 150.802 0.000 1.115 1.144\n", "2020 2.6438 0.025 106.767 0.000 2.595 2.693\n", "2020_con 0.8706 0.006 153.978 0.000 0.859 0.882\n", "2020_trade 0.3443 0.012 27.622 0.000 0.320 0.369\n", "==============================================================================\n", "Omnibus: 16.899 Durbin-Watson: 1.819\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 49.479\n", "Skew: -0.154 Prob(JB): 1.80e-11\n", "Kurtosis: 5.271 Cond. No. 153.\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ "import statsmodels.api as sm\n", "\n", "X = df_countries[['2020_gov', '2020', '2020_con', '2020_trade']] # G + I + C + T\n", "y = df_countries['GDP_calculated_2021'] # Traget = GDP\n", "\n", "# Constante\n", "X = sm.add_constant(X)\n", "\n", "# Modelo OLS para obtener los residuos\n", "model_ols = sm.OLS(y, X).fit()\n", "residuos = model_ols.resid\n", "\n", "# Inverso de los residuos al cuadrado\n", "pesos = 1.0 / (residuos ** 2)\n", "\n", "# Aplicar WLS con los pesos obtenidos\n", "model_wls = sm.WLS(y, X, weights=pesos).fit()\n", "\n", "print(model_wls.summary())" ] }, { "cell_type": "markdown", "id": "23b90234", "metadata": {}, "source": [ "#### Normalidad de los Residuos\n", "Utiliza pruebas como Shapiro-Wilk o Kolmogorov-Smirnov después de ajustar un modelo, o incluso visualiza un Q-Q plot de los residuos. La normalidad es esencial para la validez de muchas pruebas estadísticas, incluyendo aquellos en regresiones lineales. Un p-value pequeño en la prueba de Shapiro-Wilk (típicamente menor que 0.05) sugeriría que los residuos no se distribuyen normalmente, lo cual puede ser una indicación de que el modelo no captura toda la complejidad de los datos o que existen outliers." ] }, { "cell_type": "code", "execution_count": 37, "id": "ecbf4b03", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Shapiro-Wilk Test:\n", "Statistic: 0.22526339595297296\n", "p-value: 1.2498590524151944e-29\n" ] } ], "source": [ "import scipy.stats as stats\n", "\n", "residuos = model_wls.resid\n", "\n", "shapiro_test = stats.shapiro(residuos)\n", "\n", "print(\"Shapiro-Wilk Test:\")\n", "print(\"Statistic:\", shapiro_test.statistic)\n", "print(\"p-value:\", shapiro_test.pvalue)" ] }, { "cell_type": "markdown", "id": "f401cf53", "metadata": {}, "source": [ "El test de Shapiro-Wilk tiene un valor muy bajo para la estadística y un p-valor extremadamente pequeño, es una falta de normalidad lo que afecta la confiabilidad de las pruebas de significancia y los intervalos de confianza para los coeficientes del modelo." ] }, { "cell_type": "markdown", "id": "c4792089", "metadata": {}, "source": [ "#### Ausencia de Multicolinealidad\n", "Antes de realizar un modelado regresivo, revisa la multicolinealidad entre variables independientes. Esto se puede hacer calculando el Factor de Inflación de la Varianza (VIF). Un VIF mayor a 10 (o en casos más estrictos, mayor a 5) puede indicar problemas significativos de multicolinealidad." ] }, { "cell_type": "code", "execution_count": 38, "id": "38853032", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " feature VIF\n", "0 const 1.666738\n", "1 2020_gov 16.399994\n", "2 2020 1.052399\n", "3 2020_con 16.402942\n", "4 2020_trade 1.050951\n" ] } ], "source": [ "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", "\n", "# VIF para cada variable en el modelo\n", "vif_data = pd.DataFrame()\n", "vif_data[\"feature\"] = X.columns\n", "vif_data[\"VIF\"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]\n", "\n", "print(vif_data)" ] }, { "cell_type": "markdown", "id": "94696e37", "metadata": {}, "source": [ "Los resultados del **Factor de Inflación de Varianza (VIF)** muestran que las variables **2020_gov** y **2020_con** tienen VIFs muy altos (mayores de 10), lo que sugiere una fuerte multicolinealidad. Esto significa que estas variables predictoras están altamente correlacionadas con otras predictoras en el modelo, lo que puede hacer que los coeficientes del modelo sean inestables y difíciles de interpretar. Tal vez los excluya para reducir la multicolinealidad." ] }, { "cell_type": "markdown", "id": "e71f383b", "metadata": {}, "source": [ "### Regresión Robusta\n", "Para mejorar el modelo usaré usando el método RLM (Robust Linear Model) de StatsModels" ] }, { "cell_type": "code", "execution_count": 39, "id": "f09082b9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Robust linear Model Regression Results \n", "===============================================================================\n", "Dep. Variable: GDP_calculated_2021 No. Observations: 226\n", "Model: RLM Df Residuals: 221\n", "Method: IRLS Df Model: 4\n", "Norm: HuberT \n", "Scale Est.: mad \n", "Cov Type: H1 \n", "Date: Sun, 12 May 2024 \n", "Time: 21:33:28 \n", "No. Iterations: 50 \n", "==============================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const -0.0950 0.000 -194.216 0.000 -0.096 -0.094\n", "2020_gov 1.1203 0.001 790.858 0.000 1.117 1.123\n", "2020 2.8960 0.002 1504.097 0.000 2.892 2.900\n", "2020_con 0.8781 0.001 619.751 0.000 0.875 0.881\n", "2020_trade 0.0496 0.001 60.150 0.000 0.048 0.051\n", "==============================================================================\n", "\n", "If the model instance has been used for another fit with different fit parameters, then the fit options might not be the correct ones anymore .\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAABJEAAAGECAYAAACGbbE9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAAsTAAALEwEAmpwYAAA4V0lEQVR4nO3deZhcVZ3/8XcnJJ2wg+DIErZRvjBGAZtFWSQqojCOGyAK84sLICIo48igM4AILqOOgAuIiiBGYFBWByUoW9wAlQZCAvGLCsEAQRKIEJLQ2fr3x70VKp3u3E6nK91d/X49Tx667rl16nurT1VSH8451dLZ2YkkSZIkSZK0OiMGugBJkiRJkiQNfoZIkiRJkiRJqmSIJEmSJEmSpEqGSJIkSZIkSapkiCRJkiRJkqRKhkiSJEmSJEmqtN5AFyBJ0lASEZ3AdGAZ0AmsDzwHnJCZd/exz+8BV2bmLV2O7wlcnZk7rFXRDRIRLcCfgdMz83+7tH0T6MzMj/dw30uB6Zn51YYXuupjnwN8DNgpMx+rO34jcEpmPtiHPncEvpqZh63h/c4H5mbmZ9f0MRshImYCHcAiivHdCiyneF5uGsC69gQ+nZmH1x3bDngQeF1mTlvD/qYA2wPPlodGUlzr5zNzUsV9L6Ufx25E/AI4KjPn9kd/kiQ1kiGSJElr7g31H/gi4hTgm8Dr+tJZZh7bX4WtS5nZGREXAh8CVoRIETEWOJo+Ph+NFBFjgInA1cBJwKdrbZl56Fp0vT0Qa1fdoHF0fSAaEYcD3we2GqiCynoO73L4bcCn1jRAqvMfmXl17UYZVP02Iq7LzPl97LMv3rwOH0uSpLViiCRJ0lqIiPWA7YBn6o6dBhxGsWx8JvDRzHwiIt4NnE4xs2MZxYfYX5WzIs7PzKsj4gTgExQzJKbV9flZYIvMPKnr7YjYFrgQ2AFoAX6Qmf9T1vZNYH9gMfAw8MHMfL6u352BO4CtM3NxRIwEHgUOBnbprt4uT8ElwFkRsX1mPloeew/QDvwpIr4OvBbYqKzt2Mz8bZfn8ADgfyhmdS2mmNl0U0R8ADgG2AB4NjPfEBHHAB8tn9ungZMy848RsT9wLsWMkk7gvzPzmm5+Ze8F/lKe+4uIODszF5Z1zKQIKjYsfx/jy+MTarcjYhfgYmBMeT3fA75T/nebiPh5Zr4lIv4LeGd53gYUM3mui4iNy3N3A2YDS4HflI/zSuB84CXlNZyTmZMiYkOKEOcV5e+iHTg+M5d3eR63Ke+/HTCKYnbbFyNiB+BW4EZgH2Bz4LTM/FE3z89KytlmO7Ly+O7pd7BlWec/lsefpJix89mI6AB+Ul730cAC4OvltY4EvpGZl/R0rcDr634HmwAXALsDnRGxPfBfmbk0Il4AvkQRzGwNfD0zv1Z1naWdyro6yuv8MPBxirH/t/I6HyrP3b8M1zYGfkHx+11azlTcshYy124DL/RwXReX/d0eEYeWz89/AaOBl1K8ls/oZf2SJDWceyJJkrTmbo+IqRHxBFD7UPlBgIiYCLwK2Dszd6f44P698pz/oQiU9gTOACbUdxoRuwOfBV6fmXtRBCq9cTlwe2a+CtgP+NeIeC/FTKAJwKszs40iRHp1/R3LD8UPAG8vDx0MzCyXdK223vL+zwA/rl1/6cMUYcY+FB/kX5eZ/wT8gLqZP+U1v4RiVtDJmflq4P3AZeXyMIBXAhPKAOnAsv2AzNwD+ApwbXneWcC55XV+CHhjD8/VCcBl5cyW2WV/a+I/gBvKxzmUItzoBI4F/lIGSNsDBwEHltd0GnB2XZ2LKAK6IyhnL5WB3/8B3yzvcwjwxYh4HfAuYKNyPO1V9rNTN7X9ELikrG1v4KCIeE/d+T/PzL2BT1E8dz25PCLui4hZwKyy1n8p61zd7+AbwAOZuWt5bfvW9Tm6fN4CuI/id/7pstYDgVMi4rW9vNZvUIRUrwL2pAheTinbWimWB+5HEQh+qZx91p3/Ka9zZkT8rXzsN5Vh6huBUylmHe4GXAFcX4ZqANsCb6IIsnYDjuvx2Sx0e12ZWXvdvAF4DPgk8P7yNfda4D8jYouKviVJWmcMkSRJWnO1D5b/TDF75o7MfKpsexvFh7+7I+I+ir13asucrgSuK/dA2oxVP8i/CfhFZj5Z3v5uVSERsQFFcHQBQGY+C1xKEUJMo5hF8buI+BxwTWbe0U03FwEfKH/+IC+GXlX11nwL+EBEtJSzabYBfpqZd1LMZDo+Ir7Ki7N86u0D/Dkzf1fW/wDwW14MrO7PzOfKn/8ZeDlwR/ncfgXYPCI2pwiyLoiIy4E2itkcXZ+r11B86K8tvfsBcHJdMNAb1wGnRsS1wLuBj3edEVTOyHo/cHREfAn4SN11HwRMyszOzJxT9gewMzAmM68t+3gCuAZ4K8VMpVeWM9Y+DXwtM//c5do2oAhjPlc+N3dRzEjavTxlCUWgCXAPxWyknhxdhh2vp5iVMyMzHy7bVvc7OJRyzGbmbIqgqN6v6671H4FLyj5+CYwF9ujNtVKM7fPL57AD+HZ5rOYnddfZSjETrDv/URfq/BWYk5n3lm1vBX5U/o7IzEspxvUOZfsPM3NBZi4GLqN6SVrldWVmJ0VY1xYRZ1LMlmtZTf2SJK1zhkiSJPVR+YHzE8D3yiVDUCzN+XJm7l5+QN2TIuQhM08rf76bIrS5MyLq/y7upPjQWLN0NW2jy/+O6HK8dmxUZv6dF2dpLAN+FBGf6OZSrgb2iYhdKYKIH/ey3trz8AfgKYqA5Djg25m5LCL+GfhZedpPKD7sd1drVyMolmMBPF93fCTFh/fac/saiud3XmZ+h2Jmys3AW4D7y2VP9T5K8Zy2l0vXPk4RaBzS5byenmsy86cUS5J+TBF6TIuIf6y/cxlW3cGLS52+XNdfT7/jHp+HzHyEIrj577LPW8qlVPVGlv3uW/f8vBb4Ytm+uC7s6lpDt8rH/X/Af0fEPnWP0+3voLyW+n6Xdemy9rscCfy91kddrd/v5bV2fa7qxwsUM71qoQxV11oGRUcCx0bEET08Rq2f2uMs63J8SZfbRET9uKm8rjIIvJfiOb2HYtbbkqr6JUlalwyRJElaC1l8K9mdwNfKQz+n+DC6cXn7bOCHEbFeGVxskJnfpgg0dmXlD783AweXexzBi7ODAOZQzFBoKT9sHlw+/nyKWScnApTByUTg5oh4G8VeOHdk8e1fkyhCpa7X8ALFrKNLKWYrLexlvfW+RTH75jBenMn0ZoolTBcCf6DYI2hkl/vdVZQde5f1v5JiBsyUbh7jF8D7IqK2wfNHyusjIu4A9ihnjHwY2JRi9hRl+6bA+4C3ZeYO5Z9tKWaRdA3W5gDbRcRLy1lK76zr5wrgyMy8snxOngPGUQQotefm9cDdmXkuxSyb+uu+CTgmIkZExGbAO8rjCSyOYt8sImLr8rm8OYp9sr5PMUvtUxRjbHx9weVsrbuAf6+73t/W9d8n5cy1S4FvlQFij78DisDwmPLxX0KxhKuTVSXwQkT8a3nuOIpvPGzrzbWWx04sXwutFL/vm9fyOh8GvgCcV76+fg4cWe7zRER8kGIJXW320HsjorVcKvcBYHJ5fA5FqAbFTDXK+6/uupZRjJ1XUARMp2fmDRSBbiurvmYkSRowhkiSJK29k4BDIuItFAHKT4G7IuIBij2IPpCZS4F/A66IiHuAq4APlctxAMjiW6ZOBW6NiLspNmWuuZziA+qfKJYl3VnXdjTwpoiYBvyeYhnUpRQfbB8Appf97Uux51J3LqLYR+d7ZS2V9XZxJcUMoNvyxW+u+zZwYETcX9b7F2DH+tlM5blHAN8s67+CYvPvh+giM39OMavn5rLPo4B3lzNOTgXOjoh7gduBszJzZt3d3w88mJm3d+n288AbImJFUFHuB/UdihlYd1HsnVTzOYplalOB31EsR/slxfO8LCJ+T7FcbouIeJBiA+XnKZZ8bUTx/C8B/gjcQLl5emYuoQibTi6v7Rbg7LLeSRRBwoPl73Fjik2puzoKeG35PP4O+N/MvLyb89bUf1Is4/pwxe/gE8Au5eNfQ7FB+8KunZVLwN5BEbbeTxFMnZHFhuu9udaPU2w6Pa38kxQB0Nr6alnvGZl5M3AecFv5On4/RQBZm831CMUStXuBX1EsjazVdkH5mtmDF8fO6q7r2rKv5RTvHX8s7/924EGKGUySJA0KLZ2d3f0PIkmSpOGjnHE0F9g/M2cMdD1DUUR8FLg3M+8sZwj9GjgzMydX3FWSJA0R6w10AZIkSQOpXDr2e4pZQznA5QxlD1LMKBtJsY/UVQZIkiQ1F2ciSZIkSZIkqZJ7IkmSJEmSJKmSIZIkSZIkSZIqDck9kdrb21uBvSi+8WLZAJcjSZIkSZLUDEYCWwF/aGtrW+VbeYdkiEQRIP16oIuQJEmSJElqQgcAv+l6sKEhUkScDRwOdAIXZ+a5Xdo/AxwDzCsPXZSZF/Si69kAO++8M6NHj+7HijV9+nTGjx8/0GVIDeH4VrNzjKuZOb7VzBzfanaO8aFj8eLFPPTQQ1DmLl01LESKiAOBNwKvBkYBD0bEzzKz/qtz9wLem5l3rmH3ywBGjx5Na2trv9SrF/mcqpk5vtXsHONqZo5vNTPHt5qdY3zI6XbroIZtrJ2ZvwTekJlLgZdSBFYLupy2J/CpiLg/Is6PiDGNqkeSJEmSJEl919BvZ8vMJRFxFvAgcCvweK0tIjYE7gVOAV4DbAqc0ch6JEmSJEmS1DctnZ2dDX+QiFgfuAH4UWZ+t4dz9gAuycw9qvprb2/fAXikX4uUJEmSJEkSwI5tbW0zux5s5J5IuwBjMvO+zFwYEddS7I9Ua98OOCgzLykPtQBL1uQxxo8f77rKftbe3k5bW9tAlyE1hONbzc4xrmbm+FYzc3yr2TnGh46Ojg6mT5/eY3sjv51tJ+CsiNif4tvZ3gFcUte+CPhKRNwOzAROBK5rYD2SJEmSJEnqo0ZurH0jcCPFvkftwB2ZeWVE3BgRe2bmHOB4imVuSTET6ZxG1SNJkiRJkqS+a+RMJDLzTODMLscOrfv5GuCaRtYgSZIkSZKktdfQb2eTJEmSJElSczBEkiRJkiRJUqWGLmfT2pnSPotJk2cwd94itthsLBMP2ZUJbeMGuixJkiRJkjQMGSINUlPaZ3H+VVPpWLIMgDnzFnH+VVMBDJIkSZIkSdI653K2QWrS5BkrAqSajiXLmDR5xgBVJEmSJEmShjNDpEFq7rxFa3RckiRJkiSpkQyRBqktNhu7RsclSZIkSZIayRBpkJp4yK60jhq50rHWUSOZeMiuA1SRJEmSJEkaztxYe5CqbZ7tt7NJkiRJkqTBwBBpEJvQNs7QSJIkSZIkDQouZ5MkSZIkSVIlQyRJkiRJkiRVMkSSJEmSJElSJUMkSZIkSZIkVTJEkiRJkiRJUiVDJEmSJEmSJFUyRJIkSZIkSVIlQyRJkiRJkiRVMkSSJEmSJElSJUMkSZIkSZIkVTJEkiRJkiRJUiVDJEmSJEmSJFUyRJIkSZIkSVIlQyRJkiRJkiRVMkSSJEmSJElSJUMkSZIkSZIkVTJEkiRJkiRJUiVDJEmSJEmSJFUyRJIkSZIkSVIlQyRJkiRJkiRVMkSSJEmSJElSJUMkSZIkSZIkVTJEkiRJkiRJUqX1Gtl5RJwNHA50Ahdn5rld2ncHLgI2AX4FfCQzlzayJkmSJEmSJK25hs1EiogDgTcCrwb2BD4WEdHltMuAj2XmzkALcFyj6pEkSZIkSVLfNSxEysxfAm8oZxa9lGLW04Jae0RsD4zNzLvKQ5cCRzSqHkmSJEmSJPVdQ5ezZeaSiDgLOAW4Cni8rnlrYHbd7dnAtmvS//Tp09e6Rq2qvb19oEuQGsbxrWbnGFczc3yrmTm+1ewc482hoSESQGaeGRFfBm6gWK723bKppZvTl69J3+PHj6e1tXUtK1S99vZ22traBroMqSEc32p2jnE1M8e3mpnjW83OMT50dHR0rHbCTiP3RNql3DibzFwIXEuxP1LN48DL6m5vBTzRqHokSZIkSZLUdw0LkYCdgIsiojUiRgPvAH5Ta8zMR4EXImK/8tBEYHID65EkSZIkSVIfNXJj7RuBG4F7gXbgjsy8MiJujIg9y9OOBs6LiBnABsA3GlWPJEmSJEmS+q7RG2ufCZzZ5dihdT9PBfZuZA2SJEmSJElae41cziZJkiRJkqQmYYgkSZIkSZKkSoZIkiRJkiRJqmSIJEmSJEmSpEqGSJIkSZIkSapkiCRJkiRJkqRKhkiSJEmSJEmqZIgkSZIkSZKkSoZIkiRJkiRJqmSIJEmSJEmSpEqGSJIkSZIkSapkiCRJkiRJkqRKhkiSJEmSJEmqZIgkSZIkSZKkSoZIkiRJkiRJqmSIJEmSJEmSpEqGSJIkSZIkSapkiCRJkiRJkqRKhkiSJEmSJEmqZIgkSZIkSZKkSoZIkiRJkiRJqmSIJEmSJEmSpEqGSJIkSZIkSapkiCRJkiRJkqRKhkiSJEmSJEmqZIgkSZIkSZKkSoZIkiRJkiRJqmSIJEmSJEmSpEqGSJIkSZIkSapkiCRJkiRJkqRKhkiSJEmSJEmqZIgkSZIkSZKkSus1svOIOBN4T3nzZ5l5apf2zwDHAPPKQxdl5gWNrEmSJEmSJElrrmEhUkQcBBwM7AF0AjdFxLsy87q60/YC3puZdzaqDkmSJEmSJK29Rs5Emg18MjMXA0TEDGC7LufsCXwqInYCfgWckpkvNLAmSZIkSZIk9UHDQqTMfKD2c0S8AjgS2Lfu2IbAvcApwEzgUuAM4LRG1SRJkiRJkqS+aens7GzoA0TEK4GfAWdm5g9Wc94ewCWZuUdVn+3t7TsAj/RbkZIkSZIkSarZsa2tbWbXg43eWHs/4Brg3zLzyi5t2wEHZeYl5aEWYMma9D9+/HhaW1v7pVYV2tvbaWtrG+gypIZwfKvZOcbVzBzfamaObzU7x/jQ0dHRwfTp03tsb+TG2uOA64EjM/O2bk5ZBHwlIm6nWM52InBdN+dJkiRJkiRpgDVyJtIpwBjg3IioHfs28HbgM5l5d0QcD9wAjAZ+A5zTwHokSZIkSZLUR43cWPtk4ORumr5dd841FMvdJEmSJEmSNIiNGOgCJEmSJEmSNPgZIkmSJEmSJKmSIZIkSZIkSZIqGSJJkiRJkiSpkiGSJEmSJEmSKhkiSZIkSZIkqZIhkiRJkiRJkioZIkmSJEmSJKmSIZIkSZIkSZIqGSJJkiRJkiSpkiGSJEmSJEmSKhkiSZIkSZIkqZIhkiRJkiRJkioZIkmSJEmSJKmSIZIkSZIkSZIqGSJJkiRJkiSpkiGSJEmSJEmSKhkiSZIkSZIkqZIhkiRJkiRJkioZIkmSJEmSJKmSIZIkSZIkSZIqGSJJkiRJkiSpkiGSJEmSJEmSKhkiSZIkSZIkqZIhkiRJkiRJkioZIkmSJEmSJKmSIZIkSZIkSZIqGSJJkiRJkiSpkiGSJEmSJEmSKhkiSZIkSZIkqZIhkiRJkiRJkioZIkmSJEmSJKnSeo3sPCLOBN5T3vxZZp7apX134CJgE+BXwEcyc2kja5IkSZIkSdKaa9hMpIg4CDgY2APYHWiLiHd1Oe0y4GOZuTPQAhzXqHokSZIkSZLUd41czjYb+GRmLs7MJcAMYLtaY0RsD4zNzLvKQ5cCRzSwHkmSJEmSJPVRw5azZeYDtZ8j4hXAkcC+dadsTRE01cwGtm1UPZIkSZIkSeq7hu6JBBARrwR+BpySmX+qa2rp5vTla9L39OnT16Y09aC9vX2gS5AaxvGtZucYVzNzfKuZOb7V7BzjzaHRG2vvB1wD/FtmXtml+XHgZXW3twKeWJP+x48fT2tr69oVqZW0t7fT1tY20GVIDeH4VrNzjKuZOb7VzBzfanaO8aGjo6NjtRN2Grmx9jjgeuCobgIkMvNR4IUyaAKYCExuVD2SJEmSJEnqu0bORDoFGAOcGxG1Y98G3g58JjPvBo4GLoqIjYB7gW80sB5JkiRJkiT1USM31j4ZOLmbpm/XnTMV2LtRNUiSJEmSJKl/NGw5myRJkiRJkpqHIZIkSZIkSZIqGSJJkiRJkiSpkiGSJEmSJEmSKhkiSZIkSZIkqZIhkiRJkiRJkir1KkSKiPUj4rXlzydHxCURsV1jS5MkSZIkSdJg0duZSN8H3hERewEnA38FLmpYVZIkSZIkSRpUehsi7ZSZ/wn8C3BpZn4W2LxhVUmSJEmSJGlQ6W2INLr871uA2yJiJLBhY0qSJEmSJEnSYLNeL8/7bUQ8CCwF7gBuBW5pWFWSJEmSJEkaVHo7E+ljwIeB/TNzOfBVir2RJEmSJEmSNAz0KkTKzGXA1sDXI+IyYIsyTJIkSZIkSdIw0KsQKSJOAf4LmArcA3wiIk5rZGGSJEmSJEkaPHq7J9JEiqVszwFExMXAXcAXGlWYJEmSJEmSBo/e7olELUAqf34WWNKQiiRJkiRJkjTo9HYm0syIOBn4Vnn7ROCvjSlJkiRJkiRJg01vZyKdALwLWFj+OYwiSJIkSZIkSdIw0KuZSJn5ODAhItYHRmTm840tS5IkSZIkSYNJr0KkiPhGl9sAZObHG1CTJEmSJEmSBpneLmd7uu7PfOB1QGejipIkSZIkSdLg0tvlbGfV346ILwI/bUhFkiRJkiRJGnR6OxNpJZm5ANimn2uRJEmSJEnSINWXPZFagDZgRkMqkiRJkiRJ0qDTqxCJYi+kmk7gh8Dl/V+OJEmSJEmSBqM+7YkkSZIkSZKk4WW1IVJEPMJqvoUtM3fq94okSZIkSZI06FTNRDq8/O9HgcXAd4GlwAeB0Q2sS5IkSZIkSYPIakOkzGwHiIjxmblPXdO/R8TvG1qZJEmSJEmSBo0RvTxv04jYsnYjIrYGNm5MSZIkSZIkSRpsevvtbF8DpkXEz4EW4GDg1EYVJUmSJEmSpMGlVzORMvNC4C3AVOBe4KDMnNTIwiRJkiRJkjR4VH072xsz87aIeHd5aGb5350jYufMvLbqASJiY+AO4G2ZObNL22eAY4B55aGLMvOCNahfkiRJkiRJ60DVcrb3AbcBH+umrRNYbYgUEfsAFwE793DKXsB7M/POijokSZIkSZI0gKq+ne248r9vqB2LiBZgvcxc0ov+jwNOBH7YQ/uewKciYifgV8ApmflCbwqXJEmSJEnSutOrPZEiYv+IOD0iRgPtwLMRcWTV/TLz2Mz8dQ99bkixv9IpwGuATYEzelu4JEmSJEmS1p2Wzs7OypMi4k6KgGdz4APAR4EfZ+bevXmQiJgJTOi6J1KXc/YALsnMPar6a29v3wF4pDePLUmSJEmSpDWyY1tb28yuB6v2RKoZmZm3RMRFwPWZOTMiRq5NNRGxHcW3vF1SHmoBerNEboXx48fT2tq6NmUMKVPaZzFp8gzmzlvEFpuNZeIhuzKhbVy/PkZ7ezttbW392qc0WDi+1ewc42pmjm81M8e3mp1jfOjo6Ohg+vTpPbb3OkSKiL2Bfwa+EBHjgVFrWdsi4CsRcTvFt76dCFy3ln02rSntszj/qql0LFkGwJx5izj/qqkA/R4kSZIkSZIkddWrPZGALwBXABeXS9JuAE7vywNGxI0RsWdmzgGOL/tKiplI5/Slz+Fg0uQZKwKkmo4ly5g0ecYAVSRJkiRJkoaTXs1EysxrgWvrDr08M5f1dH4399+h7udD636+Brimt/0MZ3PnLVqj45IkSZIkSf2pVyFSRLwMuBh4BXAAMCkiPpCZsxtZnF60xWZjmdNNYLTFZmMHoBpJkiRJkjTc9HY527eA6yn2MXoGuA/4XmNKUncmHrIrraNW3su8ddRIJh6y6wBVJEmSJEmShpPehkg7ZOZFwPLMXJKZnwK2a2Bd6mJC2zhOOmI3ttxsLC3AlpuN5aQjdnNTbUmSJEmStE709tvZlkfEisApIjai9wGU+smEtnGGRpIkSZIkaUD0Ngi6Frgc2CQijgduA37csKokSZIkSZI0qPQqRMrMLwI3An8A3gx8NzPPamRhkiRJkiRJGjx6u5yNzPwh8MPa7Yh4c2be3JCqJEmSJEmSNKisNkSKiDbgm8DTwAczc25EbAd8HXgr4PfLS5IkSZIkDQNVy9m+BVwDPAycHhHvAR4A1gd2a3BtkiRJkiRJGiSqlrNtkpnnRMRI4CHgPcBxmXll40uTJEmSJEnSYFE1E2khQGYuA8YAhxogSZIkSZIkDT9VIVJL3c9zMvO+BtYiSZIkSZKkQapqOduIiNiMIkxqqfsZgMx8ppHFSZIkSZIkaXCoCpFeBczlxeDo6bq2TmBkI4qSJEmSJEnS4LLaECkzq5a7SZIkSZIkaRgwJJIkSZIkSVIlQyRJkiRJkiRVMkSSJEmSJElSJUMkSZIkSZIkVTJEkiRJkiRJUiVDJEmSJEmSJFUyRJIkSZIkSVIlQyRJkiRJkiRVMkSSJEmSJElSJUMkSZIkSZIkVTJEkiRJkiRJUiVDJEmSJEmSJFUyRJIkSZIkSVIlQyRJkiRJkiRVMkSSJEmSJElSJUMkSZIkSZIkVTJEkiRJkiRJUqX1Gtl5RGwM3AG8LTNndmnbHbgI2AT4FfCRzFzayHokSZIkSZLUNw2biRQR+wC/AXbu4ZTLgI9l5s5AC3Bco2qRJEmSJEnS2mnkcrbjgBOBJ7o2RMT2wNjMvKs8dClwRANrkSRJkiRJ0lpo2HK2zDwWICK6a94amF13ezawbaNqkSRJkiRJ0tpp6J5Iq9HSzbHla9rJ9OnT+6EUddXe3j7QJUgN4/hWs3OMq5k5vtXMHN9qdo7x5jBQIdLjwMvqbm9FN8veqowfP57W1tZ+K0rFC7utrW2gy5AawvGtZucYVzNzfKuZOb7V7BzjQ0dHR8dqJ+w0ck+kHmXmo8ALEbFfeWgiMHkgapEkSZIkSVK1dRoiRcSNEbFnefNo4LyImAFsAHxjXdYiSZIkSZKk3mv4crbM3KHu50Prfp4K7N3ox5ckSZIkSdLaG5DlbJIkSZIkSRpaDJEkSZIkSZJUyRBJkiRJkiRJlQyRJEmSJEmSVMkQSZIkSZIkSZUMkSRJkiRJklTJEEmSJEmSJEmVDJEkSZIkSZJUyRBJkiRJkiRJlQyRJEmSJEmSVMkQSZIkSZIkSZUMkSRJkiRJklTJEEmSJEmSJEmVDJEkSZIkSZJUyRBJkiRJkiRJlQyRJEmSJEmSVMkQSZIkSZIkSZUMkSRJkiRJklTJEEmSJEmSJEmVDJEkSZIkSZJUyRBJkiRJkiRJlQyRJEmSJEmSVMkQSZIkSZIkSZUMkSRJkiRJklTJEEmSJEmSJEmVDJEkSZIkSZJUyRBJkiRJkiRJlQyRJEmSJEmSVMkQSZIkSZIkSZUMkSRJkiRJklTJEEmSJEmSJEmVDJEkSZIkSZJUab1Gdh4RRwGnA6OB8zLzgi7tnwGOAeaVhy7qeo4kSZIkSZIGXsNCpIjYBvgC0AZ0AHdExO2Z+WDdaXsB783MOxtVhyRJkiRJktZeI5ezHQTclpnPZOYC4Grg8C7n7Al8KiLuj4jzI2JMA+uRJEmSJElSHzUyRNoamF13ezawbe1GRGwI3AucArwG2BQ4o4H1SJIkSZIkqY8auSdSSzfHltd+yMzngUNrtyPiHOAS4LTePsD06dPXpj71oL29faBLkBrG8a1m5xhXM3N8q5k5vtXsHOPNoZEh0uPAAXW3twKeqN2IiO2AgzLzkvJQC7BkTR5g/PjxtLa2rm2dqtPe3k5bW9tAlyE1hONbzc4xrmbm+FYzc3yr2TnGh46Ojo7VTthpZIh0C/DZiNgSWAAcBny4rn0R8JWIuB2YCZwIXNfAeiRJkiRJktRHDdsTKTMfp1iadjtwH3BFZv4+Im6MiD0zcw5wPHADkBQzkc5pVD2SJEmSJEnqu0bORCIzrwCu6HLs0LqfrwGuaWQNkiRJkiRJWnuN/HY2SZIkSZIkNQlDJEmSJEmSJFUyRJIkSZIkSVIlQyRJkiRJkiRVMkSSJEmSJElSJUMkSZIkSZIkVTJEkiRJkiRJUiVDJEmSJEmSJFUyRJIkSZIkSVIlQyRJkiRJkiRVMkSSJEmSJElSJUMkSZIkSZIkVTJEkiRJkiRJUiVDJEmSJEmSJFUyRJIkSZIkSVIlQyRJkiRJkiRVMkSSJEmSJElSJUMkSZIkSZIkVTJEkiRJkiRJUiVDJEmSJEmSJFUyRJIkSZIkSVIlQyRJkiRJkiRVWm+gC5DWxJT2WUyaPIO58xaxxWZjmXjIrkxoGzfQZUmSJEmS1PQMkTRkTGmfxflXTaVjyTIA5sxbxPlXTQUwSJIkSZIkqcFczqYhY9LkGSsCpJqOJcuYNHnGAFUkSZIkSdLwYYikIWPuvEVrdFySJEmSJPUfl7MNQ0N1X6EtNhvLnG4Coy02GzsA1UiSJEmSNLw4E2mYqe0rNGfeIjp5cV+hKe2zBrq0ShMP2ZXWUSNXOtY6aiQTD9l1gCqSJEmSJGn4cCZSk5jSPovvXj+N+QuXALDR+qP48DtftcoMo9XtKzTYZyPV6huKs6gkSZIkSRrqDJEG2IVX38dNv/sry5d3MmJEC2/dZztOOHz3NepjSvssvnblvSxb3rni2PyFS/j6j+4FVv7msqG+r9CEtnGGRpIkSZIkDQCXsw2gC6++jxvvfJTlZfizfHknN975KP/yyZ/woc//otdLzL57/bSVAqSapcs6V/nmsp72D3JfIUmSJEmStDrORBpAN975aI9ttb2KgG5n3tQ2x+5uo+l6XWcYTTxkV86/aupKS9rcV0iSJEmSJFVpaIgUEUcBpwOjgfMy84Iu7bsDFwGbAL8CPpKZSxtZ02Dx0S/fUnlOx5JlnHPFPXz3+mk8v3DJij2AgFWCoJ60jGhhSvusFUFUd/sK7bXLS5k0eQbnXnEPG68/kmOZ5ZIxSZIkSZK0koaFSBGxDfAFoA3oAO6IiNsz88G60y4Djs3MuyLiYuA44MJG1TSYzHpqQa/PrW2WPWfeolX2PqqyfHkn51xxD+dccQ9b1m1EXQuJat/WVguknl1YBFczHnm6T3sz1WZHjRjRwvLlnSs9pqTVq72G3Dhe0nDge54kqRkMt7/PGrkn0kHAbZn5TGYuAK4GDq81RsT2wNjMvKs8dClwRAPraQprEiB1VVsiV7/XUnff1gbFUrve7skEL4ZRteV1tX2euntMSauqfw114mtHUnPzPU+S1AyG499njVzOtjUwu+72bGDvivZt1+QB8qvn0rLwxT1/tthvX7Y69K0s6+jgwbO/sMr5L33jBP7hTW9kyXPP8ccvf3WV9pe99S1secB+dMyZy0Nf+8Yq7du841/YfO+9WPjY4/zlwu+s0j7uiMPYdPfdeP7hR3jk4u+v0r79vx7FxrvuwnMz/shRj/18lfZbttyLp1o3Z/uFT7DfM9NWab/ppa/lmdGb8PIFs9h73oOrtN/wD/szf9QG7DL/EV7z7EOrtF+31YEsYgw3f+cqRo55kk03GsOb/zx3RfuPt34TS0esxx7P/pFd5z/K3PNuZtr2m69of9UXzgbg8et+wjN3t6/U95OznqNjyzcAsO8z97PDwtkrtT/2zSlwafGcz5x0GfNz5fpaX/ISdv73kwF4+HuXsOCRmSu1j916K15+4gkA/PmCC1n0xMr9b7DjDux07IcAeOjcr9Px9NMrtW8UO7PDxH8F4I9f+gpL5j+/Uvumr34V444sMswHzvo8yxcvXql98z3b2OZd7wBg2mmfoauhNPYeveyKVdp3POaDbLjTjvz9vqnMuuqaVdr/8YTjWX/bbXjm93/g8Z/csEr7zv/2cVq33II5v/4tT9606tje5VOnMGrjjfnbrbfx1G1TVmn/p8+cxsjWVmbfeBNzf3vHKu2rG3sjRo/mlWeeDsCsH13F3+9f+bUzaqMN2eXTpwKw5NbbmXbtT1ZqHyxjb9LkGbz90Z8zavnKoe49l8xgQtt/AP0/9v4+/wWmjNyBP4zYhu3XX8Z7nr2LTTcas9I5jr3+GXvr6n2vY/78FWPc9z3HHgzev3Mn/WkTOpYs44gnblnpfW/ueTfx+GFv7nbs1ca3Y8+xB833772O+fN5Ztlyxx6OvWb9OxdoyrH32OML6XjJAQAcOPcetnlhDlD8fTZt+82H5NjrXH8svPXgVZ6nmkaGSC3dHFu+Bu2Vnn9+ASx4cVnYwr/+lSfa2+lcsoTF8+evcv7CmY/yWHs7nQsXdtu+6JGH+ev6Y+h89rlu2//8578wcuQIls99miXdtD/0pz8xctlSlj/5t27bM5MRCxewfNZja3KZ/a6zs5PHn3qeF154gZEjW1i2rPvZTUuWLGd+3XW0txcvqKWPPcayLte3uGJ/psWLl624/5Inn2R5l/s/3/Ji/0ueemqV9gVzR/FsrX3u3FXbn3qKeWX74meeprNr+5NP8nStfd7f6Vy08objC594gqdq7c8+S+fSlbfmWvjYYzxZtnd0N7aG0Njrrn3GjBmMmPcMyx5+hKXdtD/wwAOM+NuTLPvzX7ptnzZtGi2bbMyyRx7utn3q1Km0rL8+S2c+usrYAbj33ntpGTWKpX/9a7ftqxt7Leut92L7E0+s2r506Yp2YKUxDYNn7PW0Sf6CRUtW1NefY2/BC8t4+rmlPLfxYjo3gqeffWHF+8IGY0auOM+x1z9jb12+79XGuO97jj0YvH/nzpk3epVrhuLfHo+tZuzNnz/fsefYA/z3nmPPsbdS+1AYey/7Bx7605+abux1yWRWqH2WHpJjb/lyWru/LABaOjv7vjxqdSLi/cABmXlsefsMoCUzzy5vbw/cmpkvL28fAJyVmW+s6ru9vX0H4JHx48fT2rq6yxu8/uWTP6k+qcFq+xWdc8U9PbZfcnrPCWS9D33+F6v9prg16UtqlPb2dtra2ga6jG719Bpq1GtnXT+e1o3BPMalen15D3J8q5k5vtXsmnWMN+O/qTs6Opg+fTrAjm1tbTO7tjdyT6RbgDdFxJYRsT5wGHBTrTEzHwVeiIj9ykMTgckNrEddzJ23iAlt4zj0dduv0tY6auSKb4LrjYmH7ErrqJHdtq1pX9Jw1N1rqJGvnbk9hL49HZek/rSu3/MkSWqE4fj3WcNCpMx8HDgNuB24D7giM38fETdGxJ7laUcD50XEDGADYNUFmk3qhnPeMdAlsMVmYwE44fDd+eRRr2HL8vaWm43lpCN2W6Md5Se0jeOkI3Zb0ceIES197ksajupfQy00/rVTe/339rgk9ad1/Z4nSVIjDMe/zxq5JxKZeQVwRZdjh9b9PJWVN9seVm445x39tqxtxIgWRo1soWPJqttKbbT+KBYvWb7St7B1TUcntI1jQtu4tZpmWOtDUt+sy9fQxEN25fyrpq72fUGSGsl/N0iSmsFw+/usoSGSqtVmJE1pn9Xj3kQtLbDtlhsw66kFq7Qd+rrtOeHw3Vf00d2Hwg+/81UATJo8g7nzFrFFuRfScBroklZWe/37viBJkiSptwyRBokJbeN6DJHohG996iAuvPo+bvrdX1m+vJMRI1p46z7brQiQan1Azx8K/XAoqd5w+78mkiRJktaOIdIgsuVmY7vd2b1+76L60Kg7fiiUJEmSJEmNYIg0iAy1PUqmtM9yKYwkSZIkScOEIdIgsrrlaIMtsOm6/9KceYs4/6qpK12HJEmSJElqHoZIg0x3y9EGY2AzafKMlWZMAXQsWcakyTMMkYaQwRZOSpIkSZIGrxEDXYCqrS6wGShzu9m7aXXHNfjUwsk58xbRyYvh5JT2WQNdmiRJkiRpEDJEGgIGY2BT2+y7t8c1+AzGcFKSJEmSNHgZIg0BgzGwmXjIrrSOGrnSscG8CbhWNRjDSUmSJEnS4GWINAQMxsBmQts4TjpiN7bcbCwtwJabjeWkI3ZzP50hZDCGk5IkSZKkwcuNtYeA1X1r20DXNdA1DDZDaaPqiYfsutKG7bBm4eRQulZJkiRJ0tozRBoiDGwGv8H4LXqrszbh5FC7VkmSJEnS2jNEkvrJ6jaqHqzBSl/DyaF4rZIkSZKkteOeSFI/GU4bVQ+na5UkSZIkFQyRpH4ynDaqHk7XKkmSJEkqGCJJ/WQwfoteowyna5UkSZIkFdwTSeong/Vb9BphOF2rJEmSJKlgiCT1o+H0LXrD6VolSZIkSS5nkyRJkiRJUi8YIkmSJEmSJKmSIZIkSZIkSZIqGSJJkiRJkiSpkiGSJEmSJEmSKhkiSZIkSZIkqZIhkiRJkiRJkioZIkmSJEmSJKmSIZIkSZIkSZIqrTfQBfTRSIDFixcPdB1NqaOjY6BLkBrG8a1m5xhXM3N8q5k5vtXsHONDQ13OMrK79pbOzs51V00/aW9v3x/49UDXIUmSJEmS1IQOaGtr+03Xg0N1JtIfgAOA2cCyAa5FkiRJkiSpGYwEtqLIXVYxJGciSZIkSZIkad1yY21JkiRJkiRVMkSSJEmSJElSJUMkSZIkSZIkVTJEkiRJkiRJUiVDJEmSJEmSJFUyRJIkSZIkSVIlQyRJkiRJkiRVWm+gC9DgEBFHAacDo4HzMvOCAS5J6lcRcRvwD8CS8tDxmfm7ASxJWmsRsTFwB/C2zJwZEQcB5wJjgR9l5ukDWqC0FroZ35cABwALylPOyszrBqxAaS1ExJnAe8qbP8vMU30PV7PoYXz7Ht4kWjo7Owe6Bg2wiNgG+A3QBnRQ/IPtfZn54IAWJvWTiGgBHge2y8ylA12P1B8iYh/gImAXYGfgb0ACBwKzgJ8BX8vMyQNWpNRHXcd3GSJNAw7OzNkDW520dsqw6CzgDUAncBPwPeDL+B6uIa6H8X0+cDa+hzcFl7MJ4CDgtsx8JjMXAFcDhw9wTVJ/Coq/xCZHxNSIOGmgC5L6wXHAicAT5e29gT9l5iNlWHoZcMRAFSetpZXGd0RsAGwHXBQR90fEWRHhv2M1VM0GPpmZizNzCTCD4n8G+B6uZtDd+N4O38ObhsvZBLA1xYu9ZjbFhxGpWWwG3AqcQDFFfEpEZGbePLBlSX2XmccCRETtUHfv5duu47KkftHN+P4H4DbgeOB54KfAMRSzlaQhJTMfqP0cEa8AjgS+ge/hagI9jO/9gQn4Ht4UDJEE0NLNseXrvAqpQTLzTuDO8uaCiLgYOBQwRFIz8b1cTSszHwbeVbsdEd8EJuIHEA1hEfFKimVrp1Ds2RhdTvE9XENW/fjOzMT38KbhFDJBsVfMy+pub8WLyyOkIS8i9o+IN9UdauHFDbalZuF7uZpWRLwqIg6rO+T7uIa0iNiPYpb0pzPzB/geribSdXz7Ht5cnIkkgFuAz0bElhS75R8GfHhgS5L61abA2RGxLzAKeD/wkQGtSOp/vwMiIl4OPAIcBVwysCVJ/aYF+Fr5TZvPU/w75QcDW5LUNxExDrgeODIzbysP+x6uptDD+PY9vIk4E0lk5uPAacDtwH3AFZn5+wEtSupHmflTium09wLtwCXlEjepaWTmC8AHgGuAB4E/UnxRgjTkZeb9wH8Dv6UY3/dl5v8ObFVSn50CjAHOjYj7IuI+ivfvD+B7uIa+7sb3vvge3jRaOjs7B7oGSZIkSZIkDXLORJIkSZIkSVIlQyRJkiRJkiRVMkSSJEmSJElSJUMkSZIkSZIkVTJEkiRJkiRJUiVDJEmSNCRExO0R8Z/dHP9kRPzfau732Yg4v7HV9fjYH4uIzoh4bZfj34uIg/rY5yYRcVsf7ndKRFzal8eUJEkCQyRJkjR0XAB8sJvjxwEDEhL1wkeAy4F/qz+Ymcdm5i197HMzYO+1rEuSJGmNrTfQBUiSJPXS9cDXI+KAzPw1QEQcCLQAN0fEfwHvBMYAGwCnZOZ19R1ExCspAqeXAJ3AOZk5KSImAF8HFpT33Rs4GDgdGA0sLPu7MyJ2AS4uH6cF+F5mfqtrsWWfmwOnAn+JiHGZOatsm1LWcTcwPTM3LI/vULsdES8DJgFblF3+LDPPAL4PjI2I+4A24P3A8WWdmwNfyswLI2IU8A3gzcBTwN+AZ8vH2Ra4ENihvIYfZOb/RMR6wDeB/YHFwMPABzPz+dX/aiRJ0nDgTCRJkjQkZOZS4LvAMXWHPwx8C9gOOAg4MDNfDZwGnF1//zIg+T/gm+U5hwBfjIjXlaeMB96XmbuV/X0RODQz9ygf59qI2AD4D+CGzGwDDgVeHxHd/ZvqBODyzHwCuA04aQ0v+Tjg4cx8DXAA8IqI2IRiNtaizNwdGFueV6vzSOAr5f0/CuwM/BNFkLRdXd+XA7dn5quA/YB/jYj3Aq8DJgCvLq/vYeDVa1i3JElqUoZIkiRpKPku8I6I2CgiNgfeAlyamY9SzMg5OiK+RLGMbMMu990ZGJOZ1wKU4c41wFvL9lllP1CELlsBt5Yzfi4HlgMvB64DTo2Ia4F3Ax/PzOX1D1TOInoX8IPy0A+A48oQqrduAg6LiBspZhp9OjOfrT+hnCH0NuCfI+JzFOFZ7boPAq7IzMWZuaC8Bsoa9qNYHkjZ56UUodo0YBnwu7K/azLzjjWoWZIkNTFDJEmSNGRk5mzgZuC9wETg6sx8NiJeA9wBbAz8AvgyxTKtet39u2cEMKr8uX7J1kjg1szcvfYHeC3FUrOfAq8AfgzsAUyLiH/s0u+xFMvlboiImcBXy9re3+W8zi51jq671j8AO1IEZzsAv4+IfevvXC5Luw/YHvgNxfK7nvpeWnfN3T03ozLz78BuwCkUYdKPIuITSJIkYYgkSZKGnm8BR1MEMheUx14P3J2Z5wK/pNgbaWSX+yWwOCLeDRARWwOHUYRSXd0GHFzuf0REHArcD4yJiCuAIzPzSoolY88B42p3jIiRFMvfPpKZO5R/asvjTo6I+gDn78DoiPin8va76vr5EnBGZl4PnAw8QDGbaikwsuxnT2AO8PnM/DnFrKRaDTcBEyNiTESMoVjqRmbOB+4CTizP3YQikLs5It4G3ArckZmfpdiTabdunh9JkjQMGSJJkqQhJTOnUGyM/VxmTisP/y+wRUQ8CLRTzCraPCI2qrvfEopw6eSIuB+4BTg7M2/v5jEeoAiCroyIqcDngLeXy8I+R7FsbirwO4rlbb+su/vbKP6NdXmXbs8DXkaxj1LtcZ6l2Hh7ckT8gWL2UM3XgN0jYjrFBtyPlNc5G7gHmAH8AXgMyIi4l2LfozkUy+6+U95velnfI3V9Hw28KSKmAb+nWNZ3KTCZIqyaHhF3A/sCn+36/EiSpOGppbOzs/osSZIk9ZuIuAc4LTMnD3QtkiRJvbXeQBcgSZI0XJTfEDcDeBb47QCXI0mStEaciSRJkiRJkqRK7okkSZIkSZKkSoZIkiRJkiRJqmSIJEmSJEmSpEqGSJIkSZIkSapkiCRJkiRJkqRKhkiSJEmSJEmq9P8B0M/a/3na7OUAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import statsmodels.api as sm\n", "\n", "X = df_countries[['2020_gov', '2020', '2020_con', '2020_trade']] \n", "y = df_countries['GDP_calculated_2021'] \n", "\n", "# Constante\n", "X = sm.add_constant(X)\n", "\n", "# Regresión Lineal Robusta\n", "model_robust = sm.RLM(y, X).fit()\n", "\n", "print(model_robust.summary())\n", "\n", "import matplotlib.pyplot as plt\n", "plt.figure(figsize=(20, 6))\n", "plt.scatter(model_robust.fittedvalues, model_robust.resid)\n", "plt.axhline(y=0, color='r', linestyle='--')\n", "plt.xlabel('Valores Ajustados')\n", "plt.ylabel('Residuos')\n", "plt.title('Residuos vs Valores Ajustados en Regresión Robusta')\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "3b666de7", "metadata": {}, "source": [ "Valores p extremadamente bajos para los coeficientes del modelo. Esto sugiere que los predictores son altamente significativos para explicar la variabilidad en el PIB calculado para 2021.\n", "\n", "En cuanto a la gráfica de residuos vs. valores ajustados, la dispersión de los residuos parece más controlada comparada con la regresión OLS tradicional, pero todavía se pueden observar algunos valores atípicos pronunciados, especialmente para valores ajustados bajos. Estos residuos podrían indicar que, aunque el modelo maneja mejor los outliers en comparación con OLS, aún podrían existir características no capturadas por el modelo actual." ] }, { "cell_type": "markdown", "id": "9d2fe2f7", "metadata": {}, "source": [ "### Transformación de Datos NO APLICARÉ\n", "Logaritmo para reducir la asimetría y estabilizar la varianza de los predictores que sean estrictamente positivos.\n", "Raíz cuadrada para reducir el efecto de los valores atípicos en los predictores con amplias variaciones." ] }, { "cell_type": "code", "execution_count": null, "id": "6941d2ad", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "88b99bd8", "metadata": {}, "source": [ "## Verificación de Supuestos y Diagnóstico del Modelo" ] }, { "cell_type": "code", "execution_count": 40, "id": "bf66ec6d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Robust linear Model Regression Results \n", "===============================================================================\n", "Dep. Variable: GDP_calculated_2021 No. Observations: 226\n", "Model: RLM Df Residuals: 221\n", "Method: IRLS Df Model: 4\n", "Norm: HuberT \n", "Scale Est.: mad \n", "Cov Type: H1 \n", "Date: Sun, 12 May 2024 \n", "Time: 21:34:32 \n", "No. Iterations: 50 \n", "==============================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const -0.0950 0.000 -194.216 0.000 -0.096 -0.094\n", "2020_gov 1.1203 0.001 790.858 0.000 1.117 1.123\n", "2020 2.8960 0.002 1504.097 0.000 2.892 2.900\n", "2020_con 0.8781 0.001 619.751 0.000 0.875 0.881\n", "2020_trade 0.0496 0.001 60.150 0.000 0.048 0.051\n", "==============================================================================\n", "\n", "If the model instance has been used for another fit with different fit parameters, then the fit options might not be the correct ones anymore .\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmMAAAFNCAYAAABMhmimAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAAsTAAALEwEAmpwYAAAwHElEQVR4nO3deZhcVZn48W8TSBNWQZgRZFd5QaPANOICKCqioCOjgBv+oqKIiMI4MugIyuIyigIuII4gYgQGRRYHISgIuBAQKCAkEF9FCLIESSBCSEJn698f9xapdLrT3Unfvp3u7+d58qTqnlun3nvqVtXb55w6t62rqwtJkiTVY626A5AkSRrNTMYkSZJqZDImSZJUI5MxSZKkGpmMSZIk1chkTJIkqUZr1x2AVJWI6AKmAUuALmA94GngyMy8fRXrPBe4ODOv67Z9d+DnmbndagVdkYhoA+4DTsjM/+1W9l2gKzOP7uWx5wPTMvOblQe64nOfBnwK2CEzH27ZfjVwbGbeuwp1bg98MzMPGuDjzgRmZ+ZJA33OKkTEDKATWEBxfrcDSyna5Zoa49od+FxmHtyybRvgXuA1mTl1gPXdCGwLPFVuGkNxrF/OzIl9PPZ8BvHcjYhfA+/PzNmDUZ/UZDKmke4NrR+cEXEs8F3gNatSWWZ+dLACG0qZ2RURZwOHAc8lYxExDjiUVWyPKkXEusAE4OfAJ4HPNcsy84DVqHpbIFYvumHj0NY/LCLiYOBHwBZ1BVTGc3C3zW8HPjvQRKzFf2bmz5t3yoTvpoi4PDPnrmKdq+LNQ/hcGkVMxjRqRMTawDbAky3bjgcOohiynwF8IjMfjYh3ASdQ9DQsofgy+F35V/qZmfnziDgS+DTFX+xTW+o8CdgsMz/Z/X5EbAWcDWwHtAE/zsxvlLF9F9gLWAjcD3w4M59pqXdHYDKwZWYujIgxwIPAfsBOPcXbrQnOA06OiG0z88Fy27uBBvCXiPg28GpgwzK2j2bmTd3acG/gGxS9jAspetquiYgPAR8B1geeysw3RMRHgE+UbfsE8MnM/FNE7AWcTtHD0QX8d2Ze2sNL9l7gr+W+v46IUzJzfhnHDIov/A3K12N8uX2f5v2I2An4IbBueTznAv9T/v/CiPhVZr4lIj4P/Fu53/oUPUuXR8RG5b67ADOBxcAfyud5GXAm8PzyGE7LzIkRsQFFMvSS8rVoAEdk5tJu7fjC8vHbAOtQ9LZ+NSK2A34DXA28CtgUOD4zf9pD+yyn7P3cnuXP795eg83LOF9Ubn+MogfppIjoBH5RHvehwDzg2+WxjgG+k5nn9XaswOtaXoONgbOAXYGuiNgW+HxmLo6IZ4GvUSQ4WwLfzsxv9XWcpR3KuDrL4/wYcDTFuf/38jj/XO67V5mkbgT8muL1XVz2nG/e/GOteR94tpfj+mFZ3w0RcUDZPp8HxgL/RPFe/kI/45eW45wxjXQ3RMSUiHgUaH44fxggIiYALwf2yMxdKb4Azy33+QZFYrY78AVgn9ZKI2JX4CTgdZn5SorEpD8uBG7IzJcDewIfiIj3UvRM7QO8IjM7KJKxV7Q+sPxyuQd4R7lpP2BGOVS30njLxz8J/Kx5/KWPUSQFr6L4QnxNZr4U+DEtPVHlMT+fopfqmMx8BfBB4IJy2A/gZcA+ZSL2+rJ878zcDTgVuKzc72Tg9PI4DwPe2EtbHQlcUPa0zCzrG4j/BK4sn+cAiiShC/go8NcyEdsW2Bd4fXlMxwOntMS5gCLRPYSyN61MnP8P+G75mP2Br0bEa4B3AhuW59Mry3p26CG2nwDnlbHtAewbEe9u2f9XmbkH8FmKtuvNhRFxV0Q8BDxUxvqvZZwrew2+A9yTmTuXx/baljrHlu0WwF0Ur/nnylhfDxwbEa/u57F+hyLZezmwO0UCc2xZ1k4x7LsnRWL9tbI3tCffKI9zRkT8vXzuN5V/lLwROI6iF3wX4CLgijI5BdgKeBNFQrgLcHivrVno8bgys/m+eQPwMPAZ4IPle+7VwH9FxGZ91C31yGRMI13zA/ptFL05kzPz8bLs7RQfordHxF0Uc5Oaw1cXA5eXc8Q2YcUvxDcBv87Mx8r7P+grkIhYnyIBOwsgM58Czqf4Mp9K8Vf9HyPiS8ClmTm5h2rOAT5U3v4wy5LHvuJt+h7woYhoK3t3Xgj8MjNvpuhZOyIivsmyXqdWrwLuy8w/lvHfA9zEssTv7sx8urz9NuDFwOSybU8FNo2ITSkSwrMi4kKgg6J3oXtb/QvFl2dzSPXHwDEtX7D9cTlwXERcBrwLOLp7D1XZQ/hB4NCI+Brw8Zbj3heYmJldmTmrrA9gR2DdzLysrONR4FLgrRQ9Zy8re1A/B3wrM+/rdmzrUyQ1Xyrb5haKHrJdy10WUfxhAHAHRe9Ybw4tk4bXUfQSTc/M+8uylb0GB1Ces5k5kyLhavX7lmN9EXBeWcdvgXHAbv05Vopz+8yyDTuB75fbmn7RcpztFD2TPfnPluTob8CszLyzLHsr8NPyNSIzz6c4r7cry3+SmfMycyFwAX0PNfZ5XJnZRZH0dkTEiRS9t20riV9aKZMxjQrlB/engXPLoSAohly+npm7lh/0u1MkS2Tm8eXt2ymSn5sjovX90kXx4du0eCVlY8v/1+q2vbltncz8B8t6DZYAP42IT/dwKD8HXhURO1N8of+sn/E22+E24HGKRONw4PuZuSQi3gZcVe72C4ovzZ5i7W4timE2gGdato+h+BJstu2/ULTvnMz8H4qekmuBtwB3l8NZrT5B0aaNckjyaIrEYP9u+/XW1mTmLymGmn5GkTxMjYgXtT64TPoms2wI6+st9fX2GvfaDpn5AEUC9N9lndeVQ2StxpT1vralfV4NfLUsX9iSNHaPoUfl8/4/4L8j4lUtz9Pja1AeS2u9S7pV2XwtxwD/aNbREuuP+nms3duq9XyBouexmdzQ17GWCdd7gI9GxCG9PEeznubzLOm2fVG3+0RE63nT53GVCfWdFG16B0Uv7KK+4pd6YzKmUSOLXxHeDHyr3PQrig/1jcr7pwA/iYi1ywRg/cz8PkVisDPLf4lcC+xXzgGDZb1VALMo/mJuKz+09yuffy5FL8hRAGUCMgG4NiLeTjFXaHIWv9abSJGcdT+GZyl6wc6n6D2b3894W32PojfoIJb1rL2ZYmjqbOA2ijlUY7o97pYi7NijjP9lFD0yN/bwHL8G3hcRzYnkHy+Pj4iYDOxW9mB8DHgeRW8eZfnzgPcBb8/M7cp/W1H0anRPUGcB20TEP5W9Zv/WUs9FwHsy8+KyTZ4GtqZIRJpt8zrg9sw8naLXp/W4rwE+EhFrRcQmwIHl9gQWRjGvkIjYsmzLa6OYR/gjil7Tz1KcY+NbAy57D28B/qPleG9qqX+VlD2p5wPfKxPxXl8DisT7I+XzP59iaK6LFSXwbER8oNx3a4pfKHf051jLbUeV74V2itf72tU8zvuBrwBnlO+vXwHvKefBEREfphgabfZmvTci2ssh0A8Bk8rtsyiSUyh6Tikfv7LjWkJx7ryEIlE7ITOvpPjDqJ0V3zNSv5iMabT5JLB/RLyFIhH5JXBLRNxDMUfrQ5m5GPh34KKIuAO4BDisHGYBIItfhR0H/CYibqeY/N10IcUH/V8ohptubik7FHhTREwFbqUY3jqf4gviHmBaWd9rKeak9eQcinlG55ax9BlvNxdT9Ehdn8t+afp94PURcXcZ71+B7Vt718p9DwG+W8Z/EcWPDP5MN5n5K4pepmvLOt8PvKvsATkOOCUi7gRuAE7OzBktD/8gcG9m3tCt2i8Db4iI577wy/ly/0PRI3gLxdyypi9RDD9OAf5IMcz4W4p2XhIRt1IMg24WEfdSTNR+hmIob0OK9l8E/Am4kvJHGpm5iCJpO6Y8tuuAU8p4J1J8Id9bvo4bUUx+7+79wKvLdvwj8L+ZeWEP+w3Uf1EMz32sj9fg08BO5fNfSvFDkPndKyuH9g6k+KPlbooE7wtZ/LCjP8d6NMXk9qnlv6RIpFbXN8t4v5CZ1wJnANeX7+MPUiTyzd7FByiGHu8Efkcx5N2M7azyPbMby86dlR3XZWVdSyk+O/5UPv4dFEt3vHgQjk2jUFtXV09/DEnS8FT2gM0G9srM6XXHsyaKiE8Ad2bmzWWP1e+BEzNzUh8PlVQBl7aQtMYohwRvpejFyprDWZPdS9HDOYZint0lJmJSfewZkyRJqpFzxiRJkmq0Rg5TNhqNdor1Zmay4k+yJUmShpMxFJcpu62jo2OFH1etkckYRSL2+z73kiRJGj72prysWqs1NRmbCbDjjjsyduzYvvat3bRp0xg/vvvyO6qK7T20bO+hZ5sPLdt7aI3E9l64cCF//vOfYfnld55TaTIWEadQXFalC/hhubBia/kXKRYenFNuOiczz+pH1UsAxo4dS3t7+yBGXJ01Jc6RwvYeWrb30LPNh5btPbRGcHv3OLWqsmQsiovUvpFiIc11KBbQuyozW3+O/krgveV18SRJkkadyn5NmZm/pbhI82KKFZjXBuZ122134LMRcXdEnFlerkKSJGnUqHydsYg4meLix5dQXDqlq9y+AcUFfI8GZlBcEubB8oLHK9VoNLajuMSFJEnSmmL7jo6OGd03DsmirxGxHsW13X6amT/oZZ/dgPMyc7e+6msmY+PHj18jxpUbjQYdHR11hzFq2N5Dy/Yeerb50LK9h9ZIbO/Ozk6mTZsGvSRjlQ1TRsROEbErQGbOp7jA6itayreJiMNaHtJGcVFeSZKkUaPKX1PuAJwcEXtR/JryQOC8lvIFwKkRcQPFMOVRwOUVxiNJkjTsVDmB/2rgauBOiov6Ts7MiyPi6ojYPTNnAUdQDF8mRc/YaVXFI0mSNBxVus5YZp4InNht2wEtty8FLq0yhsFwY+MhJk6azuw5C9hsk3FM2H9n9unYuu6wJEnSCLCmrsA/ZG5sPMSZl0yhc1GxTtusOQs485IpACZkkiRptVU2TDlSTJw0/blErKlz0RImTppeU0SSJGkkMRnrw+w5Cwa0XZIkaSBMxvqw2SbjBrRdkiRpIEzG+jBh/51pX2fMctva1xnDhP13rikiSZI0kjiBvw/NSfr+mlKSJFXBZKwf9unY2uRLkiRVwmFKSZKkGpmMSZIk1chkTJIkqUYmY5IkSTUyGZMkSaqRyZgkSVKNTMYkSZJqZDImSZJUI5MxSZKkGpmMSZIk1chkTJIkqUYmY5IkSTUyGZMkSaqRyZgkSVKNTMYkSZJqZDImSZJUI5MxSZKkGpmMSZIk1WjtKiuPiFOAg4Eu4IeZeXq38l2Bc4CNgd8BH8/MxVXGJEmSNJxU1jMWEa8H3gi8Atgd+FRERLfdLgA+lZk7Am3A4VXFI0mSNBxVloxl5m+BN5Q9Xf9E0Qs3r1keEdsC4zLzlnLT+cAhVcUjSZI0HFU6TJmZiyLiZOBY4BLgkZbiLYGZLfdnAlsNpP5p06atdoxDpdFo1B3CqGJ7Dy3be+jZ5kPL9h5ao629K03GADLzxIj4OnAlxTDkD8qith52XzqQusePH097e/tqRli9RqNBR0dH3WGMGrb30LK9h55tPrRs76E1Etu7s7NzpR1IVc4Z26mcoE9mzgcuo5g/1vQI8IKW+1sAj1YVjyRJ0nBU5dIWOwDnRER7RIwFDgT+0CzMzAeBZyNiz3LTBGBShfFIkiQNO1VO4L8auBq4E2gAkzPz4oi4OiJ2L3c7FDgjIqYD6wPfqSoeSZKk4ajqCfwnAid223ZAy+0pwB5VxiBJkjScuQK/JElSjUzGJEmSamQyJkmSVCOTMUmSpBqZjEmSJNXIZEySJKlGJmOSJEk1MhmTJEmqkcmYJElSjUzGJEmSamQyJkmSVCOTMUmSpBqZjEmSJNXIZEySJKlGJmOSJEk1MhmTJEmqkcmYJElSjUzGJEmSamQyJkmSVCOTMUmSpBqZjEmSJNXIZEySJKlGJmOSJEk1MhmTJEmqkcmYJElSjdausvKIOBF4d3n3qsw8rlv5F4GPAHPKTedk5llVxiRJkjScVJaMRcS+wH7AbkAXcE1EvDMzL2/Z7ZXAezPz5qrikCRJGs6q7BmbCXwmMxcCRMR0YJtu++wOfDYidgB+Bxybmc9WGJMkSdKw0tbV1VX5k0TES4DJwGsz8y/ltg2AnwFHAzOA84EHM/P4vuprNBrbAQ9UFK4kSVIVtu/o6JjRfWOlc8YAIuJlwFUUvV5/aW7PzGeAA1r2Ow04D+gzGWsaP3487e3tgxhtNRqNBh0dHXWHMWrY3kPL9h56tvnQsr2H1khs787OTqZNm9ZreaW/poyIPYHfAJ/LzB93K9smIg5r2dQGLKoyHkmSpOGmygn8WwNXAO/JzOt72GUBcGpE3EAxTHkUcHkP+0mSJI1YVQ5THgusC5weEc1t3wfeAXwxM2+PiCOAK4GxwB+A0yqMR5IkadipLBnLzGOAY3oo+n7LPpcCl1YVgyRJ0nDnCvySJEk1MhmTJEmqkcmYJElSjUzGJEmSamQyJkmSVCOTMUmSpBqZjEmSJNXIZEySJKlGJmOSJEk1MhmTJEmqkcmYJElSjUzGJEmSamQyJkmSVCOTMUmSpBqZjEmSJNXIZEySJKlGJmOSJEk1MhmTJEmqkcmYJElSjUzGJEmSamQyJkmSVCOTMUmSpBqZjEmSJNXIZEySJKlGJmOSJEk1WrvKyiPiRODd5d2rMvO4buW7AucAGwO/Az6emYurjEmSJGk4qaxnLCL2BfYDdgN2BToi4p3ddrsA+FRm7gi0AYdXFY8kSdJwVOUw5UzgM5m5MDMXAdOBbZqFEbEtMC4zbyk3nQ8cUmE8kiRJw05lw5SZeU/zdkS8BHgP8NqWXbakSNiaZgJbVRWPJEnScFTpnDGAiHgZcBVwbGb+paWorYfdlw6k7mnTpq1OaEOq0WjUHcKoYnsPLdt76NnmQ8v2Hlqjrb2rnsC/J3Ap8O+ZeXG34keAF7Tc3wJ4dCD1jx8/nvb29tULcgg0Gg06OjrqDmPUsL2Hlu099GzzoWV7D62R2N6dnZ0r7UCqcgL/1sAVwPt7SMTIzAeBZ8uEDWACMKmqeCRJkoajKnvGjgXWBU6PiOa27wPvAL6YmbcDhwLnRMSGwJ3AdyqMR5IkadipcgL/McAxPRR9v2WfKcAeVcUgSZI03LkCvyRJUo36lYxFxHoR8ery9jERcV5EbNPX4yRJkrRy/e0Z+xFwYES8kmLo8W8UlzGSJEnSauhvMrZDZv4X8K/A+Zl5ErBpZVFJkiSNEv1NxsaW/78FuD4ixgAbVBOSJEnS6NHfX1PeFBH3AouBycBvgOsqi0qSJGmU6G/P2KeAjwF7ZeZS4Jv0vGyFJEmSBqBfyVhmLqG4sPe3I+ICYLMyKZMkSdJq6O/SFscCnwemAHcAn46I46sMTJIkaTTo75yxCRRDlE8DRMQPgVuAr1QVmCRJ0mjQ7xX4m4lYefspYFElEUmSJI0i/e0ZmxERxwDfK+8fRbHwqyRJklZDf3vGjgTeCcwv/x1EkZBJkiRpNfSrZywzHwH2iYj1gLUy85lqw5IkSRod+pWMRcR3ut0HIDOPriAmSZKkUaO/w5RPtPybC7wG6KoqKEmSpNGiv8OUJ7fej4ivAr+sJCJJkqRRpN9LW7TKzHnACwc5FkmSpFFnVeaMtQEdwPRKIpIkSRpF+rvO2BMtt7uAnwAXDn44kiRJo8sqzRmTJEnS4FhpMhYRD7CSX01m5g6DHpEkSdIo0lfP2MHl/58AFgI/ABYDHwbGVhiXJEnSqLDSZCwzGwARMT4zX9VS9B8RcWulkUmSJI0C/V3a4nkRsXnzTkRsCWxUTUiSJEmjR39/TfktYGpE/IpiaYv9gOOqCkqSJGm06O+vKc+OiMnAmygm9J+amdP689iI2AiYDLw9M2d0K/si8BFgTrnpnMw8q5+xS5IkrfH6+jXlGzPz+oh4V7lpRvn/jhGxY2Ze1sfjXwWcA+zYyy6vBN6bmTcPIGZJkqQRo6+esfcB1wOf6qGsC1hpMgYcDhxFsUhsT3YHPhsROwC/A47NzGf7qFOSJGnEaOvq6nUZsR5FRBuwdmYuGsBjZgD7tA5TRsQGwM+Aoyl63M4HHszM4/uqr9FobAc8MICwB83dD8zjN1Oe5qn5S9h4vTG8aZeNeMX269cRiiRJWrNs39HRMaP7xv5em3IvYB/gVOAWYKeI+HBm/nRVo8nMZ4ADWp7jNOA8oM9krGn8+PG0t7evaggDdmPjIa66fQqdi5YA8NT8JVx1+9Nsv/327NOxda+PazQadHR0DFWYo57tPbRs76Fnmw8t23tojcT27uzsZNq03qfa93dpi29QJGH/BjwGvBT4zOoEFhHbRMRhLZvagH73ttVh4qTpzyViTZ2LljBxktdMlyRJq6a/ydiYzLwOeDNwRTncOGY1n3sBcGpEbF8OfR4FXL6adVZq9pwFA9ouSZLUl34nYxGxB/A24NcRMR5YZ1WeMCKujojdM3MWcARwJZAUPWOnrUqdQ2WzTcYNaLskSVJf+rvo61eAi4AfZuaM8gLix/T3STJzu5bbB7TcvhS4tL/11G3C/jtz5iVTlhuqbF9nDBP237nGqCRJ0pqsv4u+Xsbyy1i8ODOX9Lb/SNWcpD9x0nRmz1nAZpuMY8L+O6908r4kSdLK9PfXlC8Afgi8BNgbmBgRH8rMmVUGNxzt07G1yZckSRo0/Z0z9j3gCopJ908CdwHnVhOSJEnS6NHfZGy7zDwHWJqZizLzs8A2FcYlSZI0KvQ3GVsaEc/tGxEbDuCxkiRJ6kV/E6rLgAuBjSPiCIrrVf6ssqgkSZJGiX4lY5n5VeBq4DaKhV9/kJknVxmYJEnSaNDfdcbIzJ8AP2nej4g3Z+a1lUQlSZI0Sqw0GYuIDuC7wBPAhzNzdkRsA3wbeCvg0vOSJEmroa9hyu9RrJB/P3BCRLwbuAdYD9il4tgkSZJGvL6GKTfOzNMiYgzwZ+DdwOGZeXH1oUmSJI18ffWMzQcoL320LnCAiZgkSdLg6SsZa2u5PSsz76owFkmSpFGnr2HKtSJiE4qkrK3lNgCZ+WSVwUmSJI10fSVjLwdmsywBe6KlrAsYU0VQkiRJo8VKk7HM9JJHkiRJFTLZkiRJqpHJmCRJUo1MxiRJkmpkMiZJklQjkzFJkqQamYxJkiTVyGRMkiSpRiZjkiRJNTIZkyRJqlFfl0NaLRGxETAZeHtmzuhWtitwDrAx8Dvg45m5uMp4JEmShpvKesYi4lXAH4Ade9nlAuBTmbkjxbUvD68qFkmSpOGqymHKw4GjgEe7F0TEtsC4zLyl3HQ+cEiFsUiSJA1LlQ1TZuZHASKip+ItgZkt92cCWw30OaZNm7ZKsdWh0WjUHcKoYnsPLdt76NnmQ8v2Hlqjrb0rnTO2Em09bFs60ErGjx9Pe3v7IIRTrUajQUdHR91hjBq299CyvYeebT60bO+hNRLbu7Ozc6UdSHX9mvIR4AUt97egh+FMSZKkka6WZCwzHwSejYg9y00TgEl1xCJJklSnIU3GIuLqiNi9vHsocEZETAfWB74zlLFIkiQNB5XPGcvM7VpuH9ByewqwR9XPL0mSNJy5Ar8kSVKNTMYkSZJqZDImSZJUI5MxSZKkGpmMSZIk1chkTJIkqUYmY5IkSTUyGZMkSaqRyZgkSVKNTMYkSZJqZDImSZJUI5MxSZKkGpmMSZIk1chkTJIkqUYmY5IkSTUyGZMkSaqRyZgkSVKNTMYkSZJqZDImSZJUI5MxSZKkGpmMSZIk1chkTJIkqUYmY5IkSTUyGZMkSaqRyZgkSVKN1q6y8oh4P3ACMBY4IzPP6lb+ReAjwJxy0znd95EkSRrJKkvGIuKFwFeADqATmBwRN2TmvS27vRJ4b2beXFUckiRJw1mVw5T7Atdn5pOZOQ/4OXBwt312Bz4bEXdHxJkRsW6F8UiSJA07VQ5TbgnMbLk/E9ijeSciNgDuBI4FZgDnA18Aju/vE0ybNm0QwhwajUaj7hBGFdt7aNneQ882H1q299Aabe1dZTLW1sO2pc0bmfkMcEDzfkScBpzHAJKx8ePH097evjoxDolGo0FHR0fdYYwatvfQsr2Hnm0+tGzvoTUS27uzs3OlHUhVDlM+Aryg5f4WwKPNOxGxTUQc1lLeBiyqMB5JkqRhp8qeseuAkyJic2AecBDwsZbyBcCpEXEDxTDlUcDlFcYjSZI07FTWM5aZj1AMOd4A3AVclJm3RsTVEbF7Zs4CjgCuBJKiZ+y0quKRJEkajipdZywzLwIu6rbtgJbblwKXVhmDJEnScOYK/JIkSTUyGZMkSaqRyZgkSVKNTMYkSZJqZDImSZJUI5MxSZKkGpmMSZIk1chkTJIkqUYmY5IkSTUyGZMkSaqRyZgkSVKNTMYkSZJqZDImSZJUI5MxSZKkGpmMSZIk1chkTJIkqUYmY5IkSTVau+4ANDA3Nh5i4qTpzJ6zgM02GceE/Xdmn46t6w5LkiStIpOxNciNjYc485IpdC5aAsCsOQs485IpACZkkiStoRymXINMnDT9uUSsqXPREiZOml5TRJIkaXXZM1aB7kOJe++0Lh0dq1/v7DkLBrRdkiQNfyZjq6Gn+VvACkOJV976LNtv/9BqDyVutsk4ZvWQeG22ybjVqleSJNXHZKwXfU2U72n+1mkX3UH7OmvRuWjpcnUtWtLFxEnTVzsZm7D/zss9J0D7OmOeSwIlSdKaxzljPbix8RCnX3QHs+YsoItlidb7v3A1NzYeAuAHV0xdYf4WsEIi1jQYQ4n7dGzNJw/Zhc03GUcbsPkm4/jkIbs4eV+SpDWYPWM9OP2iO+jqYfvc+Yv41sV3ct2tDzJ3/qIB1bnBeusMSmz7dGxt8iVJ0ghiMtbN2T+/q8dErGnJ0i6m3PfEgOud/+xibmysOG/MdcOGN18fSVLVKk3GIuL9wAnAWOCMzDyrW/muwDnAxsDvgI9n5uIqY+rLNX/8WyX1LlnaxQ+umNqveWfTH3iCIw/etZI41H+u66Y6+YeAVL3h8j6rbM5YRLwQ+AqwF7AL8LGIeGm33S4APpWZOwJtwOFVxdNfS5eurF9s9cydv+i5OWfQ87phAFff/OBy+6keq7Ou242Nhzjsy7/mHZ/5BYd9+de+nhqQ5h8CrfNWz7xkiueRNIiG0/usyp6xfYHrM/NJgIj4OXAwcEp5f1tgXGbeUu5/PnAycHZ/nyC/eTpt85dNjN9sz9eyxQFvZUlnJ/ee8pUV9v+nN+7DP7/pjSx6+mn+9PVvrlD+gre+BYANF83jX//+hxXKb93kpdy3/tZsuvAp3vr4LSuU37Tpy3lwvS35p84n2XfWbSuU//b5uzFx0nT+Zb15PHjBRbz5vtnLlV+3+St5vH1Ttp3/KLPPuJap2266XPmLjjyC9bZ6IU/eehuP/OLKFerf8d+Ppn3zzZj1+5t47JpfrVC+02ePZZ2NNuLvv7mex6+/cYXyl37xeMa0tzPz6muYfdPkFcpf/pVTAHjk8l/w5O2N5crWGjuWl514AgAP/fQS/nH31OXK19lwA3b63HEAzJh4AXPzz8uVtz//+ez4H8cAcP+55zHvgRnLlY/bcgtefNSRANx31tkseHTmcuXrb78dO3z0MAAWXv4Lpl72i+XKN4wd2W7CBwD409dOZdHcZ5Yrf94rXs7W7zkEgHtO/jJLFy5c7vW5b/2tuHWTlwHw5qlXMPX45c+P1nNv8n9+gcdmzePNzcT+Ybj+4XuAg9nzJRv3eu5tvveedM6azZ+/9Z0Vyl944L+y6R6vZP7Dj/DXs/9nhfKtDzmI5+26C8/c/wAP/PBHK5Rv+4H3s9HOO/H09D/x4AUXrVC+/Uc+zAY7bM8/7prCQ5dcukL5ys69zrlz6dxmW8894M+nf5vOJ5afxrAq597sB5/koPLHQM1zr3PREmaf8XWmbrspnXPnPneOD8bn3pp67oGfe4N97rXadPcOXvjOAwGYevwX6a6qc695fld97v2yayc6F63PtvMfZc8nl712s8+4hqnbbjqo597fb/kjvHW/FfZpqjIZ2xJoPXNmAnv0Ub7VQJ7gmWfmwbx5z92f/7e/8WijQdeiRSycO3eF/efPeJCHGw265s/vsXzBA/cDGw0khAGbNWcBmcmiuXMZM6aNJUt67olbtGgpc7vFeM8997DW3x9jyX1/ZXEP8U+dOpW2jTdiyQP391g+ZcoU2tZbj8UzHmRJD+V33nknbeusw+K//a3H8kaj+CBa/PDDK5S3rb32svJHH12xfPHi58oXPfYYS7uVP9O2rP5Fjz++Qvm82evwVLN89uwVyx9/nDmNZR+U3dtu3mOP8URZvnDOP+hasPyvW+c/+iiPN8ufeoquxYt7fX3GjGlbof7Wc2/m7Hl0dethXbxkKedeMYUN9tu413Pvb+utS9dTT/dYft99f2XMmLVYOvsJFvVQ/ue//IUxSxaz9LG/91iemaw1fx5LH3q4x/Lp06ez1pwnWXL/Az2eO557/Tv3Fj75BF2DcO4t6uVX2a2fC83/B+Nzz3Ov73Pv4VFy7i1X/vDDPFaWd/Z0blV47s2dO7fyc2/O0k5oX3+F7c332aCee8/Mo32FPZZp6+qqZlguIj4PrJeZJ5T3PwrsnpkfL++/Fjg1M/cq778Y+GVm7tRX3Y1GYzvggfHjx9PevrLDG7h//cwv+t5pNWy+yTjOO6HIjm9sPMRpF93R534amEajQccgXPKg+5wxKNZ162s5kXd85hc9/gikDfi/0w5c7biGm8Fqby1z2Jd/3eMCz83PBdt8aNneQ2uo2ruv99lg6uzsZNq0aQDbd3R0zOheXuU6Y48AL2i5vwXw6ADKR5zuC7Tu07E1B7xm2z73Uz1WdV233q6I4JUS1F8T9t+Z9nXGLLfNzwVpcA2n91mVw5TXASdFxObAPOAg4GPNwsx8MCKejYg9M/MmYAIwqcJ4+uXK0w4c1N6xtdZqY+nSLjbv5VcaRx68Kztv//xh8WsOrWhV1nXzSglaXc1zzs8FqTrD6X1WWTKWmY9ExPHADRRLW5ybmbdGxNXAFzPzduBQ4JyI2BC4E1hx9mgNrjztwJUOIfZXc0hrQx5faZerC7mOLMPpDa41l58LUvWGy/us0nXGMvMi4KJu2w5ouT2F5Sf1Dxv7dGzNxEnTexxPbtpwvXXYe5ctue1PjzN7zoLnVtl/Zv6i5b6AG43Hl3vccFnXRNUZLm9wSdLw5wr8K9HbcFP3eUNHDqBOFxKVJEmtTMZWovtwU7Pn6/SL7mDipOmr1KO1soVER1IyZu+fJEn9YzLWh+Zw02D1aM3uZdizt+1rInv/JEnqvyqXthhRVufSOK1Gw7IHg9VWkiSNBiZj/TRYPVrDaV2TqoyG3j9JkgaLw5T9tNkm43r8ZeVAe7RGw7IHfbWV88kkSVrGZKyfBnMhz+G07EEVidHK2sr5ZJIkLc9hyn5a1UvjDGfNxGjWnAV0sSwxurHx0GrVu7K2cj6ZJEnLs2dsAIZTj9ZgqHKZjd7ayvlkkiQtz56xUayOxGg0/JpUkqSBMBkbxepIjEbDr0klSRoIk7FRrI7EaCTOvZMkaXU4Z2wUq2uZjZE2906SpNVhMjbKmRhJklQvhyklSZJqZDImSZJUI5MxSZKkGpmMSZIk1chkTJIkqUYmY5IkSTVaU5e2GAOwcOHCuuPot87OzrpDGFVs76Flew8923xo2d5Da6S1d0u+Mqan8raurq6hi2aQNBqNvYDf1x2HJEnSAOzd0dHxh+4b19SesduAvYGZwJKaY5EkSVqZMcAWFPnLCtbInjFJkqSRwgn8kiRJNTIZkyRJqpHJmCRJUo1MxiRJkmpkMiZJklQjkzFJkqQamYxJkiTVyGRMkiSpRmvqCvxrhIh4P3ACMBY4IzPPqjmkES8irgf+GVhUbjoiM/9YY0gjTkRsBEwG3p6ZMyJiX+B0YBzw08w8odYAR6Ae2vw8iquQzCt3OTkzL68twBEkIk4E3l3evSozj/Mcr04v7T3qzm9X4K9IRLwQ+APQAXRSfJC+LzPvrTWwESwi2oBHgG0yc3Hd8YxEEfEq4BxgJ2BH4O9AAq8HHgKuAr6VmZNqC3KE6d7mZTI2FdgvM2fWG93IUiZdJwNvALqAa4Bzga/jOT7oemnvM4FTGGXnt8OU1dkXuD4zn8zMecDPgYNrjmmkC4o39KSImBIRn6w7oBHocOAo4NHy/h7AXzLzgTIBvgA4pK7gRqjl2jwi1ge2Ac6JiLsj4uSI8LN8cMwEPpOZCzNzETCd4o8Oz/Fq9NTe2zAKz2+HKauzJcWJ1jST4otL1dkE+A1wJMVwwo0RkZl5bb1hjRyZ+VGAiGhu6uk832qIwxrRemjzfwauB44AngF+CXyEovdMqyEz72nejoiXAO8BvoPneCV6ae+9gH0YZee3yVh12nrYtnTIoxhFMvNm4Oby7ryI+CFwAGAyVh3P8yGWmfcD72zej4jvAhMY4V9WQykiXkYxHHksxfzT6LaL5/ggam3vzExG4fk94rv+avQI8IKW+1uwbGhHFYiIvSLiTS2b2lg2kV/V8DwfYhHx8og4qGWT5/kgiog9KXrYP5eZP8ZzvFLd23u0nt/2jFXnOuCkiNic4hchBwEfqzekEe95wCkR8VpgHeCDwMdrjWjk+yMQEfFi4AHg/cB59YY04rUB3yp/OfwMxefKj+sNaWSIiK2BK4D3ZOb15WbP8Yr00t6j8vy2Z6wimfkIcDxwA3AXcFFm3lprUCNcZv6Soqv7TqABnFcOXaoimfks8CHgUuBe4E8UP1ZRRTLzbuC/gZso2vyuzPzfeqMaMY4F1gVOj4i7IuIuivP7Q3iOV6Gn9n4to/D8dmkLSZKkGtkzJkmSVCOTMUmSpBqZjEmSJNXIZEySJKlGJmOSJEk1MhmTNOgi4oaI+K8etn8mIv5vJY87KSLOrDa6Xp/7UxHRFRGv7rb93PKCxqtS58blekkDfdyxEXH+qjynpDWPyZikKpwFfLiH7YcDtSRb/fBx4ELg31s3ZuZHM/O6VaxzE7wmraQ+uAK/pCpcAXw7IvbOzN8DRMTrKVbXvjYiPg/8G8WCj+tTXJPu8tYKyuvVnQk8H+gCTsvMiRGxD/BtiitbrE+R7OwHnACMBeaX9d0cETsBPyyfpw04NzO/1z3Yss5NgeOAv0bE1pn5UFl2YxnH7cC0zNyg3L5d835EvACYCGxWVnlVZn4B+BEwrlzMsoPiqhBHlHFuCnwtM8+OiHUoLkj9ZuBx4O/AU+XzbAWcDWxXHsOPM/MbEbE28F2KCysvBO4HPpyZz6z8pZE03NgzJmnQZeZi4AfAR1o2fwz4HrANsC/w+sx8BcWVKk5pfXyZaPwf8N1yn/2Br0bEa8pdxgPvy8xdyvq+ChyQmbuVz3NZRKwP/CdwZWZ2UFw0/nUR0dPn3pHAhZn5KHA98MkBHvLhwP2Z+S/A3sBLImJjit7BBZm5KzCu3K8Z53uAU8vHfwLYEXgpRUK2TUvdFwI3ZObLgT2BD0TEe4HXAPsAryiP737gFQOMW9IwYDImqSo/AA6MiA0jYlPgLcD5mfkgRQ/RoRHxNYrhwQ26PXZHYN3MvAygTJIuBd5alj9U1gNF8rIF8JuyB+pCYCnwYuBy4LiIuAx4F3B0Zi5tfaKyV+udLLv+3Y+Bw8tkrr+uAQ6KiKsper4+l5lPte5Q9li9HXhbRHyJIgltHve+FJdMW5iZ88pjoIxhT4phX8o6z6dITqcCS4A/lvVdmpmTBxCzpGHCZExSJTJzJnAt8F5gAvDzzHwqIv4FmAxsBPwa+DrF8Furnj6b1qK4ADwUFxBuGgP8JjN3bf4DXk0xhPhL4CXAz4DdgKkR8aJu9X6UYhj0yoiYAXyzjO2D3fbr6hbn2JZjvQ3YniIB3Q64tbxg/XPK4ca7gG2BP1AMq/ZW9+KWY+6pbdbJzH8Au1Bc328J8NOI+DSS1jgmY5Kq9D3gUIrE5qxy2+uA2zPzdOC3FHPHxnR7XAILI+JdABGxJXAQRXLX3fXAfuX8MCLiAOBuYN2IuAh4T2ZeTDEU+DSwdfOBETGGYljz45m5XfmvOex5TES0JkL/AMZGxEvL++9sqedrwBcy8wrgGOAeit69xcCYsp7dgVnAlzPzVxS9ZM0YrgEmRMS6EbEuxRAmmTkXuAU4qtx3Y4rE9tqIeDvwG2ByZp5EMWdtlx7aR9IwZzImqTKZeSPFBPynM3Nqufl/gc0i4l6gQdHLtWlEbNjyuEUUSdoxEXE3cB1wSmbe0MNz3EORUF0cEVOALwHvKIf7vkQxHDoF+CPFsOVvWx7+dorPwQu7VXsG8AKKeWbN53mKYoL/pIi4jaI3q+lbwK4RMY1iov8D5XHOBO4ApgO3AQ8DGRF3UswLm0UxnPo/5eOmlfE90FL3ocCbImIqcCvFcO35wCSKpG9aRNwOvBY4qXv7SBr+2rq6uvreS5JGqYi4Azg+MyfVHYukkcmlLSSpB+UvOqdTLDFxU83hSBrB7BmTJEmqkXPGJEmSamQyJkmSVCOTMUmSpBqZjEmSJNXIZEySJKlG/x8VlUNK3N8AFQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Shapiro-Wilk test p-value: 3.1191042084835655e-31\n", " feature VIF\n", "0 const 1.666738\n", "1 2020_gov 16.399994\n", "2 2020 1.052399\n", "3 2020_con 16.402942\n", "4 2020_trade 1.050951\n" ] } ], "source": [ "import statsmodels.api as sm\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", "\n", "X = df_countries[['2020_gov', '2020', '2020_con', '2020_trade']]\n", "y = df_countries['GDP_calculated_2021']\n", "X = sm.add_constant(X) # Añadir una constante\n", "\n", "# Ajustar el modelo robusto\n", "model_robust = sm.RLM(y, X).fit()\n", "\n", "# Imprimir el resumen del modelo\n", "print(model_robust.summary())\n", "\n", "# Residuos vs Valores Ajustados\n", "plt.figure(figsize=(10, 5))\n", "plt.scatter(model_robust.fittedvalues, model_robust.resid)\n", "plt.axhline(y=0, color='r', linestyle='--')\n", "plt.xlabel('Valores Ajustados')\n", "plt.ylabel('Residuos')\n", "plt.title('Residuos vs Valores Ajustados en Regresión Robusta')\n", "plt.show()\n", "\n", "# Test de Shapiro-Wilk para normalidad de residuos\n", "from scipy import stats\n", "print(\"Shapiro-Wilk test p-value:\", stats.shapiro(model_robust.resid)[1])\n", "\n", "# VIF para detectar multicolinealidad\n", "vif_data = pd.DataFrame()\n", "vif_data[\"feature\"] = X.columns\n", "vif_data[\"VIF\"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]\n", "print(vif_data)" ] }, { "cell_type": "markdown", "id": "785a9e60", "metadata": {}, "source": [ "## Incluir GPI como variable dummy \n", "El GPI es un indicador de bienestar econémico de los paises o **Global Peace Index**.\n", "Después de observar que el modelo se desviaba por aplicar algunos argumentos, creo que es momento de incorporar nuevas variables." ] }, { "cell_type": "code", "execution_count": 41, "id": "81cc6f94", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Country Name 1960 1961 1962 1963 1964 1965 1966 1967 1968 ... \\\n", "0 Afghanistan 0 0 0 0 0 0 0 0 0 ... \n", "1 Albania 0 0 0 0 0 0 0 0 0 ... \n", "2 Algeria 0 0 0 0 0 0 0 0 0 ... \n", "3 Angola 0 0 0 0 0 0 0 0 0 ... \n", "4 Argentina 0 0 0 0 0 0 0 0 0 ... \n", "\n", " 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 \n", "0 0 3.0 3.23 3.4440 3.658 3.623 3.674 3.641 3.641 3.650 \n", "1 0 1.6 1.33 1.2795 1.229 1.237 1.225 1.403 1.403 1.403 \n", "2 0 2.2 2.00 2.0445 2.089 1.912 1.927 2.116 2.088 2.068 \n", "3 0 1.8 1.35 1.4270 1.504 1.403 1.418 1.625 1.625 1.666 \n", "4 0 1.4 1.33 1.3665 1.403 1.201 1.201 1.201 1.201 1.201 \n", "\n", "[5 rows x 64 columns]\n" ] } ], "source": [ "import pandas as pd\n", "\n", "gpi_data = pd.read_excel('GPI.xlsx')\n", "\n", "print(gpi_data.head())" ] }, { "cell_type": "markdown", "id": "ead66db4", "metadata": {}, "source": [ "## Crear la variable dummy para el año 2021" ] }, { "cell_type": "code", "execution_count": 42, "id": "b3e84bb1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Country Name 1960_gdp 1961_gdp 1962_gdp 1963_gdp 1964_gdp 1965_gdp \\\n", "0 Aruba -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 -0.202832 \n", "1 Afghanistan -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 -0.202832 \n", "2 Angola -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 -0.202832 \n", "3 Albania -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 -0.202832 \n", "4 Andorra -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 -0.202832 \n", "\n", " 1966_gdp 1967_gdp 1968_gdp ... 2014 2015 2016 2017 2018 2019 \\\n", "0 -0.202705 -0.202662 -0.203255 ... NaN NaN NaN NaN NaN NaN \n", "1 -0.202705 -0.202662 -0.203255 ... 3.0 3.23 3.4440 3.658 3.623 3.674 \n", "2 -0.202705 -0.202662 -0.203255 ... 1.8 1.35 1.4270 1.504 1.403 1.418 \n", "3 -0.202705 -0.202662 -0.203255 ... 1.6 1.33 1.2795 1.229 1.237 1.225 \n", "4 -0.202705 -0.202662 -0.203255 ... NaN NaN NaN NaN NaN NaN \n", "\n", " 2020 2021 2022 GPI_dummy \n", "0 NaN NaN NaN NaN \n", "1 3.641 3.641 3.650 1.0 \n", "2 1.625 1.625 1.666 0.0 \n", "3 1.403 1.403 1.403 0.0 \n", "4 NaN NaN NaN NaN \n", "\n", "[5 rows x 441 columns]\n" ] } ], "source": [ "# Crear una variable dummy donde el GPI del año 2021 mayor a 2.5 es 1, de lo contrario 0\n", "gpi_data['GPI_dummy'] = (gpi_data[2021] > 2.5).astype(int)\n", "\n", "# Fusionar con df_countries\n", "# Asegúrate de que 'Country' en df_countries y 'Country Name' en gpi_data coincidan\n", "df_countries = df_countries.merge(gpi_data[['Country Name', 'GPI_dummy']], left_on='Country Name', right_on='Country Name', how='left')\n", "\n", "# Verificar que la unión se haya hecho correctamente\n", "print(df_countries.head())" ] }, { "cell_type": "markdown", "id": "991e793f", "metadata": {}, "source": [ "## Reajustar el Modelo con la Nueva Variable" ] }, { "cell_type": "code", "execution_count": 43, "id": "fa212f4b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Country Name 1960_gdp 1961_gdp 1962_gdp 1963_gdp 1964_gdp \\\n", "1 Afghanistan -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "2 Angola -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "3 Albania -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "6 United Arab Emirates -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "7 Argentina -0.098248 -0.095184 -0.101961 -0.113989 -0.110743 \n", "\n", " 1965_gdp 1966_gdp 1967_gdp 1968_gdp ... 2014 2015 2016 2017 \\\n", "1 -0.202832 -0.202705 -0.202662 -0.203255 ... 3.0 3.23 3.4440 3.658 \n", "2 -0.202832 -0.202705 -0.202662 -0.203255 ... 1.8 1.35 1.4270 1.504 \n", "3 -0.202832 -0.202705 -0.202662 -0.203255 ... 1.6 1.33 1.2795 1.229 \n", "6 -0.202832 -0.202705 -0.202662 -0.203255 ... 1.4 1.33 1.4045 1.479 \n", "7 -0.103947 -0.110039 -0.109995 -0.112002 ... 1.4 1.33 1.3665 1.403 \n", "\n", " 2018 2019 2020 2021 2022 GPI_dummy \n", "1 3.623 3.674 3.641 3.641 3.650 1.0 \n", "2 1.403 1.418 1.625 1.625 1.666 0.0 \n", "3 1.237 1.225 1.403 1.403 1.403 0.0 \n", "6 1.580 1.604 1.211 1.464 1.581 0.0 \n", "7 1.201 1.201 1.201 1.201 1.201 0.0 \n", "\n", "[5 rows x 441 columns]\n" ] } ], "source": [ "import numpy as np\n", "\n", "# Revisar si hay valores infinitos y reemplazarlos con NaN\n", "df_countries.replace([np.inf, -np.inf], np.nan, inplace=True)\n", "\n", "# Eliminar cualquier fila que tenga algún NaN\n", "df_countries.dropna(inplace=True)\n", "\n", "for column in df_countries.columns:\n", " if df_countries[column].isnull().any():\n", " df_countries[column].fillna(df_countries[column].mean(), inplace=True)\n", "\n", "# Verificar el DataFrame después de la limpieza\n", "print(df_countries.head())" ] }, { "cell_type": "code", "execution_count": 44, "id": "daa7f34b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Valores faltantes en X: const 0\n", "2020_gov 0\n", "2020 0\n", "2020_con 0\n", "2020_trade 0\n", "GPI_dummy 0\n", "dtype: int64\n", "Valores faltantes en y: 0\n", " Robust linear Model Regression Results \n", "===============================================================================\n", "Dep. Variable: GDP_calculated_2021 No. Observations: 160\n", "Model: RLM Df Residuals: 154\n", "Method: IRLS Df Model: 5\n", "Norm: HuberT \n", "Scale Est.: mad \n", "Cov Type: H1 \n", "Date: Sun, 12 May 2024 \n", "Time: 21:39:52 \n", "No. Iterations: 50 \n", "==============================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const -0.1005 0.001 -152.932 0.000 -0.102 -0.099\n", "2020_gov 1.1192 0.002 730.681 0.000 1.116 1.122\n", "2020 2.8975 0.002 1233.770 0.000 2.893 2.902\n", "2020_con 0.8792 0.002 575.995 0.000 0.876 0.882\n", "2020_trade 0.0237 0.001 21.429 0.000 0.022 0.026\n", "GPI_dummy 0.0007 0.001 0.519 0.604 -0.002 0.003\n", "==============================================================================\n", "\n", "If the model instance has been used for another fit with different fit parameters, then the fit options might not be the correct ones anymore .\n" ] } ], "source": [ "# Preparando los datos para el modelo\n", "X = df_countries[['2020_gov', '2020', '2020_con', '2020_trade', 'GPI_dummy']]\n", "X = sm.add_constant(X) # Añadir una constante\n", "y = df_countries['GDP_calculated_2021'] # La variable objetivo\n", "\n", "# Asegurarse de que no hay valores faltantes\n", "print(\"Valores faltantes en X:\", X.isnull().sum())\n", "print(\"Valores faltantes en y:\", y.isnull().sum())\n", "\n", "# Ajustar el modelo de regresión robusta\n", "model_robust = sm.RLM(y, X).fit()\n", "print(model_robust.summary())" ] }, { "cell_type": "markdown", "id": "4083f56b", "metadata": {}, "source": [ "### Análisis de los Resultados del Modelo\n", "**Coeficientes y Significancia**:\n", "- Los coeficientes de 2020_gov, 2020, y 2020_con son significativos y sus intervalos de confianza no incluyen el cero, lo que indica que son predictores relevantes del GDP_calculated_2021.\n", "- El coeficiente para 2020_trade también es significativo y positivo, aunque el efecto es menor comparado con los otros predictores.\n", "- La variable GPI_dummy tiene un coeficiente muy pequeño y no es estadísticamente significativa (p-value = 0.604). Esto sugiere que la presencia de un índice de paz global alto o bajo no tiene un efecto significativo en el PIB calculado para 2021, al menos no en el modelo actual con las otras variables controladas.\n", "\n", "**Interpretación Económica**:\n", "- Las variables representativas del año 2020 (2020_gov, 2020, 2020_con, 2020_trade) tienen un fuerte impacto en el PIB calculado de 2021, lo que puede reflejar cómo los eventos económicos o las políticas del año anterior afectaron el desempeño económico en 2021.\n", "- La falta de impacto significativo de GPI_dummy puede ser un indicativo de que los efectos del índice de paz son más complejos o que otros factores no considerados en el modelo están moderando o enmascarando este efecto." ] }, { "cell_type": "markdown", "id": "5e52112d", "metadata": {}, "source": [ "### Examinar Otras Variables\n", "Incuiré las variables **Inflación** e **Interés** que pueden tener un impacto en el PIB calculado." ] }, { "cell_type": "code", "execution_count": 45, "id": "4136b3dd", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Country Name Country Code 1960 1961 1962 1963 1964 \\\n", "0 Aruba ABW NaN NaN NaN NaN NaN \n", "1 Africa Eastern and Southern AFE NaN NaN NaN NaN NaN \n", "2 Afghanistan AFG NaN NaN NaN NaN NaN \n", "3 Africa Western and Central AFW NaN NaN NaN NaN NaN \n", "4 Angola AGO NaN NaN NaN NaN NaN \n", "\n", " 1965 1966 1967 ... 2013 2014 2015 2016 \\\n", "0 NaN NaN NaN ... 10.709708 3.213869 0.157925 7.982851 \n", "1 NaN NaN NaN ... NaN NaN NaN NaN \n", "2 NaN NaN NaN ... 9.784496 14.351689 12.252548 17.583938 \n", "3 NaN NaN NaN ... NaN NaN NaN NaN \n", "4 NaN NaN NaN ... 12.610802 12.380530 21.144182 -4.922063 \n", "\n", " 2017 2018 2019 2020 2021 2022 \n", "0 9.789287 2.437682 -0.371564 7.738755 11.988410 2.598476 \n", "1 NaN NaN NaN NaN NaN NaN \n", "2 12.141178 NaN NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 -5.552638 -5.844003 0.090919 8.028657 -13.989372 3.277431 \n", "\n", "[5 rows x 65 columns]\n", " Country Name Country Code 1960 1961 1962 1963 1964 \\\n", "0 Aruba ABW NaN NaN NaN NaN NaN \n", "1 Africa Eastern and Southern AFE NaN NaN NaN NaN NaN \n", "2 Afghanistan AFG NaN NaN NaN NaN NaN \n", "3 Africa Western and Central AFW NaN NaN NaN NaN NaN \n", "4 Angola AGO NaN NaN NaN NaN NaN \n", "\n", " 1965 1966 1967 ... 2013 2014 2015 2016 2017 \\\n", "0 NaN NaN NaN ... -2.372065 0.421441 0.474764 -0.931196 -1.028282 \n", "1 NaN NaN NaN ... 5.750981 5.370290 5.245878 6.571396 6.399343 \n", "2 NaN NaN NaN ... 7.385772 4.673996 -0.661709 4.383892 4.975952 \n", "3 NaN NaN NaN ... 2.439201 1.768436 2.130817 1.487416 1.764635 \n", "4 NaN NaN NaN ... 8.777814 7.280387 9.353840 30.698958 29.842578 \n", "\n", " 2018 2019 2020 2021 2022 \n", "0 3.626041 4.257462 NaN NaN NaN \n", "1 4.720811 4.653665 7.321106 6.824727 10.773751 \n", "2 0.626149 2.302373 NaN NaN NaN \n", "3 1.784050 1.760112 2.437609 3.653533 7.967574 \n", "4 19.630594 17.079704 22.271564 25.754266 NaN \n", "\n", "[5 rows x 65 columns]\n" ] } ], "source": [ "import pandas as pd\n", "\n", "# Cargar datos de tasas de interés\n", "interest_data = pd.read_excel('Interest.xlsx')\n", "# Cargar datos de inflación\n", "inflation_data = pd.read_excel('Inflation.xlsx')\n", "\n", "print(interest_data.head())\n", "print(inflation_data.head())" ] }, { "cell_type": "code", "execution_count": 46, "id": "1db32d37", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Country Name 1960_gdp 1961_gdp 1962_gdp 1963_gdp 1964_gdp \\\n", "0 Afghanistan -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "1 Angola -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "2 Albania -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "3 United Arab Emirates -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 \n", "4 Argentina -0.098248 -0.095184 -0.101961 -0.113989 -0.110743 \n", "\n", " 1965_gdp 1966_gdp 1967_gdp 1968_gdp ... 2013 2014 2015 \\\n", "0 -0.202832 -0.202705 -0.202662 -0.203255 ... 7.385772 4.673996 -0.661709 \n", "1 -0.202832 -0.202705 -0.202662 -0.203255 ... 8.777814 7.280387 9.353840 \n", "2 -0.202832 -0.202705 -0.202662 -0.203255 ... 1.937621 1.625865 1.896174 \n", "3 -0.202832 -0.202705 -0.202662 -0.203255 ... 1.101118 2.346269 4.069966 \n", "4 -0.103947 -0.110039 -0.109995 -0.112002 ... NaN NaN NaN \n", "\n", " 2016 2017 2018 2019 2020 2021 2022 \n", "0 4.383892 4.975952 0.626149 2.302373 NaN NaN NaN \n", "1 30.698958 29.842578 19.630594 17.079704 22.271564 25.754266 NaN \n", "2 1.275432 1.986661 2.028060 1.411091 1.620887 2.041472 6.725203 \n", "3 1.617488 1.966826 3.068634 -1.931081 -2.079403 -0.013860 4.827889 \n", "4 NaN NaN NaN NaN NaN NaN NaN \n", "\n", "[5 rows x 569 columns]\n" ] } ], "source": [ "# Unir df_countries con interest_data e inflation_data\n", "df_countries = df_countries.merge(interest_data, on='Country Name', how='left')\n", "df_countries = df_countries.merge(inflation_data, on='Country Name', how='left')\n", "\n", "print(df_countries.head())" ] }, { "cell_type": "markdown", "id": "36ebe001", "metadata": {}, "source": [ "### Interacciones y No Linearidades\n", "Exploraré la posibilidad de interacciones entre las variables y la no linealidades para capturar mejor las relaciones complejas." ] }, { "cell_type": "code", "execution_count": 47, "id": "7531201a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Columnas en interest_data: Index(['Country Name', 'Country Code', '1960', '1961', '1962', '1963', '1964',\n", " '1965', '1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973',\n", " '1974', '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982',\n", " '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991',\n", " '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000',\n", " '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009',\n", " '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018',\n", " '2019', '2020', '2021', '2022'],\n", " dtype='object')\n", "Columnas en inflation_data: Index(['Country Name', 'Country Code', '1960', '1961', '1962', '1963', '1964',\n", " '1965', '1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973',\n", " '1974', '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982',\n", " '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991',\n", " '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000',\n", " '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009',\n", " '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018',\n", " '2019', '2020', '2021', '2022'],\n", " dtype='object')\n" ] } ], "source": [ "# Imprimir los nombres de las columnas de interest_data e inflation_data para verificar\n", "print(\"Columnas en interest_data:\", interest_data.columns)\n", "print(\"Columnas en inflation_data:\", inflation_data.columns)" ] }, { "cell_type": "code", "execution_count": 48, "id": "ef82544e", "metadata": {}, "outputs": [], "source": [ "# Renombrar columnas si es necesario\n", "interest_data.rename(columns={'Rate': 'Interest Rate'}, inplace=True)\n", "inflation_data.rename(columns={'Rate': 'Inflation Rate'}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 49, "id": "e8a9e3ca", "metadata": {}, "outputs": [], "source": [ "# Especificar Sufijos al Fusionar\n", "# Unir df_countries con interest_data\n", "df_countries = df_countries.merge(interest_data, on='Country Name', how='left', suffixes=('', '_interest'))\n", "\n", "# Unir el resultado con inflation_data\n", "df_countries = df_countries.merge(inflation_data, on='Country Name', how='left', suffixes=('', '_inflation'))" ] }, { "cell_type": "code", "execution_count": 50, "id": "8d6bc0b2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Robust linear Model Regression Results \n", "===============================================================================\n", "Dep. Variable: GDP_calculated_2021 No. Observations: 77\n", "Model: RLM Df Residuals: 67\n", "Method: IRLS Df Model: 9\n", "Norm: HuberT \n", "Scale Est.: mad \n", "Cov Type: H1 \n", "Date: Sun, 12 May 2024 \n", "Time: 21:40:55 \n", "No. Iterations: 38 \n", "=============================================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "---------------------------------------------------------------------------------------------\n", "const -0.5943 0.006 -91.706 0.000 -0.607 -0.582\n", "2020_gov 1.4269 0.026 55.580 0.000 1.377 1.477\n", "2020 -1.583e-06 0.000 -0.008 0.994 -0.000 0.000\n", "2020_con 0.7237 0.018 40.390 0.000 0.689 0.759\n", "2020_trade 0.3482 0.006 55.147 0.000 0.336 0.361\n", "GPI_dummy 0.4997 0.035 14.455 0.000 0.432 0.567\n", "Interest Rate 2021 -0.0004 0.000 -0.911 0.362 -0.001 0.000\n", "Inflation Rate 2021 -0.0003 0.001 -0.302 0.763 -0.003 0.002\n", "interest_gpi_interaction 0.0226 0.003 8.717 0.000 0.017 0.028\n", "inflation_gpi_interaction -0.0291 0.003 -8.767 0.000 -0.036 -0.023\n", "=============================================================================================\n", "\n", "If the model instance has been used for another fit with different fit parameters, then the fit options might not be the correct ones anymore .\n" ] } ], "source": [ "# Seleccionar las columnas de interés y de inflación del año 2021\n", "df_countries['Interest Rate 2021'] = df_countries['2021_interest']\n", "df_countries['Inflation Rate 2021'] = df_countries['2021_inflation']\n", "\n", "# Crear interacciones entre la tasa de interés y GPI, y la tasa de inflación y GPI\n", "df_countries['interest_gpi_interaction'] = df_countries['Interest Rate 2021'] * df_countries['GPI_dummy']\n", "df_countries['inflation_gpi_interaction'] = df_countries['Inflation Rate 2021'] * df_countries['GPI_dummy']\n", "\n", "# Preparar los datos para el modelo\n", "X = df_countries[['2020_gov', '2020', '2020_con', '2020_trade', 'GPI_dummy', 'Interest Rate 2021', 'Inflation Rate 2021', 'interest_gpi_interaction', 'inflation_gpi_interaction']]\n", "X = sm.add_constant(X) # Añadir una constante\n", "y = df_countries['GDP_calculated_2021'] # La variable objetivo\n", "\n", "# Asegurarte de que no hay NaNs en X o y\n", "X.dropna(inplace=True)\n", "y = y[X.index] # Asegurar que y solo contiene índices de X\n", "\n", "# Ajustar el modelo\n", "model_robust = sm.RLM(y, X).fit()\n", "print(model_robust.summary())" ] }, { "cell_type": "markdown", "id": "d949419b", "metadata": {}, "source": [ "### Análisis de Subconjuntos\n", "Analizar subconjuntos de países, para ver si el efecto del índice de paz varía significativamente entre estos grupos.\n", "\n", "Los países con un índice GPI más alto (menos pacíficos) tienen un PIB más alto en este modelo, lo cual es interesante y podría merecer una investigación más profunda sobre la naturaleza de esta relación', te puedo dar como ejemplo que un país como Estados Unidos con un alto PBI estan surguiendo violencia internas por diversos motivos (raciales, homofóbica, contra inmigrantes, creencias religiosas, posiciones nacionalistas, feminicidio), mientras que Ruanda ha mejorado en tres décadas en donde la reconciliación entre tribus a hecho que sea un lugar relativamente pacífico y con un PBI no destacable.\n", "Para analizar subconjuntos de países para ver si el efecto del índice de paz varía significativamente entre estos grupos, suguiero los siguiente:\n", "- **grupo_economías_grandes: 'Germany', 'France', 'United State', 'Switzerland', 'Sweden'**\n", "- **grupo economías_pequeñas: 'Peru', 'Colombia', 'Estonia', 'Uruguay', 'Portugal'**" ] }, { "cell_type": "code", "execution_count": 51, "id": "3636d331", "metadata": {}, "outputs": [], "source": [ "# Definir los grupos de países\n", "grupo_economias_grandes = ['Germany', 'France', 'United States', 'Switzerland', 'Sweden']\n", "grupo_economias_pequeñas = ['Peru', 'Colombia', 'Estonia', 'Uruguay', 'Portugal']\n", "\n", "# Asegúrate de que los nombres de los países estén correctamente escritos como aparecen en tu DataFrame" ] }, { "cell_type": "code", "execution_count": 52, "id": "d6c691de", "metadata": {}, "outputs": [], "source": [ "# Filtrar los DataFrames por grupo\n", "df_grandes = df_countries[df_countries['Country Name'].isin(grupo_economias_grandes)]\n", "df_pequenas = df_countries[df_countries['Country Name'].isin(grupo_economias_pequeñas)]" ] }, { "cell_type": "code", "execution_count": 53, "id": "c12c047a", "metadata": {}, "outputs": [], "source": [ "# Eliminar filas con valores NaN en el DataFrame para economías grandes\n", "df_grandes = df_grandes.dropna(subset=['2020_gov', '2020', '2020_con', '2020_trade', 'GPI_dummy', 'Interest Rate 2021', 'Inflation Rate 2021', 'interest_gpi_interaction', 'inflation_gpi_interaction', 'GDP_calculated_2021'])\n", "\n", "# Eliminar filas con valores NaN en el DataFrame para economías pequeñas\n", "df_pequenas = df_pequenas.dropna(subset=['2020_gov', '2020', '2020_con', '2020_trade', 'GPI_dummy', 'Interest Rate 2021', 'Inflation Rate 2021', 'interest_gpi_interaction', 'inflation_gpi_interaction', 'GDP_calculated_2021'])" ] }, { "cell_type": "code", "execution_count": 54, "id": "f6ef8587", "metadata": {}, "outputs": [], "source": [ "# Función para imputar valores faltantes con la media\n", "def imputar_con_media(df):\n", " for column in ['2020_gov', '2020', '2020_con', '2020_trade', 'GPI_dummy', 'Interest Rate 2021', 'Inflation Rate 2021', 'interest_gpi_interaction', 'inflation_gpi_interaction']:\n", " if df[column].isnull().any():\n", " df[column].fillna(df[column].mean(), inplace=True)\n", " return df\n", "\n", "# Imputar valores faltantes en ambos DataFrames\n", "df_grandes = imputar_con_media(df_grandes)\n", "df_pequenas = imputar_con_media(df_pequenas)" ] }, { "cell_type": "code", "execution_count": null, "id": "faab2951", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "74bec184", "metadata": {}, "source": [ "\n", "### Validación del Modelo\n", "Finalmente, valida el modelo utilizando un conjunto de datos de prueba o mediante técnicas de validación cruzada para asegurar que el modelo generaliza bien a nuevos datos." ] }, { "cell_type": "code", "execution_count": 55, "id": "8ed92637", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import KFold\n", "from sklearn.metrics import mean_squared_error\n", "import numpy as np\n", "import statsmodels.api as sm\n", "\n", "# Supongamos que df es tu DataFrame y ya está limpio y listo para usar\n", "X = df_countries[['2020_gov', '2020', '2020_con', '2020_trade', 'Interest Rate 2021', 'Inflation Rate 2021', 'interest_gpi_interaction', 'inflation_gpi_interaction']]\n", "y = df_countries['GDP_calculated_2021']\n", "\n", "# Añadir una constante a X\n", "X = sm.add_constant(X)" ] }, { "cell_type": "code", "execution_count": null, "id": "9768bfdd", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 56, "id": "3b00e818", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([['Afghanistan', -0.2018738832349514, -0.2009817132062104, ...,\n", " nan, nan, nan],\n", " ['Angola', -0.2018738832349514, -0.2009817132062104, ...,\n", " 25.7542656453085, -0.0, 0.0],\n", " ['Albania', -0.2018738832349514, -0.2009817132062104, ...,\n", " 2.04147163139549, 0.0, 0.0],\n", " ...,\n", " ['South Africa', -0.1696027393539647, -0.1684367115660035, ...,\n", " 4.61167217803206, 0.0, 0.0],\n", " ['Zambia', -0.2214833790799845, -0.2203719098604074, ...,\n", " 22.0207676245778, nan, 0.0],\n", " ['Zimbabwe', -0.2209943299706016, -0.2197217042515599, ...,\n", " 98.5461050920624, -0.0, 0.0]], dtype=object)" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_countries.values[:376]" ] }, { "cell_type": "code", "execution_count": 57, "id": "1a49bd46", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Country Name 0\n", "1960_gdp 0\n", "1961_gdp 0\n", "1962_gdp 0\n", "1963_gdp 0\n", " ..\n", "2022_inflation 20\n", "Interest Rate 2021 78\n", "Inflation Rate 2021 17\n", "interest_gpi_interaction 78\n", "inflation_gpi_interaction 17\n", "Length: 701, dtype: int64" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_countries.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 58, "id": "0be90489", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Country Name 0\n", "1960_gdp 0\n", "1961_gdp 0\n", "1962_gdp 0\n", "1963_gdp 0\n", " ..\n", "2022_inflation 20\n", "Interest Rate 2021 78\n", "Inflation Rate 2021 17\n", "interest_gpi_interaction 78\n", "inflation_gpi_interaction 17\n", "Length: 701, dtype: int64" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_countries.iloc[:376].isnull().sum()" ] }, { "cell_type": "code", "execution_count": 59, "id": "d6c2aacf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Country Name 0\n", "1960_gdp 0\n", "1961_gdp 0\n", "1962_gdp 0\n", "1963_gdp 0\n", " ..\n", "2022_inflation 20\n", "Interest Rate 2021 78\n", "Inflation Rate 2021 17\n", "interest_gpi_interaction 78\n", "inflation_gpi_interaction 17\n", "Length: 701, dtype: int64\n", "Country Name object\n", "1960_gdp float64\n", "1961_gdp float64\n", "1962_gdp float64\n", "1963_gdp float64\n", " ... \n", "2022_inflation float64\n", "Interest Rate 2021 float64\n", "Inflation Rate 2021 float64\n", "interest_gpi_interaction float64\n", "inflation_gpi_interaction float64\n", "Length: 701, dtype: object\n" ] } ], "source": [ "# Verificar NaNs en las primeras 376 filas y los tipos de datos\n", "print(df_countries.iloc[:376].isnull().sum())\n", "print(df_countries.iloc[:376].dtypes)" ] }, { "cell_type": "code", "execution_count": null, "id": "8387b391", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }