{ "cells": [ { "cell_type": "markdown", "id": "5c8decec", "metadata": {}, "source": [ "# Propósito del Modelo\n", "El objetivo es hacer análisis que involucran múltiples variables de diferentes bases de datos combinadas." ] }, { "cell_type": "code", "execution_count": 1, "id": "ff4e01c3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "( 1960 1961 1962 \\\n", " Country Name Country Code \n", " Aruba ABW -0.201874 -0.200982 -0.200327 \n", " Africa Eastern and Southern AFE -0.094914 -0.098259 -0.094392 \n", " Afghanistan AFG -0.201874 -0.200982 -0.200327 \n", " Africa Western and Central AFW -0.135881 -0.136426 -0.136892 \n", " Angola AGO -0.201874 -0.200982 -0.200327 \n", " \n", " 1963 1964 1965 \\\n", " Country Name Country Code \n", " Aruba ABW -0.199863 -0.200426 -0.202832 \n", " Africa Eastern and Southern AFE -0.094446 -0.097204 -0.095992 \n", " Afghanistan AFG -0.199863 -0.200426 -0.202832 \n", " Africa Western and Central AFW -0.135198 -0.136542 -0.136252 \n", " Angola AGO -0.199863 -0.200426 -0.202832 \n", " \n", " 1966 1967 1968 \\\n", " Country Name Country Code \n", " Aruba ABW -0.202705 -0.202662 -0.203255 \n", " Africa Eastern and Southern AFE -0.097136 -0.094766 -0.097880 \n", " Afghanistan AFG -0.202705 -0.202662 -0.203255 \n", " Africa Western and Central AFW -0.141240 -0.150569 -0.154342 \n", " Angola AGO -0.202705 -0.202662 -0.203255 \n", " \n", " 1969 ... 2013 2014 \\\n", " Country Name Country Code ... \n", " Aruba ABW -0.204786 ... -0.288371 -0.289320 \n", " Africa Eastern and Southern AFE -0.099362 ... -0.173739 -0.173601 \n", " Afghanistan AFG -0.204786 ... -0.286345 -0.287284 \n", " Africa Western and Central AFW -0.149334 ... -0.195458 -0.193785 \n", " Angola AGO -0.204786 ... -0.277472 -0.278209 \n", " \n", " 2015 2016 2017 \\\n", " Country Name Country Code \n", " Aruba ABW -0.289823 -0.290696 -0.291427 \n", " Africa Eastern and Southern AFE -0.174254 -0.175796 -0.177555 \n", " Afghanistan AFG -0.287813 -0.288695 -0.289457 \n", " Africa Western and Central AFW -0.194550 -0.197907 -0.199728 \n", " Angola AGO -0.278940 -0.280400 -0.281517 \n", " \n", " 2018 2019 2020 \\\n", " Country Name Country Code \n", " Aruba ABW -0.292036 -0.292556 -0.292998 \n", " Africa Eastern and Southern AFE -0.179122 -0.180347 -0.180619 \n", " Afghanistan AFG -0.290112 -0.290586 -0.290938 \n", " Africa Western and Central AFW -0.200799 -0.200851 -0.199375 \n", " Angola AGO -0.282588 -0.283415 -0.284049 \n", " \n", " 2021 2022 \n", " Country Name Country Code \n", " Aruba ABW -0.293201 -0.294042 \n", " Africa Eastern and Southern AFE -0.183041 -0.183463 \n", " Afghanistan AFG -0.291793 -0.287261 \n", " Africa Western and Central AFW -0.201723 -0.201925 \n", " Angola AGO -0.284755 -0.285617 \n", " \n", " [5 rows x 63 columns],\n", " 1960 1961 1962 \\\n", " Country Name Country Code \n", " Aruba ABW -0.084868 -0.08484 -0.083201 \n", " Africa Eastern and Southern AFE -0.084868 -0.08484 -0.083201 \n", " Afghanistan AFG -0.084868 -0.08484 -0.083201 \n", " Africa Western and Central AFW -0.084868 -0.08484 -0.083201 \n", " Angola AGO -0.084868 -0.08484 -0.083201 \n", " \n", " 1963 1964 1965 \\\n", " Country Name Country Code \n", " Aruba ABW -0.082048 -0.080368 -0.074607 \n", " Africa Eastern and Southern AFE -0.082048 -0.080368 -0.074607 \n", " Afghanistan AFG -0.082048 -0.080368 -0.074607 \n", " Africa Western and Central AFW -0.082048 -0.080368 -0.074607 \n", " Angola AGO -0.082048 -0.080368 -0.074607 \n", " \n", " 1966 1967 1968 \\\n", " Country Name Country Code \n", " Aruba ABW -0.075705 -0.073737 -0.072911 \n", " Africa Eastern and Southern AFE -0.075705 -0.073737 -0.072911 \n", " Afghanistan AFG -0.075705 -0.073737 -0.072911 \n", " Africa Western and Central AFW -0.075705 -0.073737 -0.072911 \n", " Angola AGO -0.075705 -0.073737 -0.072911 \n", " \n", " 1969 ... 2013 2014 \\\n", " Country Name Country Code ... \n", " Aruba ABW -0.071835 ... -0.105875 -0.107196 \n", " Africa Eastern and Southern AFE -0.071835 ... -0.105875 -0.107196 \n", " Afghanistan AFG -0.071835 ... -0.105875 -0.107196 \n", " Africa Western and Central AFW -0.071835 ... -0.105875 -0.107196 \n", " Angola AGO -0.071835 ... -0.104616 -0.106276 \n", " \n", " 2015 2016 2017 \\\n", " Country Name Country Code \n", " Aruba ABW -0.105721 -0.104515 -0.105551 \n", " Africa Eastern and Southern AFE -0.105721 -0.104515 -0.105551 \n", " Afghanistan AFG -0.105721 -0.104515 -0.105551 \n", " Africa Western and Central AFW -0.105721 -0.104515 -0.105551 \n", " Angola AGO -0.105172 -0.104014 -0.105000 \n", " \n", " 2018 2019 2020 \\\n", " Country Name Country Code \n", " Aruba ABW -0.107831 -0.111312 -0.111963 \n", " Africa Eastern and Southern AFE -0.107831 -0.111312 -0.111963 \n", " Afghanistan AFG -0.107831 -0.111312 -0.111067 \n", " Africa Western and Central AFW -0.107831 -0.111312 -0.111963 \n", " Angola AGO -0.107297 -0.110756 -0.111903 \n", " \n", " 2021 2022 \n", " Country Name Country Code \n", " Aruba ABW -0.110932 -0.111682 \n", " Africa Eastern and Southern AFE -0.110932 -0.111682 \n", " Afghanistan AFG -0.110692 -0.111682 \n", " Africa Western and Central AFW -0.110932 -0.111682 \n", " Angola AGO -0.111045 -0.111847 \n", " \n", " [5 rows x 63 columns],\n", " 1960 1961 1962 \\\n", " Country Name Country Code \n", " Aruba ABW -0.146825 -0.158595 -0.153408 \n", " Africa Eastern and Southern AFE -0.146825 -0.158595 -0.153408 \n", " Afghanistan AFG -0.146825 -0.158595 -0.153408 \n", " Africa Western and Central AFW -0.146825 -0.158595 -0.153408 \n", " Angola AGO -0.146825 -0.158595 -0.153408 \n", " \n", " 1963 1964 1965 \\\n", " Country Name Country Code \n", " Aruba ABW -0.158273 -0.158734 -0.162173 \n", " Africa Eastern and Southern AFE -0.158273 -0.158734 -0.162173 \n", " Afghanistan AFG -0.158273 -0.158734 -0.162173 \n", " Africa Western and Central AFW -0.158273 -0.158734 -0.162173 \n", " Angola AGO -0.158273 -0.158734 -0.162173 \n", " \n", " 1966 1967 1968 \\\n", " Country Name Country Code \n", " Aruba ABW -0.158459 -0.162375 -0.1682 \n", " Africa Eastern and Southern AFE -0.158459 -0.162375 -0.1682 \n", " Afghanistan AFG -0.158459 -0.162375 -0.1682 \n", " Africa Western and Central AFW -0.158459 -0.162375 -0.1682 \n", " Angola AGO -0.158459 -0.162375 -0.1682 \n", " \n", " 1969 ... 2013 2014 \\\n", " Country Name Country Code ... \n", " Aruba ABW -0.184395 ... -0.203738 -0.202930 \n", " Africa Eastern and Southern AFE -0.184395 ... -0.071547 -0.081470 \n", " Afghanistan AFG -0.184395 ... -0.203738 -0.202930 \n", " Africa Western and Central AFW -0.184395 ... -0.203738 -0.202930 \n", " Angola AGO -0.184395 ... -0.194796 -0.193997 \n", " \n", " 2015 2016 2017 \\\n", " Country Name Country Code \n", " Aruba ABW -0.285333 -0.202682 -0.202571 \n", " Africa Eastern and Southern AFE -0.195116 -0.092256 -0.090919 \n", " Afghanistan AFG -0.285333 -0.202682 -0.202571 \n", " Africa Western and Central AFW -0.285333 -0.202682 -0.202571 \n", " Angola AGO -0.280513 -0.198833 -0.198563 \n", " \n", " 2018 2019 2020 \\\n", " Country Name Country Code \n", " Aruba ABW -0.202676 -0.202183 -0.201677 \n", " Africa Eastern and Southern AFE -0.093306 -0.094302 -0.094236 \n", " Afghanistan AFG -0.202676 -0.202183 -0.201677 \n", " Africa Western and Central AFW -0.202676 -0.202183 -0.201677 \n", " Angola AGO -0.200401 -0.201239 -0.199575 \n", " \n", " 2021 2022 \n", " Country Name Country Code \n", " Aruba ABW -0.201440 -0.201034 \n", " Africa Eastern and Southern AFE -0.093547 -0.090401 \n", " Afghanistan AFG -0.201440 -0.201034 \n", " Africa Western and Central AFW -0.201440 -0.201034 \n", " Angola AGO -0.201274 -0.203768 \n", " \n", " [5 rows x 63 columns],\n", " 1960 1961 1962 \\\n", " Country Name Country Code \n", " Aruba ABW -0.090809 -0.091864 -0.09344 \n", " Africa Eastern and Southern AFE -0.090809 -0.091864 -0.09344 \n", " Afghanistan AFG -0.090809 -0.091864 -0.09344 \n", " Africa Western and Central AFW -0.090809 -0.091864 -0.09344 \n", " Angola AGO -0.090809 -0.091864 -0.09344 \n", " \n", " 1963 1964 1965 \\\n", " Country Name Country Code \n", " Aruba ABW -0.093163 -0.094031 -0.092187 \n", " Africa Eastern and Southern AFE -0.093163 -0.094031 -0.092187 \n", " Afghanistan AFG -0.093163 -0.094031 -0.092187 \n", " Africa Western and Central AFW -0.093163 -0.094031 -0.092187 \n", " Angola AGO -0.093163 -0.094031 -0.092187 \n", " \n", " 1966 1967 1968 \\\n", " Country Name Country Code \n", " Aruba ABW -0.090953 -0.089691 -0.090635 \n", " Africa Eastern and Southern AFE -0.090953 -0.089691 -0.090635 \n", " Afghanistan AFG -0.090953 -0.089691 -0.090635 \n", " Africa Western and Central AFW -0.090953 -0.089691 -0.090635 \n", " Angola AGO -0.090953 -0.089691 -0.090635 \n", " \n", " 1969 ... 2013 2014 \\\n", " Country Name Country Code ... \n", " Aruba ABW -0.089882 ... -0.113248 -0.114293 \n", " Africa Eastern and Southern AFE -0.089882 ... -0.113248 -0.114293 \n", " Afghanistan AFG -0.089882 ... -0.113248 -0.114293 \n", " Africa Western and Central AFW -0.089882 ... -0.113248 -0.114293 \n", " Angola AGO -0.089882 ... -0.112900 -0.113945 \n", " \n", " 2015 2016 2017 \\\n", " Country Name Country Code \n", " Aruba ABW -0.115194 -0.114932 -0.115308 \n", " Africa Eastern and Southern AFE -0.115194 -0.114932 -0.115308 \n", " Afghanistan AFG -0.115194 -0.114932 -0.115308 \n", " Africa Western and Central AFW -0.115194 -0.114932 -0.115308 \n", " Angola AGO -0.114668 -0.114465 -0.114863 \n", " \n", " 2018 2019 2020 \\\n", " Country Name Country Code \n", " Aruba ABW -0.116877 -0.119049 -0.118598 \n", " Africa Eastern and Southern AFE -0.116877 -0.119049 -0.118598 \n", " Afghanistan AFG -0.116877 -0.119049 -0.117498 \n", " Africa Western and Central AFW -0.116877 -0.119049 -0.118598 \n", " Angola AGO -0.116601 -0.118793 -0.118486 \n", " \n", " 2021 2022 \n", " Country Name Country Code \n", " Aruba ABW -0.118736 -0.117705 \n", " Africa Eastern and Southern AFE -0.118736 -0.117705 \n", " Afghanistan AFG -0.118341 -0.117705 \n", " Africa Western and Central AFW -0.118736 -0.117705 \n", " Angola AGO -0.118741 -0.118102 \n", " \n", " [5 rows x 63 columns],\n", " 1960 1961 1962 \\\n", " Country Name Country Code \n", " Aruba ABW -0.162675 -0.152507 -0.175345 \n", " Africa Eastern and Southern AFE 2.381755 2.202534 2.959211 \n", " Afghanistan AFG -0.162675 -0.152507 -0.175345 \n", " Africa Western and Central AFW -0.162675 -0.152507 -0.175345 \n", " Angola AGO -0.162675 -0.109791 -0.142530 \n", " \n", " 1963 1964 1965 \\\n", " Country Name Country Code \n", " Aruba ABW -0.199606 -0.207111 -0.257548 \n", " Africa Eastern and Southern AFE 2.810270 2.750210 3.317384 \n", " Afghanistan AFG -0.199606 -0.207111 -0.257548 \n", " Africa Western and Central AFW -0.199606 -0.207111 -0.257548 \n", " Angola AGO -0.165347 -0.144329 -0.250614 \n", " \n", " 1966 1967 1968 \\\n", " Country Name Country Code \n", " Aruba ABW -0.288931 -0.277482 -0.278466 \n", " Africa Eastern and Southern AFE 4.608648 3.940742 3.916744 \n", " Afghanistan AFG -0.288931 -0.277482 -0.278466 \n", " Africa Western and Central AFW -0.288931 -0.277482 0.066949 \n", " Angola AGO -0.261803 -0.277482 -0.278466 \n", " \n", " 1969 ... 2011 2012 \\\n", " Country Name Country Code ... \n", " Aruba ABW -0.270550 ... -0.325080 -0.311868 \n", " Africa Eastern and Southern AFE 4.802118 ... -0.325080 -0.311868 \n", " Afghanistan AFG -0.270550 ... -0.325080 -0.311868 \n", " Africa Western and Central AFW 0.209348 ... 0.935966 0.632917 \n", " Angola AGO -0.270550 ... 0.266913 0.170396 \n", " \n", " 2013 2014 2015 \\\n", " Country Name Country Code \n", " Aruba ABW -0.306099 -0.309704 -0.250354 \n", " Africa Eastern and Southern AFE -0.306099 -0.309704 -0.250354 \n", " Afghanistan AFG -0.306099 -0.309704 -0.250354 \n", " Africa Western and Central AFW 0.353438 0.164271 -0.175233 \n", " Angola AGO 0.224552 0.035088 -0.153382 \n", " \n", " 2016 2017 2018 \\\n", " Country Name Country Code \n", " Aruba ABW -0.252749 -0.257753 -0.291725 \n", " Africa Eastern and Southern AFE -0.252749 -0.257753 -0.291725 \n", " Afghanistan AFG -0.252749 -0.257753 -0.291725 \n", " Africa Western and Central AFW -0.252749 -0.133328 0.042448 \n", " Angola AGO -0.115071 -0.036075 0.042644 \n", " \n", " 2019 2020 \n", " Country Name Country Code \n", " Aruba ABW -0.281673 -0.251303 \n", " Africa Eastern and Southern AFE -0.281673 -0.251303 \n", " Afghanistan AFG -0.281673 -0.251303 \n", " Africa Western and Central AFW -0.051936 -0.239934 \n", " Angola AGO -0.088127 -0.217208 \n", " \n", " [5 rows x 61 columns])" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import statsmodels.formula.api as smf\n", "import statsmodels.api as sm\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "main_gdp = pd.read_csv('main_gdp.csv', index_col=['Country Name', 'Country Code'])\n", "main_government = pd.read_csv('main_government.csv', index_col=['Country Name', 'Country Code'])\n", "main_investments = pd.read_csv('main_investments.csv', index_col=['Country Name', 'Country Code'])\n", "main_consumption = pd.read_csv('main_consumption.csv', index_col=['Country Name', 'Country Code'])\n", "main_trade = pd.read_csv('main_trade.csv', index_col=['Country Name', 'Country Code'])\n", "\n", "(main_gdp.head(), main_government.head(), main_investments.head(), main_consumption.head(), main_trade.head())" ] }, { "cell_type": "markdown", "id": "5c1b62dd", "metadata": {}, "source": [ "#### Unir múltiples DataFrames al mismo tiempo con sufijos especificados" ] }, { "cell_type": "code", "execution_count": 2, "id": "f35b32d2", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GDP_calculated_1960GDP_calculated_1961GDP_calculated_1962GDP_calculated_1963GDP_calculated_1964GDP_calculated_1965GDP_calculated_1966GDP_calculated_1967GDP_calculated_1968GDP_calculated_1969...GDP_calculated_2013GDP_calculated_2014GDP_calculated_2015GDP_calculated_2016GDP_calculated_2017GDP_calculated_2018GDP_calculated_2019GDP_calculated_2020GDP_calculated_2021GDP_calculated_2022
Country NameCountry Code
ArubaABW-0.687051-0.688787-0.705721-0.732952-0.740670-0.789346-0.816753-0.805947-0.813468-0.821448...-1.017331-1.023442-1.046426-0.965575-0.972609-1.011146-1.006774-0.976539-0.925748-0.925497
Africa Eastern and SouthernAFE1.9643401.7689772.5347702.3823402.3198732.8924254.1863963.5201733.4871174.356643...-0.770508-0.786264-0.840639-0.740249-0.747085-0.788861-0.786684-0.756719-0.599803-0.593651
AfghanistanAFG-0.687051-0.688787-0.705721-0.732952-0.740670-0.789346-0.816753-0.805947-0.813468-0.821448...-1.015306-1.021407-1.044415-0.963573-0.970639-1.009221-1.004804-0.972484-0.923705-0.918717
Africa Western and CentralAFW-0.621058-0.624231-0.642287-0.668287-0.676786-0.722767-0.755287-0.753854-0.419140-0.286099...-0.264882-0.453932-0.876033-0.872786-0.756485-0.585735-0.685331-0.871547-0.834270-0.833380
AngolaAGO-0.687051-0.646070-0.672906-0.698693-0.677888-0.782412-0.789625-0.805947-0.813468-0.821448...-0.465233-0.657339-0.932675-0.812782-0.736018-0.664243-0.802329-0.931221-0.917089-0.923103
\n", "

5 rows × 63 columns

\n", "
" ], "text/plain": [ " GDP_calculated_1960 \\\n", "Country Name Country Code \n", "Aruba ABW -0.687051 \n", "Africa Eastern and Southern AFE 1.964340 \n", "Afghanistan AFG -0.687051 \n", "Africa Western and Central AFW -0.621058 \n", "Angola AGO -0.687051 \n", "\n", " GDP_calculated_1961 \\\n", "Country Name Country Code \n", "Aruba ABW -0.688787 \n", "Africa Eastern and Southern AFE 1.768977 \n", "Afghanistan AFG -0.688787 \n", "Africa Western and Central AFW -0.624231 \n", "Angola AGO -0.646070 \n", "\n", " GDP_calculated_1962 \\\n", "Country Name Country Code \n", "Aruba ABW -0.705721 \n", "Africa Eastern and Southern AFE 2.534770 \n", "Afghanistan AFG -0.705721 \n", "Africa Western and Central AFW -0.642287 \n", "Angola AGO -0.672906 \n", "\n", " GDP_calculated_1963 \\\n", "Country Name Country Code \n", "Aruba ABW -0.732952 \n", "Africa Eastern and Southern AFE 2.382340 \n", "Afghanistan AFG -0.732952 \n", "Africa Western and Central AFW -0.668287 \n", "Angola AGO -0.698693 \n", "\n", " GDP_calculated_1964 \\\n", "Country Name Country Code \n", "Aruba ABW -0.740670 \n", "Africa Eastern and Southern AFE 2.319873 \n", "Afghanistan AFG -0.740670 \n", "Africa Western and Central AFW -0.676786 \n", "Angola AGO -0.677888 \n", "\n", " GDP_calculated_1965 \\\n", "Country Name Country Code \n", "Aruba ABW -0.789346 \n", "Africa Eastern and Southern AFE 2.892425 \n", "Afghanistan AFG -0.789346 \n", "Africa Western and Central AFW -0.722767 \n", "Angola AGO -0.782412 \n", "\n", " GDP_calculated_1966 \\\n", "Country Name Country Code \n", "Aruba ABW -0.816753 \n", "Africa Eastern and Southern AFE 4.186396 \n", "Afghanistan AFG -0.816753 \n", "Africa Western and Central AFW -0.755287 \n", "Angola AGO -0.789625 \n", "\n", " GDP_calculated_1967 \\\n", "Country Name Country Code \n", "Aruba ABW -0.805947 \n", "Africa Eastern and Southern AFE 3.520173 \n", "Afghanistan AFG -0.805947 \n", "Africa Western and Central AFW -0.753854 \n", "Angola AGO -0.805947 \n", "\n", " GDP_calculated_1968 \\\n", "Country Name Country Code \n", "Aruba ABW -0.813468 \n", "Africa Eastern and Southern AFE 3.487117 \n", "Afghanistan AFG -0.813468 \n", "Africa Western and Central AFW -0.419140 \n", "Angola AGO -0.813468 \n", "\n", " GDP_calculated_1969 ... \\\n", "Country Name Country Code ... \n", "Aruba ABW -0.821448 ... \n", "Africa Eastern and Southern AFE 4.356643 ... \n", "Afghanistan AFG -0.821448 ... \n", "Africa Western and Central AFW -0.286099 ... \n", "Angola AGO -0.821448 ... \n", "\n", " GDP_calculated_2013 \\\n", "Country Name Country Code \n", "Aruba ABW -1.017331 \n", "Africa Eastern and Southern AFE -0.770508 \n", "Afghanistan AFG -1.015306 \n", "Africa Western and Central AFW -0.264882 \n", "Angola AGO -0.465233 \n", "\n", " GDP_calculated_2014 \\\n", "Country Name Country Code \n", "Aruba ABW -1.023442 \n", "Africa Eastern and Southern AFE -0.786264 \n", "Afghanistan AFG -1.021407 \n", "Africa Western and Central AFW -0.453932 \n", "Angola AGO -0.657339 \n", "\n", " GDP_calculated_2015 \\\n", "Country Name Country Code \n", "Aruba ABW -1.046426 \n", "Africa Eastern and Southern AFE -0.840639 \n", "Afghanistan AFG -1.044415 \n", "Africa Western and Central AFW -0.876033 \n", "Angola AGO -0.932675 \n", "\n", " GDP_calculated_2016 \\\n", "Country Name Country Code \n", "Aruba ABW -0.965575 \n", "Africa Eastern and Southern AFE -0.740249 \n", "Afghanistan AFG -0.963573 \n", "Africa Western and Central AFW -0.872786 \n", "Angola AGO -0.812782 \n", "\n", " GDP_calculated_2017 \\\n", "Country Name Country Code \n", "Aruba ABW -0.972609 \n", "Africa Eastern and Southern AFE -0.747085 \n", "Afghanistan AFG -0.970639 \n", "Africa Western and Central AFW -0.756485 \n", "Angola AGO -0.736018 \n", "\n", " GDP_calculated_2018 \\\n", "Country Name Country Code \n", "Aruba ABW -1.011146 \n", "Africa Eastern and Southern AFE -0.788861 \n", "Afghanistan AFG -1.009221 \n", "Africa Western and Central AFW -0.585735 \n", "Angola AGO -0.664243 \n", "\n", " GDP_calculated_2019 \\\n", "Country Name Country Code \n", "Aruba ABW -1.006774 \n", "Africa Eastern and Southern AFE -0.786684 \n", "Afghanistan AFG -1.004804 \n", "Africa Western and Central AFW -0.685331 \n", "Angola AGO -0.802329 \n", "\n", " GDP_calculated_2020 \\\n", "Country Name Country Code \n", "Aruba ABW -0.976539 \n", "Africa Eastern and Southern AFE -0.756719 \n", "Afghanistan AFG -0.972484 \n", "Africa Western and Central AFW -0.871547 \n", "Angola AGO -0.931221 \n", "\n", " GDP_calculated_2021 \\\n", "Country Name Country Code \n", "Aruba ABW -0.925748 \n", "Africa Eastern and Southern AFE -0.599803 \n", "Afghanistan AFG -0.923705 \n", "Africa Western and Central AFW -0.834270 \n", "Angola AGO -0.917089 \n", "\n", " GDP_calculated_2022 \n", "Country Name Country Code \n", "Aruba ABW -0.925497 \n", "Africa Eastern and Southern AFE -0.593651 \n", "Afghanistan AFG -0.918717 \n", "Africa Western and Central AFW -0.833380 \n", "Angola AGO -0.923103 \n", "\n", "[5 rows x 63 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Unir los DataFrames uno por uno\n", "df_combined = main_gdp.join(main_government, lsuffix='_gdp', rsuffix='_gov')\n", "df_combined = df_combined.join(main_investments, rsuffix='_inv')\n", "df_combined = df_combined.join(main_consumption, rsuffix='_con')\n", "df_combined = df_combined.join(main_trade, rsuffix='_trade')\n", "\n", "\n", "# Corregir la fórmula para calcular el GDP correctamente usando los sufijos adecuados para cada año\n", "for year in range(1960, 2023):\n", " gdp_col = f'{year}_gdp' if f'{year}_gdp' in df_combined.columns else str(year)\n", " gov_col = f'{year}_gov' if f'{year}_gov' in df_combined.columns else str(year)\n", " inv_col = f'{year}_inv' if f'{year}_inv' in df_combined.columns else str(year)\n", " con_col = f'{year}_con' if f'{year}_con' in df_combined.columns else str(year)\n", " trade_col = f'{year}_trade' if f'{year}_trade' in df_combined.columns else str(year)\n", "\n", " df_combined[f'GDP_calculated_{year}'] = (df_combined[gdp_col] +\n", " df_combined[gov_col] +\n", " df_combined[inv_col] +\n", " df_combined[con_col] +\n", " df_combined[trade_col])\n", "\n", "df_combined[[f'GDP_calculated_{year}' for year in range(1960, 2023)]].head()" ] }, { "cell_type": "code", "execution_count": 3, "id": "8d4ef2f3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "MultiIndex: 239 entries, ('Aruba', 'ABW') to ('Zimbabwe', 'ZWE')\n", "Columns: 376 entries, 1960_gdp to GDP_calculated_2022\n", "dtypes: float64(376)\n", "memory usage: 731.2+ KB\n" ] }, { "data": { "text/plain": [ "( 1960_gdp 1961_gdp 1962_gdp 1963_gdp 1964_gdp 1965_gdp \\\n", " count 239.000000 239.000000 239.000000 239.000000 239.000000 239.000000 \n", " mean -0.081045 -0.080586 -0.079957 -0.079715 -0.080026 -0.081028 \n", " std 0.811894 0.811040 0.811016 0.810860 0.810938 0.811302 \n", " min -0.224510 -0.223332 -0.222338 -0.222336 -0.222801 -0.221246 \n", " 25% -0.202029 -0.205308 -0.204461 -0.204545 -0.204212 -0.207263 \n", " 50% -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 -0.202832 \n", " 75% -0.201874 -0.200982 -0.200327 -0.199863 -0.200426 -0.202832 \n", " max 8.925325 8.916576 8.903261 8.903658 8.909846 8.905935 \n", " \n", " 1966_gdp 1967_gdp 1968_gdp 1969_gdp ... \\\n", " count 239.000000 239.000000 239.000000 239.000000 ... \n", " mean -0.080735 -0.080437 -0.081089 -0.082057 ... \n", " std 0.811470 0.810851 0.810718 0.810380 ... \n", " min -0.220064 -0.219031 -0.219934 -0.221195 ... \n", " 25% -0.208238 -0.209050 -0.210544 -0.212014 ... \n", " 50% -0.202705 -0.202662 -0.203255 -0.204786 ... \n", " 75% -0.202705 -0.202662 -0.203255 -0.204786 ... \n", " max 8.893228 8.892912 8.894431 8.920468 ... \n", " \n", " GDP_calculated_2013 GDP_calculated_2014 GDP_calculated_2015 \\\n", " count 239.000000 239.000000 239.000000 \n", " mean -0.213543 -0.243189 -0.328689 \n", " std 2.892561 2.864930 2.860354 \n", " min -1.030556 -1.036704 -1.056181 \n", " 25% -1.017694 -1.023788 -1.047886 \n", " 50% -1.010545 -1.016673 -1.039728 \n", " 75% -0.773237 -0.793074 -0.879409 \n", " max 28.144082 27.835159 27.572665 \n", " \n", " GDP_calculated_2016 GDP_calculated_2017 GDP_calculated_2018 \\\n", " count 239.000000 239.000000 239.000000 \n", " mean -0.247591 -0.239064 -0.249890 \n", " std 2.856636 2.864796 2.849596 \n", " min -0.978107 -0.985575 -1.024731 \n", " 25% -0.965917 -0.973220 -1.012258 \n", " 50% -0.959720 -0.966302 -1.004253 \n", " 75% -0.808664 -0.790391 -0.808545 \n", " max 27.837297 27.701692 27.367289 \n", " \n", " GDP_calculated_2019 GDP_calculated_2020 GDP_calculated_2021 \\\n", " count 239.000000 239.000000 239.000000 \n", " mean -0.257321 -0.264348 -0.200992 \n", " std 2.831007 2.824620 3.298784 \n", " min -1.021194 -0.990254 -0.954674 \n", " 25% -1.007834 -0.976800 -0.933824 \n", " 50% -0.999458 -0.970490 -0.921164 \n", " 75% -0.807603 -0.831460 -0.816629 \n", " max 26.262588 26.264463 31.407289 \n", " \n", " GDP_calculated_2022 \n", " count 239.000000 \n", " mean -0.202149 \n", " std 3.301215 \n", " min -0.962321 \n", " 25% -0.936761 \n", " 50% -0.918836 \n", " 75% -0.826804 \n", " max 31.527472 \n", " \n", " [8 rows x 376 columns],\n", " None,\n", " (239, 376),\n", " Index(['1960_gdp', '1961_gdp', '1962_gdp', '1963_gdp', '1964_gdp', '1965_gdp',\n", " '1966_gdp', '1967_gdp', '1968_gdp', '1969_gdp',\n", " ...\n", " 'GDP_calculated_2013', 'GDP_calculated_2014', 'GDP_calculated_2015',\n", " 'GDP_calculated_2016', 'GDP_calculated_2017', 'GDP_calculated_2018',\n", " 'GDP_calculated_2019', 'GDP_calculated_2020', 'GDP_calculated_2021',\n", " 'GDP_calculated_2022'],\n", " dtype='object', length=376),\n", " MultiIndex([( 'Aruba', 'ABW'),\n", " ('Africa Eastern and Southern', 'AFE'),\n", " ( 'Afghanistan', 'AFG'),\n", " ( 'Africa Western and Central', 'AFW'),\n", " ( 'Angola', 'AGO'),\n", " ( 'Albania', 'ALB'),\n", " ( 'Andorra', 'AND'),\n", " ( 'Arab World', 'ARB'),\n", " ( 'United Arab Emirates', 'ARE'),\n", " ( 'Argentina', 'ARG'),\n", " ...\n", " ( 'Virgin Islands (U.S.)', 'VIR'),\n", " ( 'Viet Nam', 'VNM'),\n", " ( 'Vanuatu', 'VUT'),\n", " ( 'World', 'WLD'),\n", " ( 'Samoa', 'WSM'),\n", " ( 'Kosovo', 'XKX'),\n", " ( 'Yemen, Rep.', 'YEM'),\n", " ( 'South Africa', 'ZAF'),\n", " ( 'Zambia', 'ZMB'),\n", " ( 'Zimbabwe', 'ZWE')],\n", " names=['Country Name', 'Country Code'], length=239),\n", " 1960_gdp float64\n", " 1961_gdp float64\n", " 1962_gdp float64\n", " 1963_gdp float64\n", " 1964_gdp float64\n", " ... \n", " GDP_calculated_2018 float64\n", " GDP_calculated_2019 float64\n", " GDP_calculated_2020 float64\n", " GDP_calculated_2021 float64\n", " GDP_calculated_2022 float64\n", " Length: 376, dtype: object,\n", " 1960_gdp 0\n", " 1961_gdp 0\n", " 1962_gdp 0\n", " 1963_gdp 0\n", " 1964_gdp 0\n", " ..\n", " GDP_calculated_2018 0\n", " GDP_calculated_2019 0\n", " GDP_calculated_2020 0\n", " GDP_calculated_2021 0\n", " GDP_calculated_2022 0\n", " Length: 376, dtype: int64)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_combined.describe(), df_combined.info(), df_combined.shape, df_combined.columns, df_combined.index, df_combined.dtypes, df_combined.isnull().sum()" ] }, { "cell_type": "markdown", "id": "3646af4e", "metadata": {}, "source": [ "## Prueba ADF " ] }, { "cell_type": "code", "execution_count": 4, "id": "8e6598de", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Augmented Dickey-Fuller Test on \"GDP_calculated_2021\" \n", " -----------------------------------------------\n", " Null Hypothesis: Data has unit root. Non-Stationary.\n", " Significance Level = 0.05\n", " Test Statistic = -15.8577\n", " No. Lags Chosen = 0\n", " Critical value 1% = -3.458\n", " Critical value 5% = -2.874\n", " Critical value 10% = -2.573\n", " => P-Value = 0.0. Rejecting Null Hypothesis.\n", " => Series is Stationary.\n" ] } ], "source": [ "from statsmodels.tsa.stattools import adfuller\n", "\n", "def test_stationarity(series, signif=0.05, name='', verbose=False):\n", " r = adfuller(series, autolag='AIC')\n", " output = {'test_statistic': round(r[0], 4), 'pvalue': round(r[1], 4), 'n_lags': r[2], 'n_obs': r[3]}\n", " p_value = output['pvalue'] \n", " def adjust(val, length= 6): return str(val).ljust(length)\n", "\n", " # Imprimir Resultados de la Prueba\n", " if verbose:\n", " print(f' Augmented Dickey-Fuller Test on \"{name}\"', \"\\n \", '-'*47)\n", " print(f' Null Hypothesis: Data has unit root. Non-Stationary.')\n", " print(f' Significance Level = {signif}')\n", " print(f' Test Statistic = {output[\"test_statistic\"]}')\n", " print(f' No. Lags Chosen = {output[\"n_lags\"]}')\n", "\n", " for key, val in r[4].items():\n", " print(f' Critical value {adjust(key)} = {round(val, 3)}')\n", "\n", " if p_value <= signif:\n", " print(f\" => P-Value = {p_value}. Rejecting Null Hypothesis.\")\n", " print(f\" => Series is Stationary.\")\n", " else:\n", " print(f\" => P-Value = {p_value}. Weak evidence to reject the Null Hypothesis.\")\n", " print(f\" => Series is Non-Stationary.\")\n", " \n", " return output\n", "\n", "# Aplicar al año 2021\n", "series = df_combined['GDP_calculated_2021']\n", "result = test_stationarity(series, name='GDP_calculated_2021', verbose=True)" ] }, { "cell_type": "markdown", "id": "28192e55", "metadata": {}, "source": [ "## Análisis de Componentes Temporales" ] }, { "cell_type": "code", "execution_count": 5, "id": "98bab1cc", "metadata": { "scrolled": false }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "def decompose_time_series(series, title):\n", " if len(series) > 2: \n", " decomposition = sm.tsa.seasonal_decompose(series, model='additive', period=1) \n", " # Crear gráficos de los componentes\n", " fig, ax = plt.subplots(4, 1, figsize=(16, 10), sharex=True)\n", " series.plot(ax=ax[0], color='b', title=title)\n", " ax[0].set_ylabel('Original')\n", " decomposition.trend.plot(ax=ax[1], color='r')\n", " ax[1].set_ylabel('Trend')\n", " decomposition.seasonal.plot(ax=ax[2], color='g')\n", " ax[2].set_ylabel('Seasonal')\n", " decomposition.resid.plot(ax=ax[3], color='k')\n", " ax[3].set_ylabel('Residual')\n", " plt.tight_layout()\n", " plt.show()\n", " else:\n", " print(f\"Not enough data to decompose {title}\")\n", "\n", "decompose_time_series(df_combined['GDP_calculated_2021'], 'Countries GDP 2021')" ] }, { "cell_type": "code", "execution_count": null, "id": "602d60bb", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "b574d8d2", "metadata": {}, "source": [ "### Homocedasticidad" ] }, { "cell_type": "code", "execution_count": 6, "id": "f976fa64", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Oscar Murgueytio\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\patsy\\util.py:672: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", " return _pandas_is_categorical_dtype(dt)\n", "C:\\Users\\Oscar Murgueytio\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\patsy\\util.py:672: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", " return _pandas_is_categorical_dtype(dt)\n", "C:\\Users\\Oscar Murgueytio\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\patsy\\util.py:672: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", " return _pandas_is_categorical_dtype(dt)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "p-value de la prueba de Breusch-Pagan: 0.00017441609247072733\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import statsmodels.api as sm\n", "from statsmodels.stats.diagnostic import het_breuschpagan\n", "from statsmodels.formula.api import ols\n", "import matplotlib.pyplot as plt\n", "\n", "modelo = 'GDP_calculated_2022 ~ GDP_calculated_2021'\n", "\n", "model = ols(modelo, data=df_combined).fit()\n", "\n", "residuos = model.resid\n", "\n", "# Prueba de Breusch-Pagan\n", "_, pvalue, _, _ = het_breuschpagan(residuos, model.model.exog)\n", "print(f\"p-value de la prueba de Breusch-Pagan: {pvalue}\")\n", "\n", "plt.figure(figsize=(20, 8))\n", "plt.scatter(model.fittedvalues, residuos)\n", "plt.axhline(0, color='red', linestyle='--')\n", "plt.title('Residuos vs Valores Ajustados')\n", "plt.xlabel('Valores Ajustados')\n", "plt.ylabel('Residuos')\n", "plt.show()\n", "\n", "# Si el p-value es menor a 0.05, existe evidencia de heterocedasticidad." ] }, { "cell_type": "markdown", "id": "ac981b36", "metadata": {}, "source": [ "### Weighted Least Squares (WLS)" ] }, { "cell_type": "code", "execution_count": 7, "id": "ab351c19", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " WLS Regression Results \n", "===============================================================================\n", "Dep. Variable: GDP_calculated_2021 R-squared: 1.000\n", "Model: WLS Adj. R-squared: 1.000\n", "Method: Least Squares F-statistic: 2.752e+06\n", "Date: Sun, 12 May 2024 Prob (F-statistic): 0.00\n", "Time: 20:58:08 Log-Likelihood: 733.57\n", "No. Observations: 239 AIC: -1457.\n", "Df Residuals: 234 BIC: -1440.\n", "Df Model: 4 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const -0.0637 0.004 -16.682 0.000 -0.071 -0.056\n", "2020_gov 1.1296 0.009 122.174 0.000 1.111 1.148\n", "2020 2.8250 0.005 579.645 0.000 2.815 2.835\n", "2020_con 0.8689 0.007 128.030 0.000 0.856 0.882\n", "2020_trade 0.2523 0.016 15.870 0.000 0.221 0.284\n", "==============================================================================\n", "Omnibus: 48.340 Durbin-Watson: 1.650\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 396.541\n", "Skew: -0.460 Prob(JB): 7.80e-87\n", "Kurtosis: 9.243 Cond. No. 72.8\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ "import statsmodels.api as sm\n", "\n", "X = df_combined[['2020_gov', '2020', '2020_con', '2020_trade']] # G + I + C + T\n", "y = df_combined['GDP_calculated_2021'] \n", "\n", "# Agregar una constante al modelo\n", "X = sm.add_constant(X)\n", "\n", "# Modelo OLS para obtener los residuos\n", "model_ols = sm.OLS(y, X).fit()\n", "residuos = model_ols.resid\n", "\n", "# Calcular los pesos como el inverso de los residuos al cuadrado\n", "pesos = 1.0 / (residuos ** 2)\n", "\n", "# Aplicar WLS con los pesos obtenidos\n", "model_wls = sm.WLS(y, X, weights=pesos).fit()\n", "\n", "print(model_wls.summary())" ] }, { "cell_type": "markdown", "id": "4740eaf2", "metadata": {}, "source": [ "### Normalidad de los Residuos" ] }, { "cell_type": "code", "execution_count": 8, "id": "715a37f2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Shapiro-Wilk Test:\n", "Statistic: 0.2418427268818486\n", "p-value: 4.245462941888389e-30\n" ] } ], "source": [ "import scipy.stats as stats\n", "\n", "residuos = model_wls.resid\n", "\n", "# Prueba de Shapiro-Wilk\n", "shapiro_test = stats.shapiro(residuos)\n", "\n", "print(\"Shapiro-Wilk Test:\")\n", "print(\"Statistic:\", shapiro_test.statistic)\n", "print(\"p-value:\", shapiro_test.pvalue)" ] }, { "cell_type": "markdown", "id": "21ce23e6", "metadata": {}, "source": [ "### Ausencia de Multicolinealidad" ] }, { "cell_type": "code", "execution_count": 9, "id": "f7dd008c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " feature VIF\n", "0 const 1.037174\n", "1 2020_gov 16.410593\n", "2 2020 1.004569\n", "3 2020_con 16.409976\n", "4 2020_trade 1.004691\n" ] } ], "source": [ "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", "\n", "# VIF para cada variable en el modelo\n", "vif_data = pd.DataFrame()\n", "vif_data[\"feature\"] = X.columns\n", "vif_data[\"VIF\"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]\n", "\n", "print(vif_data)" ] }, { "cell_type": "markdown", "id": "0fed85b3", "metadata": {}, "source": [ "### Regresión Robusta" ] }, { "cell_type": "code", "execution_count": 10, "id": "d3cb156e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Robust linear Model Regression Results \n", "===============================================================================\n", "Dep. Variable: GDP_calculated_2021 No. Observations: 239\n", "Model: RLM Df Residuals: 234\n", "Method: IRLS Df Model: 4\n", "Norm: HuberT \n", "Scale Est.: mad \n", "Cov Type: H1 \n", "Date: Sun, 12 May 2024 \n", "Time: 20:59:27 \n", "No. Iterations: 50 \n", "==============================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const -0.0776 0.001 -153.867 0.000 -0.079 -0.077\n", "2020_gov 1.1230 0.002 590.301 0.000 1.119 1.127\n", "2020 2.8207 0.001 5098.021 0.000 2.820 2.822\n", "2020_con 0.8751 0.002 460.001 0.000 0.871 0.879\n", "2020_trade 0.1813 0.001 229.744 0.000 0.180 0.183\n", "==============================================================================\n", "\n", "If the model instance has been used for another fit with different fit parameters, then the fit options might not be the correct ones anymore .\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import statsmodels.api as sm\n", "\n", "X = df_combined[['2020_gov', '2020', '2020_con', '2020_trade']] # Predictores \n", "y = df_combined['GDP_calculated_2021'] # La variable objetivo\n", "\n", "# Agrega una constante al modelo\n", "X = sm.add_constant(X)\n", "\n", "# Crear y ajustar un modelo de regresión lineal robusta\n", "model_robust = sm.RLM(y, X).fit()\n", "\n", "# Resumen del modelo\n", "print(model_robust.summary())\n", "\n", "# Visualizar los residuos y verificar la mejora\n", "import matplotlib.pyplot as plt\n", "\n", "plt.figure(figsize=(20, 8))\n", "plt.scatter(model_robust.fittedvalues, model_robust.resid)\n", "plt.axhline(y=0, color='r', linestyle='--')\n", "plt.xlabel('Valores Ajustados')\n", "plt.ylabel('Residuos')\n", "plt.title('Residuos vs Valores Ajustados en Regresión Robusta')\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "c622c224", "metadata": {}, "source": [ "### Diagnóstico Visual de los Residuos" ] }, { "cell_type": "code", "execution_count": 11, "id": "651dcb0e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Shapiro-Wilk Test - Statistic: 0.33540911798629547, p-value: 1.3335239077225291e-28\n", "Breusch-Pagan Test - p-value: 4.210590527338177e-14\n", " feature VIF\n", "0 const 1.037174\n", "1 2020_gov 16.410593\n", "2 2020 1.004569\n", "3 2020_con 16.409976\n", "4 2020_trade 1.004691\n", " OLS Regression Results \n", "===============================================================================\n", "Dep. Variable: GDP_calculated_2021 R-squared: 0.997\n", "Model: OLS Adj. R-squared: 0.997\n", "Method: Least Squares F-statistic: 1.725e+04\n", "Date: Sun, 12 May 2024 Prob (F-statistic): 8.71e-288\n", "Time: 21:01:57 Log-Likelihood: 56.088\n", "No. Observations: 239 AIC: -102.2\n", "Df Residuals: 234 BIC: -84.79\n", "Df Model: 4 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const -0.0314 0.013 -2.464 0.014 -0.056 -0.006\n", "2020_gov 1.1131 0.048 23.157 0.000 1.018 1.208\n", "2020 2.8144 0.014 201.307 0.000 2.787 2.842\n", "2020_con 0.8821 0.048 18.350 0.000 0.787 0.977\n", "2020_trade 0.4075 0.020 20.440 0.000 0.368 0.447\n", "==============================================================================\n", "Omnibus: 269.675 Durbin-Watson: 2.034\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 47503.892\n", "Skew: 4.021 Prob(JB): 0.00\n", "Kurtosis: 71.597 Cond. No. 7.98\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import statsmodels.api as sm\n", "from scipy.stats import shapiro\n", "import statsmodels.stats.diagnostic as sms\n", "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", "\n", "# Prueba de Normalidad de los Residuos con Shapiro-Wilk\n", "stat, p_value = shapiro(model_ols.resid)\n", "print(f'Shapiro-Wilk Test - Statistic: {stat}, p-value: {p_value}')\n", "\n", "# Verificación de Homocedasticidad con Breusch-Pagan\n", "_, p_value, _, _ = sms.het_breuschpagan(model_ols.resid, model_ols.model.exog)\n", "print(f'Breusch-Pagan Test - p-value: {p_value}')\n", "\n", "# Calculando VIF para cada predictor\n", "vif_data = pd.DataFrame()\n", "vif_data[\"feature\"] = X.columns\n", "vif_data[\"VIF\"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]\n", "print(vif_data)\n", "\n", "# Resumen del modelo para ajuste\n", "print(model_ols.summary())\n", "\n", "# Diagnóstico visual de residuos\n", "plt.figure(figsize=(20, 8))\n", "plt.scatter(model_ols.fittedvalues, model_ols.resid)\n", "plt.axhline(y=0, color='red', linestyle='--')\n", "plt.xlabel('Valores Ajustados')\n", "plt.ylabel('Residuos')\n", "plt.title('Residuos vs Valores Ajustados')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "9aa78e11", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "6e06a067", "metadata": {}, "source": [ "## Guardar los DataFrames como CSV" ] }, { "cell_type": "code", "execution_count": 12, "id": "def5e36d", "metadata": {}, "outputs": [], "source": [ "# Guardar el DataFrame como CSV\n", "df_combined.to_csv('df_combined.csv', index=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "3cf71b29", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 13, "id": "be01121b", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "array(['1960_gdp', '1961_gdp', '1962_gdp', '1963_gdp', '1964_gdp',\n", " '1965_gdp', '1966_gdp', '1967_gdp', '1968_gdp', '1969_gdp',\n", " '1970_gdp', '1971_gdp', '1972_gdp', '1973_gdp', '1974_gdp',\n", " '1975_gdp', '1976_gdp', '1977_gdp', '1978_gdp', '1979_gdp',\n", " '1980_gdp', '1981_gdp', '1982_gdp', '1983_gdp', '1984_gdp',\n", " '1985_gdp', '1986_gdp', '1987_gdp', '1988_gdp', '1989_gdp',\n", " '1990_gdp', '1991_gdp', '1992_gdp', '1993_gdp', '1994_gdp',\n", " '1995_gdp', '1996_gdp', '1997_gdp', '1998_gdp', '1999_gdp',\n", " '2000_gdp', '2001_gdp', '2002_gdp', '2003_gdp', '2004_gdp',\n", " '2005_gdp', '2006_gdp', '2007_gdp', '2008_gdp', '2009_gdp',\n", " '2010_gdp', '2011_gdp', '2012_gdp', '2013_gdp', '2014_gdp',\n", " '2015_gdp', '2016_gdp', '2017_gdp', '2018_gdp', '2019_gdp',\n", " '2020_gdp', '2021_gdp', '2022_gdp', '1960_gov', '1961_gov',\n", " '1962_gov', '1963_gov', '1964_gov', '1965_gov', '1966_gov',\n", " '1967_gov', '1968_gov', '1969_gov', '1970_gov', '1971_gov',\n", " '1972_gov', '1973_gov', '1974_gov', '1975_gov', '1976_gov',\n", " '1977_gov', '1978_gov', '1979_gov', '1980_gov', '1981_gov',\n", " '1982_gov', '1983_gov', '1984_gov', '1985_gov', '1986_gov',\n", " '1987_gov', '1988_gov', '1989_gov', '1990_gov', '1991_gov',\n", " '1992_gov', '1993_gov', '1994_gov', '1995_gov', '1996_gov',\n", " '1997_gov', '1998_gov', '1999_gov', '2000_gov', '2001_gov',\n", " '2002_gov', '2003_gov', '2004_gov', '2005_gov', '2006_gov',\n", " '2007_gov', '2008_gov', '2009_gov', '2010_gov', '2011_gov',\n", " '2012_gov', '2013_gov', '2014_gov', '2015_gov', '2016_gov',\n", " '2017_gov', '2018_gov', '2019_gov', '2020_gov', '2021_gov',\n", " '2022_gov', '1960', '1961', '1962', '1963', '1964', '1965', '1966',\n", " '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974',\n", " '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982',\n", " '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990',\n", " '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998',\n", " '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006',\n", " '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014',\n", " '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022',\n", " '1960_con', '1961_con', '1962_con', '1963_con', '1964_con',\n", " '1965_con', '1966_con', '1967_con', '1968_con', '1969_con',\n", " '1970_con', '1971_con', '1972_con', '1973_con', '1974_con',\n", " '1975_con', '1976_con', '1977_con', '1978_con', '1979_con',\n", " '1980_con', '1981_con', '1982_con', '1983_con', '1984_con',\n", " '1985_con', '1986_con', '1987_con', '1988_con', '1989_con',\n", " '1990_con', '1991_con', '1992_con', '1993_con', '1994_con',\n", " '1995_con', '1996_con', '1997_con', '1998_con', '1999_con',\n", " '2000_con', '2001_con', '2002_con', '2003_con', '2004_con',\n", " '2005_con', '2006_con', '2007_con', '2008_con', '2009_con',\n", " '2010_con', '2011_con', '2012_con', '2013_con', '2014_con',\n", " '2015_con', '2016_con', '2017_con', '2018_con', '2019_con',\n", " '2020_con', '2021_con', '2022_con', '1960_trade', '1961_trade',\n", " '1962_trade', '1963_trade', '1964_trade', '1965_trade',\n", " '1966_trade', '1967_trade', '1968_trade', '1969_trade',\n", " '1970_trade', '1971_trade', '1972_trade', '1973_trade',\n", " '1974_trade', '1975_trade', '1976_trade', '1977_trade',\n", " '1978_trade', '1979_trade', '1980_trade', '1981_trade',\n", " '1982_trade', '1983_trade', '1984_trade', '1985_trade',\n", " '1986_trade', '1987_trade', '1988_trade', '1989_trade',\n", " '1990_trade', '1991_trade', '1992_trade', '1993_trade',\n", " '1994_trade', '1995_trade', '1996_trade', '1997_trade',\n", " '1998_trade', '1999_trade', '2000_trade', '2001_trade',\n", " '2002_trade', '2003_trade', '2004_trade', '2005_trade',\n", " '2006_trade', '2007_trade', '2008_trade', '2009_trade',\n", " '2010_trade', '2011_trade', '2012_trade', '2013_trade',\n", " '2014_trade', '2015_trade', '2016_trade', '2017_trade',\n", " '2018_trade', '2019_trade', '2020_trade', 'GDP_calculated_1960',\n", " 'GDP_calculated_1961', 'GDP_calculated_1962',\n", " 'GDP_calculated_1963', 'GDP_calculated_1964',\n", " 'GDP_calculated_1965', 'GDP_calculated_1966',\n", " 'GDP_calculated_1967', 'GDP_calculated_1968',\n", " 'GDP_calculated_1969', 'GDP_calculated_1970',\n", " 'GDP_calculated_1971', 'GDP_calculated_1972',\n", " 'GDP_calculated_1973', 'GDP_calculated_1974',\n", " 'GDP_calculated_1975', 'GDP_calculated_1976',\n", " 'GDP_calculated_1977', 'GDP_calculated_1978',\n", " 'GDP_calculated_1979', 'GDP_calculated_1980',\n", " 'GDP_calculated_1981', 'GDP_calculated_1982',\n", " 'GDP_calculated_1983', 'GDP_calculated_1984',\n", " 'GDP_calculated_1985', 'GDP_calculated_1986',\n", " 'GDP_calculated_1987', 'GDP_calculated_1988',\n", " 'GDP_calculated_1989', 'GDP_calculated_1990',\n", " 'GDP_calculated_1991', 'GDP_calculated_1992',\n", " 'GDP_calculated_1993', 'GDP_calculated_1994',\n", " 'GDP_calculated_1995', 'GDP_calculated_1996',\n", " 'GDP_calculated_1997', 'GDP_calculated_1998',\n", " 'GDP_calculated_1999', 'GDP_calculated_2000',\n", " 'GDP_calculated_2001', 'GDP_calculated_2002',\n", " 'GDP_calculated_2003', 'GDP_calculated_2004',\n", " 'GDP_calculated_2005', 'GDP_calculated_2006',\n", " 'GDP_calculated_2007', 'GDP_calculated_2008',\n", " 'GDP_calculated_2009', 'GDP_calculated_2010',\n", " 'GDP_calculated_2011', 'GDP_calculated_2012',\n", " 'GDP_calculated_2013', 'GDP_calculated_2014',\n", " 'GDP_calculated_2015', 'GDP_calculated_2016',\n", " 'GDP_calculated_2017', 'GDP_calculated_2018',\n", " 'GDP_calculated_2019', 'GDP_calculated_2020',\n", " 'GDP_calculated_2021', 'GDP_calculated_2022'], dtype=object)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Detalles de las columnas\n", "df_combined.columns.values[:376]" ] }, { "cell_type": "code", "execution_count": 14, "id": "3b00e818", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[-0.20187388, -0.20098171, -0.20032654, ..., -0.97653872,\n", " -0.92574843, -0.92549705],\n", " [-0.09491353, -0.09825918, -0.09439167, ..., -0.75671896,\n", " -0.59980343, -0.59365105],\n", " [-0.20187388, -0.20098171, -0.20032654, ..., -0.97248442,\n", " -0.9237053 , -0.9187167 ],\n", " ...,\n", " [-0.16960274, -0.16843671, -0.16711353, ..., -0.79129927,\n", " -0.85280902, -0.85958157],\n", " [-0.22148338, -0.22037191, -0.21961394, ..., -0.95118224,\n", " -0.92343036, -0.92314575],\n", " [-0.22099433, -0.2197217 , -0.21887512, ..., -0.98686413,\n", " -0.95003866, -0.95714594]])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_combined.values[:376]" ] }, { "cell_type": "code", "execution_count": 15, "id": "1a49bd46", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1960_gdp 0\n", "1961_gdp 0\n", "1962_gdp 0\n", "1963_gdp 0\n", "1964_gdp 0\n", " ..\n", "GDP_calculated_2018 0\n", "GDP_calculated_2019 0\n", "GDP_calculated_2020 0\n", "GDP_calculated_2021 0\n", "GDP_calculated_2022 0\n", "Length: 376, dtype: int64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_combined.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 16, "id": "0be90489", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1960_gdp 0\n", "1961_gdp 0\n", "1962_gdp 0\n", "1963_gdp 0\n", "1964_gdp 0\n", " ..\n", "GDP_calculated_2018 0\n", "GDP_calculated_2019 0\n", "GDP_calculated_2020 0\n", "GDP_calculated_2021 0\n", "GDP_calculated_2022 0\n", "Length: 376, dtype: int64" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_combined.iloc[:376].isnull().sum()" ] }, { "cell_type": "code", "execution_count": 17, "id": "d6c2aacf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1960_gdp 0\n", "1961_gdp 0\n", "1962_gdp 0\n", "1963_gdp 0\n", "1964_gdp 0\n", " ..\n", "GDP_calculated_2018 0\n", "GDP_calculated_2019 0\n", "GDP_calculated_2020 0\n", "GDP_calculated_2021 0\n", "GDP_calculated_2022 0\n", "Length: 376, dtype: int64\n", "1960_gdp float64\n", "1961_gdp float64\n", "1962_gdp float64\n", "1963_gdp float64\n", "1964_gdp float64\n", " ... \n", "GDP_calculated_2018 float64\n", "GDP_calculated_2019 float64\n", "GDP_calculated_2020 float64\n", "GDP_calculated_2021 float64\n", "GDP_calculated_2022 float64\n", "Length: 376, dtype: object\n" ] } ], "source": [ "# Verificar NaNs en las primeras 376 filas y los tipos de datos\n", "print(df_combined.iloc[:376].isnull().sum())\n", "print(df_combined.iloc[:376].dtypes)" ] }, { "cell_type": "code", "execution_count": null, "id": "8387b391", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }