diff --git a/python/3_metric comparison.ipynb b/python/3_metric comparison.ipynb new file mode 100644 index 0000000..6fe0bfc --- /dev/null +++ b/python/3_metric comparison.ipynb @@ -0,0 +1,2469 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import statsmodels.formula.api as smf\n", + "import scipy.io as sio\n", + "from scipy.stats import entropy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load region-product data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearregionprodexportregionsumprodsumyearsumrcabinrcapivec_ppivec_mpivec_cpcipci_mpci_ppci_cpcil_mpcil_ppcil_c
01962ABW0010.0000000.000000e+006.782654e+081.234760e+110.00000000.0083950.0027450.003898-0.058055-2.307477-0.919122-1.095074-0.006335-0.007716-0.004269
11962AFG0010.0000008.181017e+076.782654e+081.234760e+110.00000000.0083950.0027450.003898-0.058055-2.307477-0.919122-1.095074-0.006335-0.007716-0.004269
21962AGO0019682.8486331.311828e+086.782654e+081.234760e+110.01343700.0083950.0027450.003898-0.058055-2.307477-0.919122-1.095074-0.006335-0.007716-0.004269
31962AIA0010.0000000.000000e+006.782654e+081.234760e+110.00000000.0083950.0027450.003898-0.058055-2.307477-0.919122-1.095074-0.006335-0.007716-0.004269
41962ALB0010.0000002.301677e+066.782654e+081.234760e+110.00000000.0083950.0027450.003898-0.058055-2.307477-0.919122-1.095074-0.006335-0.007716-0.004269
\n", + "
" + ], + "text/plain": [ + " year region prod export regionsum prodsum yearsum \\\n", + "0 1962 ABW 001 0.000000 0.000000e+00 6.782654e+08 1.234760e+11 \n", + "1 1962 AFG 001 0.000000 8.181017e+07 6.782654e+08 1.234760e+11 \n", + "2 1962 AGO 001 9682.848633 1.311828e+08 6.782654e+08 1.234760e+11 \n", + "3 1962 AIA 001 0.000000 0.000000e+00 6.782654e+08 1.234760e+11 \n", + "4 1962 ALB 001 0.000000 2.301677e+06 6.782654e+08 1.234760e+11 \n", + "\n", + " rca binrca pivec_p pivec_m pivec_c pci pci_m \\\n", + "0 0.000000 0 0.008395 0.002745 0.003898 -0.058055 -2.307477 \n", + "1 0.000000 0 0.008395 0.002745 0.003898 -0.058055 -2.307477 \n", + "2 0.013437 0 0.008395 0.002745 0.003898 -0.058055 -2.307477 \n", + "3 0.000000 0 0.008395 0.002745 0.003898 -0.058055 -2.307477 \n", + "4 0.000000 0 0.008395 0.002745 0.003898 -0.058055 -2.307477 \n", + "\n", + " pci_p pci_c pcil_m pcil_p pcil_c \n", + "0 -0.919122 -1.095074 -0.006335 -0.007716 -0.004269 \n", + "1 -0.919122 -1.095074 -0.006335 -0.007716 -0.004269 \n", + "2 -0.919122 -1.095074 -0.006335 -0.007716 -0.004269 \n", + "3 -0.919122 -1.095074 -0.006335 -0.007716 -0.004269 \n", + "4 -0.919122 -1.095074 -0.006335 -0.007716 -0.004269 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trade = pd.read_parquet('data/cleaned.parquet')\n", + "trade.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "R0 = 0.115" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 3.335355e+06\n", + "mean 3.586094e-01\n", + "std 2.806920e-01\n", + "min 0.000000e+00\n", + "25% 1.172252e-01\n", + "50% 3.212149e-01\n", + "75% 5.881455e-01\n", + "max 1.005534e+00\n", + "Name: avgrca_p, dtype: float64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# transform RCA with R0 and generate different projections\n", + "df = trade.assign(llrca = np.log(1+trade.rca/R0)/np.log(1+1/R0))\n", + "df['bin'] = df['export']>0\n", + "df['avgrca_part_p'] = df.llrca*df.pivec_p\n", + "df['avgrca_part_m'] = df.llrca*df.pivec_m\n", + "df['avgrca_part_c'] = df.llrca*df.pivec_c\n", + "df['avgrca_p'] = df.groupby(['year','region'])['avgrca_part_p'].transform(sum)\n", + "df['avgrca_m'] = df.groupby(['year','region'])['avgrca_part_m'].transform(sum)\n", + "df['avgrca_c'] = df.groupby(['year','region'])['avgrca_part_c'].transform(sum)\n", + "df['diversity'] = df.groupby(['year','region'])['binrca'].transform(sum)\n", + "df.avgrca_p.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearregionprodexportregionsumprodsumyearsumrcabinrcapivec_p...rct_prct_mrct_crct_demean_prct_demean_mrct_demean_cproj_pproj_mproj_ceci_part
01962ABW0010.0000000.000000e+006.782654e+081.234760e+110.00000000.008395...0.0000000.0000000.000000-0.766895-1.179861-1.0665100.0000000.0000000.0000000.0
11962AFG0010.0000008.181017e+076.782654e+081.234760e+110.00000000.008395...0.0000000.0000000.000000-0.766895-1.179861-1.066510-0.000000-0.000000-0.000000-0.0
21962AGO0019682.8486331.311828e+086.782654e+081.234760e+110.01343700.008395...0.1176280.2688550.202484-0.649267-0.911006-0.864026-0.000908-0.001703-0.000864-0.0
31962AIA0010.0000000.000000e+006.782654e+081.234760e+110.00000000.008395...0.0000000.0000000.000000-0.766895-1.179861-1.0665100.0000000.0000000.0000000.0
41962ALB0010.0000002.301677e+066.782654e+081.234760e+110.00000000.008395...0.0000000.0000000.000000-0.766895-1.179861-1.066510-0.000000-0.000000-0.000000-0.0
\n", + "

5 rows × 38 columns

\n", + "
" + ], + "text/plain": [ + " year region prod export regionsum prodsum yearsum \\\n", + "0 1962 ABW 001 0.000000 0.000000e+00 6.782654e+08 1.234760e+11 \n", + "1 1962 AFG 001 0.000000 8.181017e+07 6.782654e+08 1.234760e+11 \n", + "2 1962 AGO 001 9682.848633 1.311828e+08 6.782654e+08 1.234760e+11 \n", + "3 1962 AIA 001 0.000000 0.000000e+00 6.782654e+08 1.234760e+11 \n", + "4 1962 ALB 001 0.000000 2.301677e+06 6.782654e+08 1.234760e+11 \n", + "\n", + " rca binrca pivec_p ... rct_p rct_m rct_c \\\n", + "0 0.000000 0 0.008395 ... 0.000000 0.000000 0.000000 \n", + "1 0.000000 0 0.008395 ... 0.000000 0.000000 0.000000 \n", + "2 0.013437 0 0.008395 ... 0.117628 0.268855 0.202484 \n", + "3 0.000000 0 0.008395 ... 0.000000 0.000000 0.000000 \n", + "4 0.000000 0 0.008395 ... 0.000000 0.000000 0.000000 \n", + "\n", + " rct_demean_p rct_demean_m rct_demean_c proj_p proj_m proj_c \\\n", + "0 -0.766895 -1.179861 -1.066510 0.000000 0.000000 0.000000 \n", + "1 -0.766895 -1.179861 -1.066510 -0.000000 -0.000000 -0.000000 \n", + "2 -0.649267 -0.911006 -0.864026 -0.000908 -0.001703 -0.000864 \n", + "3 -0.766895 -1.179861 -1.066510 0.000000 0.000000 0.000000 \n", + "4 -0.766895 -1.179861 -1.066510 -0.000000 -0.000000 -0.000000 \n", + "\n", + " eci_part \n", + "0 0.0 \n", + "1 -0.0 \n", + "2 -0.0 \n", + "3 0.0 \n", + "4 -0.0 \n", + "\n", + "[5 rows x 38 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['rct_p'] = np.where(df.avgrca_p>0,df.llrca/df.avgrca_p,0)\n", + "df['rct_m'] = np.where(df.avgrca_m>0,df.llrca/df.avgrca_m,0)\n", + "df['rct_c'] = np.where(df.avgrca_c>0,df.llrca/df.avgrca_c,0)\n", + "df['rct_demean_p'] = df.rct_p-df.groupby('prod').rct_p.transform('mean')\n", + "df['rct_demean_m'] = df.rct_m-df.groupby('prod').rct_m.transform('mean')\n", + "df['rct_demean_c'] = df.rct_c-df.groupby('prod').rct_c.transform('mean')\n", + "df['proj_p'] = np.where(df.avgrca_p>0, df.pci_p * df.llrca * df.pivec_p / df.avgrca_p,0)\n", + "df['proj_m'] = np.where(df.avgrca_m>0, df.pci_m * df.llrca * df.pivec_m / df.avgrca_m,0)\n", + "df['proj_c'] = np.where(df.avgrca_c>0, df.pci_c * df.llrca * df.pivec_c / df.avgrca_c,0)\n", + "df['eci_part'] = np.where(df.diversity>0,df.pci * df.binrca/df.diversity,0)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearregionavgrca_pavgrca_mavgrca_cproj_pproj_mproj_cecibin
01962ABW0.0000000.0000000.0000000.0000000.0000000.0000000.0000000
11962AFG0.2294410.0752560.101934-1.007255-1.725521-1.162990-0.06701539
21962AGO0.4135600.1809380.240246-0.870960-1.279847-0.896124-0.060397134
31962AIA0.0000000.0000000.0000000.0000000.0000000.0000000.0000000
41962ALB0.2119570.0757340.103582-0.996652-1.301794-0.791606-0.07911924
\n", + "
" + ], + "text/plain": [ + " year region avgrca_p avgrca_m avgrca_c proj_p proj_m proj_c \\\n", + "0 1962 ABW 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "1 1962 AFG 0.229441 0.075256 0.101934 -1.007255 -1.725521 -1.162990 \n", + "2 1962 AGO 0.413560 0.180938 0.240246 -0.870960 -1.279847 -0.896124 \n", + "3 1962 AIA 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "4 1962 ALB 0.211957 0.075734 0.103582 -0.996652 -1.301794 -0.791606 \n", + "\n", + " eci bin \n", + "0 0.000000 0 \n", + "1 -0.067015 39 \n", + "2 -0.060397 134 \n", + "3 0.000000 0 \n", + "4 -0.079119 24 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cntryagg = df.groupby(['year','region'])[['avgrca_part_p','avgrca_part_m','avgrca_part_c','proj_p','proj_m','proj_c','eci_part','bin']].sum().reset_index().rename(columns={'avgrca_part_p':'avgrca_p','avgrca_part_m':'avgrca_m','avgrca_part_c':'avgrca_c','eci_part':'eci'})\n", + "cntryagg.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## calculate metrics for 2016" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### prepare mcp of 2016" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(249, 235)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mcpdf = trade[trade.year==2016].pivot(index='region',columns='prod',values='binrca').fillna(0)\n", + "mcpdf.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(233, 235)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mcp = mcpdf.values\n", + "ubiquity = mcp.sum(axis=0)\n", + "diversity = mcp.sum(axis=1)\n", + "ubiquity.min(),diversity.min()\n", + "mcp = mcp[diversity>0,:]\n", + "mcp.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "kp = mcp.sum(axis=0)\n", + "kc = mcp.sum(axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### fitness" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "qp = np.ones(mcp.shape[1])\n", + "fc = np.ones(mcp.shape[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(20):\n", + " fc_t = mcp @ qp\n", + " qp_t = 1/(mcp.T @ (1/fc))\n", + " fc = fc_t / fc_t.mean()\n", + " qp = qp_t / qp_t.mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ECI" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(233, 233)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mr = np.diag(1/kc) @ mcp @ np.diag(1/kp) @ mcp.T\n", + "mr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "eigvals2,eigvecs2 = np.linalg.eig(mr)\n", + "eigvecs2 = np.real(eigvecs2)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "eci = np.sign(np.corrcoef(kc,eigvecs2[:,1])[0,1])*eigvecs2[:,1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### genepy" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "kp_1 = (np.diag(1/kc) @ mcp).sum(axis=0)\n", + "wcp = np.diag(1/kc) @ mcp @ np.diag(1/kp_1)\n", + "ncc = wcp @ wcp.T\n", + "np.fill_diagonal(ncc, 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "eigvals,eigvecs = np.linalg.eig(ncc)\n", + "xc1 = np.absolute(eigvecs[:,0])\n", + "xc2 = eigvecs[:,1]\n", + "lambda1 = eigvals[0]\n", + "lambda2 = eigvals[1]\n", + "genepy = np.square(lambda1*np.square(xc1)+lambda2*np.square(xc2))+2*(lambda1**2*np.square(xc1)+lambda2**2*np.square(xc2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### production ability" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from girth import twopl_mml\n", + "estimates = twopl_mml(mcp.T)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['Discrimination', 'Difficulty', 'Ability', 'LatentPDF', 'AIC', 'BIC'])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "estimates.keys()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## fix effects" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionprodexportrcaregionsumprodsum
13446ABW00196504.7968750.2677352.851261e+081.843904e+10
13447AFG00198623.3203120.1028607.584507e+081.843904e+10
13448AGO0011975.1716310.0000582.698029e+101.843904e+10
13450ALB00172021.7265620.0275572.067362e+091.843904e+10
13451AND00130757.8964840.3030688.028031e+071.843904e+10
\n", + "
" + ], + "text/plain": [ + " region prod export rca regionsum prodsum\n", + "13446 ABW 001 96504.796875 0.267735 2.851261e+08 1.843904e+10\n", + "13447 AFG 001 98623.320312 0.102860 7.584507e+08 1.843904e+10\n", + "13448 AGO 001 1975.171631 0.000058 2.698029e+10 1.843904e+10\n", + "13450 ALB 001 72021.726562 0.027557 2.067362e+09 1.843904e+10\n", + "13451 AND 001 30757.896484 0.303068 8.028031e+07 1.843904e+10" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fedf = trade[(trade.year==2016)&(trade['export']>0)][['region','prod','export','rca','regionsum','prodsum']].copy()\n", + "fedf.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionprodexportrcaregionsumprodsumycpregionshareprodshare
13446ABW00196504.7968750.2677352.851261e+081.843904e+10-0.4414693.384636e-045.233722e-06
13447AFG00198623.3203120.1028607.584507e+081.843904e+10-0.8638581.300326e-045.348616e-06
13448AGO0011975.1716310.0000582.698029e+101.843904e+10-2.2779537.320795e-081.071190e-07
13450ALB00172021.7265620.0275572.067362e+091.843904e+10-1.2861053.483751e-053.905937e-06
13451AND00130757.8964840.3030688.028031e+071.843904e+10-0.3774223.831313e-041.668086e-06
\n", + "
" + ], + "text/plain": [ + " region prod export rca regionsum prodsum \\\n", + "13446 ABW 001 96504.796875 0.267735 2.851261e+08 1.843904e+10 \n", + "13447 AFG 001 98623.320312 0.102860 7.584507e+08 1.843904e+10 \n", + "13448 AGO 001 1975.171631 0.000058 2.698029e+10 1.843904e+10 \n", + "13450 ALB 001 72021.726562 0.027557 2.067362e+09 1.843904e+10 \n", + "13451 AND 001 30757.896484 0.303068 8.028031e+07 1.843904e+10 \n", + "\n", + " ycp regionshare prodshare \n", + "13446 -0.441469 3.384636e-04 5.233722e-06 \n", + "13447 -0.863858 1.300326e-04 5.348616e-06 \n", + "13448 -2.277953 7.320795e-08 1.071190e-07 \n", + "13450 -1.286105 3.483751e-05 3.905937e-06 \n", + "13451 -0.377422 3.831313e-04 1.668086e-06 " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fedf['ycp'] = -np.log(-np.log(fedf.rca/(fedf.rca+1)))\n", + "fedf['regionshare'] = fedf.export/fedf.regionsum\n", + "fedf['prodshare'] = fedf.export/fedf.prodsum\n", + "fedf.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "res = smf.ols(formula='ycp ~ region+prod', data=fedf).fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexfevar
0region[T.AFG]-0.343794AFG
1region[T.AGO]-1.024346AGO
2region[T.AIA]0.638461AIA
3region[T.ALB]0.012500ALB
4region[T.AND]0.205859AND
\n", + "
" + ], + "text/plain": [ + " index fe var\n", + "0 region[T.AFG] -0.343794 AFG\n", + "1 region[T.AGO] -1.024346 AGO\n", + "2 region[T.AIA] 0.638461 AIA\n", + "3 region[T.ALB] 0.012500 ALB\n", + "4 region[T.AND] 0.205859 AND" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fecoefdf = pd.DataFrame({'fe':res.params[1:]}).reset_index()\n", + "fecoefdf['var'] = fecoefdf['index'].str[-4:-1]\n", + "fecoefdf.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionfe
0ABW0.000000
1AFG-0.343794
2AGO-1.024346
3ALB0.012500
4AND0.205859
\n", + "
" + ], + "text/plain": [ + " region fe\n", + "0 ABW 0.000000\n", + "1 AFG -0.343794\n", + "2 AGO -1.024346\n", + "3 ALB 0.012500\n", + "4 AND 0.205859" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gamma_c = fedf[['region']].drop_duplicates().merge(fecoefdf[['var','fe']].rename(columns={'var':'region'}),how='left').fillna(0)\n", + "gamma_c.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### entropy" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionprodexport
13446ABW00196504.796875
13447AFG00198623.320312
13448AGO0011975.171631
13450ALB00172021.726562
13451AND00130757.896484
\n", + "
" + ], + "text/plain": [ + " region prod export\n", + "13446 ABW 001 96504.796875\n", + "13447 AFG 001 98623.320312\n", + "13448 AGO 001 1975.171631\n", + "13450 ALB 001 72021.726562\n", + "13451 AND 001 30757.896484" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tmpdf = fedf[['region','prod','export']].copy()\n", + "tmpdf.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(233, 235)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tmpdf.region.nunique(),tmpdf['prod'].nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionprodexporthchpxcpycpxcprycpr
13446ABW00196504.7968753.2100663.35685202099.239581217089.3866043.205398e-048.516327e-06
13447AFG00198623.3203122.4471903.35685206535.827084297092.4322991.210198e-041.165481e-05
13448AGO0011975.1716310.6608313.356854136.3817929478.3647436.347788e-083.718323e-07
13450ALB00172021.7265623.5208993.35685150827.074332139627.5644053.080489e-055.477532e-06
13451AND00130757.8964843.5302593.3568564412.83430459342.0366463.102073e-042.327964e-06
\n", + "
" + ], + "text/plain": [ + " region prod export hc hp xcp \\\n", + "13446 ABW 001 96504.796875 3.210066 3.35685 202099.239581 \n", + "13447 AFG 001 98623.320312 2.447190 3.35685 206535.827084 \n", + "13448 AGO 001 1975.171631 0.660831 3.35685 4136.381792 \n", + "13450 ALB 001 72021.726562 3.520899 3.35685 150827.074332 \n", + "13451 AND 001 30757.896484 3.530259 3.35685 64412.834304 \n", + "\n", + " ycp xcpr ycpr \n", + "13446 217089.386604 3.205398e-04 8.516327e-06 \n", + "13447 297092.432299 1.210198e-04 1.165481e-05 \n", + "13448 9478.364743 6.347788e-08 3.718323e-07 \n", + "13450 139627.564405 3.080489e-05 5.477532e-06 \n", + "13451 59342.036646 3.102073e-04 2.327964e-06 " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tmpdf['hc'] = tmpdf.groupby('region')['export'].transform(entropy)\n", + "tmpdf['hp'] = tmpdf.groupby('prod')['export'].transform(entropy)\n", + "tmpdf['xcp'] = tmpdf.export*(np.log(233) - tmpdf.hp)\n", + "tmpdf['ycp'] = tmpdf.export*(np.log(235) - tmpdf.hc)\n", + "tmpdf['xcpr'] = tmpdf.xcp/tmpdf.groupby('region')['xcp'].transform(sum)\n", + "tmpdf['ycpr'] = tmpdf.ycp/tmpdf.groupby('prod')['ycp'].transform(sum)\n", + "tmpdf.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(25):\n", + " tmpdf['hc'] = tmpdf.groupby('region')['xcpr'].transform(entropy)\n", + " tmpdf['hp'] = tmpdf.groupby('prod')['ycpr'].transform(entropy)\n", + " tmpdf['xcp'] = tmpdf.export*(np.log(233) - tmpdf.hp)\n", + " tmpdf['ycp'] = tmpdf.export*(np.log(235) - tmpdf.hc)\n", + " tmpdf['xcpr'] = tmpdf.xcp/tmpdf.groupby('region')['xcp'].transform(sum)\n", + " tmpdf['ycpr'] = tmpdf.ycp/tmpdf.groupby('prod')['ycp'].transform(sum)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionprodexporthchpxcpycpxcprycpr
13446ABW00196504.7968753.3267463.456288192503.017473205829.2078773.184944e-047.666431e-06
13447AFG00198623.3203122.5974913.456288196728.943723282269.2512691.268189e-041.051356e-05
13448AGO0011975.1716310.6072313.4562883939.9751239584.2327245.567062e-083.569797e-07
13450ALB00172021.7265623.4130153.456288143665.394218147397.5655962.989973e-055.490053e-06
13451AND00130757.8964843.4416013.45628861354.33756862068.9645832.951840e-042.311856e-06
\n", + "
" + ], + "text/plain": [ + " region prod export hc hp xcp \\\n", + "13446 ABW 001 96504.796875 3.326746 3.456288 192503.017473 \n", + "13447 AFG 001 98623.320312 2.597491 3.456288 196728.943723 \n", + "13448 AGO 001 1975.171631 0.607231 3.456288 3939.975123 \n", + "13450 ALB 001 72021.726562 3.413015 3.456288 143665.394218 \n", + "13451 AND 001 30757.896484 3.441601 3.456288 61354.337568 \n", + "\n", + " ycp xcpr ycpr \n", + "13446 205829.207877 3.184944e-04 7.666431e-06 \n", + "13447 282269.251269 1.268189e-04 1.051356e-05 \n", + "13448 9584.232724 5.567062e-08 3.569797e-07 \n", + "13450 147397.565596 2.989973e-05 5.490053e-06 \n", + "13451 62068.964583 2.951840e-04 2.311856e-06 " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tmpdf.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionhc
13446ABW3.326746
13447AFG2.597491
13448AGO0.607231
70221AIA2.766955
13450ALB3.413015
\n", + "
" + ], + "text/plain": [ + " region hc\n", + "13446 ABW 3.326746\n", + "13447 AFG 2.597491\n", + "13448 AGO 0.607231\n", + "70221 AIA 2.766955\n", + "13450 ALB 3.413015" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "regiondf = tmpdf[['region','hc']].drop_duplicates().sort_values('region')\n", + "regiondf.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## combine metrics into a result dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(233, 8)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "resdf = pd.DataFrame(\n", + " {\n", + " \"fitness2016\": fc,\n", + " \"eci2016\": eci,\n", + " \"kc\": kc,\n", + " \"xc1\": xc1,\n", + " \"xc2\": xc2,\n", + " \"genepy\": genepy,\n", + " \"ability\": estimates[\"Ability\"],\n", + " },\n", + " index=mcpdf.index[diversity > 0],\n", + ").reset_index()\n", + "resdf.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
regionfitness2016eci2016kcxc1xc2genepyabilityfehcavgrca_pavgrca_mavgrca_cproj_pproj_mproj_cecibin
0ABW1.0040320.024499330.0781760.0217910.0150890.3131060.0000003.3267460.4584870.4754850.4665180.1059680.1172980.1527330.021736192
1AFG0.365737-0.096063250.042223-0.1114360.007655-0.452356-0.3437942.5974910.5116070.2346320.286582-0.815917-1.072287-0.849485-0.050744164
2AGO0.070533-0.10099570.030913-0.0707800.003656-1.378027-1.0243460.6072310.1601690.1169780.135088-0.437043-0.387758-0.384177-0.039769198
3AIA0.6044280.060996240.0649130.0258580.0104840.2215000.6384612.7669550.2811580.3049790.2660340.2370160.1577590.3691430.03713067
4ALB0.733577-0.039531470.045679-0.0604420.0060660.6504430.0125003.4130150.6599420.5312380.513222-0.114121-0.304722-0.2245670.001374206
\n", + "
" + ], + "text/plain": [ + " region fitness2016 eci2016 kc xc1 xc2 genepy ability \\\n", + "0 ABW 1.004032 0.024499 33 0.078176 0.021791 0.015089 0.313106 \n", + "1 AFG 0.365737 -0.096063 25 0.042223 -0.111436 0.007655 -0.452356 \n", + "2 AGO 0.070533 -0.100995 7 0.030913 -0.070780 0.003656 -1.378027 \n", + "3 AIA 0.604428 0.060996 24 0.064913 0.025858 0.010484 0.221500 \n", + "4 ALB 0.733577 -0.039531 47 0.045679 -0.060442 0.006066 0.650443 \n", + "\n", + " fe hc avgrca_p avgrca_m avgrca_c proj_p proj_m \\\n", + "0 0.000000 3.326746 0.458487 0.475485 0.466518 0.105968 0.117298 \n", + "1 -0.343794 2.597491 0.511607 0.234632 0.286582 -0.815917 -1.072287 \n", + "2 -1.024346 0.607231 0.160169 0.116978 0.135088 -0.437043 -0.387758 \n", + "3 0.638461 2.766955 0.281158 0.304979 0.266034 0.237016 0.157759 \n", + "4 0.012500 3.413015 0.659942 0.531238 0.513222 -0.114121 -0.304722 \n", + "\n", + " proj_c eci bin \n", + "0 0.152733 0.021736 192 \n", + "1 -0.849485 -0.050744 164 \n", + "2 -0.384177 -0.039769 198 \n", + "3 0.369143 0.037130 67 \n", + "4 -0.224567 0.001374 206 " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "resdf = resdf.merge(gamma_c).merge(regiondf).merge(cntryagg[(cntryagg.year==2016)&(cntryagg.bin>0)].drop(columns=['year']), how='left')\n", + "resdf.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
kcfitness2016hcabilityxc1avgrca_pavgrca_mavgrca_cbineci2016ecixc2proj_pproj_mproj_cgenepyfe
kc1.0000000.9089070.9227990.9107200.7164080.9317410.9672850.9695240.6980280.4200540.4704440.2254760.4065050.3974270.3784870.6253090.324606
fitness20160.9089071.0000000.7943030.8027750.8661330.7444360.8869490.8685110.5510760.5641060.5836910.5006410.5048740.4811090.4996100.8451890.358421
hc0.9227990.7943031.0000000.9588690.7222830.9257220.9428710.9439460.6880220.4904950.5435430.2212560.4980290.4781790.4575010.5935820.354024
ability0.9107200.8027750.9588691.0000000.7794770.8954950.9273370.9241930.6840270.5515580.6070990.2576790.5425580.5366440.5174180.6448470.362611
xc10.7164080.8661330.7222830.7794771.0000000.5640920.7613270.7265980.3963210.8016050.8036420.6842780.7233490.6999430.7424450.9478700.485798
avgrca_p0.9317410.7444360.9257220.8954950.5640921.0000000.9375450.9614120.8080570.2503760.305705-0.0065030.2613370.2748730.2264140.4540050.212463
avgrca_m0.9672850.8869490.9428710.9273370.7613270.9375451.0000000.9955040.7495860.4740070.5387530.2646130.4988640.4921370.4721840.6550150.314116
avgrca_c0.9695240.8685110.9439460.9241930.7265980.9614120.9955041.0000000.7783430.4178030.4787390.1996340.4388480.4430480.4133910.6232740.288118
bin0.6980280.5510760.6880220.6840270.3963210.8080570.7495860.7783431.000000-0.0123180.112200-0.1666320.1527880.2081700.0959900.348256-0.207158
eci20160.4200540.5641060.4904950.5515580.8016050.2503760.4740070.417803-0.0123181.0000000.9520460.8094810.8567960.8043230.8769130.6964040.680923
eci0.4704440.5836910.5435430.6070990.8036420.3057050.5387530.4787390.1122000.9520461.0000000.7406520.9207740.8758720.9136960.6765500.597208
xc20.2254760.5006410.2212560.2576790.684278-0.0065030.2646130.199634-0.1666320.8094810.7406521.0000000.6430150.6058850.7302210.6949240.492865
proj_p0.4065050.5048740.4980290.5425580.7233490.2613370.4988640.4388480.1527880.8567960.9207740.6430151.0000000.9033560.9177820.6005770.468510
proj_m0.3974270.4811090.4781790.5366440.6999430.2748730.4921370.4430480.2081700.8043230.8758720.6058850.9033561.0000000.9552760.5683720.443293
proj_c0.3784870.4996100.4575010.5174180.7424450.2264140.4721840.4133910.0959900.8769130.9136960.7302210.9177820.9552761.0000000.6230070.539185
genepy0.6253090.8451890.5935820.6448470.9478700.4540050.6550150.6232740.3482560.6964040.6765500.6949240.6005770.5683720.6230071.0000000.381975
fe0.3246060.3584210.3540240.3626110.4857980.2124630.3141160.288118-0.2071580.6809230.5972080.4928650.4685100.4432930.5391850.3819751.000000
\n", + "
" + ], + "text/plain": [ + " kc fitness2016 hc ability xc1 avgrca_p \\\n", + "kc 1.000000 0.908907 0.922799 0.910720 0.716408 0.931741 \n", + "fitness2016 0.908907 1.000000 0.794303 0.802775 0.866133 0.744436 \n", + "hc 0.922799 0.794303 1.000000 0.958869 0.722283 0.925722 \n", + "ability 0.910720 0.802775 0.958869 1.000000 0.779477 0.895495 \n", + "xc1 0.716408 0.866133 0.722283 0.779477 1.000000 0.564092 \n", + "avgrca_p 0.931741 0.744436 0.925722 0.895495 0.564092 1.000000 \n", + "avgrca_m 0.967285 0.886949 0.942871 0.927337 0.761327 0.937545 \n", + "avgrca_c 0.969524 0.868511 0.943946 0.924193 0.726598 0.961412 \n", + "bin 0.698028 0.551076 0.688022 0.684027 0.396321 0.808057 \n", + "eci2016 0.420054 0.564106 0.490495 0.551558 0.801605 0.250376 \n", + "eci 0.470444 0.583691 0.543543 0.607099 0.803642 0.305705 \n", + "xc2 0.225476 0.500641 0.221256 0.257679 0.684278 -0.006503 \n", + "proj_p 0.406505 0.504874 0.498029 0.542558 0.723349 0.261337 \n", + "proj_m 0.397427 0.481109 0.478179 0.536644 0.699943 0.274873 \n", + "proj_c 0.378487 0.499610 0.457501 0.517418 0.742445 0.226414 \n", + "genepy 0.625309 0.845189 0.593582 0.644847 0.947870 0.454005 \n", + "fe 0.324606 0.358421 0.354024 0.362611 0.485798 0.212463 \n", + "\n", + " avgrca_m avgrca_c bin eci2016 eci xc2 \\\n", + "kc 0.967285 0.969524 0.698028 0.420054 0.470444 0.225476 \n", + "fitness2016 0.886949 0.868511 0.551076 0.564106 0.583691 0.500641 \n", + "hc 0.942871 0.943946 0.688022 0.490495 0.543543 0.221256 \n", + "ability 0.927337 0.924193 0.684027 0.551558 0.607099 0.257679 \n", + "xc1 0.761327 0.726598 0.396321 0.801605 0.803642 0.684278 \n", + "avgrca_p 0.937545 0.961412 0.808057 0.250376 0.305705 -0.006503 \n", + "avgrca_m 1.000000 0.995504 0.749586 0.474007 0.538753 0.264613 \n", + "avgrca_c 0.995504 1.000000 0.778343 0.417803 0.478739 0.199634 \n", + "bin 0.749586 0.778343 1.000000 -0.012318 0.112200 -0.166632 \n", + "eci2016 0.474007 0.417803 -0.012318 1.000000 0.952046 0.809481 \n", + "eci 0.538753 0.478739 0.112200 0.952046 1.000000 0.740652 \n", + "xc2 0.264613 0.199634 -0.166632 0.809481 0.740652 1.000000 \n", + "proj_p 0.498864 0.438848 0.152788 0.856796 0.920774 0.643015 \n", + "proj_m 0.492137 0.443048 0.208170 0.804323 0.875872 0.605885 \n", + "proj_c 0.472184 0.413391 0.095990 0.876913 0.913696 0.730221 \n", + "genepy 0.655015 0.623274 0.348256 0.696404 0.676550 0.694924 \n", + "fe 0.314116 0.288118 -0.207158 0.680923 0.597208 0.492865 \n", + "\n", + " proj_p proj_m proj_c genepy fe \n", + "kc 0.406505 0.397427 0.378487 0.625309 0.324606 \n", + "fitness2016 0.504874 0.481109 0.499610 0.845189 0.358421 \n", + "hc 0.498029 0.478179 0.457501 0.593582 0.354024 \n", + "ability 0.542558 0.536644 0.517418 0.644847 0.362611 \n", + "xc1 0.723349 0.699943 0.742445 0.947870 0.485798 \n", + "avgrca_p 0.261337 0.274873 0.226414 0.454005 0.212463 \n", + "avgrca_m 0.498864 0.492137 0.472184 0.655015 0.314116 \n", + "avgrca_c 0.438848 0.443048 0.413391 0.623274 0.288118 \n", + "bin 0.152788 0.208170 0.095990 0.348256 -0.207158 \n", + "eci2016 0.856796 0.804323 0.876913 0.696404 0.680923 \n", + "eci 0.920774 0.875872 0.913696 0.676550 0.597208 \n", + "xc2 0.643015 0.605885 0.730221 0.694924 0.492865 \n", + "proj_p 1.000000 0.903356 0.917782 0.600577 0.468510 \n", + "proj_m 0.903356 1.000000 0.955276 0.568372 0.443293 \n", + "proj_c 0.917782 0.955276 1.000000 0.623007 0.539185 \n", + "genepy 0.600577 0.568372 0.623007 1.000000 0.381975 \n", + "fe 0.468510 0.443293 0.539185 0.381975 1.000000 " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "resdf[['kc','fitness2016','hc','ability','xc1','avgrca_p','avgrca_m','avgrca_c','bin','eci2016','eci','xc2','proj_p','proj_m','proj_c','genepy','fe']].corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['region', 'fitness2016', 'eci2016', 'kc', 'xc1', 'xc2', 'genepy',\n", + " 'ability', 'fe', 'hc', 'avgrca_p', 'avgrca_m', 'avgrca_c', 'proj_p',\n", + " 'proj_m', 'proj_c', 'eci', 'bin'],\n", + " dtype='object')" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "resdf.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "sio.savemat('metric2016.mat', {'struct1':resdf.to_dict(\"list\")})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}