Source code for ogusa.transfer_distribution

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from ogusa.utils import MVKDE
from ogusa.constants import CODE_PATH


[docs] def get_transfer_matrix( J=7, lambdas=np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01]), data_path=None, output_path=None, ): """ Compute SxJ matrix representing the distribution of aggregate government transfers by age and lifetime income group. Args: J (int): number of lifetime income groups lambdas (Numpy array): length J array of lifetime income group proportions data_path (str): path to PSID data output_path (str): path to save output plots and data Returns: kde_matrix (Numpy array): SxJ shaped array that represents the smoothed distribution of proportions going to each (s,j) """ # Read in PSID data if data_path is None: # Read data file shipped with OG-USA package df = pd.read_csv( os.path.join(CODE_PATH, "psid_lifetime_income.csv.gz") ) else: # This is the case when running this from a branch of the OG-USA repo df = pd.read_csv(data_path) # Do some tabs with data file... df["total_transfers"] = ( df["head_and_spouse_transfer_income"] + df["other_familyunit_transfer_income"] ) df["sum_transfers"] = ( # df["other_familyunit_ssi_prior_year"] # don't include SSI since OG-USA models separately df["head_other_welfare_prior_year"] + df["spouse_other_welfare_prior_year"] + df["other_familyunit_other_welfare_prior_year"] + df["head_unemp_inc_prior_year"] + df["spouse_unemp_inc_prior_year"] + df["other_familyunit_unemp_inc_prior_year"] ) if output_path is not None: # Create plot path directory if it doesn't already exist if not os.path.exists(output_path): os.makedirs(output_path) # Total total_transfers by year df.groupby("year_data").mean(numeric_only=True).plot( y="total_transfers" ) plt.savefig(os.path.join(output_path, "total_transfers_year.png")) df.groupby("year_data").mean(numeric_only=True).plot(y="sum_transfers") plt.savefig(os.path.join(output_path, "sum_transfers_year.png")) # note that the sum of transfer categories is much lower than the # "total transfers" variable. The transfers variable goes more to high income # and old, even though it says it excludes social security # because of this, we'll use the "sum transfers" variable # Fraction of total_transfers in a year by age # line plot df[df["year_data"] >= 1988].groupby("age").mean( numeric_only=True ).plot(y="total_transfers") plt.savefig(os.path.join(output_path, "total_transfers_age.png")) # total_transfers by lifetime income group # bar plot df[df["year_data"] >= 1988].groupby("li_group").mean( numeric_only=True ).plot.bar(y="total_transfers") plt.savefig(os.path.join(output_path, "total_transfers_li.png")) # lifecycle plots with line for each ability type pd.pivot_table( df[df["year_data"] >= 1988], values="total_transfers", index="age", columns="li_group", aggfunc="mean", ).plot(legend=True) plt.savefig(os.path.join(output_path, "total_transfers_age_li.png")) pd.pivot_table( df[df["year_data"] >= 1988], values="sum_transfers", index="age", columns="li_group", aggfunc="mean", ).plot(legend=True) plt.savefig(os.path.join(output_path, "sum_transfers_age_li.png")) # Matrix Fraction of sum_transfers in a year by age and lifetime_inc transfers_matrix = pd.pivot_table( df[df["year_data"] >= 1988], values="sum_transfers", index="age", columns="li_group", aggfunc="sum", ) # replace NaN with zero transfers_matrix.fillna(value=0, inplace=True) transfers_matrix = transfers_matrix / transfers_matrix.sum().sum() # total_transfers_matrix.to_csv(os.path.join( # output_dir, 'transfer_matrix.csv')) # estimate kernel density of transfers if output_path is not None: filename = os.path.join(output_path, "sum_transfers_kde.png") else: filename = None kde_matrix = MVKDE( 80, 7, transfers_matrix.to_numpy(), filename=filename, plot=(output_path is not None), bandwidth=0.5, ) if (J == 10) and np.array_equal( np.squeeze(lambdas[:6]), np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09]) ): kde_matrix_new = np.zeros((80, J)) kde_matrix_new[:, :6] = kde_matrix[:, :6] kde_matrix_new[:, 6:] = ( kde_matrix[:, 6:].sum(axis=1).reshape(80, 1) * np.tile(np.reshape(lambdas[6:], (1, 4)), (80, 1)) / lambdas[6:].sum() ) kde_matrix = kde_matrix_new if output_path is not None: np.savetxt( os.path.join(output_path, "sum_transfers_kde.csv"), kde_matrix, delimiter=",", ) return kde_matrix