Source code for ogusa.deterministic_profiles

import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from linearmodels import PanelOLS
from ogusa.constants import CODE_PATH


[docs] def estimate_profiles(data_path=None, output_path=None): """ Function to estimate deterministic lifecycle profiles of hourly earnings. Follows methodology of Fullerton and Rogers (1993). Args: data_path (str): path to PSID data output_path (str): path to save output to Returns: reg_results (Pandas DataFrame): regression model coefficients for lifetime earnings profiles """ # Read in PSID data if data_path is None: # Read data file shipped with OG-USA package df = pd.read_csv( os.path.join(CODE_PATH, "psid_lifetime_income.csv.gz") ) else: # This is the case when running this from a branch of the OG-USA repo df = pd.read_csv(data_path) model_results = { "Names": [ "Constant", "", "Head Age", "", "Head Age^2", "", "Head Age^3", "", "R-Squared", "Observations", ] } cats_pct = ["0-25", "26-50", "51-70", "71-80", "81-90", "91-99", "100"] long_model_results = { "Lifetime Income Group": [], "Constant": [], "Age": [], "Age^2": [], "Age^3": [], "Observations": [], } for i, group in enumerate(cats_pct): data = df[df[group] == 1].copy() data["ones"] = np.ones(len(data.index)) mod = PanelOLS( data.ln_earn_rate, data[["ones", "age", "age2", "age3"]] ) res = mod.fit(cov_type="clustered", cluster_entity=True) # print('Summary for lifetime income group ', group) # print(res.summary) # Save model results to dictionary model_results[group] = [ res.params["ones"], res.std_errors["ones"], res.params["age"], res.std_errors["age"], res.params["age2"], res.std_errors["age2"], res.params["age3"], res.std_errors["age3"], res.rsquared, res.nobs, ] long_model_results["Lifetime Income Group"].extend([cats_pct[i], ""]) long_model_results["Constant"].extend( [res.params["ones"], res.std_errors["ones"]] ) long_model_results["Age"].extend( [res.params["age"], res.std_errors["age"]] ) long_model_results["Age^2"].extend( [res.params["age2"], res.std_errors["age2"]] ) long_model_results["Age^3"].extend( [res.params["age3"], res.std_errors["age3"]] ) long_model_results["Observations"].extend([res.nobs, ""]) reg_results = pd.DataFrame.from_dict(model_results) if output_path is not None: # Create directory if it doesn't already exist if not os.path.exists(output_path): os.makedirs(output_path) reg_results.to_csv( os.path.join(output_path, "DeterministicProfileRegResults.csv") ) long_reg_results = pd.DataFrame.from_dict(model_results) long_reg_results.to_csv( os.path.join( output_path, "DeterministicProfileRegResults_long.csv" ) ) # Plot lifecycles of hourly earnings from processes estimated above age_vec = np.arange(20, 81, step=1) for i, group in enumerate(cats_pct): earn_profile = ( model_results[group][0] + model_results[group][2] * age_vec + model_results[group][4] * age_vec**2 + model_results[group][6] * age_vec**3 ) plt.plot(age_vec, earn_profile, label=group) plt.title( "Estimated Lifecycle Earnings Profiles by Lifetime Income Group" ) plt.legend() plt.savefig( os.path.join(output_path, "lifecycle_earnings_profiles.png") ) # Plot of lifecycles of hourly earnings from processes from data pd.pivot_table( df, values="ln_earn_rate", index="age", columns="li_group", aggfunc="mean", ).plot(legend=True) plt.title( "Empirical Lifecycle Earnings Profiles by Lifetime Income Group" ) plt.savefig( os.path.join(output_path, "lifecycle_earnings_profiles_data.png") ) # Plot of lifecycle profiles of hours by lifetime income group # create variable from fraction of time endowment work df["labor_supply"] = df["earnhours_hh"] / ( 24 * 5 * (df["married"] + 1) * 50 ) pd.pivot_table( df, values="labor_supply", index="age", columns="li_group", aggfunc="mean", ).plot(legend=True) plt.title("Lifecycle Profiles of Hours by Lifetime Income Group") plt.savefig(os.path.join(output_path, "lifecycle_laborsupply.png")) return reg_results