Source code for ogcore.parameter_plots

import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.ticker as mticker
from ogcore.constants import GROUP_LABELS
from ogcore import utils, txfunc
from ogcore.constants import DEFAULT_START_YEAR, VAR_LABELS


[docs] def plot_imm_rates( imm_rates, start_year=DEFAULT_START_YEAR, years_to_plot=[DEFAULT_START_YEAR], include_title=False, source="United Nations, World Population Prospects", path=None, ): """ Plot fertility rates from the data Args: imm_rates (NumPy array): immigration rates for each of totpers start_year (int): first year of data years_to_plot (list): list of years to plot source (str): data source for fertility rates path (str): path to save figure to, if None then figure is returned Returns: fig (Matplotlib plot object): plot of fertility rates """ # create line styles to cycle through fig, ax = plt.subplots() for y in years_to_plot: i = start_year - y plt.plot(imm_rates[i, :], c="blue", label="Year " + str(y)) # plt.title('Fertility rates by age ($f_{s}$)', # fontsize=20) plt.xlabel(r"Age $s$") plt.ylabel(r"Immigration rate $i_{s}$") plt.legend(loc="upper left") plt.text( -5, -0.05, "Source: " + source, fontsize=9, ) plt.tight_layout(rect=(0, 0.035, 1, 1)) if include_title: plt.title("Immigration Rates") # Save or return figure if path: output_path = os.path.join(path, "imm_rates") plt.savefig(output_path, dpi=300) plt.close() else: fig.show() return fig
[docs] def plot_mort_rates( p_list, labels=[""], years=[DEFAULT_START_YEAR], survival_rates=False, include_title=False, path=None, ): """ Create a plot of mortality rates from OG-Core parameterization. Args: p_list (list): list of parameters objects labels (list): list of labels for the legend survival_rates (bool): whether to plot survival rates instead of mortality rates include_title (bool): whether to include a title in the plot path (string): path to save figure to Returns: fig (Matplotlib plot object): plot of mortality rates """ p0 = p_list[0] age_per = np.linspace(p0.E, p0.E + p0.S, p0.S) fig, ax = plt.subplots() for y in years: t = y - p0.start_year for i, p in enumerate(p_list): if survival_rates: plt.plot( age_per, np.cumprod(1 - p.rho[t, :]), label=labels[i] + " " + str(y), ) else: plt.plot(age_per, p.rho[t, :], label=labels[i] + " " + str(y)) plt.xlabel(r"Age $s$ (model periods)") if survival_rates: plt.ylabel(r"Cumulative Survival Rates") plt.legend(loc="lower left") title = "Survival Rates" else: plt.ylabel(r"Mortality Rates $\rho_{s}$") plt.legend(loc="upper left") title = "Mortality Rates" ticks_loc = ax.get_yticks().tolist() ax.yaxis.set_major_locator(mticker.FixedLocator(ticks_loc)) ax.set_yticklabels(["{:,.0%}".format(x) for x in ticks_loc]) if include_title: plt.title(title) if path is None: return fig else: if survival_rates: fig_path = os.path.join(path, "survival_rates") else: fig_path = os.path.join(path, "mortality_rates") plt.savefig(fig_path, dpi=300)
[docs] def plot_pop_growth( p, start_year=DEFAULT_START_YEAR, num_years_to_plot=150, include_title=False, path=None, ): """ Create a plot of population growth rates by year. Args: p (OG-Core Specifications class): parameters object start_year (integer): year to begin plotting num_years_to_plot (integer): number of years to plot include_title (bool): whether to include a title in the plot path (string): path to save figure to Returns: fig (Matplotlib plot object): plot of immigration rates """ assert isinstance(start_year, int) assert isinstance(num_years_to_plot, int) year_vec = np.arange(start_year, start_year + num_years_to_plot) start_index = start_year - p.start_year fig, ax = plt.subplots() plt.plot(year_vec, p.g_n[start_index : start_index + num_years_to_plot]) plt.xlabel(r"Year $t$") plt.ylabel(r"Population Growth Rate $g_{n, t}$") ticks_loc = ax.get_yticks().tolist() ax.yaxis.set_major_locator(mticker.FixedLocator(ticks_loc)) ax.set_yticklabels(["{:,.2%}".format(x) for x in ticks_loc]) if include_title: plt.title("Population Growth Rates") if path is None: return fig else: fig_path = os.path.join(path, "pop_growth_rates") plt.savefig(fig_path, dpi=300)
def plot_population(p, years_to_plot=["SS"], include_title=False, path=None): """ Plot the distribution of the population over age for various years. Args: p (OG-Core Specifications class): parameters object years_to_plot (list): list of years to plot, 'SS' will denote the steady-state period include_title (bool): whether to include a title in the plot path (string): path to save figure to Returns: fig (Matplotlib plot object): plot of population distribution """ for i, v in enumerate(years_to_plot): assert isinstance(v, int) | (v == "SS") if isinstance(v, int): assert v >= p.start_year age_vec = np.arange(p.E, p.S + p.E) fig, ax = plt.subplots() for i, v in enumerate(years_to_plot): if v == "SS": pop_dist = p.omega_SS else: pop_dist = p.omega[v - p.start_year, :] plt.plot(age_vec, pop_dist, label=str(v) + " pop.") plt.xlabel(r"Age $s$") plt.ylabel(r"Pop. dist'n $\omega_{s}$") plt.legend(loc="lower left") if include_title: plt.title("Population Distribution by Year") if path is None: return fig else: fig_path = os.path.join(path, "pop_distribution") plt.savefig(fig_path, dpi=300)
[docs] def plot_ability_profiles( p, p2=None, t=None, log_scale=False, include_title=False, path=None ): """ Create a plot of earnings ability profiles. Args: p (OG-Core Specifications class): parameters object t (int): model period for year, if None, then plot ability matrix for SS log_scale (bool): whether to plot in log points include_title (bool): whether to include a title in the plot path (string): path to save figure to Returns: fig (Matplotlib plot object): plot of earnings ability profiles """ if t is None: t = -1 age_vec = np.arange(p.starting_age, p.starting_age + p.S) fig, ax = plt.subplots() cm = plt.get_cmap("coolwarm") ax.set_prop_cycle(color=[cm(1.0 * i / p.J) for i in range(p.J)]) for j in range(p.J): if log_scale: plt.plot(age_vec, np.log(p.e[t, :, j]), label=GROUP_LABELS[p.J][j]) else: plt.plot(age_vec, p.e[t, :, j], label=GROUP_LABELS[p.J][j]) if p2 is not None: for j in range(p.J): if log_scale: plt.plot( age_vec, np.log(p2.e[t, :, j]), linestyle="--", label=GROUP_LABELS[p.J][j], ) else: plt.plot( age_vec, p2.e[t, :, j], linestyle="--", label=GROUP_LABELS[p.J][j], ) plt.xlabel(r"Age") if log_scale: plt.ylabel(r"ln(Earnings ability)") else: plt.ylabel(r"Earnings ability") plt.legend(loc=9, bbox_to_anchor=(0.5, -0.15), ncols=5) if include_title: plt.title("Lifecycle Profiles of Effective Labor Units") if path is None: return fig else: fig_path = os.path.join(path, "ability_profiles") plt.savefig(fig_path, bbox_inches="tight", dpi=300)
[docs] def plot_elliptical_u(p, plot_MU=True, include_title=False, path=None): """ Create a plot of showing the fit of the elliptical utility function. Args: p (OG-Core Specifications class): parameters object plot_MU (boolean): whether plot marginal utility or utility in levels path (string): path to save figure to Returns: fig (Matplotlib plot object): plot of elliptical vs CFE utility """ theta = 1 / p.frisch N = 101 n_grid = np.linspace(0.01, 0.8, num=N) if plot_MU: CFE = (1.0 / p.ltilde) * ((n_grid / p.ltilde) ** theta) ellipse = ( 1.0 * p.b_ellipse * (1.0 / p.ltilde) * ( (1.0 - (n_grid / p.ltilde) ** p.upsilon) ** ((1.0 / p.upsilon) - 1.0) ) * (n_grid / p.ltilde) ** (p.upsilon - 1.0) ) else: CFE = ((n_grid / p.ltilde) ** (1 + theta)) / (1 + theta) k = 1.0 # we don't estimate k, so not in parameters ellipse = ( p.b_ellipse * ((1 - ((n_grid / p.ltilde) ** p.upsilon)) ** (1 / p.upsilon)) + k ) fig, ax = plt.subplots() plt.plot(n_grid, CFE, label="Constant Frisch elasticity") plt.plot(n_grid, ellipse, label="Elliptical disutility") if include_title: if plot_MU: plt.title("Marginal Utility of CFE and Elliptical") else: plt.title("Constant Frisch Elasticity vs. Elliptical Utility") plt.xlabel(r"Labor Supply $n_{j,s,t}$") if plot_MU: plt.ylabel(r"Marginal disutility") else: plt.ylabel(r"Disutility") plt.legend(loc="upper left") plt.grid(color="gray", linestyle=":", linewidth=1, alpha=0.5) if path is None: return fig else: fig_path = os.path.join(path, "ellipse_v_CFE") plt.savefig(fig_path, dpi=300)
[docs] def plot_chi_n( p_list, labels=[""], years_to_plot=[DEFAULT_START_YEAR], include_title=False, path=None, ): """ Create a plot of showing the values of the chi_n parameters. Args: p_list (list): parameters objects labels (list): labels for legend years_to_plot (list): list of years to plot include_title (boolean): whether to include a title in the plot path (string): path to save figure to Returns: fig (Matplotlib plot object): plot of chi_n parameters """ p0 = p_list[0] age = np.linspace(p0.starting_age, p0.ending_age, p0.S) fig, ax = plt.subplots() for y in years_to_plot: for i, p in enumerate(p_list): plt.plot( age, p.chi_n[y - p.start_year, :], label=labels[i] + " " + str(y), ) if include_title: plt.title("Utility Weight on the Disutility of Labor Supply") plt.xlabel("Age, $s$") plt.ylabel(r"$\chi^{n}_{s}$") if path is None: return fig else: fig_path = os.path.join(path, "chi_n_values") plt.savefig(fig_path, dpi=300)
[docs] def plot_fert_rates( fert_rates_list, labels=[""], start_year=DEFAULT_START_YEAR, years_to_plot=[DEFAULT_START_YEAR], include_title=False, source="United Nations, World Population Prospects", path=None, ): """ Plot fertility rates from the data Args: fert_rates_list (list): list of Numpy arrays of fertility rates for each model period and age labels (list): list of labels for the legend start_year (int): first year of data years_to_plot (list): list of years to plot include_title (bool): whether to include a title in the plot source (str): data source for fertility rates path (str): path to save figure to, if None then figure is returned Returns: fig (Matplotlib plot object): plot of fertility rates """ # create line styles to cycle through fig, ax = plt.subplots() for y in years_to_plot: i = start_year - y for i, fert_rates in enumerate(fert_rates_list): plt.plot(fert_rates[i, :], label=labels[i] + " " + str(y)) if include_title: plt.title("Fertility rates by age ($f_{s}$)", fontsize=20) plt.xlabel(r"Age $s$") plt.ylabel(r"Fertility rate $f_{s}$") plt.legend(loc="upper right") plt.text( -5, -0.023, "Source: " + source, fontsize=9, ) plt.tight_layout(rect=(0, 0.035, 1, 1)) # Save or return figure if path: output_path = os.path.join(path, "fert_rates") plt.savefig(output_path, dpi=300) plt.close() else: fig.show() return fig
[docs] def plot_mort_rates_data( mort_rates, start_year=DEFAULT_START_YEAR, years_to_plot=[DEFAULT_START_YEAR], source="United Nations, World Population Prospects", path=None, ): """ Plots mortality rates from the data. Args: mort_rates (array_like): mortality rates for each of totpers start_year (int): first year of data years_to_plot (list): list of years to plot source (str): data source for fertility rates path (str): path to save figure to, if None then figure is returned Returns: fig (Matplotlib plot object): plot of mortality rates """ # create line styles to cycle through fig, ax = plt.subplots() for y in years_to_plot: i = start_year - y plt.plot(mort_rates[i, :], c="blue", label="Year " + str(y)) # plt.title('Fertility rates by age ($f_{s}$)', # fontsize=20) plt.xlabel(r"Age $s$") plt.ylabel(r"Mortality rate $rho_{s}$") plt.legend(loc="upper left") plt.text( -5, -0.223, "Source: " + source, fontsize=9, ) plt.tight_layout(rect=(0, 0.035, 1, 1)) # Save or return figure if path: output_path = os.path.join(path, "mort_rates") plt.savefig(output_path, dpi=300) plt.close() else: fig.show() return fig
[docs] def plot_g_n(p_list, label_list=[""], include_title=False, path=None): """ Create a plot of population growth rates from OG-Core parameterization. Args: p_list (list): list of OG-Core Specifications objects label_list (list): list of labels for the legend include_title (bool): whether to include a title in the plot path (string): path to save figure to Returns: fig (Matplotlib plot object): plot of immigration rates """ p0 = p_list[0] years = np.arange(p0.start_year, p0.start_year + p0.T) fig, ax = plt.subplots() for i, p in enumerate(p_list): plt.plot(years, p.g_n[: p.T], label=label_list[i]) plt.xlabel(r"Year $s$ (model periods)") plt.ylabel(r"Population Growth Rate $g_{n,t}$") if label_list[0] != "": plt.legend(loc="upper right") ticks_loc = ax.get_yticks().tolist() ax.yaxis.set_major_locator(mticker.FixedLocator(ticks_loc)) ax.set_yticklabels(["{:,.0%}".format(x) for x in ticks_loc]) if include_title: plt.title("Population Growth Rates") if path is None: return fig else: fig_path = os.path.join(path, "pop_growth_rates") plt.savefig(fig_path, dpi=300)
[docs] def plot_omega_fixed(age_per_EpS, omega_SS_orig, omega_SSfx, E, S, path=None): """ Plot the steady-state population distribution implied by the data on fertility and mortality rates versus the the steady-state population distribution after adjusting immigration rates so that the stationary distribution is achieved a reasonable number of model periods. Args: age_per_EpS (array_like): list of ages over which to plot population distribution omega_SS_orig (Numpy array): population distribution in SS without adjustment to immigration rates omega_SSfx (Numpy array): population distribution in SS after adjustment to immigration rates E (int): age at which household becomes economically active S (int): number of years which household is economically active path (str): path to save figure to, if None then figure is returned Returns: fig (Matplotlib plot object): plot of SS population distribution before and after adjustment to immigration rates """ fig, ax = plt.subplots() plt.plot(age_per_EpS, omega_SS_orig, label="Original Dist'n") plt.plot(age_per_EpS, omega_SSfx, label="Fixed Dist'n") plt.title("Original steady-state population distribution vs. fixed") plt.xlabel(r"Age $s$") plt.ylabel(r"Pop. dist'n $\omega_{s}$") plt.xlim((0, E + S + 1)) plt.legend(loc="upper right") # Save or return figure if path: output_path = os.path.join(path, "OrigVsFixSSpop") plt.savefig(output_path, dpi=300) plt.close() else: return fig
[docs] def plot_imm_fixed( age_per_EpS, imm_rates_orig, imm_rates_adj, E, S, path=None ): """ Plot the immigration rates implied by the data on population, mortality, and fertility versus the adjusted immigration rates needed to achieve a stationary distribution of the population in a reasonable number of model periods. Args: age_per_EpS (array_like): list of ages over which to plot population distribution imm_rates_orig (Numpy array): immigration rates by age imm_rates_adj (Numpy array): adjusted immigration rates by age E (int): age at which household becomes economically active S (int): number of years which household is economically active path (str): path to save figure to, if None then figure is returned Returns: fig (Matplotlib plot object): plot of immigration rates found from residuals and the adjusted rates to hit SS sooner """ fig, ax = plt.subplots() plt.plot(age_per_EpS, imm_rates_orig, label="Original Imm. Rates") plt.plot(age_per_EpS, imm_rates_adj, label="Adj. Imm. Rates") plt.title("Original immigration rates vs. adjusted") plt.xlabel(r"Age $s$") plt.ylabel(r"Imm. rates $i_{s}$") plt.xlim((0, E + S + 1)) plt.legend(loc="upper center") # Save or return figure if path: output_path = os.path.join(path, "OrigVsAdjImm") plt.savefig(output_path, dpi=300) plt.close() else: return fig
[docs] def plot_population_path( age_per_EpS, omega_path_lev, omega_SSfx, start_year, year1, year2, S, path=None, ): """ Plot the distribution of the population over age for various years. Args: age_per_EpS (array_like): list of ages over which to plot population distribution initial_pop_pct (array_like): initial year population distribution omega_path_lev (Numpy array): number of households by age over the transition path omega_SSfx (Numpy array): number of households by age in the SS start_year (int): first year of data (so can get index of year1 and year2) year1 (int): first year of data to plot year2 (int): second year of data to plot S (int): number of years which household is economically active path (str): path to save figure to, if None then figure is returned Returns: fig (Matplotlib plot object): plot of population distribution at points along the time path """ fig, ax = plt.subplots() plt.plot( age_per_EpS, ( omega_path_lev[start_year - year1, :] / omega_path_lev[start_year - year1, :].sum() ), label=str(year1) + " pop.", ) plt.plot( age_per_EpS, ( omega_path_lev[start_year - year2, :] / omega_path_lev[start_year - year2, :].sum() ), label=str(year2) + " pop.", ) plt.plot( age_per_EpS, ( omega_path_lev[int(0.5 * S), :] / omega_path_lev[int(0.5 * S), :].sum() ), label="T=" + str(int(0.5 * S)) + " pop.", ) plt.plot( age_per_EpS, (omega_path_lev[int(S), :] / omega_path_lev[int(S), :].sum()), label="T=" + str(int(S)) + " pop.", ) plt.plot(age_per_EpS, omega_SSfx, label="Adj. SS pop.") plt.title("Population distribution at points in time path") plt.xlabel(r"Age $s$") plt.ylabel(r"Pop. dist'n $\omega_{s}$") plt.legend(loc="lower left") # Save or return figure if path: output_path = os.path.join(path, "PopDistPath") plt.savefig(output_path, dpi=300) plt.close() else: return fig
[docs] def gen_3Dscatters_hist(df, s, t, output_dir): """ Create 3-D scatterplots and corresponding 3D histogram of ETR, MTRx, and MTRy as functions of labor income and capital income with truncated data in the income dimension Args: df (Pandas DataFrame): 11 variables with N observations of tax rates s (int): age of individual, >= 21 t (int): year of analysis, >= 2016 path (str): output directory for saving plot files Returns: None """ from ogcore.txfunc import MAX_INC_GRAPH, MIN_INC_GRAPH # Truncate the data df_trnc = df[ (df["total_labinc"] > MIN_INC_GRAPH) & (df["total_labinc"] < MAX_INC_GRAPH) & (df["total_capinc"] > MIN_INC_GRAPH) & (df["total_capinc"] < MAX_INC_GRAPH) ] inc_lab = df_trnc["total_labinc"] inc_cap = df_trnc["total_capinc"] etr_data = df_trnc["etr"] mtrx_data = df_trnc["mtr_labinc"] mtry_data = df_trnc["mtr_capinc"] # Plot 3D scatterplot of ETR data fig = plt.figure() ax = fig.add_subplot(111, projection="3d") ax.scatter(inc_lab, inc_cap, etr_data, c="r", marker="o") ax.set_xlabel("Total Labor Income") ax.set_ylabel("Total Capital Income") ax.set_zlabel("ETR") plt.title( "ETR, Lab. Inc., and Cap. Inc., Age=" + str(s) + ", Year=" + str(t) ) filename = "ETR_age_" + str(s) + "_Year_" + str(t) + "_data.png" fullpath = os.path.join(output_dir, filename) fig.savefig(fullpath, bbox_inches="tight", dpi=300) plt.close() # Plot 3D histogram for all data fig = plt.figure() ax = fig.add_subplot(111, projection="3d") bin_num = int(30) hist, xedges, yedges = np.histogram2d(inc_lab, inc_cap, bins=bin_num) hist = hist / hist.sum() x_midp = xedges[:-1] + 0.5 * (xedges[1] - xedges[0]) y_midp = yedges[:-1] + 0.5 * (yedges[1] - yedges[0]) elements = (len(xedges) - 1) * (len(yedges) - 1) ypos, xpos = np.meshgrid(y_midp, x_midp) xpos = xpos.flatten() ypos = ypos.flatten() zpos = np.zeros(elements) dx = (xedges[1] - xedges[0]) * np.ones_like(bin_num) dy = (yedges[1] - yedges[0]) * np.ones_like(bin_num) dz = hist.flatten() ax.bar3d(xpos, ypos, zpos, dx, dy, dz, color="b", zsort="average") ax.set_xlabel("Total Labor Income") ax.set_ylabel("Total Capital Income") ax.set_zlabel("Percent of obs.") plt.title( "Histogram by lab. inc., and cap. inc., Age=" + str(s) + ", Year=" + str(t) ) filename = "Hist_Age_" + str(s) + "_Year_" + str(t) + ".png" fullpath = os.path.join(output_dir, filename) fig.savefig(fullpath, bbox_inches="tight", dpi=300) plt.close() # Plot 3D scatterplot of MTRx data fig = plt.figure() ax = fig.add_subplot(111, projection="3d") ax.scatter(inc_lab, inc_cap, mtrx_data, c="r", marker="o") ax.set_xlabel("Total Labor Income") ax.set_ylabel("Total Capital Income") ax.set_zlabel("Marginal Tax Rate, Labor Inc.)") plt.title( "MTR Labor Income, Lab. Inc., and Cap. Inc., Age=" + str(s) + ", Year=" + str(t) ) filename = "MTRx_Age_" + str(s) + "_Year_" + str(t) + "_data.png" fullpath = os.path.join(output_dir, filename) fig.savefig(fullpath, bbox_inches="tight", dpi=300) plt.close() # Plot 3D scatterplot of MTRy data fig = plt.figure() ax = fig.add_subplot(111, projection="3d") ax.scatter(inc_lab, inc_cap, mtry_data, c="r", marker="o") ax.set_xlabel("Total Labor Income") ax.set_ylabel("Total Capital Income") ax.set_zlabel("Marginal Tax Rate (Capital Inc.)") plt.title( "MTR Capital Income, Cap. Inc., and Cap. Inc., Age=" + str(s) + ", Year=" + str(t) ) filename = "MTRy_Age_" + str(s) + "_Year_" + str(t) + "_data.png" fullpath = os.path.join(output_dir, filename) fig.savefig(fullpath, bbox_inches="tight", dpi=300) plt.close() # Garbage collection del df, df_trnc, inc_lab, inc_cap, etr_data, mtrx_data, mtry_data
[docs] def txfunc_graph( s, t, df, X, Y, txrates, rate_type, tax_func_type, params_to_plot, output_dir, ): """ This function creates a 3D plot of the fitted tax function against the data. Args: s (int): age of individual, >= 21 t (int): year of analysis, >= 2016 df (Pandas DataFrame): 11 variables with N observations of tax rates X (Pandas DataSeries): labor income Y (Pandas DataSeries): capital income Y (Pandas DataSeries): tax rates from the data rate_type (str): type of tax rate: mtrx, mtry, etr tax_func_type (str): functional form of tax functions params_to_plot (array_like or function): tax function parameters or nonparametric function path (str): output directory for saving plot files Returns: None """ cmap1 = matplotlib.cm.get_cmap("summer") # Make comparison plot with full income domains fig = plt.figure() ax = fig.add_subplot(111, projection="3d") ax.scatter(X, Y, txrates, c="r", marker="o") ax.set_xlabel("Total Labor Income") ax.set_ylabel("Total Capital Income") if rate_type == "etr": tx_label = "ETR" elif rate_type == "mtrx": tx_label = "MTRx" elif rate_type == "mtry": tx_label = "MTRy" ax.set_zlabel(tx_label) plt.title( tx_label + " vs. Predicted " + tx_label + ": Age=" + str(s) + ", Year=" + str(t) ) gridpts = 50 X_vec = np.exp(np.linspace(np.log(5), np.log(X.max()), gridpts)) Y_vec = np.exp(np.linspace(np.log(5), np.log(Y.max()), gridpts)) X_grid, Y_grid = np.meshgrid(X_vec, Y_vec) txrate_grid = txfunc.get_tax_rates( params_to_plot, X_grid, Y_grid, None, tax_func_type, rate_type, for_estimation=False, ) ax.plot_surface(X_grid, Y_grid, txrate_grid, cmap=cmap1, linewidth=0) filename = tx_label + "_age_" + str(s) + "_Year_" + str(t) + "_vsPred.png" fullpath = os.path.join(output_dir, filename) fig.savefig(fullpath, bbox_inches="tight", dpi=300) plt.close() # Make comparison plot with truncated income domains df_trnc_gph = df[ (df["total_labinc"] > 5) & (df["total_labinc"] < 800000) & (df["total_capinc"] > 5) & (df["total_capinc"] < 800000) ] X_gph = df_trnc_gph["total_labinc"] Y_gph = df_trnc_gph["total_capinc"] if rate_type == "etr": txrates_gph = df_trnc_gph["etr"] elif rate_type == "mtrx": txrates_gph = df_trnc_gph["mtr_labinc"] elif rate_type == "mtry": txrates_gph = df_trnc_gph["mtr_capinc"] fig = plt.figure() ax = fig.add_subplot(111, projection="3d") ax.scatter(X_gph, Y_gph, txrates_gph, c="r", marker="o") ax.set_xlabel("Total Labor Income") ax.set_ylabel("Total Capital Income") ax.set_zlabel(tx_label) plt.title( "Truncated " + tx_label + ", Lab. Inc., and Cap. " + "Inc., Age=" + str(s) + ", Year=" + str(t) ) gridpts = 50 X_vec = np.exp(np.linspace(np.log(5), np.log(X_gph.max()), gridpts)) Y_vec = np.exp(np.linspace(np.log(5), np.log(Y_gph.max()), gridpts)) X_grid, Y_grid = np.meshgrid(X_vec, Y_vec) txrate_grid = txfunc.get_tax_rates( params_to_plot, X_grid, Y_grid, None, tax_func_type, rate_type, for_estimation=False, ) ax.plot_surface(X_grid, Y_grid, txrate_grid, cmap=cmap1, linewidth=0) filename = ( tx_label + "trunc_age_" + str(s) + "_Year_" + str(t) + "_vsPred.png" ) fullpath = os.path.join(output_dir, filename) fig.savefig(fullpath, bbox_inches="tight", dpi=300) plt.close()
[docs] def txfunc_sse_plot(age_vec, sse_mat, start_year, varstr, output_dir, round): """ Plot sum of squared errors of tax functions over age for each year of budget window. Args: age_vec (numpy array): vector of ages, length S sse_mat (Numpy array): SSE for each estimated tax function, size is BW x S start_year (int): first year of budget window varstr (str): name of tax function being evaluated path (str): path to save graph to round (int): which round of sweeping for outliers (0, 1, or 2) Returns: None """ fig, ax = plt.subplots() BW = sse_mat.shape[0] for y in range(BW): plt.plot(age_vec, sse_mat[y, :], label=str(start_year + y)) plt.legend(loc="upper left") titletext = ( "Sum of Squared Errors by age and Tax Year" + " minus outliers (Round " + str(round) + "): " + varstr ) plt.title(titletext) plt.xlabel(r"age $s$") plt.ylabel(r"SSE") graphname = "SSE_" + varstr + "_Round" + str(round) output_path = os.path.join(output_dir, graphname) plt.savefig(output_path, bbox_inches="tight", dpi=300) plt.close()
[docs] def plot_income_data( ages, abil_midp, abil_pcts, emat, t=None, path=None, filesuffix="" ): """ This function graphs ability matrix in 3D, 2D, log, and nolog Args: ages (Numpy array) ages represented in sample, length S abil_midp (Numpy array): midpoints of income percentile bins in each ability group abil_pcts (Numpy array): percent of population in each lifetime income group, length J emat (Numpy array): effective labor units by age and lifetime income group, size TxSxJ t (int): model period for year, if None, then plot SS values filesuffix (str): suffix to be added to plot files Returns: None """ if t is None: t = -1 J = abil_midp.shape[0] abil_mesh, age_mesh = np.meshgrid(abil_midp, ages) cmap1 = matplotlib.colormaps["summer"] if path: # Make sure that directory is created utils.mkdirs(path) if J == 1: # Plot of 2D, J=1 in levels plt.figure() plt.plot(ages, emat[t, :, :]) filename = "ability_2D_lev" + filesuffix fullpath = os.path.join(path, filename) plt.savefig(fullpath, dpi=300) plt.close() # Plot of 2D, J=1 in logs plt.figure() plt.plot(ages, np.log(emat[t, :, :])) filename = "ability_2D_log" + filesuffix fullpath = os.path.join(path, filename) plt.savefig(fullpath, dpi=300) plt.close() else: # Plot of 3D, J>1 in levels fig10, ax10 = plt.subplots(subplot_kw={"projection": "3d"}) ax10.plot_surface( age_mesh, abil_mesh, emat[t, :, :], rstride=8, cstride=1, cmap=cmap1, ) ax10.set_xlabel(r"age-$s$") ax10.set_ylabel(r"ability type -$j$") ax10.set_zlabel(r"ability $e_{j,s}$") filename = "ability_3D_lev" + filesuffix fullpath = os.path.join(path, filename) plt.savefig(fullpath, dpi=300) plt.close() # Plot of 3D, J>1 in logs fig11, ax11 = plt.subplots(subplot_kw={"projection": "3d"}) ax11.plot_surface( age_mesh, abil_mesh, np.log(emat[t, :, :]), rstride=8, cstride=1, cmap=cmap1, ) ax11.set_xlabel(r"age-$s$") ax11.set_ylabel(r"ability type -$j$") ax11.set_zlabel(r"log ability $log(e_{j,s})$") filename = "ability_3D_log" + filesuffix fullpath = os.path.join(path, filename) plt.savefig(fullpath, dpi=300) plt.close() if J <= 10: # Restricted because of line and marker types # Plot of 2D lines from 3D version in logs ax = plt.subplot(111) linestyles = np.array( [ "-", "--", "-.", ":", ] ) markers = np.array(["x", "v", "o", "d", ">", "|"]) pct_lb = 0 for j in range(J): this_label = ( str(int(np.rint(pct_lb))) + " - " + str(int(np.rint(pct_lb + 100 * abil_pcts[j]))) + "%" ) pct_lb += 100 * abil_pcts[j] if j <= 3: ax.plot( ages, np.log(emat[t, :, j]), label=this_label, linestyle=linestyles[j], color="black", ) elif j > 3: ax.plot( ages, np.log(emat[t, :, j]), label=this_label, marker=markers[j - 4], color="black", ) ax.axvline(x=80, color="black", linestyle="--") box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(loc="center left", bbox_to_anchor=(1, 0.5)) ax.set_xlabel(r"age-$s$") ax.set_ylabel(r"log ability $log(e_{j,s})$") filename = "ability_2D_log" + filesuffix fullpath = os.path.join(path, filename) plt.savefig(fullpath, dpi=300) plt.close() else: if J <= 10: # Restricted because of line and marker types # Plot of 2D lines from 3D version in logs ax = plt.subplot(111) linestyles = np.array( [ "-", "--", "-.", ":", ] ) markers = np.array(["x", "v", "o", "d", ">", "|"]) pct_lb = 0 for j in range(J): this_label = ( str(int(np.rint(pct_lb))) + " - " + str(int(np.rint(pct_lb + 100 * abil_pcts[j]))) + "%" ) pct_lb += 100 * abil_pcts[j] if j <= 3: ax.plot( ages, np.log(emat[t, :, j]), label=this_label, linestyle=linestyles[j], color="black", ) elif j > 3: ax.plot( ages, np.log(emat[t, :, j]), label=this_label, marker=markers[j - 4], color="black", ) ax.axvline(x=80, color="black", linestyle="--") box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(loc="center left", bbox_to_anchor=(1, 0.5)) ax.set_xlabel(r"age-$s$") ax.set_ylabel(r"log ability $log(e_{j,s})$") return ax
[docs] def plot_2D_taxfunc( year, start_year, tax_param_list, age=None, E=21, # Age at which agents become economically active in the model tax_func_type=["DEP"], rate_type="etr", over_labinc=True, other_inc_val=1000, max_inc_amt=1000000, data_list=None, labels=["1st Functions"], title=None, path=None, ): """ This function plots OG-Core tax functions in two dimensions. The tax rates are plotted over capital or labor income, as entered by the user. Args: year (int): year of policy tax functions represent start_year (int): first year tax functions estimated for in tax_param_list elements tax_param_list (list): list of arrays containing tax function parameters age (int): age for tax functions to plot, use None if tax function parameters were not age specific tax_func_type (list): list of strings in ["DEP", "DEP_totalinc", "GS", "linear"] and specifies functional form of tax functions in tax_param_list rate_type (str): string that is in ["etr", "mtrx", "mtry"] and determines the type of tax rate that is plotted over_labinc (bool): indicates that x-axis of the plot is over labor income, if False then plot is over capital income other_inc_val (scalar): dollar value at which to hold constant the amount of income that is not represented on the x-axis max_inc_amt (scalar): largest income amount to represent on the x-axis of the plot data_list (list): list of DataFrames with data to scatter plot with tax functions, needs to be of format output from ogcore.get_micro_data.get_data labels (list): list of labels for tax function parameters title (str): title for the plot path (str): path to which to save plot, if None then figure returned Returns: fig (Matplotlib plot object): plot of tax functions """ # Check that inputs are valid assert isinstance(start_year, int) assert isinstance(year, int) assert year >= start_year # if list of tax function types less than list of params, assume # all the same functional form if len(tax_func_type) < len(tax_param_list): tax_func_type = [tax_func_type[0]] * len(tax_param_list) for i, v in enumerate(tax_func_type): assert v in ["DEP", "DEP_totalinc", "GS", "linear", "mono", "mono2D"] assert rate_type in ["etr", "mtrx", "mtry"] assert len(tax_param_list) == len(labels) # Set age and year to look at if age is not None: assert isinstance(age, int) assert age >= E s = ( age - E ) # Note: assumed age is given in E + model periods (but age below is also assumed to be calendar years) else: s = 0 # if not age-specific, all ages have the same values t = year - start_year # create rate_key to correspond to keys in tax func dicts rate_key = "tfunc_" + rate_type + "_params_S" # Set income range to plot over (min income value hard coded to 5) inc_sup = np.exp(np.linspace(np.log(5), np.log(max_inc_amt), 100)) # Set income value for other income inc_fix = other_inc_val if over_labinc: key1 = "total_labinc" X = inc_sup Y = inc_fix else: key1 = "total_capinc" X = inc_fix Y = inc_sup # get tax rates for each point in the income support and plot fig, ax = plt.subplots() for i, tax_params in enumerate(tax_param_list): tax_params = tax_params[rate_key][t][s] rates = txfunc.get_tax_rates( tax_params, X, Y, None, tax_func_type[i], rate_type, for_estimation=False, ) plt.plot(inc_sup, rates, label=labels[i]) # plot raw data (if passed) if data_list is not None: rate_type_dict = { "etr": "etr", "mtrx": "mtr_labinc", "mtry": "mtr_capinc", } # censor data to range of the plot for d, data in enumerate(data_list): data_to_plot = data[str(year)].copy() if age is not None: data_to_plot.drop( data_to_plot[data_to_plot["age"] != age].index, inplace=True, ) # other censoring data_to_plot.drop( data_to_plot[data_to_plot[key1] > max_inc_amt].index, inplace=True, ) # other censoring used in txfunc.py data_to_plot = txfunc.tax_data_sample(data_to_plot) # set number of bins to 100 or bins of $1000 dollars n_bins = min(100, np.floor_divide(max_inc_amt, 1000)) # need to compute weighted averages by group... def weighted_mean(x, cols, w="weight"): try: return pd.Series( np.average(x[cols], weights=x[w], axis=0), cols ) except ZeroDivisionError: return 0 data_to_plot["inc_bin"] = pd.cut(data_to_plot[key1], n_bins) groups = data_to_plot.groupby("inc_bin", observed=True).apply( weighted_mean, [rate_type_dict[rate_type], key1] ) plt.scatter( groups[key1], groups[rate_type_dict[rate_type]], alpha=0.1 ) # add legend, labels, etc to plot plt.legend(loc="center right") if title: plt.title(title) if over_labinc: plt.xlabel(r"Labor income") else: plt.xlabel(r"Capital income") plt.ylabel(VAR_LABELS[rate_type]) if path is None: return fig else: plt.savefig(path, dpi=300)