Source code for ogcore.parameter_plots

import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.ticker as mticker
from ogcore.constants import GROUP_LABELS
from ogcore import utils, txfunc
from ogcore.constants import DEFAULT_START_YEAR, VAR_LABELS



[docs]
def plot_imm_rates(
    imm_rates,
    start_year=DEFAULT_START_YEAR,
    years_to_plot=[DEFAULT_START_YEAR],
    include_title=False,
    source="United Nations, World Population Prospects",
    path=None,
):
    """
    Plot fertility rates from the data

    Args:
        imm_rates (NumPy array): immigration rates for each of
            totpers
        start_year (int): first year of data
        years_to_plot (list): list of years to plot
        source (str): data source for fertility rates
        path (str): path to save figure to, if None then figure
            is returned

    Returns:
        fig (Matplotlib plot object): plot of fertility rates

    """
    # create line styles to cycle through
    fig, ax = plt.subplots()
    for y in years_to_plot:
        i = start_year - y
        plt.plot(imm_rates[i, :], c="blue", label="Year " + str(y))
    # plt.title('Fertility rates by age ($f_{s}$)',
    #     fontsize=20)
    plt.xlabel(r"Age $s$")
    plt.ylabel(r"Immigration rate $i_{s}$")
    plt.legend(loc="upper left")
    plt.text(
        -5,
        -0.05,
        "Source: " + source,
        fontsize=9,
    )
    plt.tight_layout(rect=(0, 0.035, 1, 1))
    if include_title:
        plt.title("Immigration Rates")
    # Save or return figure
    if path:
        output_path = os.path.join(path, "imm_rates")
        plt.savefig(output_path, dpi=300)
        plt.close()
    else:
        fig.show()
        return fig




[docs]
def plot_mort_rates(
    p_list,
    labels=[""],
    years=[DEFAULT_START_YEAR],
    survival_rates=False,
    include_title=False,
    path=None,
):
    """
    Create a plot of mortality rates from OG-Core parameterization.

    Args:
        p_list (list): list of parameters objects
        labels (list): list of labels for the legend
        survival_rates (bool): whether to plot survival rates instead
            of mortality rates
        include_title (bool): whether to include a title in the plot
        path (string): path to save figure to

    Returns:
        fig (Matplotlib plot object): plot of mortality rates

    """
    p0 = p_list[0]
    age_per = np.linspace(p0.E, p0.E + p0.S, p0.S)
    fig, ax = plt.subplots()
    for y in years:
        t = y - p0.start_year
        for i, p in enumerate(p_list):
            if survival_rates:
                plt.plot(
                    age_per,
                    np.cumprod(1 - p.rho[t, :]),
                    label=labels[i] + " " + str(y),
                )
            else:
                plt.plot(age_per, p.rho[t, :], label=labels[i] + " " + str(y))
    plt.xlabel(r"Age $s$ (model periods)")
    if survival_rates:
        plt.ylabel(r"Cumulative Survival Rates")
        plt.legend(loc="lower left")
        title = "Survival Rates"
    else:
        plt.ylabel(r"Mortality Rates $\rho_{s}$")
        plt.legend(loc="upper left")
        title = "Mortality Rates"
    ticks_loc = ax.get_yticks().tolist()
    ax.yaxis.set_major_locator(mticker.FixedLocator(ticks_loc))
    ax.set_yticklabels(["{:,.0%}".format(x) for x in ticks_loc])
    if include_title:
        plt.title(title)
    if path is None:
        return fig
    else:
        if survival_rates:
            fig_path = os.path.join(path, "survival_rates")
        else:
            fig_path = os.path.join(path, "mortality_rates")
        plt.savefig(fig_path, dpi=300)




[docs]
def plot_pop_growth(
    p,
    start_year=DEFAULT_START_YEAR,
    num_years_to_plot=150,
    include_title=False,
    path=None,
):
    """
    Create a plot of population growth rates by year.

    Args:
        p (OG-Core Specifications class): parameters object
        start_year (integer): year to begin plotting
        num_years_to_plot (integer): number of years to plot
        include_title (bool): whether to include a title in the plot
        path (string): path to save figure to

    Returns:
        fig (Matplotlib plot object): plot of immigration rates

    """
    assert isinstance(start_year, int)
    assert isinstance(num_years_to_plot, int)
    year_vec = np.arange(start_year, start_year + num_years_to_plot)
    start_index = start_year - p.start_year
    fig, ax = plt.subplots()
    plt.plot(year_vec, p.g_n[start_index : start_index + num_years_to_plot])
    plt.xlabel(r"Year $t$")
    plt.ylabel(r"Population Growth Rate $g_{n, t}$")
    ticks_loc = ax.get_yticks().tolist()
    ax.yaxis.set_major_locator(mticker.FixedLocator(ticks_loc))
    ax.set_yticklabels(["{:,.2%}".format(x) for x in ticks_loc])
    if include_title:
        plt.title("Population Growth Rates")
    if path is None:
        return fig
    else:
        fig_path = os.path.join(path, "pop_growth_rates")
        plt.savefig(fig_path, dpi=300)




[docs]
def plot_population(p, years_to_plot=["SS"], include_title=False, path=None):
    """
    Plot the distribution of the population over age for various years.

    Args:
        p (OG-Core Specifications class): parameters object
        years_to_plot (list): list of years to plot, 'SS' will denote
            the steady-state period
        include_title (bool): whether to include a title in the plot
        path (string): path to save figure to

    Returns:
        fig (Matplotlib plot object): plot of population distribution

    """
    for i, v in enumerate(years_to_plot):
        assert isinstance(v, int) | (v == "SS")
        if isinstance(v, int):
            assert v >= p.start_year
    age_vec = np.arange(p.E, p.S + p.E)
    fig, ax = plt.subplots()
    for i, v in enumerate(years_to_plot):
        if v == "SS":
            pop_dist = p.omega_SS
        else:
            pop_dist = p.omega[v - p.start_year, :]
        plt.plot(age_vec, pop_dist, label=str(v) + " pop.")
    plt.xlabel(r"Age $s$")
    plt.ylabel(r"Pop. dist'n $\omega_{s}$")
    plt.legend(loc="lower left")
    if include_title:
        plt.title("Population Distribution by Year")
    if path is None:
        return fig
    else:
        fig_path = os.path.join(path, "pop_distribution")
        plt.savefig(fig_path, dpi=300)




[docs]
def plot_ability_profiles(
    p, p2=None, t=None, log_scale=False, include_title=False, path=None
):
    """
    Create a plot of earnings ability profiles.

    Args:
        p (OG-Core Specifications class): parameters object
        t (int): model period for year, if None, then plot ability matrix for SS
        log_scale (bool): whether to plot in log points
        include_title (bool): whether to include a title in the plot
        path (string): path to save figure to

    Returns:
        fig (Matplotlib plot object): plot of earnings ability profiles

    """
    if t is None:
        t = -1
    age_vec = np.arange(p.starting_age, p.starting_age + p.S)
    fig, ax = plt.subplots()
    cm = plt.get_cmap("coolwarm")
    ax.set_prop_cycle(color=[cm(1.0 * i / p.J) for i in range(p.J)])
    for j in range(p.J):
        if log_scale:
            plt.plot(age_vec, np.log(p.e[t, :, j]), label=GROUP_LABELS[p.J][j])
        else:
            plt.plot(age_vec, p.e[t, :, j], label=GROUP_LABELS[p.J][j])
    if p2 is not None:
        for j in range(p.J):
            if log_scale:
                plt.plot(
                    age_vec,
                    np.log(p2.e[t, :, j]),
                    linestyle="--",
                    label=GROUP_LABELS[p.J][j],
                )
            else:
                plt.plot(
                    age_vec,
                    p2.e[t, :, j],
                    linestyle="--",
                    label=GROUP_LABELS[p.J][j],
                )
    plt.xlabel(r"Age")
    if log_scale:
        plt.ylabel(r"ln(Earnings ability)")
    else:
        plt.ylabel(r"Earnings ability")
    plt.legend(loc=9, bbox_to_anchor=(0.5, -0.15), ncols=5)
    if include_title:
        plt.title("Lifecycle Profiles of Effective Labor Units")
    if path is None:
        return fig
    else:
        fig_path = os.path.join(path, "ability_profiles")
        plt.savefig(fig_path, bbox_inches="tight", dpi=300)




[docs]
def plot_elliptical_u(p, plot_MU=True, include_title=False, path=None):
    """
    Create a plot of showing the fit of the elliptical utility function.

    Args:
        p (OG-Core Specifications class): parameters object
        plot_MU (boolean): whether plot marginal utility or utility in
            levels
        path (string): path to save figure to

    Returns:
        fig (Matplotlib plot object): plot of elliptical vs CFE utility

    """
    theta = 1 / p.frisch
    N = 101
    n_grid = np.linspace(0.01, 0.8, num=N)
    if plot_MU:
        CFE = (1.0 / p.ltilde) * ((n_grid / p.ltilde) ** theta)
        ellipse = (
            1.0
            * p.b_ellipse
            * (1.0 / p.ltilde)
            * (
                (1.0 - (n_grid / p.ltilde) ** p.upsilon)
                ** ((1.0 / p.upsilon) - 1.0)
            )
            * (n_grid / p.ltilde) ** (p.upsilon - 1.0)
        )
    else:
        CFE = ((n_grid / p.ltilde) ** (1 + theta)) / (1 + theta)
        k = 1.0  # we don't estimate k, so not in parameters
        ellipse = (
            p.b_ellipse
            * ((1 - ((n_grid / p.ltilde) ** p.upsilon)) ** (1 / p.upsilon))
            + k
        )
    fig, ax = plt.subplots()
    plt.plot(n_grid, CFE, label="Constant Frisch elasticity")
    plt.plot(n_grid, ellipse, label="Elliptical disutility")
    if include_title:
        if plot_MU:
            plt.title("Marginal Utility of CFE and Elliptical")
        else:
            plt.title("Constant Frisch Elasticity vs. Elliptical Utility")
    plt.xlabel(r"Labor Supply $n_{j,s,t}$")
    if plot_MU:
        plt.ylabel(r"Marginal disutility")
    else:
        plt.ylabel(r"Disutility")
    plt.legend(loc="upper left")
    plt.grid(color="gray", linestyle=":", linewidth=1, alpha=0.5)
    if path is None:
        return fig
    else:
        fig_path = os.path.join(path, "ellipse_v_CFE")
        plt.savefig(fig_path, dpi=300)




[docs]
def plot_chi_n(
    p_list,
    labels=[""],
    years_to_plot=[DEFAULT_START_YEAR],
    include_title=False,
    path=None,
):
    """
    Create a plot of showing the values of the chi_n parameters.

    Args:
        p_list (list): parameters objects
        labels (list): labels for legend
        years_to_plot (list): list of years to plot
        include_title (boolean): whether to include a title in the plot
        path (string): path to save figure to

    Returns:
        fig (Matplotlib plot object): plot of chi_n parameters

    """
    p0 = p_list[0]
    age = np.linspace(p0.starting_age, p0.ending_age, p0.S)
    fig, ax = plt.subplots()
    for y in years_to_plot:
        for i, p in enumerate(p_list):
            plt.plot(
                age,
                p.chi_n[y - p.start_year, :],
                label=labels[i] + " " + str(y),
            )
    if include_title:
        plt.title("Utility Weight on the Disutility of Labor Supply")
    plt.xlabel("Age, $s$")
    plt.ylabel(r"$\chi^{n}_{s}$")
    if path is None:
        return fig
    else:
        fig_path = os.path.join(path, "chi_n_values")
        plt.savefig(fig_path, dpi=300)




[docs]
def plot_fert_rates(
    fert_rates_list,
    labels=[""],
    start_year=DEFAULT_START_YEAR,
    years_to_plot=[DEFAULT_START_YEAR],
    include_title=False,
    source="United Nations, World Population Prospects",
    path=None,
):
    """
    Plot fertility rates from the data

    Args:
        fert_rates_list (list): list of Numpy arrays of fertility rates
            for each model period and age
        labels (list): list of labels for the legend
        start_year (int): first year of data
        years_to_plot (list): list of years to plot
        include_title (bool): whether to include a title in the plot
        source (str): data source for fertility rates
        path (str): path to save figure to, if None then figure
            is returned

    Returns:
        fig (Matplotlib plot object): plot of fertility rates

    """
    # create line styles to cycle through
    fig, ax = plt.subplots()
    for y in years_to_plot:
        i = start_year - y
        for i, fert_rates in enumerate(fert_rates_list):
            plt.plot(fert_rates[i, :], label=labels[i] + " " + str(y))
    if include_title:
        plt.title("Fertility rates by age ($f_{s}$)", fontsize=20)
    plt.xlabel(r"Age $s$")
    plt.ylabel(r"Fertility rate $f_{s}$")
    plt.legend(loc="upper right")
    plt.text(
        -5,
        -0.023,
        "Source: " + source,
        fontsize=9,
    )
    plt.tight_layout(rect=(0, 0.035, 1, 1))
    # Save or return figure
    if path:
        output_path = os.path.join(path, "fert_rates")
        plt.savefig(output_path, dpi=300)
        plt.close()
    else:
        fig.show()
        return fig




[docs]
def plot_mort_rates_data(
    mort_rates,
    start_year=DEFAULT_START_YEAR,
    years_to_plot=[DEFAULT_START_YEAR],
    source="United Nations, World Population Prospects",
    path=None,
):
    """
    Plots mortality rates from the data.

    Args:
        mort_rates (array_like): mortality rates for each of
            totpers
        start_year (int): first year of data
        years_to_plot (list): list of years to plot
        source (str): data source for fertility rates
        path (str): path to save figure to, if None then figure
            is returned

    Returns:
        fig (Matplotlib plot object): plot of mortality rates

    """
    # create line styles to cycle through
    fig, ax = plt.subplots()
    for y in years_to_plot:
        i = start_year - y
        plt.plot(mort_rates[i, :], c="blue", label="Year " + str(y))
    # plt.title('Fertility rates by age ($f_{s}$)',
    #     fontsize=20)
    plt.xlabel(r"Age $s$")
    plt.ylabel(r"Mortality rate $rho_{s}$")
    plt.legend(loc="upper left")
    plt.text(
        -5,
        -0.223,
        "Source: " + source,
        fontsize=9,
    )
    plt.tight_layout(rect=(0, 0.035, 1, 1))
    # Save or return figure
    if path:
        output_path = os.path.join(path, "mort_rates")
        plt.savefig(output_path, dpi=300)
        plt.close()
    else:
        fig.show()
        return fig




[docs]
def plot_g_n(p_list, label_list=[""], include_title=False, path=None):
    """
    Create a plot of population growth rates from OG-Core parameterization.

    Args:
        p_list (list): list of OG-Core Specifications objects
        label_list (list): list of labels for the legend
        include_title (bool): whether to include a title in the plot
        path (string): path to save figure to

    Returns:
        fig (Matplotlib plot object): plot of immigration rates

    """
    p0 = p_list[0]
    years = np.arange(p0.start_year, p0.start_year + p0.T)
    fig, ax = plt.subplots()
    for i, p in enumerate(p_list):
        plt.plot(years, p.g_n[: p.T], label=label_list[i])
    plt.xlabel(r"Year $s$ (model periods)")
    plt.ylabel(r"Population Growth Rate $g_{n,t}$")
    if label_list[0] != "":
        plt.legend(loc="upper right")
    ticks_loc = ax.get_yticks().tolist()
    ax.yaxis.set_major_locator(mticker.FixedLocator(ticks_loc))
    ax.set_yticklabels(["{:,.0%}".format(x) for x in ticks_loc])
    if include_title:
        plt.title("Population Growth Rates")
    if path is None:
        return fig
    else:
        fig_path = os.path.join(path, "pop_growth_rates")
        plt.savefig(fig_path, dpi=300)




[docs]
def plot_omega_fixed(age_per_EpS, omega_SS_orig, omega_SSfx, E, S, path=None):
    """
    Plot the steady-state population distribution implied by the data
    on fertility and mortality rates versus the the steady-state
    population distribution after adjusting immigration rates so that
    the stationary distribution is achieved a reasonable number of
    model periods.

    Args:
        age_per_EpS (array_like): list of ages over which to plot
            population distribution
        omega_SS_orig (Numpy array): population distribution in SS
            without adjustment to immigration rates
        omega_SSfx (Numpy array): population distribution in SS
            after adjustment to immigration rates
        E (int): age at which household becomes economically active
        S (int): number of years which household is economically active
        path (str): path to save figure to, if None then figure
            is returned

    Returns:
        fig (Matplotlib plot object): plot of SS population distribution
            before and after adjustment to immigration rates

    """
    fig, ax = plt.subplots()
    plt.plot(age_per_EpS, omega_SS_orig, label="Original Dist'n")
    plt.plot(age_per_EpS, omega_SSfx, label="Fixed Dist'n")
    plt.title("Original steady-state population distribution vs. fixed")
    plt.xlabel(r"Age $s$")
    plt.ylabel(r"Pop. dist'n $\omega_{s}$")
    plt.xlim((0, E + S + 1))
    plt.legend(loc="upper right")
    # Save or return figure
    if path:
        output_path = os.path.join(path, "OrigVsFixSSpop")
        plt.savefig(output_path, dpi=300)
        plt.close()
    else:
        return fig




[docs]
def plot_imm_fixed(
    age_per_EpS, imm_rates_orig, imm_rates_adj, E, S, path=None
):
    """
    Plot the immigration rates implied by the data on population,
    mortality, and fertility versus the adjusted immigration rates
    needed to achieve a stationary distribution of the population in a
    reasonable number of model periods.

    Args:
        age_per_EpS (array_like): list of ages over which to plot
            population distribution
        imm_rates_orig (Numpy array): immigration rates by age
        imm_rates_adj (Numpy array): adjusted immigration rates by age
        E (int): age at which household becomes economically active
        S (int): number of years which household is economically active
        path (str): path to save figure to, if None then figure
            is returned

    Returns:
        fig (Matplotlib plot object): plot of immigration rates found
            from residuals and the adjusted rates to hit SS sooner

    """
    fig, ax = plt.subplots()
    plt.plot(age_per_EpS, imm_rates_orig, label="Original Imm. Rates")
    plt.plot(age_per_EpS, imm_rates_adj, label="Adj. Imm. Rates")
    plt.title("Original immigration rates vs. adjusted")
    plt.xlabel(r"Age $s$")
    plt.ylabel(r"Imm. rates $i_{s}$")
    plt.xlim((0, E + S + 1))
    plt.legend(loc="upper center")
    # Save or return figure
    if path:
        output_path = os.path.join(path, "OrigVsAdjImm")
        plt.savefig(output_path, dpi=300)
        plt.close()
    else:
        return fig




[docs]
def plot_population_path(
    age_per_EpS,
    omega_path_lev,
    omega_SSfx,
    start_year,
    year1,
    year2,
    S,
    path=None,
):
    """
    Plot the distribution of the population over age for various years.

    Args:
        age_per_EpS (array_like): list of ages over which to plot
            population distribution
        initial_pop_pct (array_like): initial year population distribution
        omega_path_lev (Numpy array): number of households by age
            over the transition path
        omega_SSfx (Numpy array): number of households by age
            in the SS
        start_year (int): first year of data (so can get index of year1
            and year2)
        year1 (int): first year of data to plot
        year2 (int): second year of data to plot
        S (int): number of years which household is economically active
        path (str): path to save figure to, if None then figure
            is returned

    Returns:
        fig (Matplotlib plot object): plot of population distribution
            at points along the time path

    """
    fig, ax = plt.subplots()
    plt.plot(
        age_per_EpS,
        (
            omega_path_lev[start_year - year1, :]
            / omega_path_lev[start_year - year1, :].sum()
        ),
        label=str(year1) + " pop.",
    )
    plt.plot(
        age_per_EpS,
        (
            omega_path_lev[start_year - year2, :]
            / omega_path_lev[start_year - year2, :].sum()
        ),
        label=str(year2) + " pop.",
    )
    plt.plot(
        age_per_EpS,
        (
            omega_path_lev[int(0.5 * S), :]
            / omega_path_lev[int(0.5 * S), :].sum()
        ),
        label="T=" + str(int(0.5 * S)) + " pop.",
    )
    plt.plot(
        age_per_EpS,
        (omega_path_lev[int(S), :] / omega_path_lev[int(S), :].sum()),
        label="T=" + str(int(S)) + " pop.",
    )
    plt.plot(age_per_EpS, omega_SSfx, label="Adj. SS pop.")
    plt.title("Population distribution at points in time path")
    plt.xlabel(r"Age $s$")
    plt.ylabel(r"Pop. dist'n $\omega_{s}$")
    plt.legend(loc="lower left")
    # Save or return figure
    if path:
        output_path = os.path.join(path, "PopDistPath")
        plt.savefig(output_path, dpi=300)
        plt.close()
    else:
        return fig




[docs]
def gen_3Dscatters_hist(df, s, t, output_dir):
    """
    Create 3-D scatterplots and corresponding 3D histogram of ETR, MTRx,
    and MTRy as functions of labor income and capital income with
    truncated data in the income dimension

    Args:
        df (Pandas DataFrame): 11 variables with N observations of tax
            rates
        s (int): age of individual, >= 21
        t (int): year of analysis, >= 2016
        path (str): output directory for saving plot files

    Returns:
        None

    """
    from ogcore.txfunc import MAX_INC_GRAPH, MIN_INC_GRAPH

    # Truncate the data
    df_trnc = df[
        (df["total_labinc"] > MIN_INC_GRAPH)
        & (df["total_labinc"] < MAX_INC_GRAPH)
        & (df["total_capinc"] > MIN_INC_GRAPH)
        & (df["total_capinc"] < MAX_INC_GRAPH)
    ]
    inc_lab = df_trnc["total_labinc"]
    inc_cap = df_trnc["total_capinc"]
    etr_data = df_trnc["etr"]
    mtrx_data = df_trnc["mtr_labinc"]
    mtry_data = df_trnc["mtr_capinc"]

    # Plot 3D scatterplot of ETR data
    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")
    ax.scatter(inc_lab, inc_cap, etr_data, c="r", marker="o")
    ax.set_xlabel("Total Labor Income")
    ax.set_ylabel("Total Capital Income")
    ax.set_zlabel("ETR")
    plt.title(
        "ETR, Lab. Inc., and Cap. Inc., Age=" + str(s) + ", Year=" + str(t)
    )
    filename = "ETR_age_" + str(s) + "_Year_" + str(t) + "_data.png"
    fullpath = os.path.join(output_dir, filename)
    fig.savefig(fullpath, bbox_inches="tight", dpi=300)
    plt.close()

    # Plot 3D histogram for all data
    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")
    bin_num = int(30)
    hist, xedges, yedges = np.histogram2d(inc_lab, inc_cap, bins=bin_num)
    hist = hist / hist.sum()
    x_midp = xedges[:-1] + 0.5 * (xedges[1] - xedges[0])
    y_midp = yedges[:-1] + 0.5 * (yedges[1] - yedges[0])
    elements = (len(xedges) - 1) * (len(yedges) - 1)
    ypos, xpos = np.meshgrid(y_midp, x_midp)
    xpos = xpos.flatten()
    ypos = ypos.flatten()
    zpos = np.zeros(elements)
    dx = (xedges[1] - xedges[0]) * np.ones_like(bin_num)
    dy = (yedges[1] - yedges[0]) * np.ones_like(bin_num)
    dz = hist.flatten()
    ax.bar3d(xpos, ypos, zpos, dx, dy, dz, color="b", zsort="average")
    ax.set_xlabel("Total Labor Income")
    ax.set_ylabel("Total Capital Income")
    ax.set_zlabel("Percent of obs.")
    plt.title(
        "Histogram by lab. inc., and cap. inc., Age="
        + str(s)
        + ", Year="
        + str(t)
    )
    filename = "Hist_Age_" + str(s) + "_Year_" + str(t) + ".png"
    fullpath = os.path.join(output_dir, filename)
    fig.savefig(fullpath, bbox_inches="tight", dpi=300)
    plt.close()

    # Plot 3D scatterplot of MTRx data
    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")
    ax.scatter(inc_lab, inc_cap, mtrx_data, c="r", marker="o")
    ax.set_xlabel("Total Labor Income")
    ax.set_ylabel("Total Capital Income")
    ax.set_zlabel("Marginal Tax Rate, Labor Inc.)")
    plt.title(
        "MTR Labor Income, Lab. Inc., and Cap. Inc., Age="
        + str(s)
        + ", Year="
        + str(t)
    )
    filename = "MTRx_Age_" + str(s) + "_Year_" + str(t) + "_data.png"
    fullpath = os.path.join(output_dir, filename)
    fig.savefig(fullpath, bbox_inches="tight", dpi=300)
    plt.close()

    # Plot 3D scatterplot of MTRy data
    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")
    ax.scatter(inc_lab, inc_cap, mtry_data, c="r", marker="o")
    ax.set_xlabel("Total Labor Income")
    ax.set_ylabel("Total Capital Income")
    ax.set_zlabel("Marginal Tax Rate (Capital Inc.)")
    plt.title(
        "MTR Capital Income, Cap. Inc., and Cap. Inc., Age="
        + str(s)
        + ", Year="
        + str(t)
    )
    filename = "MTRy_Age_" + str(s) + "_Year_" + str(t) + "_data.png"
    fullpath = os.path.join(output_dir, filename)
    fig.savefig(fullpath, bbox_inches="tight", dpi=300)
    plt.close()

    # Garbage collection
    del df, df_trnc, inc_lab, inc_cap, etr_data, mtrx_data, mtry_data




[docs]
def txfunc_graph(
    s,
    t,
    df,
    X,
    Y,
    txrates,
    rate_type,
    tax_func_type,
    params_to_plot,
    output_dir,
):
    """
    This function creates a 3D plot of the fitted tax function against
    the data.

    Args:
        s (int): age of individual, >= 21
        t (int): year of analysis, >= 2016
        df (Pandas DataFrame): 11 variables with N observations of tax
            rates
        X (Pandas DataSeries): labor income
        Y (Pandas DataSeries): capital income
        Y (Pandas DataSeries): tax rates from the data
        rate_type (str): type of tax rate: mtrx, mtry, etr
        tax_func_type (str): functional form of tax functions
        params_to_plot (array_like or function): tax function parameters or
            nonparametric function
        path (str): output directory for saving plot files

    Returns:
        None

    """
    cmap1 = matplotlib.cm.get_cmap("summer")

    # Make comparison plot with full income domains
    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")
    ax.scatter(X, Y, txrates, c="r", marker="o")
    ax.set_xlabel("Total Labor Income")
    ax.set_ylabel("Total Capital Income")
    if rate_type == "etr":
        tx_label = "ETR"
    elif rate_type == "mtrx":
        tx_label = "MTRx"
    elif rate_type == "mtry":
        tx_label = "MTRy"
    ax.set_zlabel(tx_label)
    plt.title(
        tx_label
        + " vs. Predicted "
        + tx_label
        + ": Age="
        + str(s)
        + ", Year="
        + str(t)
    )

    gridpts = 50
    X_vec = np.exp(np.linspace(np.log(5), np.log(X.max()), gridpts))
    Y_vec = np.exp(np.linspace(np.log(5), np.log(Y.max()), gridpts))
    X_grid, Y_grid = np.meshgrid(X_vec, Y_vec)
    txrate_grid = txfunc.get_tax_rates(
        params_to_plot,
        X_grid,
        Y_grid,
        None,
        tax_func_type,
        rate_type,
        for_estimation=False,
    )
    ax.plot_surface(X_grid, Y_grid, txrate_grid, cmap=cmap1, linewidth=0)
    filename = tx_label + "_age_" + str(s) + "_Year_" + str(t) + "_vsPred.png"
    fullpath = os.path.join(output_dir, filename)
    fig.savefig(fullpath, bbox_inches="tight", dpi=300)
    plt.close()

    # Make comparison plot with truncated income domains
    df_trnc_gph = df[
        (df["total_labinc"] > 5)
        & (df["total_labinc"] < 800000)
        & (df["total_capinc"] > 5)
        & (df["total_capinc"] < 800000)
    ]
    X_gph = df_trnc_gph["total_labinc"]
    Y_gph = df_trnc_gph["total_capinc"]
    if rate_type == "etr":
        txrates_gph = df_trnc_gph["etr"]
    elif rate_type == "mtrx":
        txrates_gph = df_trnc_gph["mtr_labinc"]
    elif rate_type == "mtry":
        txrates_gph = df_trnc_gph["mtr_capinc"]

    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")
    ax.scatter(X_gph, Y_gph, txrates_gph, c="r", marker="o")
    ax.set_xlabel("Total Labor Income")
    ax.set_ylabel("Total Capital Income")
    ax.set_zlabel(tx_label)
    plt.title(
        "Truncated "
        + tx_label
        + ", Lab. Inc., and Cap. "
        + "Inc., Age="
        + str(s)
        + ", Year="
        + str(t)
    )

    gridpts = 50
    X_vec = np.exp(np.linspace(np.log(5), np.log(X_gph.max()), gridpts))
    Y_vec = np.exp(np.linspace(np.log(5), np.log(Y_gph.max()), gridpts))
    X_grid, Y_grid = np.meshgrid(X_vec, Y_vec)
    txrate_grid = txfunc.get_tax_rates(
        params_to_plot,
        X_grid,
        Y_grid,
        None,
        tax_func_type,
        rate_type,
        for_estimation=False,
    )
    ax.plot_surface(X_grid, Y_grid, txrate_grid, cmap=cmap1, linewidth=0)
    filename = (
        tx_label + "trunc_age_" + str(s) + "_Year_" + str(t) + "_vsPred.png"
    )
    fullpath = os.path.join(output_dir, filename)
    fig.savefig(fullpath, bbox_inches="tight", dpi=300)
    plt.close()




[docs]
def txfunc_sse_plot(age_vec, sse_mat, start_year, varstr, output_dir, round):
    """
    Plot sum of squared errors of tax functions over age for each year
    of budget window.

    Args:
        age_vec (numpy array): vector of ages, length S
        sse_mat (Numpy array): SSE for each estimated tax function,
            size is BW x S
        start_year (int): first year of budget window
        varstr (str): name of tax function being evaluated
        path (str): path to save graph to
        round (int): which round of sweeping for outliers (0, 1, or 2)

    Returns:
        None

    """
    fig, ax = plt.subplots()
    BW = sse_mat.shape[0]
    for y in range(BW):
        plt.plot(age_vec, sse_mat[y, :], label=str(start_year + y))
    plt.legend(loc="upper left")
    titletext = (
        "Sum of Squared Errors by age and Tax Year"
        + " minus outliers (Round "
        + str(round)
        + "): "
        + varstr
    )
    plt.title(titletext)
    plt.xlabel(r"age $s$")
    plt.ylabel(r"SSE")
    graphname = "SSE_" + varstr + "_Round" + str(round)
    output_path = os.path.join(output_dir, graphname)
    plt.savefig(output_path, bbox_inches="tight", dpi=300)
    plt.close()




[docs]
def plot_income_data(
    ages, abil_midp, abil_pcts, emat, t=None, path=None, filesuffix=""
):
    """
    This function graphs ability matrix in 3D, 2D, log, and nolog

    Args:
        ages (Numpy array) ages represented in sample, length S
        abil_midp (Numpy array): midpoints of income percentile bins in
            each ability group
        abil_pcts (Numpy array): percent of population in each lifetime
            income group, length J
        emat (Numpy array): effective labor units by age and lifetime
            income group, size TxSxJ
        t (int): model period for year, if None, then plot SS values
        filesuffix (str): suffix to be added to plot files

    Returns:
        None

    """
    if t is None:
        t = -1
    J = abil_midp.shape[0]
    abil_mesh, age_mesh = np.meshgrid(abil_midp, ages)
    cmap1 = matplotlib.colormaps["summer"]
    if path:
        # Make sure that directory is created
        utils.mkdirs(path)
        if J == 1:
            # Plot of 2D, J=1 in levels
            plt.figure()
            plt.plot(ages, emat[t, :, :])
            filename = "ability_2D_lev" + filesuffix
            fullpath = os.path.join(path, filename)
            plt.savefig(fullpath, dpi=300)
            plt.close()

            # Plot of 2D, J=1 in logs
            plt.figure()
            plt.plot(ages, np.log(emat[t, :, :]))
            filename = "ability_2D_log" + filesuffix
            fullpath = os.path.join(path, filename)
            plt.savefig(fullpath, dpi=300)
            plt.close()
        else:
            # Plot of 3D, J>1 in levels
            fig10, ax10 = plt.subplots(subplot_kw={"projection": "3d"})
            ax10.plot_surface(
                age_mesh,
                abil_mesh,
                emat[t, :, :],
                rstride=8,
                cstride=1,
                cmap=cmap1,
            )
            ax10.set_xlabel(r"age-$s$")
            ax10.set_ylabel(r"ability type -$j$")
            ax10.set_zlabel(r"ability $e_{j,s}$")
            filename = "ability_3D_lev" + filesuffix
            fullpath = os.path.join(path, filename)
            plt.savefig(fullpath, dpi=300)
            plt.close()

            # Plot of 3D, J>1 in logs
            fig11, ax11 = plt.subplots(subplot_kw={"projection": "3d"})
            ax11.plot_surface(
                age_mesh,
                abil_mesh,
                np.log(emat[t, :, :]),
                rstride=8,
                cstride=1,
                cmap=cmap1,
            )
            ax11.set_xlabel(r"age-$s$")
            ax11.set_ylabel(r"ability type -$j$")
            ax11.set_zlabel(r"log ability $log(e_{j,s})$")
            filename = "ability_3D_log" + filesuffix
            fullpath = os.path.join(path, filename)
            plt.savefig(fullpath, dpi=300)
            plt.close()

            if J <= 10:  # Restricted because of line and marker types
                # Plot of 2D lines from 3D version in logs
                ax = plt.subplot(111)
                linestyles = np.array(
                    [
                        "-",
                        "--",
                        "-.",
                        ":",
                    ]
                )
                markers = np.array(["x", "v", "o", "d", ">", "|"])
                pct_lb = 0
                for j in range(J):
                    this_label = (
                        str(int(np.rint(pct_lb)))
                        + " - "
                        + str(int(np.rint(pct_lb + 100 * abil_pcts[j])))
                        + "%"
                    )
                    pct_lb += 100 * abil_pcts[j]
                    if j <= 3:
                        ax.plot(
                            ages,
                            np.log(emat[t, :, j]),
                            label=this_label,
                            linestyle=linestyles[j],
                            color="black",
                        )
                    elif j > 3:
                        ax.plot(
                            ages,
                            np.log(emat[t, :, j]),
                            label=this_label,
                            marker=markers[j - 4],
                            color="black",
                        )
                ax.axvline(x=80, color="black", linestyle="--")
                box = ax.get_position()
                ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
                ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
                ax.set_xlabel(r"age-$s$")
                ax.set_ylabel(r"log ability $log(e_{j,s})$")
                filename = "ability_2D_log" + filesuffix
                fullpath = os.path.join(path, filename)
                plt.savefig(fullpath, dpi=300)
                plt.close()
    else:
        if J <= 10:  # Restricted because of line and marker types
            # Plot of 2D lines from 3D version in logs
            ax = plt.subplot(111)
            linestyles = np.array(
                [
                    "-",
                    "--",
                    "-.",
                    ":",
                ]
            )
            markers = np.array(["x", "v", "o", "d", ">", "|"])
            pct_lb = 0
            for j in range(J):
                this_label = (
                    str(int(np.rint(pct_lb)))
                    + " - "
                    + str(int(np.rint(pct_lb + 100 * abil_pcts[j])))
                    + "%"
                )
                pct_lb += 100 * abil_pcts[j]
                if j <= 3:
                    ax.plot(
                        ages,
                        np.log(emat[t, :, j]),
                        label=this_label,
                        linestyle=linestyles[j],
                        color="black",
                    )
                elif j > 3:
                    ax.plot(
                        ages,
                        np.log(emat[t, :, j]),
                        label=this_label,
                        marker=markers[j - 4],
                        color="black",
                    )
            ax.axvline(x=80, color="black", linestyle="--")
            box = ax.get_position()
            ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
            ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
            ax.set_xlabel(r"age-$s$")
            ax.set_ylabel(r"log ability $log(e_{j,s})$")

            return ax




[docs]
def plot_2D_taxfunc(
    year,
    start_year,
    tax_param_list,
    age=None,
    E=21,  # Age at which agents become economically active in the model
    tax_func_type=["DEP"],
    rate_type="etr",
    over_labinc=True,
    other_inc_val=1000,
    max_inc_amt=1000000,
    data_list=None,
    labels=["1st Functions"],
    title=None,
    path=None,
):
    """
    This function plots OG-Core tax functions in two dimensions.
    The tax rates are plotted over capital or labor income, as
    entered by the user.

    Args:
        year (int): year of policy tax functions represent
        start_year (int): first year tax functions estimated for in
            tax_param_list elements
        tax_param_list (list): list of arrays containing tax function
            parameters
        age (int): age for tax functions to plot, use None if tax
            function parameters were not age specific
        tax_func_type (list): list of strings in ["DEP", "DEP_totalinc",
            "GS", "linear"] and specifies functional form of tax functions
            in tax_param_list
        rate_type (str): string that is in ["etr", "mtrx", "mtry"] and
            determines the type of tax rate that is plotted
        over_labinc (bool): indicates that x-axis of the plot is over
            labor income, if False then plot is over capital income
        other_inc_val (scalar): dollar value at which to hold constant
            the amount of income that is not represented on the x-axis
        max_inc_amt (scalar): largest income amount to represent on the
            x-axis of the plot
        data_list (list): list of DataFrames with data to scatter plot
            with tax functions, needs to be of format output from
            ogcore.get_micro_data.get_data
        labels (list): list of labels for tax function parameters
        title (str): title for the plot
        path (str): path to which to save plot, if None then figure
            returned

    Returns:
        fig (Matplotlib plot object): plot of tax functions

    """
    # Check that inputs are valid
    assert isinstance(start_year, int)
    assert isinstance(year, int)
    assert year >= start_year
    # if list of tax function types less than list of params, assume
    # all the same functional form
    if len(tax_func_type) < len(tax_param_list):
        tax_func_type = [tax_func_type[0]] * len(tax_param_list)
    for i, v in enumerate(tax_func_type):
        assert v in ["DEP", "DEP_totalinc", "GS", "linear", "mono", "mono2D"]
    assert rate_type in ["etr", "mtrx", "mtry"]
    assert len(tax_param_list) == len(labels)

    # Set age and year to look at
    if age is not None:
        assert isinstance(age, int)
        assert age >= E
        s = (
            age - E
        )  # Note: assumed age is given in E + model periods (but age below is also assumed to be calendar years)
    else:
        s = 0  # if not age-specific, all ages have the same values
    t = year - start_year

    # create rate_key to correspond to keys in tax func dicts
    rate_key = "tfunc_" + rate_type + "_params_S"

    # Set income range to plot over (min income value hard coded to 5)
    inc_sup = np.exp(np.linspace(np.log(5), np.log(max_inc_amt), 100))
    # Set income value for other income
    inc_fix = other_inc_val

    if over_labinc:
        key1 = "total_labinc"
        X = inc_sup
        Y = inc_fix
    else:
        key1 = "total_capinc"
        X = inc_fix
        Y = inc_sup

    # get tax rates for each point in the income support and plot
    fig, ax = plt.subplots()
    for i, tax_params in enumerate(tax_param_list):
        tax_params = tax_params[rate_key][t][s]
        rates = txfunc.get_tax_rates(
            tax_params,
            X,
            Y,
            None,
            tax_func_type[i],
            rate_type,
            for_estimation=False,
        )
        plt.plot(inc_sup, rates, label=labels[i])

    # plot raw data (if passed)
    if data_list is not None:
        rate_type_dict = {
            "etr": "etr",
            "mtrx": "mtr_labinc",
            "mtry": "mtr_capinc",
        }
        # censor data to range of the plot
        for d, data in enumerate(data_list):
            data_to_plot = data[str(year)].copy()
            if age is not None:
                data_to_plot.drop(
                    data_to_plot[data_to_plot["age"] != age].index,
                    inplace=True,
                )
            # other censoring
            data_to_plot.drop(
                data_to_plot[data_to_plot[key1] > max_inc_amt].index,
                inplace=True,
            )
            # other censoring used in txfunc.py
            data_to_plot = txfunc.tax_data_sample(data_to_plot)
            # set number of bins to 100 or bins of $1000 dollars
            n_bins = min(100, np.floor_divide(max_inc_amt, 1000))
            # need to compute weighted averages by group...

            def weighted_mean(x, cols, w="weight"):
                try:
                    return pd.Series(
                        np.average(x[cols], weights=x[w], axis=0), cols
                    )
                except ZeroDivisionError:
                    return 0

            data_to_plot["inc_bin"] = pd.cut(data_to_plot[key1], n_bins)
            groups = data_to_plot.groupby("inc_bin", observed=True).apply(
                weighted_mean, [rate_type_dict[rate_type], key1]
            )
            plt.scatter(
                groups[key1], groups[rate_type_dict[rate_type]], alpha=0.1
            )
    # add legend, labels, etc to plot
    plt.legend(loc="center right")
    if title:
        plt.title(title)
    if over_labinc:
        plt.xlabel(r"Labor income")
    else:
        plt.xlabel(r"Capital income")
    plt.ylabel(VAR_LABELS[rate_type])
    if path is None:
        return fig
    else:
        plt.savefig(path, dpi=300)