import arviz as azimport matplotlib.pyplot as pltimport numpy as npimport pandas as pdimport pymc as pmimport pytensor.tensor as ptimport statsmodels.api as smfrom scipy.stats import bernoullifrom statsmodels.miscmodels.ordinal_model import OrderedModel
WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
In [2]:
plt.rcParams["font.family"] ="Latin Modern Roman"
In [3]:
%config InlineBackend.figure_format ='retina'# high resolution figuresaz.style.use("arviz-darkgrid")rng = np.random.default_rng(42)
exog = sm.add_constant(df[["salary", "work_from_home", "work_sat"]])mod = sm.OLS(df["explicit_rating"], exog)results = mod.fit()results.summary()results.predict([1, 200, 1, 0.6])fig, axs = plt.subplots(1, 2, figsize=(20, 6))axs = axs.flatten()ax = axs[1]salaries = np.linspace(10, 125, 20)predictions = [results.predict([1, i, 1, 0.6])[0] for i in salaries]ax.plot(salaries, predictions, label="Implied Linear function of Salaries on Outcome")ax.set_title("Out of bound Prediction based on Salary")ax.axhline(10, linestyle="--", color="black")ax.set_xlabel("Hypothetical Salary")ax.set_ylabel("Manager Rating Scale")ax.axhline(0, linestyle="--", color="black")axs[0].hist(results.resid, ec="white")axs[0].set_title("Simple OLS Residuals on Training Data")
Text(0.5, 1.0, 'Simple OLS Residuals on Training Data')
There were 86 divergences after tuning. Increase `target_accept` or reparameterize.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters. A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details
Sampling: [y]
There were 83 divergences after tuning. Increase `target_accept` or reparameterize.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
Sampling: [y]
There were 1 divergences after tuning. Increase `target_accept` or reparameterize.
Sampling: [y]
fig, ax = plt.subplots(figsize=(20, 6))for i inrange(K): ax.hist(implied_probs[0, i, :], label=f"Cutpoint: {i}", ec="white", bins=20, alpha=0.4)ax.set_xlabel("Probability")ax.set_title("Probability by Interval of Manager Rating \n by Individual 0", fontsize=20)ax.legend();