{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Discrete Choice Models Overview"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import statsmodels.api as sm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data\n",
    "\n",
    "Load data from Spector and Mazzeo (1980). Examples follow Greene's Econometric Analysis Ch. 21 (5th Edition)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "spector_data = sm.datasets.spector.load()\n",
    "spector_data.exog = sm.add_constant(spector_data.exog, prepend=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Inspect the data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    GPA  TUCE  PSI  const\n",
      "0  2.66  20.0  0.0    1.0\n",
      "1  2.89  22.0  0.0    1.0\n",
      "2  3.28  24.0  0.0    1.0\n",
      "3  2.92  12.0  0.0    1.0\n",
      "4  4.00  21.0  0.0    1.0\n",
      "0    0.0\n",
      "1    0.0\n",
      "2    0.0\n",
      "3    0.0\n",
      "4    1.0\n",
      "Name: GRADE, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(spector_data.exog.head())\n",
    "print(spector_data.endog.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Linear Probability Model (OLS)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parameters:  GPA     0.463852\n",
      "TUCE    0.010495\n",
      "PSI     0.378555\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "lpm_mod = sm.OLS(spector_data.endog, spector_data.exog)\n",
    "lpm_res = lpm_mod.fit()\n",
    "print(\"Parameters: \", lpm_res.params[:-1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Logit Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "execution": {
 
 
 
 
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parameters:  GPA       2.826113\n",
      "TUCE      0.095158\n",
      "PSI       2.378688\n",
      "const   -13.021347\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "logit_mod = sm.Logit(spector_data.endog, spector_data.exog)\n",
    "logit_res = logit_mod.fit(disp=0)\n",
    "print(\"Parameters: \", logit_res.params)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Marginal Effects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "        Logit Marginal Effects       \n",
      "=====================================\n",
      "Dep. Variable:                  GRADE\n",
      "Method:                          dydx\n",
      "At:                           overall\n",
      "==============================================================================\n",
      "                dy/dx    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "GPA            0.3626      0.109      3.313      0.001       0.148       0.577\n",
      "TUCE           0.0122      0.018      0.686      0.493      -0.023       0.047\n",
      "PSI            0.3052      0.092      3.304      0.001       0.124       0.486\n",
      "==============================================================================\n"
     ]
    }
   ],
   "source": [
    "margeff = logit_res.get_margeff()\n",
    "print(margeff.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "As in all the discrete data models presented below, we can print a nice summary of results:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                           Logit Regression Results                           \n",
      "==============================================================================\n",
      "Dep. Variable:                  GRADE   No. Observations:                   32\n",
      "Model:                          Logit   Df Residuals:                       28\n",
      "Method:                           MLE   Df Model:                            3\n",
      "Date:                Thu, 18 Dec 2025   Pseudo R-squ.:                  0.3740\n",
      "Time:                        07:37:30   Log-Likelihood:                -12.890\n",
      "converged:                       True   LL-Null:                       -20.592\n",
      "Covariance Type:            nonrobust   LLR p-value:                  0.001502\n",
      "==============================================================================\n",
      "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "GPA            2.8261      1.263      2.238      0.025       0.351       5.301\n",
      "TUCE           0.0952      0.142      0.672      0.501      -0.182       0.373\n",
      "PSI            2.3787      1.065      2.234      0.025       0.292       4.465\n",
      "const        -13.0213      4.931     -2.641      0.008     -22.687      -3.356\n",
      "==============================================================================\n"
     ]
    }
   ],
   "source": [
    "print(logit_res.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Probit Model "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.400588\n",
      "         Iterations 6\n",
      "Parameters:  GPA      1.625810\n",
      "TUCE     0.051729\n",
      "PSI      1.426332\n",
      "const   -7.452320\n",
      "dtype: float64\n",
      "Marginal effects: \n",
      "       Probit Marginal Effects       \n",
      "=====================================\n",
      "Dep. Variable:                  GRADE\n",
      "Method:                          dydx\n",
      "At:                           overall\n",
      "==============================================================================\n",
      "                dy/dx    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "GPA            0.3608      0.113      3.182      0.001       0.139       0.583\n",
      "TUCE           0.0115      0.018      0.624      0.533      -0.025       0.048\n",
      "PSI            0.3165      0.090      3.508      0.000       0.140       0.493\n",
      "==============================================================================\n"
     ]
    }
   ],
   "source": [
    "probit_mod = sm.Probit(spector_data.endog, spector_data.exog)\n",
    "probit_res = probit_mod.fit()\n",
    "probit_margeff = probit_res.get_margeff()\n",
    "print(\"Parameters: \", probit_res.params)\n",
    "print(\"Marginal effects: \")\n",
    "print(probit_margeff.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Multinomial Logit"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load data from the American National Election Studies:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "anes_data = sm.datasets.anes96.load()\n",
    "anes_exog = anes_data.exog\n",
    "anes_exog = sm.add_constant(anes_exog)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Inspect the data:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   logpopul  selfLR   age  educ  income\n",
      "0 -2.302585     7.0  36.0   3.0     1.0\n",
      "1  5.247550     3.0  20.0   4.0     1.0\n",
      "2  3.437208     2.0  24.0   6.0     1.0\n",
      "3  4.420045     3.0  28.0   6.0     1.0\n",
      "4  6.461624     5.0  68.0   6.0     1.0\n",
      "0    6.0\n",
      "1    1.0\n",
      "2    1.0\n",
      "3    1.0\n",
      "4    0.0\n",
      "Name: PID, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(anes_data.exog.head())\n",
    "print(anes_data.endog.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Fit MNL model:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 1.548647\n",
      "         Iterations 7\n",
      "                 0         1         2         3         4          5\n",
      "const    -0.373402 -2.250913 -3.665584 -7.613843 -7.060478 -12.105751\n",
      "logpopul -0.011536 -0.088751 -0.105967 -0.091557 -0.093285  -0.140881\n",
      "selfLR    0.297714  0.391669  0.573451  1.278772  1.346962   2.070080\n",
      "age      -0.024945 -0.022898 -0.014851 -0.008681 -0.017904  -0.009433\n",
      "educ      0.082491  0.181043 -0.007152  0.199828  0.216939   0.321926\n",
      "income    0.005197  0.047874  0.057575  0.084498  0.080958   0.108894\n"
     ]
    }
   ],
   "source": [
    "mlogit_mod = sm.MNLogit(anes_data.endog, anes_exog)\n",
    "mlogit_res = mlogit_mod.fit()\n",
    "print(mlogit_res.params)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Poisson\n",
    "\n",
    "Load the Rand data. Note that this example is similar to Cameron and Trivedi's `Microeconometrics` Table 20.5, but it is slightly different because of minor changes in the data. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "rand_data = sm.datasets.randhie.load()\n",
    "rand_exog = rand_data.exog\n",
    "rand_exog = sm.add_constant(rand_exog, prepend=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Fit Poisson model: "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 3.091609\n",
      "         Iterations 6\n",
      "                          Poisson Regression Results                          \n",
      "==============================================================================\n",
      "Dep. Variable:                  mdvis   No. Observations:                20190\n",
      "Model:                        Poisson   Df Residuals:                    20180\n",
      "Method:                           MLE   Df Model:                            9\n",
      "Date:                Thu, 18 Dec 2025   Pseudo R-squ.:                 0.06343\n",
      "Time:                        07:37:30   Log-Likelihood:                -62420.\n",
      "converged:                       True   LL-Null:                       -66647.\n",
      "Covariance Type:            nonrobust   LLR p-value:                     0.000\n",
      "==============================================================================\n",
      "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "lncoins       -0.0525      0.003    -18.216      0.000      -0.058      -0.047\n",
      "idp           -0.2471      0.011    -23.272      0.000      -0.268      -0.226\n",
      "lpi            0.0353      0.002     19.302      0.000       0.032       0.039\n",
      "fmde          -0.0346      0.002    -21.439      0.000      -0.038      -0.031\n",
      "physlm         0.2717      0.012     22.200      0.000       0.248       0.296\n",
      "disea          0.0339      0.001     60.098      0.000       0.033       0.035\n",
      "hlthg         -0.0126      0.009     -1.366      0.172      -0.031       0.005\n",
      "hlthf          0.0541      0.015      3.531      0.000       0.024       0.084\n",
      "hlthp          0.2061      0.026      7.843      0.000       0.155       0.258\n",
      "const          0.7004      0.011     62.741      0.000       0.678       0.722\n",
      "==============================================================================\n"
     ]
    }
   ],
   "source": [
    "poisson_mod = sm.Poisson(rand_data.endog, rand_exog)\n",
    "poisson_res = poisson_mod.fit(method=\"newton\")\n",
    "print(poisson_res.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Negative Binomial\n",
    "\n",
    "The negative binomial model gives slightly different results. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                     NegativeBinomial Regression Results                      \n",
      "==============================================================================\n",
      "Dep. Variable:                  mdvis   No. Observations:                20190\n",
      "Model:               NegativeBinomial   Df Residuals:                    20180\n",
      "Method:                           MLE   Df Model:                            9\n",
      "Date:                Thu, 18 Dec 2025   Pseudo R-squ.:                 0.01845\n",
      "Time:                        07:37:30   Log-Likelihood:                -43384.\n",
      "converged:                      False   LL-Null:                       -44199.\n",
      "Covariance Type:            nonrobust   LLR p-value:                     0.000\n",
      "==============================================================================\n",
      "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "lncoins       -0.0579      0.006     -9.515      0.000      -0.070      -0.046\n",
      "idp           -0.2678      0.023    -11.802      0.000      -0.312      -0.223\n",
      "lpi            0.0412      0.004      9.938      0.000       0.033       0.049\n",
      "fmde          -0.0381      0.003    -11.216      0.000      -0.045      -0.031\n",
      "physlm         0.2691      0.030      8.985      0.000       0.210       0.328\n",
      "disea          0.0382      0.001     26.080      0.000       0.035       0.041\n",
      "hlthg         -0.0441      0.020     -2.201      0.028      -0.083      -0.005\n",
      "hlthf          0.0173      0.036      0.478      0.632      -0.054       0.088\n",
      "hlthp          0.1782      0.074      2.399      0.016       0.033       0.324\n",
      "const          0.6635      0.025     26.786      0.000       0.615       0.712\n",
      "alpha          1.2930      0.019     69.477      0.000       1.256       1.329\n",
      "==============================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/lib/python3/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
      "  warnings.warn(\"Maximum Likelihood optimization failed to \"\n"
     ]
    }
   ],
   "source": [
    "mod_nbin = sm.NegativeBinomial(rand_data.endog, rand_exog)\n",
    "res_nbin = mod_nbin.fit(disp=False)\n",
    "print(res_nbin.summary())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Alternative solvers\n",
    "\n",
    "The default method for fitting discrete data MLE models is Newton-Raphson. You can use other solvers by using the ``method`` argument: "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "execution": {
 
 
 
 
    },
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 1.548647\n",
      "         Iterations: 111\n",
      "         Function evaluations: 117\n",
      "         Gradient evaluations: 117\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                          MNLogit Regression Results                          \n",
      "==============================================================================\n",
      "Dep. Variable:                    PID   No. Observations:                  944\n",
      "Model:                        MNLogit   Df Residuals:                      908\n",
      "Method:                           MLE   Df Model:                           30\n",
      "Date:                Thu, 18 Dec 2025   Pseudo R-squ.:                  0.1648\n",
      "Time:                        07:37:30   Log-Likelihood:                -1461.9\n",
      "converged:                       True   LL-Null:                       -1750.3\n",
      "Covariance Type:            nonrobust   LLR p-value:                1.822e-102\n",
      "==============================================================================\n",
      "     PID=1       coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "const         -0.3734      0.630     -0.593      0.553      -1.608       0.861\n",
      "logpopul      -0.0115      0.034     -0.337      0.736      -0.079       0.056\n",
      "selfLR         0.2977      0.094      3.180      0.001       0.114       0.481\n",
      "age           -0.0249      0.007     -3.823      0.000      -0.038      -0.012\n",
      "educ           0.0825      0.074      1.121      0.262      -0.062       0.227\n",
      "income         0.0052      0.018      0.295      0.768      -0.029       0.040\n",
      "------------------------------------------------------------------------------\n",
      "     PID=2       coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "const         -2.2509      0.763     -2.949      0.003      -3.747      -0.755\n",
      "logpopul      -0.0888      0.039     -2.266      0.023      -0.166      -0.012\n",
      "selfLR         0.3917      0.108      3.619      0.000       0.180       0.604\n",
      "age           -0.0229      0.008     -2.893      0.004      -0.038      -0.007\n",
      "educ           0.1810      0.085      2.123      0.034       0.014       0.348\n",
      "income         0.0479      0.022      2.149      0.032       0.004       0.092\n",
      "------------------------------------------------------------------------------\n",
      "     PID=3       coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "const         -3.6656      1.157     -3.169      0.002      -5.932      -1.399\n",
      "logpopul      -0.1060      0.057     -1.858      0.063      -0.218       0.006\n",
      "selfLR         0.5734      0.159      3.617      0.000       0.263       0.884\n",
      "age           -0.0149      0.011     -1.311      0.190      -0.037       0.007\n",
      "educ          -0.0072      0.126     -0.057      0.955      -0.255       0.240\n",
      "income         0.0576      0.034      1.713      0.087      -0.008       0.123\n",
      "------------------------------------------------------------------------------\n",
      "     PID=4       coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "const         -7.6139      0.958     -7.951      0.000      -9.491      -5.737\n",
      "logpopul      -0.0916      0.044     -2.091      0.037      -0.177      -0.006\n",
      "selfLR         1.2788      0.129      9.921      0.000       1.026       1.531\n",
      "age           -0.0087      0.008     -1.031      0.302      -0.025       0.008\n",
      "educ           0.1998      0.094      2.123      0.034       0.015       0.384\n",
      "income         0.0845      0.026      3.226      0.001       0.033       0.136\n",
      "------------------------------------------------------------------------------\n",
      "     PID=5       coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "const         -7.0605      0.844     -8.362      0.000      -8.715      -5.406\n",
      "logpopul      -0.0933      0.039     -2.371      0.018      -0.170      -0.016\n",
      "selfLR         1.3470      0.117     11.494      0.000       1.117       1.577\n",
      "age           -0.0179      0.008     -2.352      0.019      -0.033      -0.003\n",
      "educ           0.2169      0.085      2.552      0.011       0.050       0.384\n",
      "income         0.0810      0.023      3.524      0.000       0.036       0.126\n",
      "------------------------------------------------------------------------------\n",
      "     PID=6       coef    std err          z      P>|z|      [0.025      0.975]\n",
      "------------------------------------------------------------------------------\n",
      "const        -12.1058      1.060    -11.421      0.000     -14.183     -10.028\n",
      "logpopul      -0.1409      0.042     -3.343      0.001      -0.223      -0.058\n",
      "selfLR         2.0701      0.143     14.435      0.000       1.789       2.351\n",
      "age           -0.0094      0.008     -1.160      0.246      -0.025       0.007\n",
      "educ           0.3219      0.091      3.534      0.000       0.143       0.500\n",
      "income         0.1089      0.025      4.304      0.000       0.059       0.158\n",
      "==============================================================================\n"
     ]
    }
   ],
   "source": [
    "mlogit_res = mlogit_mod.fit(method=\"bfgs\", maxiter=250)\n",
    "print(mlogit_res.summary())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
