montebello-642
/

logistic-regression

Model card Files Files and versions Community

montebello-642 commited on Jan 13, 2024

Commit

6b4216c

verified ·

1 Parent(s): 29b2d73

Upload Logistic Regression.ipynb

Browse files

Files changed (1) hide show

Logistic Regression.ipynb +264 -0

Logistic Regression.ipynb ADDED Viewed

	@@ -0,0 +1,264 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Index(['duration_mo', 'mos_ethnicity', 'complainant_ethnicity', 'is_force',\n",
+      "       'is_abuse_of_authority', 'is_discourtesy', 'is_offensive_language',\n",
+      "       'outcome_description'],\n",
+      "      dtype='object')\n",
+      "   duration_mo  mos_ethnicity  complainant_ethnicity  is_force  \\\n",
+      "0           10              0                      2         0   \n",
+      "1            9              1                      2         0   \n",
+      "2            9              1                      2         1   \n",
+      "3           14              1                      2         0   \n",
+      "4            6              0                      7         0   \n",
+      "\n",
+      "   is_abuse_of_authority  is_discourtesy  is_offensive_language  \\\n",
+      "0                      1               0                      0   \n",
+      "1                      0               1                      0   \n",
+      "2                      0               0                      0   \n",
+      "3                      1               0                      0   \n",
+      "4                      0               0                      1   \n",
+      "\n",
+      "   outcome_description  \n",
+      "0                    0  \n",
+      "1                    0  \n",
+      "2                    0  \n",
+      "3                    0  \n",
+      "4                    1  \n",
+      "        duration_mo  mos_ethnicity  complainant_ethnicity      is_force  \\\n",
+      "count  33358.000000   33358.000000           33358.000000  33358.000000   \n",
+      "mean       9.733767       0.946819               2.468283      0.022573   \n",
+      "std        5.017703       0.754311               2.256281      0.148541   \n",
+      "min        0.000000       0.000000               0.000000      0.000000   \n",
+      "25%        6.000000       0.000000               1.000000      0.000000   \n",
+      "50%       10.000000       1.000000               2.000000      0.000000   \n",
+      "75%       13.000000       1.000000               2.000000      0.000000   \n",
+      "max      110.000000       4.000000               7.000000      1.000000   \n",
+      "\n",
+      "       is_abuse_of_authority  is_discourtesy  is_offensive_language  \\\n",
+      "count           33358.000000    33358.000000           33358.000000   \n",
+      "mean                0.608310        0.140206               0.228911   \n",
+      "std                 0.488135        0.347206               0.420138   \n",
+      "min                 0.000000        0.000000               0.000000   \n",
+      "25%                 0.000000        0.000000               0.000000   \n",
+      "50%                 1.000000        0.000000               0.000000   \n",
+      "75%                 1.000000        0.000000               0.000000   \n",
+      "max                 1.000000        1.000000               1.000000   \n",
+      "\n",
+      "       outcome_description  \n",
+      "count         33358.000000  \n",
+      "mean              0.438066  \n",
+      "std               0.496157  \n",
+      "min               0.000000  \n",
+      "25%               0.000000  \n",
+      "50%               0.000000  \n",
+      "75%               1.000000  \n",
+      "max               1.000000  \n",
+      "duration_mo              0\n",
+      "mos_ethnicity            0\n",
+      "complainant_ethnicity    0\n",
+      "is_force                 0\n",
+      "is_abuse_of_authority    0\n",
+      "is_discourtesy           0\n",
+      "is_offensive_language    0\n",
+      "outcome_description      0\n",
+      "dtype: int64\n",
+      "Accuracy: 0.65\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.65      0.82      0.72      3778\n",
+      "           1       0.64      0.42      0.51      2894\n",
+      "\n",
+      "    accuracy                           0.65      6672\n",
+      "   macro avg       0.64      0.62      0.62      6672\n",
+      "weighted avg       0.64      0.65      0.63      6672\n",
+      "\n",
+      "Running on local URL:  http://127.0.0.1:7860\n",
+      "Running on public URL: https://d8846d114093b0894a.gradio.live\n",
+      "\n",
+      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": "<div><iframe src=\"https://d8846d114093b0894a.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": ""
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from sklearn.model_selection import train_test_split, cross_val_score\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix\n",
+    "import seaborn as sns\n",
+    "import matplotlib.pyplot as plt\n",
+    "import gradio as gr\n",
+    "import numpy as np\n",
+    "\n",
+    "#loading the dataset and select only the columns needed\n",
+    "selected_columns = ['duration_mo', 'mos_ethnicity', 'complainant_ethnicity', 'is_force', 'is_abuse_of_authority', 'is_discourtesy', 'is_offensive_language', 'outcome_description']\n",
+    "df = pd.read_csv('my_dataset_logistic.csv', usecols=selected_columns)\n",
+    "\n",
+    "print(df.columns)\n",
+    "print(df.head())\n",
+    "print(df.describe())\n",
+    "print(df.isnull().sum())\n",
+    "\n",
+    "#set the name of the column to calculate accuracy\n",
+    "X = df.drop('outcome_description', axis=1)\n",
+    "y = df['outcome_description']\n",
+    "X.fillna(0, inplace=True)\n",
+    "\n",
+    "#split into training and test set\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+    "\n",
+    "#standardize the features\n",
+    "scaler = StandardScaler()\n",
+    "X_train_scaled = scaler.fit_transform(X_train)\n",
+    "X_test_scaled = scaler.transform(X_test)\n",
+    "\n",
+    "#train the model\n",
+    "model = LogisticRegression(random_state=42)\n",
+    "model.fit(X_train_scaled, y_train)\n",
+    "\n",
+    "#make predictions and evaluate the model\n",
+    "y_pred = model.predict(X_test_scaled)\n",
+    "accuracy = accuracy_score(y_test, y_pred)\n",
+    "print(f'Accuracy: {accuracy:.2f}')\n",
+    "\n",
+    "#classification report with confusion matrix, correlation graph and standard deviation of all the variables\n",
+    "print(classification_report(y_test, y_pred))\n",
+    "\n",
+    "# Confusion Matrix\n",
+    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
+    "plt.figure(figsize=(8, 6))\n",
+    "sns.heatmap(conf_matrix, annot=True, fmt=\"d\", cmap=\"Blues\", cbar=False,xticklabels=df['outcome_description'].unique(), yticklabels=df['outcome_description'].unique())\n",
+    "plt.title(\"Confusion Matrix\")\n",
+    "plt.xlabel(\"Predicted\")\n",
+    "plt.ylabel(\"Actual\")\n",
+    "plt.show()\n",
+    "\n",
+    "#Correlation Matrix\n",
+    "correlation_matrix = df.corr()\n",
+    "plt.figure(figsize=(10, 8))\n",
+    "sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=\".2f\", linewidths=.5)\n",
+    "plt.title('Correlation Matrix')\n",
+    "plt.show()\n",
+    "\n",
+    "#plotting a bar chart to visualize better the correlation\n",
+    "target_correlations = correlation_matrix['outcome_description'].sort_values(ascending=False)\n",
+    "plt.figure(figsize=(10, 6))\n",
+    "target_correlations.drop('outcome_description').plot(kind='bar', color='blue')\n",
+    "plt.title('Correlations with Target Variable')\n",
+    "plt.xlabel('Features')\n",
+    "plt.ylabel('Correlation')\n",
+    "plt.show()\n",
+    "\n",
+    "#Standard Deviation\n",
+    "std_dev = df.std()\n",
+    "print('\\nStandard deviation')\n",
+    "print(std_dev)\n",
+    "\n",
+    "#gradio implementation\n",
+    "#create the available options for the ethnicities\n",
+    "mos_ethnicity_options = [\"Hispanic\", \"White\", \"Black\", \"Asian\", \"American Indian\", \"Other Race\", \"Refused\", \"Unknown\"]\n",
+    "complainant_ethnicity_options = [\"Hispanic\", \"White\", \"Black\", \"Asian\", \"American Indian\", \"Other Race\", \"Refused\", \"Unknown\"]\n",
+    "\n",
+    "#defining the function to make predictions using the model\n",
+    "def predict_outcome_duration(mos_ethnicity, complainant_ethnicity, is_force, is_abuse_of_authority, is_discourtesy, is_offensive_language, duration_mo):\n",
+    "    try:\n",
+    "        #converting values from string to int\n",
+    "        mos_ethnicity_encoded = mos_ethnicity_options.index(mos_ethnicity)\n",
+    "        complainant_ethnicity_encoded = complainant_ethnicity_options.index(complainant_ethnicity)\n",
+    "\n",
+    "        #converting checkbox value to int\n",
+    "        is_force = int(is_force)\n",
+    "        is_abuse_of_authority = int(is_abuse_of_authority)\n",
+    "        is_discourtesy = int(is_discourtesy)\n",
+    "        is_offensive_language = int(is_offensive_language)\n",
+    "\n",
+    "        input_data = [[duration_mo, mos_ethnicity_encoded, complainant_ethnicity_encoded, is_force, is_abuse_of_authority, is_discourtesy, is_offensive_language]]\n",
+    "        input_scaled = scaler.transform(input_data)\n",
+    "        prediction = model.predict(input_scaled)[0]\n",
+    "\n",
+    "        #outputting the result\n",
+    "        return \"Arrest\" if prediction == 1 else \"No Arrest\"\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        return f\"Error: {str(e)}\"\n",
+    "\n",
+    "#creating the gradio interface, using dropdowns to show the different ethnicities, checkbox to identify which type of allegation it was and a slider with the duration in months\n",
+    "mos_ethnicity_dropdown = gr.Dropdown(choices=mos_ethnicity_options,label=\"Defendant Ethnicity\")\n",
+    "complainant_ethnicity_dropdown = gr.Dropdown(choices=complainant_ethnicity_options, label=\"Complainant Ethnicity\")\n",
+    "is_force_checkbox = gr.Checkbox()\n",
+    "is_abuse_of_authority_checkbox = gr.Checkbox()\n",
+    "is_discourtesy_checkbox = gr.Checkbox()\n",
+    "is_offensive_language_checkbox = gr.Checkbox()\n",
+    "duration_mo_slider = gr.Slider(minimum=0, maximum=20, label=\"Duration in months\")\n",
+    "\n",
+    "iface = gr.Interface(\n",
+    "    fn=predict_outcome_duration,\n",
+    "    inputs=[complainant_ethnicity_dropdown, mos_ethnicity_dropdown, is_force_checkbox, is_abuse_of_authority_checkbox, is_discourtesy_checkbox, is_offensive_language_checkbox, duration_mo_slider],\n",
+    "    outputs=\"text\",\n",
+    "    live=True,\n",
+    "    title=\"Complaint Outcome Prediction\"\n",
+    ")\n",
+    "\n",
+    "# Launch the Gradio Interface\n",
+    "iface.launch(share=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   }
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}