mirror of
https://github.com/youronlydimwit/Data_ScienceUse_Cases.git
synced 2025-12-13 18:29:54 +01:00
1263 lines
52 KiB
Plaintext
1263 lines
52 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "35edb7d6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import numpy as np"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "7c963881",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Role Satisfaction</th>\n",
|
|
" <th>Skill Utilization</th>\n",
|
|
" <th>Career Growth Opportunity</th>\n",
|
|
" <th>Supervisor Support</th>\n",
|
|
" <th>Work-Life Balance</th>\n",
|
|
" <th>Recognition & Appreciation</th>\n",
|
|
" <th>Company Culture</th>\n",
|
|
" <th>Training & Development</th>\n",
|
|
" <th>Communication Effectiveness</th>\n",
|
|
" <th>Diversity & Inclusion</th>\n",
|
|
" <th>Work Environment</th>\n",
|
|
" <th>Compensation</th>\n",
|
|
" <th>Staff_Id</th>\n",
|
|
" <th>Month_Of_Service</th>\n",
|
|
" <th>Years_Of_Service</th>\n",
|
|
" <th>Residence</th>\n",
|
|
" <th>Residence_Code</th>\n",
|
|
" <th>Net_Salary</th>\n",
|
|
" <th>Resigned</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>3</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>SA63171</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>Depok</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>5582218</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>SP10211</td>\n",
|
|
" <td>43</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>Jakarta</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>9213443</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>3</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>SA79627</td>\n",
|
|
" <td>10</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>Bekasi</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>5836455</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>3</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>SA02310</td>\n",
|
|
" <td>17</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Depok</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>6035466</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>SA98565</td>\n",
|
|
" <td>17</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Jakarta</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>5568101</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Role Satisfaction Skill Utilization Career Growth Opportunity \\\n",
|
|
"0 3 4 5 \n",
|
|
"1 2 3 1 \n",
|
|
"2 3 3 2 \n",
|
|
"3 3 3 4 \n",
|
|
"4 3 2 4 \n",
|
|
"\n",
|
|
" Supervisor Support Work-Life Balance Recognition & Appreciation \\\n",
|
|
"0 2 2 3 \n",
|
|
"1 2 4 3 \n",
|
|
"2 2 2 5 \n",
|
|
"3 4 3 1 \n",
|
|
"4 3 3 2 \n",
|
|
"\n",
|
|
" Company Culture Training & Development Communication Effectiveness \\\n",
|
|
"0 3 3 2 \n",
|
|
"1 4 3 2 \n",
|
|
"2 4 4 3 \n",
|
|
"3 4 4 4 \n",
|
|
"4 3 4 2 \n",
|
|
"\n",
|
|
" Diversity & Inclusion Work Environment Compensation Staff_Id \\\n",
|
|
"0 3 4 4 SA63171 \n",
|
|
"1 2 2 4 SP10211 \n",
|
|
"2 2 4 5 SA79627 \n",
|
|
"3 5 3 5 SA02310 \n",
|
|
"4 3 2 3 SA98565 \n",
|
|
"\n",
|
|
" Month_Of_Service Years_Of_Service Residence Residence_Code Net_Salary \\\n",
|
|
"0 1 0 Depok 4 5582218 \n",
|
|
"1 43 3 Jakarta 1 9213443 \n",
|
|
"2 10 0 Bekasi 3 5836455 \n",
|
|
"3 17 1 Depok 4 6035466 \n",
|
|
"4 17 1 Jakarta 1 5568101 \n",
|
|
"\n",
|
|
" Resigned \n",
|
|
"0 0 \n",
|
|
"1 0 \n",
|
|
"2 0 \n",
|
|
"3 0 \n",
|
|
"4 0 "
|
|
]
|
|
},
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Grab Data\n",
|
|
"df = pd.read_excel(\"https://raw.githubusercontent.com/youronlydimwit/Data_ScienceUse_Cases/main/Classification/Data/HRD_Survey_50.xlsx\")\n",
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "fcbe70d6",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Role Satisfaction</th>\n",
|
|
" <th>Skill Utilization</th>\n",
|
|
" <th>Career Growth Opportunity</th>\n",
|
|
" <th>Supervisor Support</th>\n",
|
|
" <th>Work-Life Balance</th>\n",
|
|
" <th>Recognition & Appreciation</th>\n",
|
|
" <th>Company Culture</th>\n",
|
|
" <th>Training & Development</th>\n",
|
|
" <th>Communication Effectiveness</th>\n",
|
|
" <th>Diversity & Inclusion</th>\n",
|
|
" <th>Work Environment</th>\n",
|
|
" <th>Compensation</th>\n",
|
|
" <th>Month_Of_Service</th>\n",
|
|
" <th>Years_Of_Service</th>\n",
|
|
" <th>Residence_Code</th>\n",
|
|
" <th>Net_Salary</th>\n",
|
|
" <th>Resigned</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>3</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>5582218</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>43</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>9213443</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>3</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>10</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>5836455</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>3</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>17</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>6035466</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>17</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>5568101</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Role Satisfaction Skill Utilization Career Growth Opportunity \\\n",
|
|
"0 3 4 5 \n",
|
|
"1 2 3 1 \n",
|
|
"2 3 3 2 \n",
|
|
"3 3 3 4 \n",
|
|
"4 3 2 4 \n",
|
|
"\n",
|
|
" Supervisor Support Work-Life Balance Recognition & Appreciation \\\n",
|
|
"0 2 2 3 \n",
|
|
"1 2 4 3 \n",
|
|
"2 2 2 5 \n",
|
|
"3 4 3 1 \n",
|
|
"4 3 3 2 \n",
|
|
"\n",
|
|
" Company Culture Training & Development Communication Effectiveness \\\n",
|
|
"0 3 3 2 \n",
|
|
"1 4 3 2 \n",
|
|
"2 4 4 3 \n",
|
|
"3 4 4 4 \n",
|
|
"4 3 4 2 \n",
|
|
"\n",
|
|
" Diversity & Inclusion Work Environment Compensation Month_Of_Service \\\n",
|
|
"0 3 4 4 1 \n",
|
|
"1 2 2 4 43 \n",
|
|
"2 2 4 5 10 \n",
|
|
"3 5 3 5 17 \n",
|
|
"4 3 2 3 17 \n",
|
|
"\n",
|
|
" Years_Of_Service Residence_Code Net_Salary Resigned \n",
|
|
"0 0 4 5582218 0 \n",
|
|
"1 3 1 9213443 0 \n",
|
|
"2 0 3 5836455 0 \n",
|
|
"3 1 4 6035466 0 \n",
|
|
"4 1 1 5568101 0 "
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Making a copy of df, but with only numerical information\n",
|
|
"# Removing unnecessary columns\n",
|
|
"pred_df = df.drop(columns=['Staff_Id','Residence'])\n",
|
|
"pred_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "10a4fe36",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Role Satisfaction int64\n",
|
|
"Skill Utilization int64\n",
|
|
"Career Growth Opportunity int64\n",
|
|
"Supervisor Support int64\n",
|
|
"Work-Life Balance int64\n",
|
|
"Recognition & Appreciation int64\n",
|
|
"Company Culture int64\n",
|
|
"Training & Development int64\n",
|
|
"Communication Effectiveness int64\n",
|
|
"Diversity & Inclusion int64\n",
|
|
"Work Environment int64\n",
|
|
"Compensation int64\n",
|
|
"Month_Of_Service int64\n",
|
|
"Residence_Code int64\n",
|
|
"Net_Salary int64\n",
|
|
"Resigned int64\n",
|
|
"dtype: object"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pred_df = pred_df.drop(columns=[\"Years_Of_Service\"])\n",
|
|
"pred_df.dtypes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "a4bf1dae",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "2dd56994",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"from sklearn.tree import DecisionTreeClassifier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "b7c40c6f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Split the data into features (X) and labels (y)\n",
|
|
"X = pred_df.drop(columns=['Resigned'])\n",
|
|
"y = pred_df['Resigned']\n",
|
|
"\n",
|
|
"# Split the data into training and testing sets\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
|
"\n",
|
|
"# Define a dictionary to store results\n",
|
|
"results = {'Model': [], 'F1_score': [], 'Accuracy': [], 'Precision': [], 'Recall': []}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "319665bc",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Norm"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "e162bda0",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, msg_start, len(result))\n",
|
|
"C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Train Random Forest model\n",
|
|
"rf_model = RandomForestClassifier(random_state=42)\n",
|
|
"rf_model.fit(X_train, y_train)\n",
|
|
"\n",
|
|
"# Train Decision Tree model\n",
|
|
"dt_model = DecisionTreeClassifier(random_state=42)\n",
|
|
"dt_model.fit(X_train, y_train)\n",
|
|
"\n",
|
|
"# Train Logistic Regression model\n",
|
|
"lr_model = LogisticRegression(random_state=42)\n",
|
|
"lr_model.fit(X_train, y_train)\n",
|
|
"\n",
|
|
"# Evaluate models\n",
|
|
"models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n",
|
|
"metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n",
|
|
"results = {}\n",
|
|
"\n",
|
|
"for name, model in models.items():\n",
|
|
" y_pred = model.predict(X_test)\n",
|
|
" result = {}\n",
|
|
" for metric_name, metric_func in metrics.items():\n",
|
|
" result[metric_name] = metric_func(y_test, y_pred)\n",
|
|
" results[name] = result\n",
|
|
"\n",
|
|
"# Convert results to DataFrame for easier plotting\n",
|
|
"results_df = pd.DataFrame(results)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "94ebdccc",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Random Forest</th>\n",
|
|
" <th>Decision Tree</th>\n",
|
|
" <th>Logistic Regression</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Accuracy</th>\n",
|
|
" <td>0.87</td>\n",
|
|
" <td>0.820000</td>\n",
|
|
" <td>0.87</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Precision</th>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.272727</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Recall</th>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.230769</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>F1 Score</th>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.250000</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Random Forest Decision Tree Logistic Regression\n",
|
|
"Accuracy 0.87 0.820000 0.87\n",
|
|
"Precision 0.00 0.272727 0.00\n",
|
|
"Recall 0.00 0.230769 0.00\n",
|
|
"F1 Score 0.00 0.250000 0.00"
|
|
]
|
|
},
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"results_df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "e95e9b4b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from imblearn.over_sampling import SMOTE"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "96ab491e",
|
|
"metadata": {},
|
|
"source": [
|
|
"# SMOTE"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "4bed2a76",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Apply SMOTE to oversample the minority class in the training data\n",
|
|
"smote = SMOTE(random_state=42)\n",
|
|
"X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "c24a2a88",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Random Forest</th>\n",
|
|
" <th>Decision Tree</th>\n",
|
|
" <th>Logistic Regression</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Accuracy</th>\n",
|
|
" <td>0.83</td>\n",
|
|
" <td>0.720000</td>\n",
|
|
" <td>0.130000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Precision</th>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.105263</td>\n",
|
|
" <td>0.130000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Recall</th>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.153846</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>F1 Score</th>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.125000</td>\n",
|
|
" <td>0.230088</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Random Forest Decision Tree Logistic Regression\n",
|
|
"Accuracy 0.83 0.720000 0.130000\n",
|
|
"Precision 0.00 0.105263 0.130000\n",
|
|
"Recall 0.00 0.153846 1.000000\n",
|
|
"F1 Score 0.00 0.125000 0.230088"
|
|
]
|
|
},
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Train Random Forest model\n",
|
|
"rf_model = RandomForestClassifier(random_state=42)\n",
|
|
"rf_model.fit(X_train_resampled, y_train_resampled)\n",
|
|
"\n",
|
|
"# Train Decision Tree model\n",
|
|
"dt_model = DecisionTreeClassifier(random_state=42)\n",
|
|
"dt_model.fit(X_train_resampled, y_train_resampled)\n",
|
|
"\n",
|
|
"# Train Logistic Regression model\n",
|
|
"lr_model = LogisticRegression(random_state=42)\n",
|
|
"lr_model.fit(X_train_resampled, y_train_resampled)\n",
|
|
"\n",
|
|
"# Evaluate models\n",
|
|
"models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n",
|
|
"metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n",
|
|
"results = {}\n",
|
|
"\n",
|
|
"for name, model in models.items():\n",
|
|
" y_pred = model.predict(X_test)\n",
|
|
" result = {}\n",
|
|
" for metric_name, metric_func in metrics.items():\n",
|
|
" result[metric_name] = metric_func(y_test, y_pred)\n",
|
|
" results[name] = result\n",
|
|
"\n",
|
|
"# Convert results to DataFrame for easier plotting\n",
|
|
"results_df_resampled = pd.DataFrame(results)\n",
|
|
"\n",
|
|
"results_df_resampled"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "45d27935",
|
|
"metadata": {},
|
|
"source": [
|
|
"# OVERSAMPLER"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "33d278e9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from imblearn.over_sampling import RandomOverSampler"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"id": "c4b2b38c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Apply Random Oversampling to balance the training data\n",
|
|
"oversampler = RandomOverSampler(random_state=42)\n",
|
|
"X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train, y_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "6e730981",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Random Forest</th>\n",
|
|
" <th>Decision Tree</th>\n",
|
|
" <th>Logistic Regression</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Accuracy</th>\n",
|
|
" <td>0.87</td>\n",
|
|
" <td>0.78</td>\n",
|
|
" <td>0.130000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Precision</th>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.130000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Recall</th>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>F1 Score</th>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.230088</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Random Forest Decision Tree Logistic Regression\n",
|
|
"Accuracy 0.87 0.78 0.130000\n",
|
|
"Precision 0.00 0.00 0.130000\n",
|
|
"Recall 0.00 0.00 1.000000\n",
|
|
"F1 Score 0.00 0.00 0.230088"
|
|
]
|
|
},
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Train Random Forest model\n",
|
|
"rf_model = RandomForestClassifier(random_state=42)\n",
|
|
"rf_model.fit(X_train_resampled, y_train_resampled)\n",
|
|
"\n",
|
|
"# Train Decision Tree model\n",
|
|
"dt_model = DecisionTreeClassifier(random_state=42)\n",
|
|
"dt_model.fit(X_train_resampled, y_train_resampled)\n",
|
|
"\n",
|
|
"# Train Logistic Regression model\n",
|
|
"lr_model = LogisticRegression(random_state=42)\n",
|
|
"lr_model.fit(X_train_resampled, y_train_resampled)\n",
|
|
"\n",
|
|
"# Evaluate models\n",
|
|
"models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n",
|
|
"metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n",
|
|
"results = {}\n",
|
|
"\n",
|
|
"for name, model in models.items():\n",
|
|
" y_pred = model.predict(X_test)\n",
|
|
" result = {}\n",
|
|
" for metric_name, metric_func in metrics.items():\n",
|
|
" result[metric_name] = metric_func(y_test, y_pred)\n",
|
|
" results[name] = result\n",
|
|
"\n",
|
|
"# Convert results to DataFrame for easier plotting\n",
|
|
"results_df_resampled = pd.DataFrame(results)\n",
|
|
"\n",
|
|
"results_df_resampled"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "bbe4b256",
|
|
"metadata": {},
|
|
"source": [
|
|
"# CLASS WEIGHTS - NORM"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"id": "a1a034ad",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, msg_start, len(result))\n",
|
|
"C:\\Users\\Asus\\anaconda3\\lib\\site-packages\\sklearn\\metrics\\_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Random Forest</th>\n",
|
|
" <th>Decision Tree</th>\n",
|
|
" <th>Logistic Regression</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Accuracy</th>\n",
|
|
" <td>0.87</td>\n",
|
|
" <td>0.800000</td>\n",
|
|
" <td>0.87</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Precision</th>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.111111</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Recall</th>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.076923</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>F1 Score</th>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>0.090909</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Random Forest Decision Tree Logistic Regression\n",
|
|
"Accuracy 0.87 0.800000 0.87\n",
|
|
"Precision 0.00 0.111111 0.00\n",
|
|
"Recall 0.00 0.076923 0.00\n",
|
|
"F1 Score 0.00 0.090909 0.00"
|
|
]
|
|
},
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Define class weights\n",
|
|
"class_weights = {0: 1, 1: 9}\n",
|
|
"\n",
|
|
"# Train Random Forest model\n",
|
|
"rf_model = RandomForestClassifier(random_state=42, class_weight=class_weights)\n",
|
|
"rf_model.fit(X_train, y_train)\n",
|
|
"\n",
|
|
"# Train Decision Tree model\n",
|
|
"dt_model = DecisionTreeClassifier(random_state=42, class_weight=class_weights)\n",
|
|
"dt_model.fit(X_train, y_train)\n",
|
|
"\n",
|
|
"# Train Logistic Regression model\n",
|
|
"lr_model = LogisticRegression(random_state=42, class_weight=class_weights)\n",
|
|
"lr_model.fit(X_train, y_train)\n",
|
|
"\n",
|
|
"# Evaluate models\n",
|
|
"models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n",
|
|
"metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n",
|
|
"results = {}\n",
|
|
"\n",
|
|
"for name, model in models.items():\n",
|
|
" y_pred = model.predict(X_test)\n",
|
|
" result = {}\n",
|
|
" for metric_name, metric_func in metrics.items():\n",
|
|
" result[metric_name] = metric_func(y_test, y_pred)\n",
|
|
" results[name] = result\n",
|
|
"\n",
|
|
"# Convert results to DataFrame for easier plotting\n",
|
|
"results_df_resampled = pd.DataFrame(results)\n",
|
|
"\n",
|
|
"results_df_resampled"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "00cb6be9",
|
|
"metadata": {},
|
|
"source": [
|
|
"# UNDERSAMPLER"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"id": "06a4d1c7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from imblearn.under_sampling import RandomUnderSampler"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"id": "dcb0db52",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Apply Random Oversampling to balance the training data\n",
|
|
"undersampler = RandomUnderSampler(random_state=42)\n",
|
|
"X_train_resampled, y_train_resampled = undersampler.fit_resample(X_train, y_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"id": "a939e8fe",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Random Forest</th>\n",
|
|
" <th>Decision Tree</th>\n",
|
|
" <th>Logistic Regression</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Accuracy</th>\n",
|
|
" <td>0.560000</td>\n",
|
|
" <td>0.490000</td>\n",
|
|
" <td>0.130000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Precision</th>\n",
|
|
" <td>0.155556</td>\n",
|
|
" <td>0.183333</td>\n",
|
|
" <td>0.130000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Recall</th>\n",
|
|
" <td>0.538462</td>\n",
|
|
" <td>0.846154</td>\n",
|
|
" <td>1.000000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>F1 Score</th>\n",
|
|
" <td>0.241379</td>\n",
|
|
" <td>0.301370</td>\n",
|
|
" <td>0.230088</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Random Forest Decision Tree Logistic Regression\n",
|
|
"Accuracy 0.560000 0.490000 0.130000\n",
|
|
"Precision 0.155556 0.183333 0.130000\n",
|
|
"Recall 0.538462 0.846154 1.000000\n",
|
|
"F1 Score 0.241379 0.301370 0.230088"
|
|
]
|
|
},
|
|
"execution_count": 22,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Train Random Forest model\n",
|
|
"rf_model = RandomForestClassifier(random_state=42)\n",
|
|
"rf_model.fit(X_train_resampled, y_train_resampled)\n",
|
|
"\n",
|
|
"# Train Decision Tree model\n",
|
|
"dt_model = DecisionTreeClassifier(random_state=42)\n",
|
|
"dt_model.fit(X_train_resampled, y_train_resampled)\n",
|
|
"\n",
|
|
"# Train Logistic Regression model\n",
|
|
"lr_model = LogisticRegression(random_state=42)\n",
|
|
"lr_model.fit(X_train_resampled, y_train_resampled)\n",
|
|
"\n",
|
|
"# Evaluate models\n",
|
|
"models = {\"Random Forest\": rf_model, \"Decision Tree\": dt_model, \"Logistic Regression\": lr_model}\n",
|
|
"metrics = {\"Accuracy\": accuracy_score, \"Precision\": precision_score, \"Recall\": recall_score, \"F1 Score\": f1_score}\n",
|
|
"results = {}\n",
|
|
"\n",
|
|
"for name, model in models.items():\n",
|
|
" y_pred = model.predict(X_test)\n",
|
|
" result = {}\n",
|
|
" for metric_name, metric_func in metrics.items():\n",
|
|
" result[metric_name] = metric_func(y_test, y_pred)\n",
|
|
" results[name] = result\n",
|
|
"\n",
|
|
"# Convert results to DataFrame for easier plotting\n",
|
|
"results_df_resampled = pd.DataFrame(results)\n",
|
|
"\n",
|
|
"results_df_resampled"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 31,
|
|
"id": "3269fce1",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 720x432 with 2 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Calculate class frequencies for y_train_resampled and y_train\n",
|
|
"unique_train_resampled, counts_train_resampled = np.unique(y_train_resampled, return_counts=True)\n",
|
|
"unique_train, counts_train = np.unique(y_train, return_counts=True)\n",
|
|
"\n",
|
|
"# Plot bar plots\n",
|
|
"plt.figure(figsize=(10, 6))\n",
|
|
"\n",
|
|
"# Plot for y_train_resampled\n",
|
|
"plt.subplot(1, 2, 1)\n",
|
|
"plt.bar(unique_train_resampled, counts_train_resampled, color='red')\n",
|
|
"plt.title('Distribution of y_train_resampled')\n",
|
|
"plt.xlabel('Class')\n",
|
|
"plt.ylabel('Frequency')\n",
|
|
"plt.xticks(unique_train_resampled)\n",
|
|
"plt.ylim(0, max(max(counts_train_resampled), max(counts_train)) + 10)\n",
|
|
"\n",
|
|
"# Plot for y_train\n",
|
|
"plt.subplot(1, 2, 2)\n",
|
|
"plt.bar(unique_train, counts_train, color='blue')\n",
|
|
"plt.title('Distribution of y_train')\n",
|
|
"plt.xlabel('Class')\n",
|
|
"plt.ylabel('Frequency')\n",
|
|
"plt.xticks(unique_train)\n",
|
|
"plt.ylim(0, max(max(counts_train_resampled), max(counts_train)) + 10)\n",
|
|
"\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f16cadbc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|