mirror of
https://github.com/youronlydimwit/Data_ScienceUse_Cases.git
synced 2025-12-16 23:29:45 +01:00
815 lines
24 KiB
Plaintext
815 lines
24 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ee34a7c4",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Import Libraries"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "1a23a10f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import os\n",
|
|
"import sklearn\n",
|
|
"from sklearn.model_selection import train_test_split"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "3333920d",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Dataset\n",
|
|
"For our dataset, you can find it [here.](https://www.kaggle.com/datasets/elakiricoder/gender-classification-dataset)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "5aea2295",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>long_hair</th>\n",
|
|
" <th>forehead_width_cm</th>\n",
|
|
" <th>forehead_height_cm</th>\n",
|
|
" <th>nose_wide</th>\n",
|
|
" <th>nose_long</th>\n",
|
|
" <th>lips_thin</th>\n",
|
|
" <th>distance_nose_to_lip_long</th>\n",
|
|
" <th>gender</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>11.8</td>\n",
|
|
" <td>6.1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Male</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>0</td>\n",
|
|
" <td>14.0</td>\n",
|
|
" <td>5.4</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>Female</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>0</td>\n",
|
|
" <td>11.8</td>\n",
|
|
" <td>6.3</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Male</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>0</td>\n",
|
|
" <td>14.4</td>\n",
|
|
" <td>6.1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Male</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>13.5</td>\n",
|
|
" <td>5.9</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>Female</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
|
|
"0 1 11.8 6.1 1 0 \n",
|
|
"1 0 14.0 5.4 0 0 \n",
|
|
"2 0 11.8 6.3 1 1 \n",
|
|
"3 0 14.4 6.1 0 1 \n",
|
|
"4 1 13.5 5.9 0 0 \n",
|
|
"\n",
|
|
" lips_thin distance_nose_to_lip_long gender \n",
|
|
"0 1 1 Male \n",
|
|
"1 1 0 Female \n",
|
|
"2 1 1 Male \n",
|
|
"3 1 1 Male \n",
|
|
"4 0 0 Female "
|
|
]
|
|
},
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Load dataset\n",
|
|
"df = pd.read_csv(r'D:\\archive\\gender_classification_v7.csv', encoding='utf-8')\n",
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "58b8ed5e",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Data Pre-processing\n",
|
|
"For this example I skipped the Descriptive Statistics, and went to minor adjustments."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "d93ff56d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"long_hair int64\n",
|
|
"forehead_width_cm float64\n",
|
|
"forehead_height_cm float64\n",
|
|
"nose_wide int64\n",
|
|
"nose_long int64\n",
|
|
"lips_thin int64\n",
|
|
"distance_nose_to_lip_long int64\n",
|
|
"gender object\n",
|
|
"dtype: object"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Check Data types of dataframe columns\n",
|
|
"df.dtypes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "19ae1cf5",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>long_hair</th>\n",
|
|
" <th>forehead_width_cm</th>\n",
|
|
" <th>forehead_height_cm</th>\n",
|
|
" <th>nose_wide</th>\n",
|
|
" <th>nose_long</th>\n",
|
|
" <th>lips_thin</th>\n",
|
|
" <th>distance_nose_to_lip_long</th>\n",
|
|
" <th>gender</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>11.8</td>\n",
|
|
" <td>6.1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>0</td>\n",
|
|
" <td>14.0</td>\n",
|
|
" <td>5.4</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>0</td>\n",
|
|
" <td>11.8</td>\n",
|
|
" <td>6.3</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>0</td>\n",
|
|
" <td>14.4</td>\n",
|
|
" <td>6.1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>13.5</td>\n",
|
|
" <td>5.9</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
|
|
"0 1 11.8 6.1 1 0 \n",
|
|
"1 0 14.0 5.4 0 0 \n",
|
|
"2 0 11.8 6.3 1 1 \n",
|
|
"3 0 14.4 6.1 0 1 \n",
|
|
"4 1 13.5 5.9 0 0 \n",
|
|
"\n",
|
|
" lips_thin distance_nose_to_lip_long gender \n",
|
|
"0 1 1 0 \n",
|
|
"1 1 0 1 \n",
|
|
"2 1 1 0 \n",
|
|
"3 1 1 0 \n",
|
|
"4 0 0 1 "
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Convert Gender labels into integer values, for classification\n",
|
|
"df['gender']=df['gender'].replace('Male',0)\n",
|
|
"df['gender']=df['gender'].replace('Female',1)\n",
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "b573f11e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"long_hair int64\n",
|
|
"forehead_width_cm float64\n",
|
|
"forehead_height_cm float64\n",
|
|
"nose_wide int64\n",
|
|
"nose_long int64\n",
|
|
"lips_thin int64\n",
|
|
"distance_nose_to_lip_long int64\n",
|
|
"gender int64\n",
|
|
"dtype: object"
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Now all is numeric data\n",
|
|
"df.dtypes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "35388ca3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Split dataset into X (Features) and y (Labels)\n",
|
|
"\n",
|
|
"# X is ALL columns except the last column (usually the label to be predicted)\n",
|
|
"X = df.iloc[:,:-1]\n",
|
|
"# y is the LABEL column (to be predicted)\n",
|
|
"y = df.iloc[:,-1]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "14c3347e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Use sklearn's train_test_split function imported before\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "fe832e3f",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Using 4 Classifiers\n",
|
|
"It is sugggested to take a deeper look of the parameters provided in documentations below, for better tweaking of the classifiers.\n",
|
|
"- [Logistic Regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html)\n",
|
|
"- [Decision Tree](https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html)\n",
|
|
"- [SVM](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html)\n",
|
|
"- [K-Means / KNN](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "f83a2e5c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"from sklearn.tree import DecisionTreeClassifier\n",
|
|
"from sklearn.svm import SVC\n",
|
|
"from sklearn.neighbors import KNeighborsClassifier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "dc4c2062",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Logistic Regression Accuracy: 0.9682063587282543\n",
|
|
"Decision Tree Accuracy: 0.8792241551689662\n",
|
|
"SVM Accuracy: 0.967006598680264\n",
|
|
"K-Means Accuracy: 0.9754049190161967\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Logistic Regression\n",
|
|
"# Train the model\n",
|
|
"LogR = LogisticRegression(random_state=0, multi_class='ovr').fit(X_train, y_train)\n",
|
|
"# Predict the test set\n",
|
|
"LogR_pred = LogR.predict(X_test)\n",
|
|
"\n",
|
|
"# Decision Tree\n",
|
|
"dtree = DecisionTreeClassifier(max_depth = 2).fit(X_train, y_train)\n",
|
|
"dtree_pred = dtree.predict(X_test)\n",
|
|
"\n",
|
|
"# SVM\n",
|
|
"svm = SVC(kernel='linear',C=1).fit(X_train, y_train)\n",
|
|
"svm_pred = svm.predict(X_test)\n",
|
|
"\n",
|
|
"# K-Means\n",
|
|
"knn = KNeighborsClassifier(n_neighbors=5).fit(X_train,y_train)\n",
|
|
"knn_pred = knn.predict(X_test)\n",
|
|
"\n",
|
|
"# See Accuracy of each classifier\n",
|
|
"print(\"Logistic Regression Accuracy: \"+ str(LogR.score(X,y)))\n",
|
|
"print(\"Decision Tree Accuracy: \"+ str(dtree.score(X,y)))\n",
|
|
"print(\"SVM Accuracy: \"+ str(svm.score(X,y)))\n",
|
|
"print(\"K-Means Accuracy: \"+ str(knn.score(X,y)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "00f72b96",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Try on a new dataset\n",
|
|
"Use one (or many) model above as predictor in a new dataset. Assuming we have the same columns but different values, we get.."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "9c24db9a",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>long_hair</th>\n",
|
|
" <th>forehead_width_cm</th>\n",
|
|
" <th>forehead_height_cm</th>\n",
|
|
" <th>nose_wide</th>\n",
|
|
" <th>nose_long</th>\n",
|
|
" <th>lips_thin</th>\n",
|
|
" <th>distance_nose_to_lip_long</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>14.5</td>\n",
|
|
" <td>6.7</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>14.0</td>\n",
|
|
" <td>5.9</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>12.9</td>\n",
|
|
" <td>6.4</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
|
|
"0 1 14.5 6.7 0 1 \n",
|
|
"1 1 14.0 5.9 0 0 \n",
|
|
"2 1 12.9 6.4 1 0 \n",
|
|
"\n",
|
|
" lips_thin distance_nose_to_lip_long \n",
|
|
"0 1 1 \n",
|
|
"1 0 0 \n",
|
|
"2 0 1 "
|
|
]
|
|
},
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# For this example we use 3 rows of data to be predicted\n",
|
|
"dval = pd.read_csv(r'D:\\archive\\valgend.csv', encoding='utf-8')\n",
|
|
"dval.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "ad501b6a",
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# predict with knn (change to which model you choose)\n",
|
|
"knn_pred_new = knn.predict(dval)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "8896ab72",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([0, 1, 0], dtype=int64)"
|
|
]
|
|
},
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# See the result\n",
|
|
"knn_pred_new"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"id": "7fa9db00",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Add new column in new dataframe for placing the results, pass the \"result\" from before\n",
|
|
"dval[\"pred_gender\"]=knn_pred_new"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "6155a519",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>long_hair</th>\n",
|
|
" <th>forehead_width_cm</th>\n",
|
|
" <th>forehead_height_cm</th>\n",
|
|
" <th>nose_wide</th>\n",
|
|
" <th>nose_long</th>\n",
|
|
" <th>lips_thin</th>\n",
|
|
" <th>distance_nose_to_lip_long</th>\n",
|
|
" <th>pred_gender</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>14.5</td>\n",
|
|
" <td>6.7</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>14.0</td>\n",
|
|
" <td>5.9</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>12.9</td>\n",
|
|
" <td>6.4</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
|
|
"0 1 14.5 6.7 0 1 \n",
|
|
"1 1 14.0 5.9 0 0 \n",
|
|
"2 1 12.9 6.4 1 0 \n",
|
|
"\n",
|
|
" lips_thin distance_nose_to_lip_long pred_gender \n",
|
|
"0 1 1 0 \n",
|
|
"1 0 0 1 \n",
|
|
"2 0 1 0 "
|
|
]
|
|
},
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# See data with appended prediction (last column)\n",
|
|
"dval.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"id": "c2587a57",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>long_hair</th>\n",
|
|
" <th>forehead_width_cm</th>\n",
|
|
" <th>forehead_height_cm</th>\n",
|
|
" <th>nose_wide</th>\n",
|
|
" <th>nose_long</th>\n",
|
|
" <th>lips_thin</th>\n",
|
|
" <th>distance_nose_to_lip_long</th>\n",
|
|
" <th>pred_gender</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>14.5</td>\n",
|
|
" <td>6.7</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Male</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>14.0</td>\n",
|
|
" <td>5.9</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>Female</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>12.9</td>\n",
|
|
" <td>6.4</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Male</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" long_hair forehead_width_cm forehead_height_cm nose_wide nose_long \\\n",
|
|
"0 1 14.5 6.7 0 1 \n",
|
|
"1 1 14.0 5.9 0 0 \n",
|
|
"2 1 12.9 6.4 1 0 \n",
|
|
"\n",
|
|
" lips_thin distance_nose_to_lip_long pred_gender \n",
|
|
"0 1 1 Male \n",
|
|
"1 0 0 Female \n",
|
|
"2 0 1 Male "
|
|
]
|
|
},
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Converting back to labels\n",
|
|
"dval['pred_gender']=dval['pred_gender'].replace(0,'Male')\n",
|
|
"dval['pred_gender']=dval['pred_gender'].replace(1,'Female')\n",
|
|
"dval.head()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|