mirror of
https://github.com/youronlydimwit/Data_ScienceUse_Cases.git
synced 2025-12-14 02:40:02 +01:00
1400 lines
165 KiB
Plaintext
1400 lines
165 KiB
Plaintext
|
|
{
|
||
|
|
"cells": [
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 1,
|
||
|
|
"id": "3a904562",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"import numpy as np\n",
|
||
|
|
"import pandas as pd"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 7,
|
||
|
|
"id": "b20a5ceb",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 5 3 4 3 3 2 3 \n",
|
||
|
|
"1 3 2 4 5 3 2 2 \n",
|
||
|
|
"2 4 4 3 2 5 4 3 \n",
|
||
|
|
"3 3 4 4 2 1 2 5 \n",
|
||
|
|
"4 4 4 2 2 4 3 2 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 \n",
|
||
|
|
"0 1 4 4 3 4 \n",
|
||
|
|
"1 3 4 3 1 4 \n",
|
||
|
|
"2 4 4 3 3 3 \n",
|
||
|
|
"3 3 3 2 4 5 \n",
|
||
|
|
"4 4 3 4 1 2 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 7,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Generating random data with a normal distribution\n",
|
||
|
|
"data = np.random.normal(loc=3, scale=1, size=(500, 12)) # mean=3, standard deviation=1\n",
|
||
|
|
"\n",
|
||
|
|
"# Rounding the values and ensuring they are between 1 and 5\n",
|
||
|
|
"data = np.round(data)\n",
|
||
|
|
"data[data < 1] = 1\n",
|
||
|
|
"data[data > 5] = 5\n",
|
||
|
|
"\n",
|
||
|
|
"# Converting to integers\n",
|
||
|
|
"data = data.astype(int)\n",
|
||
|
|
"\n",
|
||
|
|
"# Creating a DataFrame\n",
|
||
|
|
"df = pd.DataFrame(data, columns=[f'Column_{i}' for i in range(1, 13)])\n",
|
||
|
|
"\n",
|
||
|
|
"# Displaying the DataFrame\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 8,
|
||
|
|
"id": "2b558e59",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABDAAAALFCAYAAAA1GxOGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAB5JElEQVR4nOz9fZhkdX3n/z9fglHxJmpmBAQmowYxyE9RO3yTJRoSYkRXRDfRhRhjDDqywVVWswrEFTTBdd14k/0aTUYloFEURfHmi4loosZVgwOOMIhEVNSRgRlABZWAg+/fH3V6LIbumdPdVX1OVT8f19VXV33qnKp39+DL0+/zOZ+TqkKSJEmSJKnP7tJ1AZIkSZIkSbtjA0OSJEmSJPWeDQxJkiRJktR7NjAkSZIkSVLv2cCQJEmSJEm9ZwNDkiRJkiT1ng0MSZJaSHJ5kiO6rqMrSQ5P8rUkP0zy1K7rmUuStUkqyZ7L9HmV5JeW47MkSZINDEmSSHJ1kt/eaeyPknx29nlVPbyqPrWb91nWP6CX2auAN1XVvarq/KW+WZKzktzWNERmv7689DIXXMdhSS5I8v0kNya5KMlzlrsOSZK0ezYwJEmaEB03Rn4RuHwxO+6i7tc2DZHZr0cuvrxF1fVrwD8BnwZ+CfgF4L8AT1zOOiRJUjs2MCRJamF4lkZz1n5DkpuSXJfk9c1mn2m+f7+ZUfBrSe6S5OVJvpVka5J3JPn5off9w+a1G5L8j50+5/Qk70/y90luAv6o+ezPNzMGtiR5U5KfG3q/SvInzeUeNyf58yQPafa5Kcm5s9snWZXko0OzD/4lyZ2ODZJ8HXgw8JHm57pbkgcm+XCz31VJnje0/Z3qXsTv+31Jrk3ygySfSfLwodfukeR1ze/tB0k+m+QeQ7s/M8m3k1yf5M928TH/Gzi7qv5XVV1fAxdX1TOGPut5zc93Y/PzPnCeej+V5LlDz+8wg2eB/y5HJNmc5CXNfzNbnBUiSZINDEmSFuOvgL+qqvsADwHObcYf13y/bzOj4PMM/nj/I+A3GTQB7gW8CSDJwcCbgWcC+wI/D+y302cdA7wfuC/wLuB24L8Bq4BfA44E/mSnfY4CHgP8KvBSYH3zGQcAhwDHNdu9BNgMrAb2Bk4FaucftqoeAnwbOLr5uW4Fzmn2fSDwe8Crkxy5i7oX6mPAgcADgEt2eo+/bH6+/wDcv/kZfzr0+q8DBzH43bwiyS/v/OZJ9mLw+3v/fAUk+S3gfwLPYPDv8y3gPYv4WWa1/XcB2Ief/fdwPPDXSe63hM+WJGni2cCQJGng/GYmwveTfJ9BY2E+PwF+KcmqqvphVX1hF9s+E3h9VX2jqn4InAIc21xW8XvAR6rqs1V1G/AK7txA+HxVnV9VP62qW5oZAl+oqu1VdTXwt8Bv7LTP/6qqm6rqcmAT8PHm83/AoDHwqKGfY1/gF6vqJ1X1L1V1pwbGzpIcwKBJ8LKq+veq2gi8DXjWfHXP81Z/Ovw7T3L27AtVdWZV3dw0S04HHpnk55sZIn8MvKiqvltVt1fV55rtZr2y+V19GfgyMNelKfdjcBy0ZRc/6jOBM6vqkub9TwF+LcnaXeyzK23/XWDwb/Oq5t/lAuCHDJoykiStWDYwJEkaeGpV3Xf2izvPahh2PPBQ4KtJvpjkybvY9oEMztzP+hawJ4MZDw8EvjP7QlX9GLhhp/2/M/wkyUObyz6ubS7PeDWD2RjDrht6fMscz+/VPP7fwFXAx5N8I8nJu/g5dv6Zbqyqm3f6uYZnj3yH3fvL4d95VT0bIMkeSV6T5OvNz3h1s/2q5uvuwNd38b7XDj3+MT/7eYd9j8GsjX138T53+LdrGlA3cOdZMm21/XcBuKGqtg89n+/nkCRpxbCBIUnSAlXV16rqOAaXN/wv4P1J7skcl18A1zBYAHPWGmA7gz9etwD7z77QrOPwCzt/3E7P3wJ8FTiwuYTlVCCL/DlurqqXVNWDgaOBF+90Gch8rgHun+TeQ2NrgO/uou6F+H0Gl6D8NoPLKNY24wGuB/6dwaU7i9Y0iz4P/O4uNrvDv13zb/wL3PHnnPUjYK+h5/sspT5JknRnNjAkSVqgJH+QZHVV/RT4fjN8O7CNwVn9Bw9tfg7w35I8KMm9GMyYeG9zdv39wNFJ/kOzgOMr2X0z4t7ATcAPkzyMwV0zFvtzPDnJLyVJ8563N1+7VFXfAT4H/M8kd0/yCAazUhaz1sVc7g3cymC2w14Mfmezn/1T4Ezg9c1ContksFjq3RbxOS9lsDDqf0/yCwBJHplkdp2LdwPPSXJo8/6vBv61uXRnZxuB/5RkryS/xOD3IUmSRsgGhiRJC3cUcHmSHzJY0PPYZi2IHwNnAP+3WdPhVxn8sf1OBnco+SaD2QP/FaBZC+G/MlgYcgtwM7CVwR/v8/lTBjMUbgbeCrx3CT/HgcAnGKyv8HngzVX1qZb7HsdgZsQ1wAeB06rqwgV+/kubu5rMfl3fjL+DwaUb3wW+Auy8xsifApcBXwRuZDALZsHHNFX1OeC3mq9vJLmRwcKaFzSvfxL4H8B5DP59HgIcO8/bvQG4jcHMmrMZXTNHkiQ10mKtLkmStAyaGRrfZ3B5yDc7LkeSJKlXnIEhSVKHkhzdXHZwTwa3B72Mny1aKUmSpIYNDEmSunUMg8swrmFwScexbW5lKkmStNJ4CYkkSZIkSeo9Z2BIkiRJkqTes4EhSZIkSZJ6zwaGJEmSJEnqPRsYkiRJkiSp92xgSJIkSZKk3rOBIUmSJEmSes8GhiRJkiRJ6j0bGFqwJEck2dx1HZL6z7yQ1IZZIakNs0I2MFa4JL+fZEOSHybZkuRjSX6967oWKskLmp/j1iRndV2PNI2mIS+S3C3J25N8K8nNSb6U5Ild1yVNk2nICoAkf9/Uf1OSf0vy3K5rkqbJtGTFrCQHJvn3JH/fdS3TzAbGCpbkxcAbgVcDewNrgDcDx3RY1mJdA/wFcGbXhUjTaIryYk/gO8BvAD8P/A/g3CRruyxKmhZTlBUA/xNYW1X3AZ4C/EWSx3RckzQVpiwrZv018MWui5h2NjBWqCQ/D7wKOLGqPlBVP6qqn1TVR6rqvzdnKd+Y5Jrm641J7jbPe1WSXxp6flaSv2geH5Fkc5KXJtnadFefmuRJzdmMG5OcOrTv6UnOTfKO5uzo5UlmdvfzND/D+cANS/3dSLqjacqLpvbTq+rqqvppVX0U+CbgHyXSEk1TVgBU1eVVdevs0+brIUv4FUli+rKi2fdY4PvAJ5fwq1ELNjBWrl8D7g58cJ7X/wz4VeBQ4JHAYcDLF/lZ+zSftR/wCuCtwB8w+IPhscArkjx4aPunAO8B7gt8GHjTIj9X0mhMbV4k2Rt4KHD5IuuV9DNTlxVJ3pzkx8BXgS3ABYusV9LPTFVWJLkPg4bMSxZZoxbABsbK9QvA9VW1fZ7Xnwm8qqq2VtU24JXAsxb5WT8BzqiqnzAIhFXAX1XVzVV1OYM/HB4xtP1nq+qCqrodeCeD4JLUnanMiyR3Bd4FnF1VX11kvZJ+Zuqyoqr+BLg3gz90PgDcuus9JLUwbVnx58Dbq+o7i6xRC2ADY+W6AViVZM95Xn8g8K2h599qxhb1WU0IANzSfL9u6PVbgHsNPb926PGPgbvvok5J4zd1eZHkLgwOTG4DXrDIWiXd0dRlBUBV3V5VnwX2B/7LIuuV9DNTkxVJDgV+G3jDIuvTAtnAWLk+D/w78NR5Xr8G+MWh52uasbn8GNhr6Pk+Sy1OUq9MVV4kCfB2BouG/W5zVkbS0k1VVsxhT1wDQxqFacqKI4C1wLeTXAv8KfC7SS5Z5jpWDBsYK1RV/YDBdWB/3Sxms1eSuyZ5YpLXAucAL0+yOsmqZtv5bgm0Efj9JHskOYrB6v7LKsmeSe4O7AHskcRZG9KITFteAG8Bfhk4uqpu2d3GktqZpqxI8oAkxya5V1PDE4DjgH9azjqkaTRNWQGsZ9DYPLT5+hvg/wOesMx1rBj+gbeCVdXrk1zHYFGcdwE3AxcDZwCXAPcBLm02fx+D25TO5UXA2cCJwPnN13J7OXD
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 1080x720 with 12 Axes>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {
|
||
|
|
"needs_background": "light"
|
||
|
|
},
|
||
|
|
"output_type": "display_data"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"import matplotlib.pyplot as plt\n",
|
||
|
|
"\n",
|
||
|
|
"# Setting up the subplots\n",
|
||
|
|
"fig, axes = plt.subplots(3, 4, figsize=(15, 10))\n",
|
||
|
|
"fig.suptitle('Histograms for Each Column')\n",
|
||
|
|
"\n",
|
||
|
|
"# Visualizing/histogram for each column\n",
|
||
|
|
"for i, ax in enumerate(axes.flat):\n",
|
||
|
|
" column = df.columns[i]\n",
|
||
|
|
" ax.hist(df[column], bins=[1, 2, 3, 4, 5, 6], alpha=0.5, edgecolor='black')\n",
|
||
|
|
" ax.set_title(f'{column}')\n",
|
||
|
|
" ax.set_xlabel('Value')\n",
|
||
|
|
" ax.set_ylabel('Frequency')\n",
|
||
|
|
"\n",
|
||
|
|
"# Adjust layout\n",
|
||
|
|
"plt.tight_layout()\n",
|
||
|
|
"plt.show()"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 9,
|
||
|
|
"id": "dfa7fe98",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 5 3 4 3 2 1 3 \n",
|
||
|
|
"1 3 2 4 5 2 1 2 \n",
|
||
|
|
"2 4 4 3 2 4 3 3 \n",
|
||
|
|
"3 3 4 4 2 0 1 5 \n",
|
||
|
|
"4 4 4 2 2 3 2 2 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 \n",
|
||
|
|
"0 1 4 4 2 4 \n",
|
||
|
|
"1 3 4 3 0 4 \n",
|
||
|
|
"2 4 4 3 2 3 \n",
|
||
|
|
"3 3 3 2 3 5 \n",
|
||
|
|
"4 4 3 4 0 2 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 9,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Selecting random columns\n",
|
||
|
|
"skew_left = np.random.choice(df.columns, 3, replace=False)\n",
|
||
|
|
"\n",
|
||
|
|
"# Introducing skewness to the selected columns\n",
|
||
|
|
"for column in skew_left:\n",
|
||
|
|
" skewness_factor = np.random.uniform(0.1, 0.5) # Random skewness factor between 0.1 and 0.5\n",
|
||
|
|
" df[column] -= int(skewness_factor * 4) # Shifting values towards 1\n",
|
||
|
|
"\n",
|
||
|
|
"# Displaying the modified DataFrame\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 10,
|
||
|
|
"id": "bb2aabc8",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABDAAAALFCAYAAAA1GxOGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAB450lEQVR4nOz9fZhkdX3n/z9fglHxJmpmBAQmowYxyE9RO3yTJRoSYkRXRDfRhRhjDDqywVVWswrEFTTBdd14k/0aTQYloFEURfHmi4loosZVgwOOMIhEVNSRgRlABZSAM75/f9TpsRi6Z073VPU5Vf18XFdfXfWpc6re3aMvTr/P53xOqgpJkiRJkqQ+u1vXBUiSJEmSJO2KDQxJkiRJktR7NjAkSZIkSVLv2cCQJEmSJEm9ZwNDkiRJkiT1ng0MSZIkSZLUezYwJElqIckVSY7ouo6uJDk8ydeT3Jrk6V3XM5ckq5NUkj2X6PMqyS8txWdJkiQbGJIkkeSaJL+9w9gfJfnc7POqemRVfXoX77Okf0AvsdcAb6mq+1TVBbv7ZknOTnJH0xCZ/frK7pe54DoOS3Jhkh8kuSnJxUmet9R1SJKkXbOBIUnShOi4MfKLwBWL2XEndb++aYjMfj168eUtqq5fA/4J+AzwS8AvAP8FePJS1iFJktqxgSFJUgvDszSas/brktyc5Pokb2w2+2zz/QfNjIJfS3K3JK9M8u0km5O8M8nPD73vHzav3Zjkf+zwOacn+UCSv09yM/BHzWd/oZkxsCnJW5L83ND7VZI/aS73uCXJnyd5WLPPzUnOm90+yYokHxuaffAvSe5ybJDkG8BDgY82P9c9kjw4yUea/a5O8oKh7e9S9yJ+3+9Pcl2SHyb5bJJHDr12ryRvaH5vP0zyuST3Gtr92Um+k+SGJH+2k4/538A5VfW/quqGGrikqp419FkvaH6+m5qf98Hz1PvpJM8fen6nGTwL/Hc5IsnGJC9r/jezyVkhkiTZwJAkaTH+Cvirqrof8DDgvGb8Cc33+zczCr7A4I/3PwJ+k0ET4D7AWwCSHAy8FXg2sC/w88B+O3zWMcAHgPsD7wa2Af8NWAH8GnAk8Cc77HMU8DjgV4GXA2ubzzgAOAQ4rtnuZcBGYCWwN3AqUDv+sFX1MOA7wNHNz3U7cG6z74OB3wNem+TIndS9UB8HDgQeBFy6w3v8ZfPz/Qfggc3P+NOh138dOIjB7+ZVSX55xzdPsheD398H5isgyW8B/xN4FoN/n28D713EzzKr7b8LwD787H8PxwN/neQBu/HZkiRNPBsYkiQNXNDMRPhBkh8waCzM5yfALyVZUVW3VtUXd7Lts4E3VtU3q+pW4BTg2Oayit8DPlpVn6uqO4BXcdcGwheq6oKq+mlV3dbMEPhiVW2tqmuAvwV+Y4d9/ldV3VxVVwAbgE80n/9DBo2Bxwz9HPsCv1hVP6mqf6mquzQwdpTkAAZNgldU1b9X1Xrg7cBz5qt7nrf60+HfeZJzZl+oqrOq6pamWXI68OgkP9/MEPlj4CVV9b2q2lZVn2+2m/Xq5nf1FeArwFyXpjyAwXHQpp38qM8GzqqqS5v3PwX4tSSrd7LPzrT9d4HBv81rmn+XC4FbGTRlJElatmxgSJI08PSquv/sF3ed1TDseODhwNeSfCnJU3ey7YMZnLmf9W1gTwYzHh4MfHf2har6MXDjDvt/d/hJkoc3l31c11ye8VoGszGGXT/0+LY5nt+nefy/gauBTyT5ZpKTd/Jz7Pgz3VRVt+zwcw3PHvkuu/aXw7/zqnouQJI9krwuyTean/GaZvsVzdc9gW/s5H2vG3r8Y3728w77PoNZG/vu5H3u9G/XNKBu5K6zZNpq++8CcGNVbR16Pt/PIUnSsmEDQ5KkBaqqr1fVcQwub/hfwAeS3Js5Lr8ArmWwAOasVcBWBn+8bgL2n32hWcfhF3b8uB2evw34GnBgcwnLqUAW+XPcUlUvq6qHAkcDL93hMpD5XAs8MMl9h8ZWAd/bSd0L8fsMLkH5bQaXUaxuxgPcAPw7g0t3Fq1pFn0B+N2dbHanf7vm3/gXuPPPOetHwF5Dz/fZnfokSdJd2cCQJGmBkvxBkpVV9VPgB83wNmALg7P6Dx3a/FzgvyV5SJL7MJgx8b7m7PoHgKOT/IdmAcdXs+tmxH2Bm4FbkzyCwV0zFvtzPDXJLyVJ857bmq+dqqrvAp8H/meSeyZ5FINZKYtZ62Iu9wVuZzDbYS8Gv7PZz/4pcBbwxmYh0T0yWCz1Hov4nJczWBj1vyf5BYAkj04yu87Fe4DnJTm0ef/XAv/aXLqzo/XAf0qyV5JfYvD7kCRJI2QDQ5KkhTsKuCLJrQwW9Dy2WQvix8AZwP9t1nT4VQZ/bL+LwR1KvsVg9sB/BWjWQvivDBaG3ATcAmxm8Mf7fP6UwQyFW4Azgfftxs9xIPBJBusrfAF4a1V9uuW+xzGYGXEt8CHgtKq6aIGf//LmriazXzc04+9kcOnG94CvAjuuMfKnwOXAl4CbGMyCWfAxTVV9Hvit5uubSW5isLDmhc3rnwL+B3A+g3+fhwHHzvN2bwLuYDCz5hxG18yRJEmNtFirS5IkLYFmhsYPGFwe8q2Oy5EkSeoVZ2BIktShJEc3lx3cm8HtQS/nZ4tWSpIkqWEDQ5Kkbh3D4DKMaxlc0nFsm1uZSpIkLTdeQiJJkiRJknrPGRiSJEmSJKn3bGBIkiRJkqTes4EhSZIkSZJ6zwaGJEmSJEnqPRsYkiRJkiSp92xgSJIkSZKk3rOBIUmSJEmSes8GhhYsyRFJNnZdh6T+My8ktWFWSGrDrJANjGUuye8nWZfk1iSbknw8ya93XddCJXlR83PcnuTsruuRptE05EWSeyR5R5JvJ7klyZeTPLnruqRpMg1ZAZDk75v6b07yb0me33VN0jSZlqyYleTAJP+e5O+7rmWa2cBYxpK8FHgz8Fpgb2AV8FbgmA7LWqxrgb8Azuq6EGkaTVFe7Al8F/gN4OeB/wGcl2R1l0VJ02KKsgLgfwKrq+p+wNOAv0jyuI5rkqbClGXFrL8GvtR1EdPOBsYyleTngdcAJ1bVB6vqR1X1k6r6aFX99+Ys5ZuTXNt8vTnJPeZ5r0ryS0PPz07yF83jI5JsTPLyJJub7urTkzylOZtxU5JTh/Y9Pcl5Sd7ZnB29IsnMrn6e5me4ALhxd383ku5smvKiqf30qrqmqn5aVR8DvgX4R4m0m6YpKwCq6oqqun32afP1sN34FUli+rKi2fdY4AfAp3bjV6MWbGAsX78G3BP40Dyv/xnwq8ChwKOBw4BXLvKz9mk+az/gVcCZwB8w+IPh8cCrkjx0aPunAe8F7g98BHjLIj9X0mhMbV4k2Rt4OHDFIuuV9DNTlxVJ3prkx8DXgE3AhYusV9LPTFVWJLkfg4bMyxZZoxbABsby9QvADVW1dZ7Xnw28pqo2V9UW4NXAcxb5WT8BzqiqnzAIhBXAX1XVLVV1BYM/HB41tP3nqurCqtoGvItBcEnqzlTmRZK7A+8Gzqmqry2yXkk/M3VZUVV/AtyXwR86HwRu3/keklqYtqz4c+AdVfXdRdaoBbCBsXzdCKxIsuc8rz8Y+PbQ8283Y4v6rCYEAG5rvl8/9PptwH2Gnl839PjHwD13Uqek8Zu6vEhyNwYHJncAL1pkrZLubOqyAqCqtlXV54D9gf+yyHol/czUZEWSQ4HfBt60yPq0QDYwlq8vAP8OPH2e168FfnHo+apmbC4/BvYaer7P7hYnqVemKi+SBHgHg0XDfrc5KyNp901VVsxhT1wDQxqFacqKI4DVwHeSXAf8KfC7SS5d4jqWDRsYy1RV/ZDBdWB/3Sxms1eSuyd5cpLXA+cCr0yyMsmKZtv5bgm0Hvj9JHskOYrB6v5LKsmeSe4J7AHskcRZG9KITFteAG8Dfhk4uqpu29XGktqZpqxI8qAkxya5T1PDk4DjgH9ayjqkaTRNWQGsZdDYPLT5+hvg/wOetMR1LBv+gbeMVdUbk1zPYFGcdwO3AJcAZwCXAvcDLms2fz+D25TO5SXAOcCJwAXN11J7JXD
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 1080x720 with 12 Axes>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {
|
||
|
|
"needs_background": "light"
|
||
|
|
},
|
||
|
|
"output_type": "display_data"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Setting up the subplots\n",
|
||
|
|
"fig, axes = plt.subplots(3, 4, figsize=(15, 10))\n",
|
||
|
|
"fig.suptitle('Histograms for Each Column')\n",
|
||
|
|
"\n",
|
||
|
|
"# Visualizing/histogram for each column\n",
|
||
|
|
"for i, ax in enumerate(axes.flat):\n",
|
||
|
|
" column = df.columns[i]\n",
|
||
|
|
" ax.hist(df[column], bins=[1, 2, 3, 4, 5, 6], alpha=0.5, edgecolor='black')\n",
|
||
|
|
" ax.set_title(f'{column}')\n",
|
||
|
|
" ax.set_xlabel('Value')\n",
|
||
|
|
" ax.set_ylabel('Frequency')\n",
|
||
|
|
"\n",
|
||
|
|
"# Adjust layout\n",
|
||
|
|
"plt.tight_layout()\n",
|
||
|
|
"plt.show()"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 11,
|
||
|
|
"id": "cebcf6cb",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>6</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 5 3 4 4 2 1 3 \n",
|
||
|
|
"1 3 2 4 6 2 1 2 \n",
|
||
|
|
"2 4 4 3 3 4 3 3 \n",
|
||
|
|
"3 3 4 4 3 0 1 5 \n",
|
||
|
|
"4 4 4 2 3 3 2 2 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 \n",
|
||
|
|
"0 1 4 4 2 4 \n",
|
||
|
|
"1 3 4 3 0 4 \n",
|
||
|
|
"2 4 4 3 2 3 \n",
|
||
|
|
"3 3 3 2 3 5 \n",
|
||
|
|
"4 4 3 4 0 2 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 11,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Selecting random columns for right skewness, excluding the ones already skewed left\n",
|
||
|
|
"skew_right = np.random.choice([col for col in df.columns if col not in skew_left], 2, replace=False)\n",
|
||
|
|
"\n",
|
||
|
|
"# Introducing skewness to the selected columns\n",
|
||
|
|
"for column in skew_right:\n",
|
||
|
|
" skewness_factor = np.random.uniform(0.1, 0.5) # Random skewness factor between 0.1 and 0.5\n",
|
||
|
|
" df[column] += int(skewness_factor * 4) # Shifting values towards 5\n",
|
||
|
|
"\n",
|
||
|
|
"# Displaying the modified DataFrame\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 12,
|
||
|
|
"id": "69a10ec6",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABDAAAALFCAYAAAA1GxOGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAB4y0lEQVR4nOz9fZhkdX3n/z9fglHxJmJmBAQmowYxyE9RO3yTJRoSYkRXRDfRhRhjDDqywVVWswrEFTTBdd14k/0aTQYloFEURfHmi4loosZVgwOOMIhEVNSRgRlABZWAM75/f9TpsRi6Z073VPU5Vf18XFdfXfWpc6re3aMvTr/P53xOqgpJkiRJkqQ+u1vXBUiSJEmSJO2KDQxJkiRJktR7NjAkSZIkSVLv2cCQJEmSJEm9ZwNDkiRJkiT1ng0MSZIkSZLUezYwJElqIcmVSY7suo6uJDkiydeS/DDJ07quZy5JViepJHsu0edVkl9ais+SJEk2MCRJIsm1SX57h7E/SvLZ2edV9Yiq+tQu3mdJ/4BeYq8G3lxV96mqC3f3zZKck+SOpiEy+/Xl3S9zwXUcnuSiJN9PcnOSS5I8d6nrkCRJu2YDQ5KkCdFxY+QXgSsXs+NO6n5d0xCZ/XrU4stbVF2/BvwT8Gngl4BfAP4L8KSlrEOSJLVjA0OSpBaGZ2k0Z+3XJbklyQ1J3tBs9pnm+/ebGQW/luRuSV6R5FtJNid5R5KfH3rfP2xeuynJ/9jhc85I8v4kf5/kFuCPms/+fDNjYFOSNyf5uaH3qyR/0lzucWuSP0/y0GafW5KcP7t9khVJPjo0++Bfktzl2CDJ14GHAB9pfq57JHlQkg83+12T5PlD29+l7kX8vt+X5PokP0jymSSPGHrtXkle3/zefpDks0nuNbT7s5J8O8mNSf5sJx/zv4Fzq+p/VdWNNXBpVT1z6LOe3/x8Nzc/74PmqfdTSZ439PxOM3gW+O9yZJKNSV7a/G9mk7NCJEmygSFJ0mL8FfBXVXU/4KHA+c3445vv929mFHyewR/vfwT8JoMmwH2ANwMkOQR4C/AsYD/g54H9d/isY4H3A/cH3gVsA/4bsAL4NeAo4E922Odo4LHArwIvA9Y2n3EgcChwfLPdS4GNwEpgH+A0oHb8YavqocC3gWOan+t24Lxm3wcBvwe8JslRO6l7oT4GHAQ8ELhsh/f4y+bn+w/AA5qf8adDr/86cDCD380rk/zyjm+eZC8Gv7/3z1dAkt8C/ifwTAb/Pt8C3rOIn2VW238XgH352f8eTgD+Osneu/HZkiRNPBsYkiQNXNjMRPh+ku8zaCzM5yfALyVZUVU/rKov7GTbZwFvqKpvVNUPgVOB45rLKn4P+EhVfbaq7gBeyV0bCJ+vqgur6qdVdVszQ+ALVbW1qq4F/hb4jR32+V9VdUtVXQlsAD7efP4PGDQGHj30c+wH/GJV/aSq/qWq7tLA2FGSAxk0CV5eVf9eVeuBtwHPnq/ued7qT4d/50nOnX2hqs6uqlubZskZwKOS/HwzQ+SPgRdX1XeraltVfa7Zbtarmt/Vl4EvA3NdmrI3g+OgTTv5UZ8FnF1VlzXvfyrwa0lW72SfnWn77wKDf5tXN/8uFwE/ZNCUkSRp2bKBIUnSwNOq6v6zX9x1VsOwE4CHAV9N8sUkT9nJtg9icOZ+1reAPRnMeHgQ8J3ZF6rqx8BNO+z/neEnSR7WXPZxfXN5xmsYzMYYdsPQ49vmeH6f5vH/Bq4BPp7kG0lO2cnPsePPdHNV3brDzzU8e+Q77NpfDv/Oq+o5AEn2SPLaJF9vfsZrm+1XNF/3BL6+k/e9fujxj/nZzzvsewxmbey3k/e5079d04C6ibvOkmmr7b8LwE1VtXXo+Xw/hyRJy4YNDEmSFqiqvlZVxzO4vOF/Ae9Pcm/muPwCuI7BApizVgFbGfzxugk4YPaFZh2HX9jx43Z4/lbgq8BBzSUspwFZ5M9xa1W9tKoeAhwDvGSHy0Dmcx3wgCT3HRpbBXx3J3UvxO8zuATltxlcRrG6GQ9wI/DvDC7dWbSmWfR54Hd3stmd/u2af+Nf4M4/56wfAXsNPd93d+qTJEl3ZQNDkqQFSvIHSVZW1U+B7zfD24AtDM7qP2Ro8/OA/5bkwUnuw2DGxHubs+vvB45J8h+aBRxfxa6bEfcFbgF+mOThDO6asdif4ylJfilJmvfc1nztVFV9B/gc8D+T3DPJIxnMSlnMWhdzuS9wO4PZDnsx+J3NfvZPgbOBNzQLie6RwWKp91jE57yMwcKo/z3JLwAkeVSS2XUu3g08N8lhzfu/BvjX5tKdHa0H/lOSvZL8EoPfhyRJGiEbGJIkLdzRwJVJfshgQc/jmrUgfgycCfzfZk2HX2Xwx/Y7Gdyh5JsMZg/8V4BmLYT/ymBhyE3ArcBmBn+8z+dPGcxQuBU4C3jvbvwcBwGfYLC+wueBt1TVp1ruezyDmRHXAR8ETq+qixf4+S9r7moy+3VjM/4OBpdufBf4CrDjGiN/ClwBfBG4mcEsmAUf01TV54Dfar6+keRmBgtrXtS8/kngfwAXMPj3eShw3Dxv90bgDgYza85ldM0cSZLUSIu1uiRJ0hJoZmh8n8HlId/suBxJkqRecQaGJEkdSnJMc9nBvRncHvQKfrZopSRJkho2MCRJ6taxDC7DuI7BJR3HtbmVqSRJ0nLjJSSSJEmSJKn3nIEhSZIkSZJ6zwaGJEmSJEnqPRsYkiRJkiSp92xgSJIkSZKk3rOBIUmSJEmSes8GhiRJkiRJ6j0bGJIkSZIkqfdsYGjBkhyZZGPXdUjqP/NCUhtmhaQ2zArZwFjmkvx+knVJfphkU5KPJfn1rutaqCQvbH6O25Oc03U90jSahrxIco8kb0/yrSS3JvlSkid1XZc0TaYhKwCS/H1T/y1J/i3J87quSZom05IVs5IclOTfk/x917VMMxsYy1iSlwBvAl4D7AOsAt4CHNthWYt1HfAXwNldFyJNoynKiz2B7wC/Afw88D+A85Os7rIoaVpMUVYA/E9gdVXdD3gq8BdJHttxTdJUmLKsmPXXwBe7LmLa2cBYppL8PPBq4KSq+kBV/aiqflJVH6mq/96cpXxTkuuarzclucc871VJfmno+TlJ/qJ5fGSSjUlelmRz0119WpInN2czbk5y2tC+ZyQ5P8k7mrOjVyaZ2dXP0/wMFwI37e7vRtKdTVNeNLWfUVXXVtVPq+qjwDcB/yiRdtM0ZQVAVV1ZVbfPPm2+HrobvyJJTF9WNPseB3wf+ORu/GrUgg2M5evXgHsCH5zn9T8DfhU4DHgUcDjwikV+1r7NZ+0PvBI4C/gDBn8wPA54ZZKHDG3/VOA9wP2BDwNvXuTnShqNqc2LJPsADwOuXGS9kn5m6rIiyVuS/Bj4KrAJuGiR9Ur6manKiiT3Y9CQeekia9QC2MBYvn4BuLGqts7z+rOAV1fV5qraArwKePYiP+snwJlV9RMGgbAC+KuqurWqrmTwh8Mjh7b/bFVdVFXbgHcyCC5J3ZnKvEhyd+BdwLlV9dVF1ivpZ6YuK6rqT4D7MvhD5wPA7TvfQ1IL05YVfw68vaq+s8gatQA2MJavm4AVSfac5/UHAd8aev6tZmxRn9WEAMBtzfcbhl6/DbjP0PPrhx7/GLjnTuqUNH5TlxdJ7sbgwOQO4IWLrFXSnU1dVgBU1baq+ixwAPBfFlmvpJ+ZmqxIchjw28AbF1mfFsgGxvL1eeDfgafN8/p1wC8OPV/VjM3lx8BeQ8/33d3iJPXKVOVFkgBvZ7Bo2O82Z2Uk7b6pyoo57IlrYEijME1ZcSSwGvh2kuuBPwV+N8llS1zHsmEDY5mqqh8wuA7sr5vFbPZKcvckT0ryOuA84BVJViZZ0Ww73y2B1gO/n2SPJEczWN1/SSXZM8k9gT2APZI4a0MakWnLC+CtwC8Dx1TVbbvaWFI705QVSR6Y5Lgk92lqeCJwPPBPS1mHNI2mKSuAtQwam4c1X38D/H/AE5e4jmXDP/CWsap6Q5IbGCyK8y7gVuBS4EzgMuB+wOXN5u9jcJvSubwYOBc4Cbiw+VpqrwB
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 1080x720 with 12 Axes>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {
|
||
|
|
"needs_background": "light"
|
||
|
|
},
|
||
|
|
"output_type": "display_data"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Setting up the subplots\n",
|
||
|
|
"fig, axes = plt.subplots(3, 4, figsize=(15, 10))\n",
|
||
|
|
"fig.suptitle('Histograms for Each Column')\n",
|
||
|
|
"\n",
|
||
|
|
"# Visualizing/histogram for each column\n",
|
||
|
|
"for i, ax in enumerate(axes.flat):\n",
|
||
|
|
" column = df.columns[i]\n",
|
||
|
|
" ax.hist(df[column], bins=[1, 2, 3, 4, 5, 6], alpha=0.5, edgecolor='black')\n",
|
||
|
|
" ax.set_title(f'{column}')\n",
|
||
|
|
" ax.set_xlabel('Value')\n",
|
||
|
|
" ax.set_ylabel('Frequency')\n",
|
||
|
|
"\n",
|
||
|
|
"# Adjust layout\n",
|
||
|
|
"plt.tight_layout()\n",
|
||
|
|
"plt.show()"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 13,
|
||
|
|
"id": "50833ea0",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" <th>Staff_Id</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SA75310</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>6</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SP54242</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>SA54434</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>MA69977</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>SA59502</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 5 3 4 4 2 1 3 \n",
|
||
|
|
"1 3 2 4 6 2 1 2 \n",
|
||
|
|
"2 4 4 3 3 4 3 3 \n",
|
||
|
|
"3 3 4 4 3 0 1 5 \n",
|
||
|
|
"4 4 4 2 3 3 2 2 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 Staff_Id \n",
|
||
|
|
"0 1 4 4 2 4 SA75310 \n",
|
||
|
|
"1 3 4 3 0 4 SP54242 \n",
|
||
|
|
"2 4 4 3 2 3 SA54434 \n",
|
||
|
|
"3 3 3 2 3 5 MA69977 \n",
|
||
|
|
"4 4 3 4 0 2 SA59502 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 13,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"import random\n",
|
||
|
|
"\n",
|
||
|
|
"# Function to generate staff ID\n",
|
||
|
|
"def generate_staff_id():\n",
|
||
|
|
" level_codes = ['DR'] * 3 + ['MA'] * 50 + ['SP'] * 75 + ['SA'] * 372 # Level codes distribution\n",
|
||
|
|
" level_code = random.choice(level_codes) # Randomly choose a level code\n",
|
||
|
|
" random_numbers = ''.join(str(random.randint(0, 9)) for _ in range(5)) # Generate 5 random numbers\n",
|
||
|
|
" return f\"{level_code}{random_numbers}\"\n",
|
||
|
|
"\n",
|
||
|
|
"# Add \"Staff_Id\" column to DataFrame\n",
|
||
|
|
"df['Staff_Id'] = [generate_staff_id() for _ in range(500)]\n",
|
||
|
|
"\n",
|
||
|
|
"# Display the DataFrame with the new \"Staff_Id\" columns\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 14,
|
||
|
|
"id": "268636d1",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" <th>Staff_Id</th>\n",
|
||
|
|
" <th>Month_Of_Service</th>\n",
|
||
|
|
" <th>Years_Of_Service</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SA75310</td>\n",
|
||
|
|
" <td>50</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>6</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SP54242</td>\n",
|
||
|
|
" <td>34</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>SA54434</td>\n",
|
||
|
|
" <td>17</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>MA69977</td>\n",
|
||
|
|
" <td>7</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>SA59502</td>\n",
|
||
|
|
" <td>52</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 5 3 4 4 2 1 3 \n",
|
||
|
|
"1 3 2 4 6 2 1 2 \n",
|
||
|
|
"2 4 4 3 3 4 3 3 \n",
|
||
|
|
"3 3 4 4 3 0 1 5 \n",
|
||
|
|
"4 4 4 2 3 3 2 2 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 Staff_Id \\\n",
|
||
|
|
"0 1 4 4 2 4 SA75310 \n",
|
||
|
|
"1 3 4 3 0 4 SP54242 \n",
|
||
|
|
"2 4 4 3 2 3 SA54434 \n",
|
||
|
|
"3 3 3 2 3 5 MA69977 \n",
|
||
|
|
"4 4 3 4 0 2 SA59502 \n",
|
||
|
|
"\n",
|
||
|
|
" Month_Of_Service Years_Of_Service \n",
|
||
|
|
"0 50 4 \n",
|
||
|
|
"1 34 2 \n",
|
||
|
|
"2 17 1 \n",
|
||
|
|
"3 7 0 \n",
|
||
|
|
"4 52 4 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 14,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Generating random values for Month_Of_Service\n",
|
||
|
|
"df['Month_Of_Service'] = [random.randint(0, 66) for _ in range(500)] # 66 months = 5 years 6 months\n",
|
||
|
|
"\n",
|
||
|
|
"# Generating Years_Of_Service based on Month_Of_Service\n",
|
||
|
|
"df['Years_Of_Service'] = df['Month_Of_Service'] // 12 # Integer division to get years\n",
|
||
|
|
"\n",
|
||
|
|
"# Adjusting Years_Of_Service for people with less than a year of service\n",
|
||
|
|
"df.loc[df['Years_Of_Service'] == 5, 'Years_Of_Service'] = 4\n",
|
||
|
|
"\n",
|
||
|
|
"# Displaying the DataFrame with the new columns\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 15,
|
||
|
|
"id": "73aeb01d",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" <th>Staff_Id</th>\n",
|
||
|
|
" <th>Month_Of_Service</th>\n",
|
||
|
|
" <th>Years_Of_Service</th>\n",
|
||
|
|
" <th>Residence</th>\n",
|
||
|
|
" <th>Residence_Code</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SA75310</td>\n",
|
||
|
|
" <td>50</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>Jakarta</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>6</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SP54242</td>\n",
|
||
|
|
" <td>34</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>Jakarta</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>SA54434</td>\n",
|
||
|
|
" <td>17</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>Jakarta</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>MA69977</td>\n",
|
||
|
|
" <td>7</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>Tangerang</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>SA59502</td>\n",
|
||
|
|
" <td>52</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>Depok</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 5 3 4 4 2 1 3 \n",
|
||
|
|
"1 3 2 4 6 2 1 2 \n",
|
||
|
|
"2 4 4 3 3 4 3 3 \n",
|
||
|
|
"3 3 4 4 3 0 1 5 \n",
|
||
|
|
"4 4 4 2 3 3 2 2 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 Staff_Id \\\n",
|
||
|
|
"0 1 4 4 2 4 SA75310 \n",
|
||
|
|
"1 3 4 3 0 4 SP54242 \n",
|
||
|
|
"2 4 4 3 2 3 SA54434 \n",
|
||
|
|
"3 3 3 2 3 5 MA69977 \n",
|
||
|
|
"4 4 3 4 0 2 SA59502 \n",
|
||
|
|
"\n",
|
||
|
|
" Month_Of_Service Years_Of_Service Residence Residence_Code \n",
|
||
|
|
"0 50 4 Jakarta 1 \n",
|
||
|
|
"1 34 2 Jakarta 1 \n",
|
||
|
|
"2 17 1 Jakarta 1 \n",
|
||
|
|
"3 7 0 Tangerang 2 \n",
|
||
|
|
"4 52 4 Depok 4 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 15,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Define the possible residence locations\n",
|
||
|
|
"residence_locations = ['Jakarta', 'Tangerang', 'Bekasi', 'Depok', 'Bogor']\n",
|
||
|
|
"\n",
|
||
|
|
"# Generating random values for Residence\n",
|
||
|
|
"df['Residence'] = [random.choice(residence_locations) for _ in range(500)]\n",
|
||
|
|
"\n",
|
||
|
|
"# Creating Residence_Code based on Residence\n",
|
||
|
|
"residence_mapping = {location: i+1 for i, location in enumerate(residence_locations)}\n",
|
||
|
|
"df['Residence_Code'] = df['Residence'].map(residence_mapping)\n",
|
||
|
|
"\n",
|
||
|
|
"# Displaying the DataFrame with the new columns\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 27,
|
||
|
|
"id": "39e7083a",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" <th>Staff_Id</th>\n",
|
||
|
|
" <th>Month_Of_Service</th>\n",
|
||
|
|
" <th>Years_Of_Service</th>\n",
|
||
|
|
" <th>Residence</th>\n",
|
||
|
|
" <th>Residence_Code</th>\n",
|
||
|
|
" <th>Net_Salary</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SA75310</td>\n",
|
||
|
|
" <td>50</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>Jakarta</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>6504819</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>6</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SP54242</td>\n",
|
||
|
|
" <td>34</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>Jakarta</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>9050238</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>SA54434</td>\n",
|
||
|
|
" <td>17</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>Jakarta</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>5485486</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>MA69977</td>\n",
|
||
|
|
" <td>7</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>Tangerang</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>19505881</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>SA59502</td>\n",
|
||
|
|
" <td>52</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>Depok</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5633594</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 5 3 4 4 2 1 3 \n",
|
||
|
|
"1 3 2 4 6 2 1 2 \n",
|
||
|
|
"2 4 4 3 3 4 3 3 \n",
|
||
|
|
"3 3 4 4 3 0 1 5 \n",
|
||
|
|
"4 4 4 2 3 3 2 2 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 Staff_Id \\\n",
|
||
|
|
"0 1 4 4 2 4 SA75310 \n",
|
||
|
|
"1 3 4 3 0 4 SP54242 \n",
|
||
|
|
"2 4 4 3 2 3 SA54434 \n",
|
||
|
|
"3 3 3 2 3 5 MA69977 \n",
|
||
|
|
"4 4 3 4 0 2 SA59502 \n",
|
||
|
|
"\n",
|
||
|
|
" Month_Of_Service Years_Of_Service Residence Residence_Code Net_Salary \n",
|
||
|
|
"0 50 4 Jakarta 1 6504819 \n",
|
||
|
|
"1 34 2 Jakarta 1 9050238 \n",
|
||
|
|
"2 17 1 Jakarta 1 5485486 \n",
|
||
|
|
"3 7 0 Tangerang 2 19505881 \n",
|
||
|
|
"4 52 4 Depok 4 5633594 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 27,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Define salary ranges for each staff level\n",
|
||
|
|
"salary_ranges = {'SA': (5070000, 7004030), # Salary range for Staff (SA)\n",
|
||
|
|
" 'SP': (8100075, 10240060), # Salary range for Supervisor (SP)\n",
|
||
|
|
" 'MA': (15562000, 21053011), # Salary range for Manager (MA)\n",
|
||
|
|
" 'DR': (53010000, 55020000)} # Salary range for Director (DR)\n",
|
||
|
|
"\n",
|
||
|
|
"# Function to generate net salary based on staff level\n",
|
||
|
|
"def generate_net_salary(level_code):\n",
|
||
|
|
" lower_bound, upper_bound = salary_ranges[level_code]\n",
|
||
|
|
" return random.randint(lower_bound, upper_bound)\n",
|
||
|
|
"\n",
|
||
|
|
"# Add \"Net_Salary\" column to DataFrame\n",
|
||
|
|
"df['Net_Salary'] = [generate_net_salary(staff_id[:2]) for staff_id in df['Staff_Id']]\n",
|
||
|
|
"\n",
|
||
|
|
"# Display the DataFrame with the new \"Net_Salary\" column\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 29,
|
||
|
|
"id": "8861e640",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Staff_Id\n",
|
||
|
|
"DR 54434974.0\n",
|
||
|
|
"MA 18732872.0\n",
|
||
|
|
"SA 5966061.0\n",
|
||
|
|
"SP 9149982.0\n",
|
||
|
|
"Name: Net_Salary, dtype: float64\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Grouping by staff level and calculating median net salary\n",
|
||
|
|
"median_salary_by_level = df.groupby(df['Staff_Id'].str[:2])['Net_Salary'].median()\n",
|
||
|
|
"\n",
|
||
|
|
"# Displaying the median net salary for each staff level\n",
|
||
|
|
"print(median_salary_by_level)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"id": "a04382c5",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": []
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"kernelspec": {
|
||
|
|
"display_name": "Python 3 (ipykernel)",
|
||
|
|
"language": "python",
|
||
|
|
"name": "python3"
|
||
|
|
},
|
||
|
|
"language_info": {
|
||
|
|
"codemirror_mode": {
|
||
|
|
"name": "ipython",
|
||
|
|
"version": 3
|
||
|
|
},
|
||
|
|
"file_extension": ".py",
|
||
|
|
"mimetype": "text/x-python",
|
||
|
|
"name": "python",
|
||
|
|
"nbconvert_exporter": "python",
|
||
|
|
"pygments_lexer": "ipython3",
|
||
|
|
"version": "3.9.12"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"nbformat": 4,
|
||
|
|
"nbformat_minor": 5
|
||
|
|
}
|