mirror of
https://github.com/youronlydimwit/Data_ScienceUse_Cases.git
synced 2025-12-30 17:50:03 +01:00
1420 lines
168 KiB
Plaintext
1420 lines
168 KiB
Plaintext
|
|
{
|
||
|
|
"cells": [
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 3,
|
||
|
|
"id": "3a904562",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"import numpy as np\n",
|
||
|
|
"import pandas as pd\n",
|
||
|
|
"import matplotlib.pyplot as plt"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 2,
|
||
|
|
"id": "b20a5ceb",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 3 4 5 2 2 3 3 \n",
|
||
|
|
"1 2 3 1 2 4 3 4 \n",
|
||
|
|
"2 3 3 2 2 2 5 4 \n",
|
||
|
|
"3 3 3 4 4 3 1 4 \n",
|
||
|
|
"4 3 2 4 3 3 2 3 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 \n",
|
||
|
|
"0 3 2 3 4 3 \n",
|
||
|
|
"1 3 2 2 2 3 \n",
|
||
|
|
"2 4 3 2 4 4 \n",
|
||
|
|
"3 4 4 5 3 4 \n",
|
||
|
|
"4 4 2 3 2 2 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 2,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Generating random data with a normal distribution\n",
|
||
|
|
"data = np.random.normal(loc=3, scale=1, size=(500, 12)) # mean=3, standard deviation=1\n",
|
||
|
|
"\n",
|
||
|
|
"# Rounding the values and ensuring they are between 1 and 5\n",
|
||
|
|
"data = np.round(data)\n",
|
||
|
|
"data[data < 1] = 1\n",
|
||
|
|
"data[data > 5] = 5\n",
|
||
|
|
"\n",
|
||
|
|
"# Converting to integers\n",
|
||
|
|
"data = data.astype(int)\n",
|
||
|
|
"\n",
|
||
|
|
"# Creating a DataFrame\n",
|
||
|
|
"df = pd.DataFrame(data, columns=[f'Column_{i}' for i in range(1, 13)])\n",
|
||
|
|
"\n",
|
||
|
|
"# Displaying the DataFrame\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 4,
|
||
|
|
"id": "2b558e59",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABDAAAALFCAYAAAA1GxOGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAB7+0lEQVR4nOzdfZhkdX3n/fdHMCKKETIjjsBkwCAGuXTUljtZoiExruiKaB4MxBii6OgGo6xmFYgraILrmohmb6PJKAQ0iqAoPtyaiCZKTCQ4wMiDQABBGRmYAVRACDj4vf+o00MxdM+crunqOlX9fl1XX13nV+dUfbtHPp7+1u/8TqoKSZIkSZKkLnvIqAuQJEmSJEnaFhsYkiRJkiSp82xgSJIkSZKkzrOBIUmSJEmSOs8GhiRJkiRJ6jwbGJIkSZIkqfNsYEiS1EKSy5McPOo6RiXJQUmuTnJnkheNup6ZJFmRpJLsuEDvV0l+YSHeS5Ik2cCQJIkk1yf5jS3G/jDJ16e3q+pJVfXVbbzOgv4BvcDeDryvqh5ZVeds74slOS3JvU1DZPrrW9tf5pzrODDJF5L8MMltSS5I8vKFrkOSJG2bDQxJksbEiBsjPw9cPsiBW6n7XU1DZPrrKYOXN1Bdvwz8E/A14BeAnwP+O/C8haxDkiS1YwNDkqQW+mdpNJ/ar0lye5Kbk5zc7HZe8/2HzYyCX07ykCRvSfLdJBuSfDjJz/a97h80z92a5H9t8T4nJvlkkr9Pcjvwh817f6OZMbA+yfuS/Ezf61WSP2ou97gjyZ8leXxzzO1JzpreP8mSJJ/vm33wL0kedG6Q5FpgH+Bzzc/1sCSPS/LZ5rhrkryqb/8H1T3A7/sTSW5K8qMk5yV5Ut9zD0/y7ub39qMkX0/y8L7DX5rke0luSfKnW3mbvwBOr6r/U1W3VM+FVfWSvvd6VfPz3db8vI+bpd6vJnll3/YDZvDM8d/l4CTrkryx+d/MemeFSJJkA0OSpEH8FfBXVfUo4PHAWc34s5rvj25mFHyD3h/vfwj8Gr0mwCOB9wEk2R94P/BSYBnws8AeW7zXYcAngUcDHwXuA/4HsAT4ZeDZwB9tccwhwNOBXwLeBKxu3mMv4ADgiGa/NwLrgKXA7sDxQG35w1bV44HvAYc2P9c9wBnNsY8Dfht4R5Jnb6XuufoisC/wGOCiLV7jL5uf778AuzU/40/7nv8VYD96v5u3JvnFLV88yc70fn+fnK2AJL8O/G/gJfT+fb4LfHyAn2Va238XgMdy//8ejgL+Osmu2/HekiSNPRsYkiT1nNPMRPhhkh/SayzM5ifALyRZUlV3VtX5W9n3pcDJVfWdqroTOA44vLms4reBz1XV16vqXuCtPLiB8I2qOqeqflpVdzczBM6vqk1VdT3wt8CvbnHM/6mq26vqcuAy4EvN+/+IXmPgqX0/xzLg56vqJ1X1L1X1oAbGlpLsRa9J8Oaq+s+qWgt8CHjZbHXP8lJ/0v87T3L69BNVdWpV3dE0S04EnpLkZ5sZIq8AXl9V36+q+6rq35r9pr2t+V19C/gWMNOlKbvSOw9av5Uf9aXAqVV1UfP6xwG/nGTFVo7Zmrb/LtD7t3l78+/yBeBOek0ZSZIWLRsYkiT1vKiqHj39xYNnNfQ7CngCcGWSbyZ5wVb2fRy9T+6nfRfYkd6Mh8cBN0w/UVV3AbducfwN/RtJntBc9nFTc3nGO+jNxuh3c9/ju2fYfmTz+C+Aa4AvJflOkmO38nNs+TPdVlV3bPFz9c8euYFt+8v+33lVHQmQZIck70xybfMzXt/sv6T52gm4diuve1Pf47u4/+ft9wN6szaWbeV1HvBv1zSgbuXBs2TaavvvAnBrVW3q257t55AkadGwgSFJ0hxV1dVVdQS9yxv+D/DJJI9ghssvgBvpLYA5bTmwid4fr+uBPaefaNZx+Lkt326L7Q8AVwL7NpewHA9kwJ/jjqp6Y1XtAxwKvGGLy0BmcyOwW5Jd+saWA9/fSt1z8Xv0LkH5DXqXUaxoxgPcAvwnvUt3BtY0i74B/NZWdnvAv13zb/xzPPDnnPZjYOe+7cduT32SJOnBbGBIkjRHSX4/ydKq+inww2b4PmAjvU/19+nb/QzgfyTZO8kj6c2YOLP5dP2TwKFJ/kuzgOPb2HYzYhfgduDOJE+kd9eMQX+OFyT5hSRpXvO+5murquoG4N+A/51kpyRPpjcrZZC1LmayC3APvdkOO9P7nU2/90+BU4GTm4VEd0hvsdSHDfA+b6K3MOr/TPJzAEmekmR6nYuPAS9PsrJ5/XcA/95curOltcBvJtk5yS/Q+31IkqR5ZANDkqS5OwS4PMmd9Bb0PLxZC+Iu4CTgX5s1HX6J3h/bH6F3h5Lr6M0e+GOAZi2EP6a3MOR64A5gA70/3mfzJ/RmKNwBfBA4czt+jn2BL9NbX+EbwPur6qstjz2C3syIG4FPAydU1blzfP83NXc1mf66pRn/ML1LN74PfBvYco2RPwEuBb4J3EZvFsycz2mq6t+AX2++vpPkNnoLa36hef4rwP8Czqb37/N44PBZXu49wL30Ztaczvw1cyRJUiMt1uqSJEkLoJmh8UN6l4dcN+JyJEmSOsUZGJIkjVCSQ5vLDh5B7/agl3L/opWSJElq2MCQJGm0DqN3GcaN9C7pOLzNrUwlSZIWGy8hkSRJkiRJnecMDEmSJEmS1Hk2MCRJkiRJUufZwJAkSZIkSZ1nA0OSJEmSJHWeDQxJkiRJktR5NjAkSZIkSVLn2cCQJEmSJEmdZwNDc5bk4CTrRl2HpO4zLyS1YVZIasOskA2MRS7J7yVZk+TOJOuTfDHJr4y6rrlK8trm57gnyWmjrkeaRJOQF0keluSUJN9NckeSi5M8b9R1SZNkErICIMnfN/XfnuQ/krxy1DVJk2RSsmJakn2T/GeSvx91LZPMBsYiluQNwHuBdwC7A8uB9wOHjbCsQd0I/Dlw6qgLkSbRBOXFjsANwK8CPwv8L+CsJCtGWZQ0KSYoKwD+N7Ciqh4FvBD48yRPH3FN0kSYsKyY9tfAN0ddxKSzgbFIJflZ4O3A0VX1qar6cVX9pKo+V1X/s/mU8r1Jbmy+3pvkYbO8ViX5hb7t05L8efP44CTrkrwpyYamu/qiJM9vPs24LcnxfceemOSsJB9uPh29PMnUtn6e5mc4B7h1e383kh5okvKiqf3Eqrq+qn5aVZ8HrgP8o0TaTpOUFQBVdXlV3TO92Xw9fjt+RZKYvKxojj0c+CHwle341agFGxiL1y8DOwGfnuX5PwV+CVgJPAU4EHjLgO/12Oa99gDeCnwQ+H16fzA8E3hrkn369n8h8HHg0cBngfcN+L6S5sfE5kWS3YEnAJcPWK+k+01cViR5f5K7gCuB9cAXBqxX0v0mKiuSPIpeQ+aNA9aoObCBsXj9HHBLVW2a5fmXAm+vqg1VtRF4G/CyAd/rJ8BJVfUTeoGwBPirqrqjqi6n94fDk/v2/3pVfaGq7gM+Qi+4JI3OROZFkocCHwVOr6orB6xX0v0mLiuq6o+AXej9ofMp4J6tHyGphUnLij8DTqmqGwasUXNgA2PxuhVYkmTHWZ5/HPDdvu3vNmMDvVcTAgB3N99v7nv+buCRfds39T2+C9hpK3VKGr6Jy4skD6F3YnIv8NoBa5X0QBOXFQBVdV9VfR3YE/jvA9Yr6X4TkxVJVgK/AbxnwPo0RzYwFq9vAP8JvGiW528Efr5ve3kzNpO7gJ37th+7vcVJ6pSJyoskAU6ht2jYbzWfykjafhOVFTPYEdfAkObDJGXFwcAK4HtJbgL+BPitJBctcB2Lhg2MRaqqfkTvOrC/bhaz2TnJQ5M8L8m7gDOAtyRZmmRJs+9stwRaC/xekh2SHEJvdf8FlWTHJDsBOwA7JHHWhjRPJi0vgA8AvwgcWlV3b2tnSe1MUlYkeUySw5M8sqnhucARwD8tZB3SJJqkrABW02tsrmy+/gb4/4DnLnAdi4Z/4C1iVXVykpvpLYrzUeAO4ELgJOAi4FHAJc3un6B3m9KZvB44HTgaOKf5WmhvAU7o2/59etfLnTi
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 1080x720 with 12 Axes>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {
|
||
|
|
"needs_background": "light"
|
||
|
|
},
|
||
|
|
"output_type": "display_data"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Setting up the subplots\n",
|
||
|
|
"fig, axes = plt.subplots(3, 4, figsize=(15, 10))\n",
|
||
|
|
"fig.suptitle('Histograms for Each Column')\n",
|
||
|
|
"\n",
|
||
|
|
"# Visualizing/histogram for each column\n",
|
||
|
|
"for i, ax in enumerate(axes.flat):\n",
|
||
|
|
" column = df.columns[i]\n",
|
||
|
|
" ax.hist(df[column], bins=[1, 2, 3, 4, 5, 6], alpha=0.5, edgecolor='black')\n",
|
||
|
|
" ax.set_title(f'{column}')\n",
|
||
|
|
" ax.set_xlabel('Value')\n",
|
||
|
|
" ax.set_ylabel('Frequency')\n",
|
||
|
|
"\n",
|
||
|
|
"# Adjust layout\n",
|
||
|
|
"plt.tight_layout()\n",
|
||
|
|
"plt.show()"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 5,
|
||
|
|
"id": "dfa7fe98",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 3 4 5 2 2 3 3 \n",
|
||
|
|
"1 2 3 1 2 4 3 4 \n",
|
||
|
|
"2 3 3 2 2 2 5 4 \n",
|
||
|
|
"3 3 3 4 4 3 1 4 \n",
|
||
|
|
"4 3 2 4 3 3 2 3 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 \n",
|
||
|
|
"0 3 2 3 4 3 \n",
|
||
|
|
"1 3 2 2 2 3 \n",
|
||
|
|
"2 4 3 2 4 4 \n",
|
||
|
|
"3 4 4 5 3 4 \n",
|
||
|
|
"4 4 2 3 2 2 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 5,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Selecting random columns\n",
|
||
|
|
"skew_left = np.random.choice(df.columns, 3, replace=False)\n",
|
||
|
|
"\n",
|
||
|
|
"# Introducing skewness to the selected columns\n",
|
||
|
|
"for column in skew_left:\n",
|
||
|
|
" skewness_factor = np.random.uniform(0.1, 0.5) # Random skewness factor between 0.1 and 0.5\n",
|
||
|
|
" df[column] -= int(skewness_factor * 4) # Shifting values towards 1\n",
|
||
|
|
"\n",
|
||
|
|
"# Displaying the modified DataFrame\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 6,
|
||
|
|
"id": "bb2aabc8",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABDAAAALFCAYAAAA1GxOGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAB7+0lEQVR4nOzdfZhkdX3n/fdHMCKKETIjjsBkwCAGuXTUljtZoiExruiKaB4MxBii6OgGo6xmFYgraILrmohmb6PJKAQ0iqAoPtyaiCZKTCQ4wMiDQABBGRmYAVRACDj4vf+o00MxdM+crunqOlX9fl1XX13nV+dUfbtHPp7+1u/8TqoKSZIkSZKkLnvIqAuQJEmSJEnaFhsYkiRJkiSp82xgSJIkSZKkzrOBIUmSJEmSOs8GhiRJkiRJ6jwbGJIkSZIkqfNsYEiS1EKSy5McPOo6RiXJQUmuTnJnkheNup6ZJFmRpJLsuEDvV0l+YSHeS5Ik2cCQJIkk1yf5jS3G/jDJ16e3q+pJVfXVbbzOgv4BvcDeDryvqh5ZVeds74slOS3JvU1DZPrrW9tf5pzrODDJF5L8MMltSS5I8vKFrkOSJG2bDQxJksbEiBsjPw9cPsiBW6n7XU1DZPrrKYOXN1Bdvwz8E/A14BeAnwP+O/C8haxDkiS1YwNDkqQW+mdpNJ/ar0lye5Kbk5zc7HZe8/2HzYyCX07ykCRvSfLdJBuSfDjJz/a97h80z92a5H9t8T4nJvlkkr9Pcjvwh817f6OZMbA+yfuS/Ezf61WSP2ou97gjyZ8leXxzzO1JzpreP8mSJJ/vm33wL0kedG6Q5FpgH+Bzzc/1sCSPS/LZ5rhrkryqb/8H1T3A7/sTSW5K8qMk5yV5Ut9zD0/y7ub39qMkX0/y8L7DX5rke0luSfKnW3mbvwBOr6r/U1W3VM+FVfWSvvd6VfPz3db8vI+bpd6vJnll3/YDZvDM8d/l4CTrkryx+d/MemeFSJJkA0OSpEH8FfBXVfUo4PHAWc34s5rvj25mFHyD3h/vfwj8Gr0mwCOB9wEk2R94P/BSYBnws8AeW7zXYcAngUcDHwXuA/4HsAT4ZeDZwB9tccwhwNOBXwLeBKxu3mMv4ADgiGa/NwLrgKXA7sDxQG35w1bV44HvAYc2P9c9wBnNsY8Dfht4R5Jnb6XuufoisC/wGOCiLV7jL5uf778AuzU/40/7nv8VYD96v5u3JvnFLV88yc70fn+fnK2AJL8O/G/gJfT+fb4LfHyAn2Va238XgMdy//8ejgL+Osmu2/HekiSNPRsYkiT1nNPMRPhhkh/SayzM5ifALyRZUlV3VtX5W9n3pcDJVfWdqroTOA44vLms4reBz1XV16vqXuCtPLiB8I2qOqeqflpVdzczBM6vqk1VdT3wt8CvbnHM/6mq26vqcuAy4EvN+/+IXmPgqX0/xzLg56vqJ1X1L1X1oAbGlpLsRa9J8Oaq+s+qWgt8CHjZbHXP8lJ/0v87T3L69BNVdWpV3dE0S04EnpLkZ5sZIq8AXl9V36+q+6rq35r9pr2t+V19C/gWMNOlKbvSOw9av5Uf9aXAqVV1UfP6xwG/nGTFVo7Zmrb/LtD7t3l78+/yBeBOek0ZSZIWLRsYkiT1vKiqHj39xYNnNfQ7CngCcGWSbyZ5wVb2fRy9T+6nfRfYkd6Mh8cBN0w/UVV3AbducfwN/RtJntBc9nFTc3nGO+jNxuh3c9/ju2fYfmTz+C+Aa4AvJflOkmO38nNs+TPdVlV3bPFz9c8euYFt+8v+33lVHQmQZIck70xybfMzXt/sv6T52gm4diuve1Pf47u4/+ft9wN6szaWbeV1HvBv1zSgbuXBs2TaavvvAnBrVW3q257t55AkadGwgSFJ0hxV1dVVdQS9yxv+D/DJJI9ghssvgBvpLYA5bTmwid4fr+uBPaefaNZx+Lkt326L7Q8AVwL7NpewHA9kwJ/jjqp6Y1XtAxwKvGGLy0BmcyOwW5Jd+saWA9/fSt1z8Xv0LkH5DXqXUaxoxgPcAvwnvUt3BtY0i74B/NZWdnvAv13zb/xzPPDnnPZjYOe+7cduT32SJOnBbGBIkjRHSX4/ydKq+inww2b4PmAjvU/19+nb/QzgfyTZO8kj6c2YOLP5dP2TwKFJ/kuzgOPb2HYzYhfgduDOJE+kd9eMQX+OFyT5hSRpXvO+5murquoG4N+A/51kpyRPpjcrZZC1LmayC3APvdkOO9P7nU2/90+BU4GTm4VEd0hvsdSHDfA+b6K3MOr/TPJzAEmekmR6nYuPAS9PsrJ5/XcA/95curOltcBvJtk5yS/Q+31IkqR5ZANDkqS5OwS4PMmd9Bb0PLxZC+Iu4CTgX5s1HX6J3h/bH6F3h5Lr6M0e+GOAZi2EP6a3MOR64A5gA70/3mfzJ/RmKNwBfBA4czt+jn2BL9NbX+EbwPur6qstjz2C3syIG4FPAydU1blzfP83NXc1mf66pRn/ML1LN74PfBvYco2RPwEuBb4J3EZvFsycz2mq6t+AX2++vpPkNnoLa36hef4rwP8Czqb37/N44PBZXu49wL30Ztaczvw1cyRJUiMt1uqSJEkLoJmh8UN6l4dcN+JyJEmSOsUZGJIkjVCSQ5vLDh5B7/agl3L/opWSJElq2MCQJGm0DqN3GcaN9C7pOLzNrUwlSZIWGy8hkSRJkiRJnecMDEmSJEmS1Hk2MCRJkiRJUufZwJAkSZIkSZ1nA0OSJEmSJHWeDQxJkiRJktR5NjAkSZIkSVLn2cCQJEmSJEmdZwNDc5bk4CTrRl2HpO4zLyS1YVZIasOskA2MRS7J7yVZk+TOJOuTfDHJr4y6rrlK8trm57gnyWmjrkeaRJOQF0keluSUJN9NckeSi5M8b9R1SZNkErICIMnfN/XfnuQ/krxy1DVJk2RSsmJakn2T/GeSvx91LZPMBsYiluQNwHuBdwC7A8uB9wOHjbCsQd0I/Dlw6qgLkSbRBOXFjsANwK8CPwv8L+CsJCtGWZQ0KSYoKwD+N7Ciqh4FvBD48yRPH3FN0kSYsKyY9tfAN0ddxKSzgbFIJflZ4O3A0VX1qar6cVX9pKo+V1X/s/mU8r1Jbmy+3pvkYbO8ViX5hb7t05L8efP44CTrkrwpyYamu/qiJM9vPs24LcnxfceemOSsJB9uPh29PMnUtn6e5mc4B7h1e383kh5okvKiqf3Eqrq+qn5aVZ8HrgP8o0TaTpOUFQBVdXlV3TO92Xw9fjt+RZKYvKxojj0c+CHwle341agFGxiL1y8DOwGfnuX5PwV+CVgJPAU4EHjLgO/12Oa99gDeCnwQ+H16fzA8E3hrkn369n8h8HHg0cBngfcN+L6S5sfE5kWS3YEnAJcPWK+k+01cViR5f5K7gCuB9cAXBqxX0v0mKiuSPIpeQ+aNA9aoObCBsXj9HHBLVW2a5fmXAm+vqg1VtRF4G/CyAd/rJ8BJVfUTeoGwBPirqrqjqi6n94fDk/v2/3pVfaGq7gM+Qi+4JI3OROZFkocCHwVOr6orB6xX0v0mLiuq6o+AXej9ofMp4J6tHyGphUnLij8DTqmqGwasUXNgA2PxuhVYkmTHWZ5/HPDdvu3vNmMDvVcTAgB3N99v7nv+buCRfds39T2+C9hpK3VKGr6Jy4skD6F3YnIv8NoBa5X0QBOXFQBVdV9VfR3YE/jvA9Yr6X4TkxVJVgK/AbxnwPo0RzYwFq9vAP8JvGiW528Efr5ve3kzNpO7gJ37th+7vcVJ6pSJyoskAU6ht2jYbzWfykjafhOVFTPYEdfAkObDJGXFwcAK4HtJbgL+BPitJBctcB2Lhg2MRaqqfkTvOrC/bhaz2TnJQ5M8L8m7gDOAtyRZmmRJs+9stwRaC/xekh2SHEJvdf8FlWTHJDsBOwA7JHHWhjRPJi0vgA8AvwgcWlV3b2tnSe1MUlYkeUySw5M8sqnhucARwD8tZB3SJJqkrABW02tsrmy+/gb4/4DnLnAdi4Z/4C1iVXVykpvpLYrzUeAO4ELgJOAi4FHAJc3un6B3m9KZvB44HTgaOKf5WmhvAU7o2/59etfLnTi
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 1080x720 with 12 Axes>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {
|
||
|
|
"needs_background": "light"
|
||
|
|
},
|
||
|
|
"output_type": "display_data"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Setting up the subplots\n",
|
||
|
|
"fig, axes = plt.subplots(3, 4, figsize=(15, 10))\n",
|
||
|
|
"fig.suptitle('Histograms for Each Column')\n",
|
||
|
|
"\n",
|
||
|
|
"# Visualizing/histogram for each column\n",
|
||
|
|
"for i, ax in enumerate(axes.flat):\n",
|
||
|
|
" column = df.columns[i]\n",
|
||
|
|
" ax.hist(df[column], bins=[1, 2, 3, 4, 5, 6], alpha=0.5, edgecolor='black')\n",
|
||
|
|
" ax.set_title(f'{column}')\n",
|
||
|
|
" ax.set_xlabel('Value')\n",
|
||
|
|
" ax.set_ylabel('Frequency')\n",
|
||
|
|
"\n",
|
||
|
|
"# Adjust layout\n",
|
||
|
|
"plt.tight_layout()\n",
|
||
|
|
"plt.show()"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 7,
|
||
|
|
"id": "cebcf6cb",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 3 4 5 2 2 3 3 \n",
|
||
|
|
"1 2 3 1 2 4 3 4 \n",
|
||
|
|
"2 3 3 2 2 2 5 4 \n",
|
||
|
|
"3 3 3 4 4 3 1 4 \n",
|
||
|
|
"4 3 2 4 3 3 2 3 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 \n",
|
||
|
|
"0 3 2 3 4 4 \n",
|
||
|
|
"1 3 2 2 2 4 \n",
|
||
|
|
"2 4 3 2 4 5 \n",
|
||
|
|
"3 4 4 5 3 5 \n",
|
||
|
|
"4 4 2 3 2 3 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 7,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Selecting random columns for right skewness, excluding the ones already skewed left\n",
|
||
|
|
"skew_right = np.random.choice([col for col in df.columns if col not in skew_left], 2, replace=False)\n",
|
||
|
|
"\n",
|
||
|
|
"# Introducing skewness to the selected columns\n",
|
||
|
|
"for column in skew_right:\n",
|
||
|
|
" skewness_factor = np.random.uniform(0.1, 0.5) # Random skewness factor between 0.1 and 0.5\n",
|
||
|
|
" df[column] += int(skewness_factor * 4) # Shifting values towards 5\n",
|
||
|
|
"\n",
|
||
|
|
"# Displaying the modified DataFrame\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 8,
|
||
|
|
"id": "69a10ec6",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABDAAAALFCAYAAAA1GxOGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAB7r0lEQVR4nOzdfZhkdX3n/fdHMCKKETIjjMBkwCAGuXTUljtZoiExruiKaB4MJDEkoqO7uMpqVh7iCprguiai2dtoMgoBjSIoig+3SUQTJSYSHGDkQSCAoIwMzAAqIAQc/N5/1OmhGLpnTtd0dZ2qfr+uq6+u86tzqr7dIx9Pf+t3fidVhSRJkiRJUpc9YtQFSJIkSZIkbYsNDEmSJEmS1Hk2MCRJkiRJUufZwJAkSZIkSZ1nA0OSJEmSJHWeDQxJkiRJktR5NjAkSWohyZVJDhl1HaOS5OAk1ya5O8lLR13PTJKsSFJJdlyg96skP7cQ7yVJkmxgSJJEkhuT/NoWY3+Q5GvT21X11Kr6yjZeZ0H/gF5gbwfeV1WPrarztvfFkpyR5P6mITL99c3tL3POdRyU5AtJfpDkjiQXJfnDha5DkiRtmw0MSZLGxIgbIz8LXDnIgVup+11NQ2T66+mDlzdQXb8I/CPwVeDngJ8B/ivwwoWsQ5IktWMDQ5KkFvpnaTSf2q9JcmeSW5Oc2ux2QfP9B82Mgl9M8ogkb0nynSQbknw4yU/3ve7vN8/dnuR/bfE+Jyf5ZJK/TXIn8AfNe3+9mTGwPsn7kvxU3+tVkv/WXO5xV5I/SfKk5pg7k5wzvX+SJUk+3zf74J+TPOzcIMn1wL7A55qf61FJnpjks81x1yV5dd/+D6t7gN/3J5LckuSHSS5I8tS+5x6d5N3N7+2HSb6W5NF9h/9uku8muS3JH2/lbf4MOLOq/k9V3VY9F1fVy/ve69XNz3dH8/M+cZZ6v5LkVX3bD5nBM8d/l0OSrEvypuZ/M+udFSJJkg0MSZIG8RfAX1TV44AnAec0489tvj++mVHwdXp/vP8B8Cv0mgCPBd4HkOQA4P3A7wLLgJ8G9tzivQ4HPgk8Hvgo8ADwP4AlwC8CzwP+2xbHHAo8C/gF4M3A6uY99gYOBI5s9nsTsA5YCuwOnAjUlj9sVT0J+C5wWPNz3Qec1Rz7ROA3gXcked5W6p6rvwP2A54AXLLFa/x58/P9J2C35mf8Sd/zvwTsT+9389YkP7/liyfZmd7v75OzFZDkV4H/Dbyc3r/Pd4CPD/CzTGv77wKwBw/+7+Fo4C+T7Lod7y1J0tizgSFJUs95zUyEHyT5Ab3Gwmx+DPxckiVVdXdVXbiVfX8XOLWqvl1VdwMnAEc0l1X8JvC5qvpaVd0PvJWHNxC+XlXnVdVPqureZobAhVW1qapuBP4a+OUtjvk/VXVnVV0JXAF8sXn/H9JrDDyj7+dYBvxsVf24qv65qh7WwNhSkr3pNQmOq6r/qKq1wIeAV8xW9ywv9Uf9v/MkZ04/UVWnV9VdTbPkZODpSX66mSHySuANVfW9qnqgqv612W/a25rf1TeBbwIzXZqyK73zoPVb+VF/Fzi9qi5pXv8E4BeTrNjKMVvT9t8Fev82b2/+Xb4A3E2vKSNJ0qJlA0OSpJ6XVtXjp794+KyGfkcDTwauTvKNJC/eyr5PpPfJ/bTvADvSm/HwROCm6Seq6h7g9i2Ov6l/I8mTm8s+bmkuz3gHvdkY/W7te3zvDNuPbR7/GXAd8MUk305y/FZ+ji1/pjuq6q4tfq7+2SM3sW1/3v87r6qjAJLskOSdSa5vfsYbm/2XNF87Addv5XVv6Xt8Dw/+vP2+T2/WxrKtvM5D/u2aBtTtPHyWTFtt/10Abq+qTX3bs/0ckiQtGjYwJEmao6q6tqqOpHd5w/8BPpnkMcxw+QVwM70FMKctBzbR++N1PbDX9BPNOg4/s+XbbbH9AeBqYL/mEpYTgQz4c9xVVW+qqn2Bw4A3bnEZyGxuBnZLskvf2HLge1upey5+h94lKL9G7zKKFc14gNuA/6B36c7AmmbR14Hf2MpuD/m3a/6Nf4aH/pzTfgTs3Le9x/bUJ0mSHs4GhiRJc5Tk95IsraqfAD9ohh8ANtL7VH/fvt3PAv5Hkn2SPJbejImzm0/XPwkcluQ/NQs4vo1tNyN2Ae4E7k7yFHp3zRj053hxkp9LkuY1H2i+tqqqbgL+FfjfSXZK8jR6s1IGWetiJrsA99Gb7bAzvd/Z9Hv/BDgdOLVZSHSH9BZLfdQA7/Nmeguj/s8kPwOQ5OlJpte5+Bjwh0lWNq//DuDfmkt3trQW+PUkOyf5OXq/D0mSNI9sYEiSNHeHAlcmuZvegp5HNGtB3AOcAvxLs6bDL9D7Y/sj9O5QcgO92QP/HaBZC+G/01sYcj1wF7CB3h/vs/kjejMU7gI+CJy9HT/HfsCX6K2v8HXg/VX1lZbHHklvZsTNwKeBk6rq/Dm+/5ubu5pMf93WjH+Y3qUb3wO+BWy5xsgfAZcD3wDuoDcLZs7nNFX1r8CvNl/fTnIHvYU1v9A8/2XgfwHn0vv3eRJwxCwv9x7gfnoza85k/po5kiSpkRZrdUmSpAXQzND4Ab3LQ24YcTmSJEmd4gwMSZJGKMlhzWUHj6F3e9DLeXDRSkmSJDVsYEiSNFqH07sM42Z6l3Qc0eZWppIkSYuNl5BIkiRJkqTOcwaGJEmSJEnqPBsYkiRJkiSp82xgSJIkSZKkzrOBIUmSJEmSOs8GhiRJkiRJ6jwbGJIkSZIkqfNsYEiSJEmSpM6zgaE5S3JIknWjrkNS95kXktowKyS1YVbIBsYil+R3kqxJcneS9Un+LskvjbquuUryuubnuC/JGaOuR5pEk5AXSR6V5LQk30lyV5JLk7xw1HVJk2QSsgIgyd829d+Z5N+TvGrUNUmTZFKyYlqS/ZL8R5K/HXUtk8wGxiKW5I3Ae4F3ALsDy4H3A4ePsKxB3Qz8KXD6qAuRJtEE5cWOwE3ALwM/Dfwv4JwkK0ZZlDQpJigrAP43sKKqHge8BPjTJM8acU3SRJiwrJj2l8A3Rl3EpLOBsUgl+Wng7cAxVfWpqvpRVf24qj5XVf+z+ZTyvUlubr7em+RRs7xWJfm5vu0zkvxp8/iQJOuSvDnJhqa7+tIkL2o+zbgjyYl9x56c5JwkH24+Hb0yydS2fp7mZzgPuH17fzeSHmqS8qKp/eSqurGqflJVnwduAPyjRNpOk5QVAFV1ZVXdN73ZfD1pO35Fkpi8rGiOPQL4AfDl7fjVqAUbGIvXLwI7AZ+e5fk/Bn4BWAk8HTgIeMuA77VH8157Am8FPgj8Hr0/GJ4DvDXJvn37vwT4OPB44LPA+wZ8X0nzY2LzIsnuwJOBKwesV9KDJi4rkrw/yT3A1cB64AsD1ivpQROVFUkeR68h86YBa9Qc2MBYvH4GuK2qNs3y/O8Cb6+qDVW1EXgb8IoB3+vHwClV9WN6gbAE+IuququqrqT3h8PT+vb/WlV9oaoeAD5CL7gkjc5E5kWSRwIfBc6sqqsHrFfSgyYuK6rqvwG70PtD51PAfVs/QlILk5YVfwKcVlU3DVij5sAGxuJ1O7AkyY6zPP9E4Dt9299pxgZ6ryYEAO5tvt/a9/y9wGP7tm/pe3wPsNNW6pQ0fBOXF0keQe/E5H7gdQPWKumhJi4rAKrqgar6GrAX8F8HrFfSgyYmK5KsBH4NeM+A9WmObGAsXl8H/gN46SzP3wz8bN/28mZsJvcAO/dt77G9xUnqlInKiyQBTqO3aNhvNJ/KSNp+E5UVM9gR18CQ5sMkZcUhwArgu0luAf4I+I0klyxwHYuGDYxFqqp+SO86sL9sFrPZOckjk7wwybuAs4C3JFmaZEmz72y3BFoL/E6SHZIcSm91/wWVZMckOwE7ADskcdaGNE8mLS+ADwA/DxxWVfdua2dJ7UxSViR5QpIjkjy2qeEFwJHAPy5kHdIkmqSsAFbTa2yubL7+Cvj/gBcscB2Lhn/gLWJVdWqSW+ktivNR4C7gYuAU4BLgccBlze6foHeb0pm8ATgTOAY4r/laaG8BTurb/j1618u
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 1080x720 with 12 Axes>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {
|
||
|
|
"needs_background": "light"
|
||
|
|
},
|
||
|
|
"output_type": "display_data"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Setting up the subplots\n",
|
||
|
|
"fig, axes = plt.subplots(3, 4, figsize=(15, 10))\n",
|
||
|
|
"fig.suptitle('Histograms for Each Column')\n",
|
||
|
|
"\n",
|
||
|
|
"# Visualizing/histogram for each column\n",
|
||
|
|
"for i, ax in enumerate(axes.flat):\n",
|
||
|
|
" column = df.columns[i]\n",
|
||
|
|
" ax.hist(df[column], bins=[1, 2, 3, 4, 5, 6], alpha=0.5, edgecolor='black')\n",
|
||
|
|
" ax.set_title(f'{column}')\n",
|
||
|
|
" ax.set_xlabel('Value')\n",
|
||
|
|
" ax.set_ylabel('Frequency')\n",
|
||
|
|
"\n",
|
||
|
|
"# Adjust layout\n",
|
||
|
|
"plt.tight_layout()\n",
|
||
|
|
"plt.show()"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 9,
|
||
|
|
"id": "7add2a67",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"import random"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 10,
|
||
|
|
"id": "50833ea0",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" <th>Staff_Id</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SA63171</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SP10211</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>SA79627</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>SA02310</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>SA98565</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 3 4 5 2 2 3 3 \n",
|
||
|
|
"1 2 3 1 2 4 3 4 \n",
|
||
|
|
"2 3 3 2 2 2 5 4 \n",
|
||
|
|
"3 3 3 4 4 3 1 4 \n",
|
||
|
|
"4 3 2 4 3 3 2 3 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 Staff_Id \n",
|
||
|
|
"0 3 2 3 4 4 SA63171 \n",
|
||
|
|
"1 3 2 2 2 4 SP10211 \n",
|
||
|
|
"2 4 3 2 4 5 SA79627 \n",
|
||
|
|
"3 4 4 5 3 5 SA02310 \n",
|
||
|
|
"4 4 2 3 2 3 SA98565 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 10,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Function to generate staff ID\n",
|
||
|
|
"def generate_staff_id():\n",
|
||
|
|
" level_codes = ['DR'] * 3 + ['MA'] * 50 + ['SP'] * 75 + ['SA'] * 372 # Level codes distribution\n",
|
||
|
|
" level_code = random.choice(level_codes) # Randomly choose a level code\n",
|
||
|
|
" random_numbers = ''.join(str(random.randint(0, 9)) for _ in range(5)) # Generate 5 random numbers\n",
|
||
|
|
" return f\"{level_code}{random_numbers}\"\n",
|
||
|
|
"\n",
|
||
|
|
"# Add \"Staff_Id\" column to DataFrame\n",
|
||
|
|
"df['Staff_Id'] = [generate_staff_id() for _ in range(500)]\n",
|
||
|
|
"\n",
|
||
|
|
"# Display the DataFrame with the new \"Staff_Id\" columns\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 11,
|
||
|
|
"id": "268636d1",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" <th>Staff_Id</th>\n",
|
||
|
|
" <th>Month_Of_Service</th>\n",
|
||
|
|
" <th>Years_Of_Service</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SA63171</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SP10211</td>\n",
|
||
|
|
" <td>43</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>SA79627</td>\n",
|
||
|
|
" <td>10</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>SA02310</td>\n",
|
||
|
|
" <td>17</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>SA98565</td>\n",
|
||
|
|
" <td>17</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 3 4 5 2 2 3 3 \n",
|
||
|
|
"1 2 3 1 2 4 3 4 \n",
|
||
|
|
"2 3 3 2 2 2 5 4 \n",
|
||
|
|
"3 3 3 4 4 3 1 4 \n",
|
||
|
|
"4 3 2 4 3 3 2 3 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 Staff_Id \\\n",
|
||
|
|
"0 3 2 3 4 4 SA63171 \n",
|
||
|
|
"1 3 2 2 2 4 SP10211 \n",
|
||
|
|
"2 4 3 2 4 5 SA79627 \n",
|
||
|
|
"3 4 4 5 3 5 SA02310 \n",
|
||
|
|
"4 4 2 3 2 3 SA98565 \n",
|
||
|
|
"\n",
|
||
|
|
" Month_Of_Service Years_Of_Service \n",
|
||
|
|
"0 1 0 \n",
|
||
|
|
"1 43 3 \n",
|
||
|
|
"2 10 0 \n",
|
||
|
|
"3 17 1 \n",
|
||
|
|
"4 17 1 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 11,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Generating random values for Month_Of_Service\n",
|
||
|
|
"df['Month_Of_Service'] = [random.randint(0, 66) for _ in range(500)] # 66 months = 5 years 6 months\n",
|
||
|
|
"\n",
|
||
|
|
"# Generating Years_Of_Service based on Month_Of_Service\n",
|
||
|
|
"df['Years_Of_Service'] = df['Month_Of_Service'] // 12 # Integer division to get years\n",
|
||
|
|
"\n",
|
||
|
|
"# Adjusting Years_Of_Service for people with less than a year of service\n",
|
||
|
|
"df.loc[df['Years_Of_Service'] == 5, 'Years_Of_Service'] = 4\n",
|
||
|
|
"\n",
|
||
|
|
"# Displaying the DataFrame with the new columns\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 12,
|
||
|
|
"id": "73aeb01d",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" <th>Staff_Id</th>\n",
|
||
|
|
" <th>Month_Of_Service</th>\n",
|
||
|
|
" <th>Years_Of_Service</th>\n",
|
||
|
|
" <th>Residence</th>\n",
|
||
|
|
" <th>Residence_Code</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SA63171</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>Depok</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SP10211</td>\n",
|
||
|
|
" <td>43</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>Jakarta</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>SA79627</td>\n",
|
||
|
|
" <td>10</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>Bekasi</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>SA02310</td>\n",
|
||
|
|
" <td>17</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>Depok</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>SA98565</td>\n",
|
||
|
|
" <td>17</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>Jakarta</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 3 4 5 2 2 3 3 \n",
|
||
|
|
"1 2 3 1 2 4 3 4 \n",
|
||
|
|
"2 3 3 2 2 2 5 4 \n",
|
||
|
|
"3 3 3 4 4 3 1 4 \n",
|
||
|
|
"4 3 2 4 3 3 2 3 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 Staff_Id \\\n",
|
||
|
|
"0 3 2 3 4 4 SA63171 \n",
|
||
|
|
"1 3 2 2 2 4 SP10211 \n",
|
||
|
|
"2 4 3 2 4 5 SA79627 \n",
|
||
|
|
"3 4 4 5 3 5 SA02310 \n",
|
||
|
|
"4 4 2 3 2 3 SA98565 \n",
|
||
|
|
"\n",
|
||
|
|
" Month_Of_Service Years_Of_Service Residence Residence_Code \n",
|
||
|
|
"0 1 0 Depok 4 \n",
|
||
|
|
"1 43 3 Jakarta 1 \n",
|
||
|
|
"2 10 0 Bekasi 3 \n",
|
||
|
|
"3 17 1 Depok 4 \n",
|
||
|
|
"4 17 1 Jakarta 1 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 12,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Define the possible residence locations\n",
|
||
|
|
"residence_locations = ['Jakarta', 'Tangerang', 'Bekasi', 'Depok', 'Bogor']\n",
|
||
|
|
"\n",
|
||
|
|
"# Generating random values for Residence\n",
|
||
|
|
"df['Residence'] = [random.choice(residence_locations) for _ in range(500)]\n",
|
||
|
|
"\n",
|
||
|
|
"# Creating Residence_Code based on Residence\n",
|
||
|
|
"residence_mapping = {location: i+1 for i, location in enumerate(residence_locations)}\n",
|
||
|
|
"df['Residence_Code'] = df['Residence'].map(residence_mapping)\n",
|
||
|
|
"\n",
|
||
|
|
"# Displaying the DataFrame with the new columns\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 13,
|
||
|
|
"id": "39e7083a",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"text/html": [
|
||
|
|
"<div>\n",
|
||
|
|
"<style scoped>\n",
|
||
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
|
" vertical-align: middle;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe tbody tr th {\n",
|
||
|
|
" vertical-align: top;\n",
|
||
|
|
" }\n",
|
||
|
|
"\n",
|
||
|
|
" .dataframe thead th {\n",
|
||
|
|
" text-align: right;\n",
|
||
|
|
" }\n",
|
||
|
|
"</style>\n",
|
||
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
|
" <thead>\n",
|
||
|
|
" <tr style=\"text-align: right;\">\n",
|
||
|
|
" <th></th>\n",
|
||
|
|
" <th>Column_1</th>\n",
|
||
|
|
" <th>Column_2</th>\n",
|
||
|
|
" <th>Column_3</th>\n",
|
||
|
|
" <th>Column_4</th>\n",
|
||
|
|
" <th>Column_5</th>\n",
|
||
|
|
" <th>Column_6</th>\n",
|
||
|
|
" <th>Column_7</th>\n",
|
||
|
|
" <th>Column_8</th>\n",
|
||
|
|
" <th>Column_9</th>\n",
|
||
|
|
" <th>Column_10</th>\n",
|
||
|
|
" <th>Column_11</th>\n",
|
||
|
|
" <th>Column_12</th>\n",
|
||
|
|
" <th>Staff_Id</th>\n",
|
||
|
|
" <th>Month_Of_Service</th>\n",
|
||
|
|
" <th>Years_Of_Service</th>\n",
|
||
|
|
" <th>Residence</th>\n",
|
||
|
|
" <th>Residence_Code</th>\n",
|
||
|
|
" <th>Net_Salary</th>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </thead>\n",
|
||
|
|
" <tbody>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>0</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SA63171</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>Depok</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5582218</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>1</th>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>SP10211</td>\n",
|
||
|
|
" <td>43</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>Jakarta</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>9213443</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>2</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>SA79627</td>\n",
|
||
|
|
" <td>10</td>\n",
|
||
|
|
" <td>0</td>\n",
|
||
|
|
" <td>Bekasi</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>5836455</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>3</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>5</td>\n",
|
||
|
|
" <td>SA02310</td>\n",
|
||
|
|
" <td>17</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>Depok</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>6035466</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" <tr>\n",
|
||
|
|
" <th>4</th>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>4</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>2</td>\n",
|
||
|
|
" <td>3</td>\n",
|
||
|
|
" <td>SA98565</td>\n",
|
||
|
|
" <td>17</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>Jakarta</td>\n",
|
||
|
|
" <td>1</td>\n",
|
||
|
|
" <td>5568101</td>\n",
|
||
|
|
" </tr>\n",
|
||
|
|
" </tbody>\n",
|
||
|
|
"</table>\n",
|
||
|
|
"</div>"
|
||
|
|
],
|
||
|
|
"text/plain": [
|
||
|
|
" Column_1 Column_2 Column_3 Column_4 Column_5 Column_6 Column_7 \\\n",
|
||
|
|
"0 3 4 5 2 2 3 3 \n",
|
||
|
|
"1 2 3 1 2 4 3 4 \n",
|
||
|
|
"2 3 3 2 2 2 5 4 \n",
|
||
|
|
"3 3 3 4 4 3 1 4 \n",
|
||
|
|
"4 3 2 4 3 3 2 3 \n",
|
||
|
|
"\n",
|
||
|
|
" Column_8 Column_9 Column_10 Column_11 Column_12 Staff_Id \\\n",
|
||
|
|
"0 3 2 3 4 4 SA63171 \n",
|
||
|
|
"1 3 2 2 2 4 SP10211 \n",
|
||
|
|
"2 4 3 2 4 5 SA79627 \n",
|
||
|
|
"3 4 4 5 3 5 SA02310 \n",
|
||
|
|
"4 4 2 3 2 3 SA98565 \n",
|
||
|
|
"\n",
|
||
|
|
" Month_Of_Service Years_Of_Service Residence Residence_Code Net_Salary \n",
|
||
|
|
"0 1 0 Depok 4 5582218 \n",
|
||
|
|
"1 43 3 Jakarta 1 9213443 \n",
|
||
|
|
"2 10 0 Bekasi 3 5836455 \n",
|
||
|
|
"3 17 1 Depok 4 6035466 \n",
|
||
|
|
"4 17 1 Jakarta 1 5568101 "
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"execution_count": 13,
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "execute_result"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Define salary ranges for each staff level\n",
|
||
|
|
"salary_ranges = {'SA': (5070000, 7004030), # Salary range for Staff (SA)\n",
|
||
|
|
" 'SP': (8100075, 10240060), # Salary range for Supervisor (SP)\n",
|
||
|
|
" 'MA': (15562000, 21053011), # Salary range for Manager (MA)\n",
|
||
|
|
" 'DR': (53010000, 55020000)} # Salary range for Director (DR)\n",
|
||
|
|
"\n",
|
||
|
|
"# Function to generate net salary based on staff level\n",
|
||
|
|
"def generate_net_salary(level_code):\n",
|
||
|
|
" lower_bound, upper_bound = salary_ranges[level_code]\n",
|
||
|
|
" return random.randint(lower_bound, upper_bound)\n",
|
||
|
|
"\n",
|
||
|
|
"# Add \"Net_Salary\" column to DataFrame\n",
|
||
|
|
"df['Net_Salary'] = [generate_net_salary(staff_id[:2]) for staff_id in df['Staff_Id']]\n",
|
||
|
|
"\n",
|
||
|
|
"# Display the DataFrame with the new \"Net_Salary\" column\n",
|
||
|
|
"df.head(5)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 14,
|
||
|
|
"id": "8861e640",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Staff_Id\n",
|
||
|
|
"DR 54436305.0\n",
|
||
|
|
"MA 18489651.0\n",
|
||
|
|
"SA 5938218.5\n",
|
||
|
|
"SP 9349631.0\n",
|
||
|
|
"Name: Net_Salary, dtype: float64\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Grouping by staff level and calculating median net salary\n",
|
||
|
|
"median_salary_by_level = df.groupby(df['Staff_Id'].str[:2])['Net_Salary'].median()\n",
|
||
|
|
"\n",
|
||
|
|
"# Displaying the median net salary for each staff level\n",
|
||
|
|
"print(median_salary_by_level)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 15,
|
||
|
|
"id": "a04382c5",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"# Save the DataFrame to Excel\n",
|
||
|
|
"df.to_excel('D:\\\\for python use\\\\HRD_Survey.xlsx', index=False)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"id": "16c8d27f",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
" "
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"kernelspec": {
|
||
|
|
"display_name": "Python 3 (ipykernel)",
|
||
|
|
"language": "python",
|
||
|
|
"name": "python3"
|
||
|
|
},
|
||
|
|
"language_info": {
|
||
|
|
"codemirror_mode": {
|
||
|
|
"name": "ipython",
|
||
|
|
"version": 3
|
||
|
|
},
|
||
|
|
"file_extension": ".py",
|
||
|
|
"mimetype": "text/x-python",
|
||
|
|
"name": "python",
|
||
|
|
"nbconvert_exporter": "python",
|
||
|
|
"pygments_lexer": "ipython3",
|
||
|
|
"version": "3.9.12"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"nbformat": 4,
|
||
|
|
"nbformat_minor": 5
|
||
|
|
}
|