mirror of
https://github.com/youronlydimwit/Data_ScienceUse_Cases.git
synced 2025-12-11 09:20:12 +01:00
Add files via upload
This commit is contained in:
committed by
GitHub
parent
a933346b5e
commit
a81d6b635d
639
Scripts/Synthetic_Data_Generator/SDG_2.py
Normal file
639
Scripts/Synthetic_Data_Generator/SDG_2.py
Normal file
@@ -0,0 +1,639 @@
|
||||
#!/usr/bin/env python3
|
||||
import tkinter as tk
|
||||
from tkinter import ttk, messagebox, filedialog
|
||||
import uuid
|
||||
import random
|
||||
import csv
|
||||
import math
|
||||
|
||||
# ---------- Helper functions ----------
|
||||
def new_col_id():
|
||||
return str(uuid.uuid4())
|
||||
|
||||
def clamp(v, a, b):
|
||||
return max(a, min(b, v))
|
||||
|
||||
def format_decimals(dec):
|
||||
return f"0.{''.join(['0']*dec)}" if dec>0 else "0"
|
||||
|
||||
# ---------- Column model ----------
|
||||
class Column:
|
||||
def __init__(self, name="col", col_id=None):
|
||||
self.id = col_id or new_col_id()
|
||||
self.name = name
|
||||
self.type = "Random" # or "Fixed"
|
||||
self.min = 0.0
|
||||
self.max = 10.0
|
||||
self.fixed = 0.0
|
||||
self.decimals = 0
|
||||
self.linearity = {
|
||||
"enabled": False,
|
||||
"target_id": None,
|
||||
"weight": 0.0
|
||||
}
|
||||
|
||||
def range_min(self):
|
||||
if self.type == "Random":
|
||||
return float(self.min)
|
||||
else:
|
||||
return float(self.fixed)
|
||||
|
||||
def range_max(self):
|
||||
if self.type == "Random":
|
||||
return float(self.max)
|
||||
else:
|
||||
return float(self.fixed)
|
||||
|
||||
# ---------- Main App ----------
|
||||
class SyntheticDataGUI(tk.Tk):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.title("Synthetic Data Generator v1.0")
|
||||
self.geometry("920x800") # Increased height to accommodate preview table
|
||||
self.minsize(800, 600)
|
||||
|
||||
self.columns = [] # list[Column]
|
||||
# start with 2 sample columns
|
||||
self.add_column("A")
|
||||
self.add_column("B")
|
||||
|
||||
self._build_ui()
|
||||
|
||||
# ---------- UI building ----------
|
||||
def _build_ui(self):
|
||||
# Create main paned window for resizable sections
|
||||
main_pane = ttk.PanedWindow(self, orient=tk.VERTICAL)
|
||||
main_pane.pack(fill=tk.BOTH, expand=True, padx=8, pady=6)
|
||||
|
||||
# Top frame: controls and column definitions
|
||||
top_frame = ttk.Frame(main_pane)
|
||||
main_pane.add(top_frame, weight=1)
|
||||
|
||||
# top frame: controls
|
||||
top = ttk.Frame(top_frame)
|
||||
top.pack(side="top", fill="x", padx=8, pady=6)
|
||||
|
||||
add_btn = ttk.Button(top, text="Add column", command=self.ui_add_column)
|
||||
add_btn.pack(side="left", padx=(0,6))
|
||||
|
||||
remove_btn = ttk.Button(top, text="Remove selected", command=self.ui_remove_selected)
|
||||
remove_btn.pack(side="left", padx=(0,6))
|
||||
|
||||
help_btn = ttk.Button(top, text="Help / Guide", command=self.show_help)
|
||||
help_btn.pack(side="right")
|
||||
|
||||
self.rows_var = tk.IntVar(value=100)
|
||||
rows_spin = ttk.Spinbox(top, from_=1, to=1000000, textvariable=self.rows_var, width=8)
|
||||
rows_spin.pack(side="left", padx=(0,6))
|
||||
|
||||
gen_btn = ttk.Button(top, text="Generate & Preview", command=self.generate_and_preview)
|
||||
gen_btn.pack(side="left", padx=(12,6))
|
||||
|
||||
export_btn = ttk.Button(top, text="Export CSV", command=self.export_csv_dialog)
|
||||
export_btn.pack(side="left", padx=(0,6))
|
||||
|
||||
ttk.Label(top, text=" ").pack(side="left", expand=True) # spacer
|
||||
|
||||
# main area: headers + scrollable frame with list of columns
|
||||
# 1) Fixed headers directly under the buttons
|
||||
hdr = ttk.Frame(top_frame)
|
||||
hdr.pack(fill="x", padx=8, pady=(0,6))
|
||||
header_font = ('Arial', 10, 'bold italic')
|
||||
ttk.Label(hdr, text="Name", width=20, font=header_font).grid(
|
||||
row=0, column=0, sticky="w", padx=(10, 0)
|
||||
)
|
||||
ttk.Label(hdr, text="Type / Params", width=46, font=header_font).grid(
|
||||
row=0, column=1, sticky="w", padx=(20, 0)
|
||||
)
|
||||
ttk.Label(hdr, text="Rounding", width=10, font=header_font).grid(
|
||||
row=0, column=2, sticky="w", padx=(0, 0)
|
||||
)
|
||||
ttk.Label(hdr, text="Advanced", width=16, font=header_font).grid(
|
||||
row=0, column=3, sticky="w", padx=(10, 0)
|
||||
)
|
||||
|
||||
# 2) Scrollable container for rows (separate from headers)
|
||||
scroll_container = ttk.Frame(top_frame)
|
||||
scroll_container.pack(fill="both", expand=True, padx=8, pady=6)
|
||||
|
||||
# canvas + scrollbar
|
||||
self.canvas = tk.Canvas(scroll_container)
|
||||
self.canvas.pack(side="left", fill="both", expand=True)
|
||||
scrollbar = ttk.Scrollbar(scroll_container, orient="vertical", command=self.canvas.yview)
|
||||
scrollbar.pack(side="right", fill="y")
|
||||
self.canvas.configure(yscrollcommand=scrollbar.set)
|
||||
|
||||
# adjust scrollregion when canvas resized
|
||||
self.canvas.bind('<Configure>', lambda e: self.canvas.configure(scrollregion=self.canvas.bbox("all")))
|
||||
|
||||
self.inner_frame = ttk.Frame(self.canvas)
|
||||
self.canvas.create_window((0,0), window=self.inner_frame, anchor='nw')
|
||||
|
||||
# list area
|
||||
self.row_frames = {}
|
||||
self.selected_col_id = None
|
||||
|
||||
self.refresh_column_list()
|
||||
|
||||
# Bottom frame: Preview table
|
||||
bottom_frame = ttk.Frame(main_pane)
|
||||
main_pane.add(bottom_frame, weight=1)
|
||||
|
||||
# Preview section
|
||||
preview_label = ttk.Label(bottom_frame, text="Preview (First 10 rows):", font=('Arial', 10, 'bold italic'))
|
||||
preview_label.pack(anchor='w', padx=8, pady=(8, 4))
|
||||
|
||||
# Create frame for table and scrollbar
|
||||
table_container = ttk.Frame(bottom_frame)
|
||||
table_container.pack(fill='both', expand=True, padx=8, pady=(0, 8))
|
||||
|
||||
# Create treeview for table display
|
||||
self.preview_tree = ttk.Treeview(table_container, show='headings', height=10)
|
||||
vsb = ttk.Scrollbar(table_container, orient="vertical", command=self.preview_tree.yview)
|
||||
hsb = ttk.Scrollbar(table_container, orient="horizontal", command=self.preview_tree.xview)
|
||||
self.preview_tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set)
|
||||
|
||||
# Pack the treeview and scrollbars
|
||||
self.preview_tree.grid(row=0, column=0, sticky='nsew')
|
||||
vsb.grid(row=0, column=1, sticky='ns')
|
||||
hsb.grid(row=1, column=0, sticky='ew')
|
||||
|
||||
table_container.grid_rowconfigure(0, weight=1)
|
||||
table_container.grid_columnconfigure(0, weight=1)
|
||||
|
||||
# Bottom Labels
|
||||
self.preview_status = ttk.Label(bottom_frame, text="No data generated yet. Click 'Generate & Preview' to see sample data.")
|
||||
self.preview_status.pack(anchor='w', padx=8, pady=(0, 8))
|
||||
|
||||
|
||||
# ---------- Column management ----------
|
||||
def add_column(self, name="col"):
|
||||
c = Column(name=name)
|
||||
self.columns.append(c)
|
||||
return c
|
||||
|
||||
def remove_column_by_id(self, cid):
|
||||
self.columns = [c for c in self.columns if c.id != cid]
|
||||
|
||||
def find_column(self, cid):
|
||||
for c in self.columns:
|
||||
if c.id == cid:
|
||||
return c
|
||||
return None
|
||||
|
||||
# ---------- UI callbacks ----------
|
||||
def ui_add_column(self):
|
||||
n = 1
|
||||
base = "col"
|
||||
existing = {c.name for c in self.columns}
|
||||
while f"{base}{n}" in existing:
|
||||
n += 1
|
||||
c = self.add_column(f"{base}{n}")
|
||||
self.refresh_column_list()
|
||||
# select new
|
||||
self.selected_col_id = c.id
|
||||
|
||||
def ui_remove_selected(self):
|
||||
if not self.selected_col_id:
|
||||
messagebox.showinfo("Remove column", "Select a column row by clicking its name first.")
|
||||
return
|
||||
self.remove_column_by_id(self.selected_col_id)
|
||||
self.selected_col_id = None
|
||||
self.refresh_column_list()
|
||||
|
||||
def show_help(self):
|
||||
"""Show Help/Guide window"""
|
||||
help_win = tk.Toplevel(self)
|
||||
help_win.title("Help / Guide")
|
||||
help_win.geometry("500x400")
|
||||
|
||||
# Instructions text
|
||||
text = (
|
||||
"Welcome to the Synthetic Data Generator!\n\n"
|
||||
"➤ Use 'Add column' to create new data columns.\n"
|
||||
"➤ Choose 'Random' or 'Fixed' values for each column.\n"
|
||||
"➤ Adjust ranges, rounding, and advanced settings as needed.\n"
|
||||
"➤ 'Generate & Preview' shows sample data (first 10 rows).\n"
|
||||
"➤ 'Export CSV' saves the generated dataset to a CSV file.\n\n"
|
||||
"Advanced:\n"
|
||||
" - Use the 'Advanced' button per column to enable linearity, basically\n"
|
||||
" making one column follow another with a weighted influence.\n\n"
|
||||
)
|
||||
label = tk.Label(
|
||||
help_win,
|
||||
text=text,
|
||||
justify="left",
|
||||
anchor="nw",
|
||||
font=("Arial", 10),
|
||||
wraplength=460
|
||||
)
|
||||
label.pack(fill="both", expand=True, padx=12, pady=12)
|
||||
|
||||
cred = tk.Label(
|
||||
help_win,
|
||||
text=(
|
||||
"This will be one of my scripts/executables.\n"
|
||||
"Feel free to share!\n\n"
|
||||
"You can leave feedback on my LinkedIn:\n"
|
||||
"Sang Putu Sandhyana Yogi\n\n"
|
||||
"Thank you so much!"
|
||||
),
|
||||
font=("Arial", 7, "italic"),
|
||||
justify="center"
|
||||
)
|
||||
cred.pack(fill="both", expand=True, padx=5, pady=5)
|
||||
|
||||
# Important notice
|
||||
warning = tk.Label(
|
||||
help_win,
|
||||
text="For Research / Training Purposes Only.\nDon't fake your Thesis Data!",
|
||||
font=("Arial", 10, "bold italic"),
|
||||
fg="red",
|
||||
justify="center"
|
||||
)
|
||||
warning.pack(pady=(0, 12))
|
||||
|
||||
def refresh_column_list(self):
|
||||
# clear previous column rows (but keep the header which is now outside)
|
||||
for child in list(self.inner_frame.pack_slaves()):
|
||||
if child != self.inner_frame.pack_slaves()[0]:
|
||||
child.destroy()
|
||||
|
||||
# rebuild rows
|
||||
for c in self.columns:
|
||||
self._create_column_row(c)
|
||||
|
||||
# ensure scrollregion updates after refresh
|
||||
self.inner_frame.update_idletasks()
|
||||
self.canvas.configure(scrollregion=self.canvas.bbox("all"))
|
||||
|
||||
def _create_column_row(self, col: Column):
|
||||
row = ttk.Frame(self.inner_frame, relief="ridge", padding=6)
|
||||
row.pack(fill="x", pady=4)
|
||||
|
||||
# clicking name selects
|
||||
name_var = tk.StringVar(value=col.name)
|
||||
name_entry = ttk.Entry(row, textvariable=name_var, width=24)
|
||||
name_entry.grid(row=0, column=0, sticky="w")
|
||||
name_entry.bind("<FocusOut>", lambda e, cid=col.id, var=name_var: self._on_name_change(cid, var.get()))
|
||||
name_entry.bind("<Button-1>", lambda e, cid=col.id: self._on_row_select(cid))
|
||||
|
||||
# Type + params frame
|
||||
params = ttk.Frame(row)
|
||||
params.grid(row=0, column=1, sticky="w", padx=(8,8))
|
||||
|
||||
type_var = tk.StringVar(value=col.type)
|
||||
type_combo = ttk.Combobox(params, values=["Random", "Fixed"], width=8, state="readonly", textvariable=type_var)
|
||||
type_combo.grid(row=0, column=0, padx=(0,6))
|
||||
type_combo.bind("<<ComboboxSelected>>", lambda e, cid=col.id, var=type_var: self._on_type_change(cid, var.get()))
|
||||
# Random: min/max entries; Fixed: fixed
|
||||
min_var = tk.StringVar(value=str(col.min))
|
||||
max_var = tk.StringVar(value=str(col.max))
|
||||
fixed_var = tk.StringVar(value=str(col.fixed))
|
||||
|
||||
min_entry = ttk.Entry(params, textvariable=min_var, width=10)
|
||||
min_entry.grid(row=0, column=1, padx=(0,4))
|
||||
ttk.Label(params, text="to").grid(row=0, column=2)
|
||||
max_entry = ttk.Entry(params, textvariable=max_var, width=10)
|
||||
max_entry.grid(row=0, column=3, padx=(4,8))
|
||||
|
||||
fixed_entry = ttk.Entry(params, textvariable=fixed_var, width=12)
|
||||
# place fixed entry but hide or show based on type
|
||||
fixed_entry.grid(row=0, column=4, padx=(4,8))
|
||||
|
||||
# rounding
|
||||
round_var = tk.IntVar(value=col.decimals)
|
||||
round_combo = ttk.Combobox(row, values=[0,1,2,3,4,5,6], width=4, state="readonly", textvariable=round_var)
|
||||
round_combo.grid(row=0, column=2)
|
||||
round_combo.bind("<<ComboboxSelected>>", lambda e, cid=col.id, var=round_var: self._on_round_change(cid, int(var.get())))
|
||||
|
||||
# advanced button
|
||||
adv_btn = ttk.Button(row, text="Advanced ▾", width=12)
|
||||
adv_btn.grid(row=0, column=3, padx=(8,0))
|
||||
|
||||
# advanced area (hidden by default)
|
||||
adv_frame = ttk.Frame(row)
|
||||
adv_frame.grid(row=1, column=0, columnspan=4, pady=(8,0), sticky="w")
|
||||
adv_frame.grid_remove()
|
||||
|
||||
# contents of advanced: linearity
|
||||
lin_enabled_var = tk.BooleanVar(value=col.linearity["enabled"])
|
||||
lin_check = ttk.Checkbutton(adv_frame, text="Enable Linearity (follow another column)", variable=lin_enabled_var)
|
||||
lin_check.grid(row=0, column=0, sticky="w")
|
||||
# target selector and weight
|
||||
ttk.Label(adv_frame, text="Target:").grid(row=0, column=1, sticky="e", padx=(12,2))
|
||||
target_values = [ (c.name, c.id) for c in self.columns if c.id != col.id ]
|
||||
# if target list empty, provide placeholder
|
||||
if not target_values:
|
||||
target_combo = ttk.Combobox(adv_frame, values=["(no other columns)"], state="disabled", width=18)
|
||||
else:
|
||||
target_combo = ttk.Combobox(adv_frame, values=[tv[0] for tv in target_values], state="readonly", width=18)
|
||||
# set current if exists
|
||||
if col.linearity["target_id"]:
|
||||
found = next((i for i,tv in enumerate(target_values) if tv[1]==col.linearity["target_id"]), None)
|
||||
if found is not None:
|
||||
target_combo.current(found)
|
||||
target_combo.grid(row=0, column=2, padx=(4,8))
|
||||
|
||||
ttk.Label(adv_frame, text="Weight:").grid(row=0, column=3, padx=(8,2))
|
||||
weight_var = tk.DoubleVar(value=col.linearity["weight"])
|
||||
weight_spin = ttk.Spinbox(adv_frame, from_=0.0, to=1.0, increment=0.01, textvariable=weight_var, width=6)
|
||||
weight_spin.grid(row=0, column=4)
|
||||
weight_scale = ttk.Scale(adv_frame, from_=0.0, to=1.0, orient="horizontal", variable=weight_var, length=120)
|
||||
weight_scale.grid(row=0, column=5, padx=(8,0))
|
||||
|
||||
# after creating the row, update scrollregion so scrollbar appears
|
||||
self.inner_frame.update_idletasks()
|
||||
self.canvas.configure(scrollregion=self.canvas.bbox("all"))
|
||||
|
||||
# wire adv toggle button
|
||||
def toggle_adv():
|
||||
if adv_frame.winfo_ismapped():
|
||||
adv_frame.grid_remove()
|
||||
adv_btn.config(text="Advanced ▾")
|
||||
else:
|
||||
adv_frame.grid()
|
||||
adv_btn.config(text="Advanced ▴")
|
||||
adv_btn.config(command=toggle_adv)
|
||||
|
||||
# update widgets visibility according to type
|
||||
def update_type_widgets():
|
||||
t = type_var.get()
|
||||
if t == "Random":
|
||||
min_entry.configure(state="normal")
|
||||
max_entry.configure(state="normal")
|
||||
fixed_entry.configure(state="disabled")
|
||||
else:
|
||||
min_entry.configure(state="disabled")
|
||||
max_entry.configure(state="disabled")
|
||||
fixed_entry.configure(state="normal")
|
||||
|
||||
update_type_widgets()
|
||||
|
||||
# bind entry updates
|
||||
def on_minmax_focus_out(e=None):
|
||||
try:
|
||||
col.min = float(min_var.get())
|
||||
col.max = float(max_var.get())
|
||||
if col.min > col.max:
|
||||
col.min, col.max = col.max, col.min
|
||||
min_var.set(str(col.min))
|
||||
max_var.set(str(col.max))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def on_fixed_focus_out(e=None):
|
||||
try:
|
||||
col.fixed = float(fixed_var.get())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
min_entry.bind("<FocusOut>", on_minmax_focus_out)
|
||||
max_entry.bind("<FocusOut>", on_minmax_focus_out)
|
||||
fixed_entry.bind("<FocusOut>", on_fixed_focus_out)
|
||||
|
||||
# when combobox type changed
|
||||
def on_type_selected(e=None):
|
||||
col.type = type_var.get()
|
||||
update_type_widgets()
|
||||
|
||||
type_combo.bind("<<ComboboxSelected>>", lambda e=None: on_type_selected())
|
||||
|
||||
# name change handler
|
||||
# already bound above
|
||||
|
||||
# rounding handler bound above
|
||||
|
||||
# set initial values in widgets (in case)
|
||||
min_var.set(str(col.min))
|
||||
max_var.set(str(col.max))
|
||||
fixed_var.set(str(col.fixed))
|
||||
|
||||
# clicking a row to 'select' it
|
||||
def on_click_row(event=None):
|
||||
self.selected_col_id = col.id
|
||||
# highlight selection visually
|
||||
for rf_cid, rf in self.row_frames.items():
|
||||
if rf_cid == col.id:
|
||||
rf.config(style="Selected.TFrame")
|
||||
else:
|
||||
rf.config(style="TFrame")
|
||||
|
||||
row.bind("<Button-1>", lambda e: on_click_row())
|
||||
# also store refs for later updates
|
||||
self.row_frames[col.id] = row
|
||||
|
||||
# store final update callbacks when user changes advanced widgets
|
||||
def apply_all_changes():
|
||||
col.name = name_var.get().strip() or col.name
|
||||
col.type = type_var.get()
|
||||
try:
|
||||
col.min = float(min_var.get())
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
col.max = float(max_var.get())
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
col.fixed = float(fixed_var.get())
|
||||
except Exception:
|
||||
pass
|
||||
col.decimals = int(round_var.get())
|
||||
col.linearity["enabled"] = bool(lin_enabled_var.get())
|
||||
col.linearity["weight"] = float(weight_var.get())
|
||||
# determine target id by name in current dropdown
|
||||
if isinstance(target_combo, ttk.Combobox) and target_combo['state'] != 'disabled':
|
||||
sel_name = target_combo.get()
|
||||
# find id by name
|
||||
for other in self.columns:
|
||||
if other.id != col.id and other.name == sel_name:
|
||||
col.linearity["target_id"] = other.id
|
||||
break
|
||||
else:
|
||||
# if no selection or not matched
|
||||
col.linearity["target_id"] = None
|
||||
|
||||
# call apply_all_changes when adv toggled or when leaving row
|
||||
for w in [name_entry, min_entry, max_entry, fixed_entry, round_combo, lin_check, weight_spin, target_combo]:
|
||||
w.bind("<FocusOut>", lambda e, f=apply_all_changes: f())
|
||||
|
||||
# when columns list changes (someone added/renamed) we need to refresh target lists.
|
||||
# We'll rely on refresh_column_list to rebuild everything - keep simple.
|
||||
|
||||
# ---------- simple callbacks to update model ----------
|
||||
def _on_name_change(self, cid, new_name):
|
||||
c = self.find_column(cid)
|
||||
if c:
|
||||
c.name = new_name.strip() or c.name
|
||||
# refresh to update target name lists
|
||||
self.refresh_column_list()
|
||||
|
||||
def _on_type_change(self, cid, new_type):
|
||||
c = self.find_column(cid)
|
||||
if c:
|
||||
c.type = new_type
|
||||
self.refresh_column_list()
|
||||
|
||||
def _on_round_change(self, cid, dec):
|
||||
c = self.find_column(cid)
|
||||
if c:
|
||||
c.decimals = dec
|
||||
|
||||
def _on_row_select(self, cid):
|
||||
self.selected_col_id = cid
|
||||
# visual selection handled in row click
|
||||
|
||||
# ---------- Generation logic ----------
|
||||
def generate_rows(self, nrows):
|
||||
# Validate and collect column metadata
|
||||
if not self.columns:
|
||||
raise RuntimeError("No columns defined")
|
||||
# ensure all names unique
|
||||
names = [c.name for c in self.columns]
|
||||
if len(set(names)) != len(names):
|
||||
# enforce uniqueness by appending small suffixes
|
||||
seen = {}
|
||||
for c in self.columns:
|
||||
if c.name in seen:
|
||||
seen[c.name] += 1
|
||||
c.name = f"{c.name}_{seen[c.name]}"
|
||||
else:
|
||||
seen[c.name] = 1
|
||||
|
||||
# Produce base values
|
||||
base_values = { c.id: [] for c in self.columns }
|
||||
for c in self.columns:
|
||||
for _ in range(nrows):
|
||||
if c.type == "Random":
|
||||
a = float(c.min)
|
||||
b = float(c.max)
|
||||
if a == b:
|
||||
val = a
|
||||
else:
|
||||
val = random.random() * (b - a) + a
|
||||
else:
|
||||
val = float(c.fixed)
|
||||
base_values[c.id].append(val)
|
||||
|
||||
# Now apply linearity adjustments
|
||||
final_values = { c.id: [v for v in base_values[c.id]] for c in self.columns }
|
||||
|
||||
# We'll process adjustments in simple pass: for each column with linearity enabled,
|
||||
# map target base value to source's min/max and mix with weight.
|
||||
for c in self.columns:
|
||||
lin = c.linearity
|
||||
if lin["enabled"] and lin["target_id"]:
|
||||
target = self.find_column(lin["target_id"])
|
||||
if not target:
|
||||
continue
|
||||
w = clamp(float(lin["weight"]), 0.0, 1.0)
|
||||
smin = c.range_min()
|
||||
smax = c.range_max()
|
||||
tmin = target.range_min()
|
||||
tmax = target.range_max()
|
||||
# precompute denominators
|
||||
t_range = tmax - tmin
|
||||
s_range = smax - smin
|
||||
for i in range(nrows):
|
||||
base_val = base_values[c.id][i]
|
||||
tval = base_values[target.id][i]
|
||||
# map tval into source's range:
|
||||
if t_range == 0:
|
||||
# target constant: use its value, scaled by midpoint mapping to source range
|
||||
mapped = smin + (s_range * 0.5) if s_range != 0 else smin
|
||||
else:
|
||||
frac = (tval - tmin) / t_range
|
||||
mapped = smin + frac * s_range
|
||||
new_val = (1.0 - w) * base_val + w * mapped
|
||||
final_values[c.id][i] = new_val
|
||||
|
||||
# apply rounding according to decimals
|
||||
rows = []
|
||||
for i in range(nrows):
|
||||
row = {}
|
||||
for c in self.columns:
|
||||
dec = int(c.decimals)
|
||||
val = final_values[c.id][i]
|
||||
# apply rounding
|
||||
if dec == 0:
|
||||
val = int(round(val))
|
||||
else:
|
||||
val = round(val, dec)
|
||||
row[c.name] = val
|
||||
rows.append(row)
|
||||
return rows
|
||||
|
||||
def generate_and_preview(self):
|
||||
n_preview = 10
|
||||
nrows = self.rows_var.get()
|
||||
|
||||
if nrows < n_preview:
|
||||
n_preview = nrows
|
||||
|
||||
try:
|
||||
data = self.generate_rows(n_preview)
|
||||
self.update_preview_table(data)
|
||||
self.preview_status.config(text=f"Preview showing first {n_preview} rows. Total rows to generate: {nrows}")
|
||||
except Exception as e:
|
||||
messagebox.showerror("Error generating", str(e))
|
||||
self.preview_status.config(text="Error generating preview data")
|
||||
|
||||
def update_preview_table(self, data):
|
||||
"""Update the preview table with generated data"""
|
||||
# Clear existing data
|
||||
for item in self.preview_tree.get_children():
|
||||
self.preview_tree.delete(item)
|
||||
|
||||
# Clear existing columns
|
||||
for col in self.preview_tree["columns"]:
|
||||
self.preview_tree.heading(col, text="")
|
||||
self.preview_tree.column(col, width=0)
|
||||
|
||||
# Set up new columns
|
||||
if not self.columns:
|
||||
return
|
||||
|
||||
columns = [c.name for c in self.columns]
|
||||
self.preview_tree["columns"] = columns
|
||||
|
||||
# Configure column headers
|
||||
for col_name in columns:
|
||||
self.preview_tree.heading(col_name, text=col_name)
|
||||
self.preview_tree.column(col_name, width=100, minwidth=80, anchor='center')
|
||||
|
||||
# Add data rows
|
||||
for i, row in enumerate(data):
|
||||
values = [row[col_name] for col_name in columns]
|
||||
self.preview_tree.insert("", "end", values=values, tags=('evenrow' if i % 2 == 0 else 'oddrow',))
|
||||
|
||||
# Configure row colors for better readability
|
||||
self.preview_tree.tag_configure('evenrow', background='#f0f0f0')
|
||||
self.preview_tree.tag_configure('oddrow', background='white')
|
||||
|
||||
# ---------- Export ----------
|
||||
def export_csv_dialog(self):
|
||||
nrows = self.rows_var.get()
|
||||
if nrows <= 0:
|
||||
messagebox.showinfo("Rows required", "Please specify a number of rows > 0.")
|
||||
return
|
||||
fname = filedialog.asksaveasfilename(title="Save CSV", defaultextension=".csv",
|
||||
filetypes=[("CSV files","*.csv"),("All files","*.*")])
|
||||
if not fname:
|
||||
return
|
||||
try:
|
||||
data = self.generate_rows(nrows)
|
||||
with open(fname, "w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=[c.name for c in self.columns])
|
||||
writer.writeheader()
|
||||
for row in data:
|
||||
writer.writerow(row)
|
||||
messagebox.showinfo("Exported", f"Wrote {nrows} rows to:\n{fname}")
|
||||
except Exception as e:
|
||||
messagebox.showerror("Error", str(e))
|
||||
|
||||
|
||||
# ---------- Run the app ----------
|
||||
if __name__ == "__main__":
|
||||
app = SyntheticDataGUI()
|
||||
app.mainloop()
|
||||
691
Scripts/Synthetic_Data_Generator/SDG_3.py
Normal file
691
Scripts/Synthetic_Data_Generator/SDG_3.py
Normal file
@@ -0,0 +1,691 @@
|
||||
#!/usr/bin/env python3
|
||||
import tkinter as tk
|
||||
from tkinter import ttk, messagebox, filedialog
|
||||
import uuid
|
||||
import random
|
||||
import csv
|
||||
import math
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
# ---------- Helper functions ----------
|
||||
def new_col_id():
|
||||
return str(uuid.uuid4())
|
||||
|
||||
def clamp(v, a, b):
|
||||
return max(a, min(b, v))
|
||||
|
||||
def format_decimals(dec):
|
||||
return f"0.{''.join(['0']*dec)}" if dec>0 else "0"
|
||||
|
||||
# ---------- Column model ----------
|
||||
class Column:
|
||||
def __init__(self, name="col", col_id=None):
|
||||
self.id = col_id or new_col_id()
|
||||
self.name = name
|
||||
self.type = "Random" # or "Fixed"
|
||||
self.min = 0.0
|
||||
self.max = 10.0
|
||||
self.fixed = 0.0
|
||||
self.decimals = 0
|
||||
self.linearity = {
|
||||
"enabled": False,
|
||||
"target_id": None,
|
||||
"weight": 0.0
|
||||
}
|
||||
|
||||
def range_min(self):
|
||||
if self.type == "Random":
|
||||
return float(self.min)
|
||||
else:
|
||||
return float(self.fixed)
|
||||
|
||||
def range_max(self):
|
||||
if self.type == "Random":
|
||||
return float(self.max)
|
||||
else:
|
||||
return float(self.fixed)
|
||||
|
||||
# ---------- Main App ----------
|
||||
class SyntheticDataGUI(tk.Tk):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.title("Synthetic Data Generator v1.0")
|
||||
self.geometry("920x800")
|
||||
self.minsize(800, 600)
|
||||
|
||||
self.columns = []
|
||||
self.generated_data = None # <--- store last generated data
|
||||
self.add_column("A")
|
||||
self.add_column("B")
|
||||
|
||||
self._build_ui()
|
||||
|
||||
# ---------- UI building ----------
|
||||
def _build_ui(self):
|
||||
# Create main paned window for resizable sections
|
||||
main_pane = ttk.PanedWindow(self, orient=tk.VERTICAL)
|
||||
main_pane.pack(fill=tk.BOTH, expand=True, padx=8, pady=6)
|
||||
|
||||
# Top frame: controls and column definitions
|
||||
top_frame = ttk.Frame(main_pane)
|
||||
main_pane.add(top_frame, weight=1)
|
||||
|
||||
# top frame: controls
|
||||
top = ttk.Frame(top_frame)
|
||||
top.pack(side="top", fill="x", padx=8, pady=6)
|
||||
|
||||
add_btn = ttk.Button(top, text="Add column", command=self.ui_add_column)
|
||||
add_btn.pack(side="left", padx=(0,6))
|
||||
|
||||
remove_btn = ttk.Button(top, text="Remove selected", command=self.ui_remove_selected)
|
||||
remove_btn.pack(side="left", padx=(0,6))
|
||||
|
||||
help_btn = ttk.Button(top, text="Help / Guide", command=self.show_help)
|
||||
help_btn.pack(side="right")
|
||||
|
||||
self.rows_var = tk.IntVar(value=100)
|
||||
rows_spin = ttk.Spinbox(top, from_=1, to=1000000, textvariable=self.rows_var, width=8)
|
||||
rows_spin.pack(side="left", padx=(0,6))
|
||||
|
||||
gen_btn = ttk.Button(top, text="Generate & Preview", command=self.generate_and_preview)
|
||||
gen_btn.pack(side="left", padx=(12,6))
|
||||
|
||||
export_btn = ttk.Button(top, text="Export CSV", command=self.export_csv_dialog)
|
||||
export_btn.pack(side="left", padx=(0,6))
|
||||
|
||||
vis_btn = ttk.Button(top, text="Visualize Data", command=self.visualize_data)
|
||||
vis_btn.pack(side="left", padx=(12,6))
|
||||
|
||||
ttk.Label(top, text=" ").pack(side="left", expand=True) # spacer
|
||||
|
||||
# main area: headers + scrollable frame with list of columns
|
||||
# 1) Fixed headers directly under the buttons
|
||||
hdr = ttk.Frame(top_frame)
|
||||
hdr.pack(fill="x", padx=8, pady=(0,6))
|
||||
header_font = ('Arial', 10, 'bold italic')
|
||||
ttk.Label(hdr, text="Name", width=20, font=header_font).grid(
|
||||
row=0, column=0, sticky="w", padx=(10, 0)
|
||||
)
|
||||
ttk.Label(hdr, text="Type / Params", width=46, font=header_font).grid(
|
||||
row=0, column=1, sticky="w", padx=(20, 0)
|
||||
)
|
||||
ttk.Label(hdr, text="Rounding", width=10, font=header_font).grid(
|
||||
row=0, column=2, sticky="w", padx=(0, 0)
|
||||
)
|
||||
ttk.Label(hdr, text="Advanced", width=16, font=header_font).grid(
|
||||
row=0, column=3, sticky="w", padx=(10, 0)
|
||||
)
|
||||
|
||||
# 2) Scrollable container for rows (separate from headers)
|
||||
scroll_container = ttk.Frame(top_frame)
|
||||
scroll_container.pack(fill="both", expand=True, padx=8, pady=6)
|
||||
|
||||
# canvas + scrollbar
|
||||
self.canvas = tk.Canvas(scroll_container)
|
||||
self.canvas.pack(side="left", fill="both", expand=True)
|
||||
scrollbar = ttk.Scrollbar(scroll_container, orient="vertical", command=self.canvas.yview)
|
||||
scrollbar.pack(side="right", fill="y")
|
||||
self.canvas.configure(yscrollcommand=scrollbar.set)
|
||||
|
||||
# adjust scrollregion when canvas resized
|
||||
self.canvas.bind('<Configure>', lambda e: self.canvas.configure(scrollregion=self.canvas.bbox("all")))
|
||||
|
||||
self.inner_frame = ttk.Frame(self.canvas)
|
||||
self.canvas.create_window((0,0), window=self.inner_frame, anchor='nw')
|
||||
|
||||
# list area
|
||||
self.row_frames = {}
|
||||
self.selected_col_id = None
|
||||
|
||||
self.refresh_column_list()
|
||||
|
||||
# Bottom frame: Preview table
|
||||
bottom_frame = ttk.Frame(main_pane)
|
||||
main_pane.add(bottom_frame, weight=1)
|
||||
|
||||
# Preview section
|
||||
preview_label = ttk.Label(bottom_frame, text="Preview (First 10 rows):", font=('Arial', 10, 'bold italic'))
|
||||
preview_label.pack(anchor='w', padx=8, pady=(8, 4))
|
||||
|
||||
# Create frame for table and scrollbar
|
||||
table_container = ttk.Frame(bottom_frame)
|
||||
table_container.pack(fill='both', expand=True, padx=8, pady=(0, 8))
|
||||
|
||||
# Create treeview for table display
|
||||
self.preview_tree = ttk.Treeview(table_container, show='headings', height=10)
|
||||
vsb = ttk.Scrollbar(table_container, orient="vertical", command=self.preview_tree.yview)
|
||||
hsb = ttk.Scrollbar(table_container, orient="horizontal", command=self.preview_tree.xview)
|
||||
self.preview_tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set)
|
||||
|
||||
# Pack the treeview and scrollbars
|
||||
self.preview_tree.grid(row=0, column=0, sticky='nsew')
|
||||
vsb.grid(row=0, column=1, sticky='ns')
|
||||
hsb.grid(row=1, column=0, sticky='ew')
|
||||
|
||||
table_container.grid_rowconfigure(0, weight=1)
|
||||
table_container.grid_columnconfigure(0, weight=1)
|
||||
|
||||
# Bottom Labels
|
||||
self.preview_status = ttk.Label(bottom_frame, text="No data generated yet. Click 'Generate & Preview' to see sample data.")
|
||||
self.preview_status.pack(anchor='w', padx=8, pady=(0, 8))
|
||||
|
||||
|
||||
# ---------- Column management ----------
|
||||
def add_column(self, name="col"):
|
||||
c = Column(name=name)
|
||||
self.columns.append(c)
|
||||
return c
|
||||
|
||||
def remove_column_by_id(self, cid):
|
||||
self.columns = [c for c in self.columns if c.id != cid]
|
||||
|
||||
def find_column(self, cid):
|
||||
for c in self.columns:
|
||||
if c.id == cid:
|
||||
return c
|
||||
return None
|
||||
|
||||
# ---------- UI callbacks ----------
|
||||
def ui_add_column(self):
|
||||
n = 1
|
||||
base = "col"
|
||||
existing = {c.name for c in self.columns}
|
||||
while f"{base}{n}" in existing:
|
||||
n += 1
|
||||
c = self.add_column(f"{base}{n}")
|
||||
self.refresh_column_list()
|
||||
# select new
|
||||
self.selected_col_id = c.id
|
||||
|
||||
def ui_remove_selected(self):
|
||||
if not self.selected_col_id:
|
||||
messagebox.showinfo("Remove column", "Select a column row by clicking its name first.")
|
||||
return
|
||||
self.remove_column_by_id(self.selected_col_id)
|
||||
self.selected_col_id = None
|
||||
self.refresh_column_list()
|
||||
|
||||
def show_help(self):
|
||||
"""Show Help/Guide window"""
|
||||
help_win = tk.Toplevel(self)
|
||||
help_win.title("Help / Guide")
|
||||
help_win.geometry("500x400")
|
||||
|
||||
# Instructions text
|
||||
text = (
|
||||
"Welcome to the Synthetic Data Generator!\n\n"
|
||||
"➤ Use 'Add column' to create new data columns.\n"
|
||||
"➤ Choose 'Random' or 'Fixed' values for each column.\n"
|
||||
"➤ Adjust ranges, rounding, and advanced settings as needed.\n"
|
||||
"➤ 'Generate & Preview' shows sample data (first 10 rows).\n"
|
||||
"➤ 'Export CSV' saves the generated dataset to a CSV file.\n\n"
|
||||
"Advanced:\n"
|
||||
" - Use the 'Advanced' button per column to enable linearity, basically\n"
|
||||
" making one column follow another with a weighted influence.\n\n"
|
||||
)
|
||||
label = tk.Label(
|
||||
help_win,
|
||||
text=text,
|
||||
justify="left",
|
||||
anchor="nw",
|
||||
font=("Arial", 10),
|
||||
wraplength=460
|
||||
)
|
||||
label.pack(fill="both", expand=True, padx=12, pady=12)
|
||||
|
||||
cred = tk.Label(
|
||||
help_win,
|
||||
text=(
|
||||
"This will be one of my scripts/executables.\n"
|
||||
"Feel free to share!\n\n"
|
||||
"You can leave feedback on my LinkedIn:\n"
|
||||
"Sang Putu Sandhyana Yogi\n\n"
|
||||
"Thank you so much!"
|
||||
),
|
||||
font=("Arial", 7, "italic"),
|
||||
justify="center"
|
||||
)
|
||||
cred.pack(fill="both", expand=True, padx=5, pady=5)
|
||||
|
||||
# Important notice
|
||||
warning = tk.Label(
|
||||
help_win,
|
||||
text="For Research / Training Purposes Only.\nDon't fake your Thesis Data!",
|
||||
font=("Arial", 10, "bold italic"),
|
||||
fg="red",
|
||||
justify="center"
|
||||
)
|
||||
warning.pack(pady=(0, 12))
|
||||
|
||||
def refresh_column_list(self):
|
||||
# clear all previous column rows
|
||||
for child in list(self.inner_frame.pack_slaves()):
|
||||
child.destroy()
|
||||
|
||||
# rebuild rows
|
||||
for c in self.columns:
|
||||
self._create_column_row(c)
|
||||
|
||||
# ensure scrollregion updates
|
||||
self.inner_frame.update_idletasks()
|
||||
self.canvas.configure(scrollregion=self.canvas.bbox("all"))
|
||||
|
||||
def _create_column_row(self, col: Column):
|
||||
row = ttk.Frame(self.inner_frame, relief="ridge", padding=6)
|
||||
row.pack(fill="x", pady=4)
|
||||
|
||||
# clicking name selects
|
||||
name_var = tk.StringVar(value=col.name)
|
||||
name_entry = ttk.Entry(row, textvariable=name_var, width=24)
|
||||
name_entry.grid(row=0, column=0, sticky="w")
|
||||
name_entry.bind("<FocusOut>", lambda e, cid=col.id, var=name_var: self._on_name_change(cid, var.get()))
|
||||
name_entry.bind("<Return>", lambda e, cid=col.id, var=name_var: self._on_name_change(cid, var.get()))
|
||||
name_entry.bind("<Button-1>", lambda e, cid=col.id: self._on_row_select(cid))
|
||||
|
||||
# Type + params frame
|
||||
params = ttk.Frame(row)
|
||||
params.grid(row=0, column=1, sticky="w", padx=(8,8))
|
||||
|
||||
type_var = tk.StringVar(value=col.type)
|
||||
type_combo = ttk.Combobox(params, values=["Random", "Fixed"], width=8, state="readonly", textvariable=type_var)
|
||||
type_combo.grid(row=0, column=0, padx=(0,6))
|
||||
type_combo.bind("<<ComboboxSelected>>", lambda e, cid=col.id, var=type_var: self._on_type_change(cid, var.get()))
|
||||
# Random: min/max entries; Fixed: fixed
|
||||
min_var = tk.StringVar(value=str(col.min))
|
||||
max_var = tk.StringVar(value=str(col.max))
|
||||
fixed_var = tk.StringVar(value=str(col.fixed))
|
||||
|
||||
min_entry = ttk.Entry(params, textvariable=min_var, width=10)
|
||||
min_entry.grid(row=0, column=1, padx=(0,4))
|
||||
ttk.Label(params, text="to").grid(row=0, column=2)
|
||||
max_entry = ttk.Entry(params, textvariable=max_var, width=10)
|
||||
max_entry.grid(row=0, column=3, padx=(4,8))
|
||||
|
||||
fixed_entry = ttk.Entry(params, textvariable=fixed_var, width=12)
|
||||
# place fixed entry but hide or show based on type
|
||||
fixed_entry.grid(row=0, column=4, padx=(4,8))
|
||||
|
||||
# rounding
|
||||
round_var = tk.IntVar(value=col.decimals)
|
||||
round_combo = ttk.Combobox(row, values=[0,1,2,3,4,5,6], width=4, state="readonly", textvariable=round_var)
|
||||
round_combo.grid(row=0, column=2)
|
||||
round_combo.bind("<<ComboboxSelected>>", lambda e, cid=col.id, var=round_var: self._on_round_change(cid, int(var.get())))
|
||||
|
||||
# advanced button
|
||||
adv_btn = ttk.Button(row, text="Advanced ▾", width=12)
|
||||
adv_btn.grid(row=0, column=3, padx=(8,0))
|
||||
|
||||
# advanced area (hidden by default)
|
||||
adv_frame = ttk.Frame(row)
|
||||
adv_frame.grid(row=1, column=0, columnspan=4, pady=(8,0), sticky="w")
|
||||
adv_frame.grid_remove()
|
||||
|
||||
# contents of advanced: linearity
|
||||
lin_enabled_var = tk.BooleanVar(value=col.linearity["enabled"])
|
||||
lin_check = ttk.Checkbutton(adv_frame, text="Enable Linearity (follow another column)", variable=lin_enabled_var)
|
||||
lin_check.grid(row=0, column=0, sticky="w")
|
||||
# target selector and weight
|
||||
ttk.Label(adv_frame, text="Target:").grid(row=0, column=1, sticky="e", padx=(12,2))
|
||||
target_values = [ (c.name, c.id) for c in self.columns if c.id != col.id ]
|
||||
# if target list empty, provide placeholder
|
||||
if not target_values:
|
||||
target_combo = ttk.Combobox(adv_frame, values=["(no other columns)"], state="disabled", width=18)
|
||||
else:
|
||||
target_combo = ttk.Combobox(adv_frame, values=[tv[0] for tv in target_values], state="readonly", width=18)
|
||||
# set current if exists
|
||||
if col.linearity["target_id"]:
|
||||
found = next((i for i,tv in enumerate(target_values) if tv[1]==col.linearity["target_id"]), None)
|
||||
if found is not None:
|
||||
target_combo.current(found)
|
||||
target_combo.grid(row=0, column=2, padx=(4,8))
|
||||
|
||||
ttk.Label(adv_frame, text="Weight:").grid(row=0, column=3, padx=(8,2))
|
||||
weight_var = tk.DoubleVar(value=col.linearity["weight"])
|
||||
weight_spin = ttk.Spinbox(adv_frame, from_=0.0, to=1.0, increment=0.01, textvariable=weight_var, width=6)
|
||||
weight_spin.grid(row=0, column=4)
|
||||
weight_scale = ttk.Scale(adv_frame, from_=0.0, to=1.0, orient="horizontal", variable=weight_var, length=120)
|
||||
weight_scale.grid(row=0, column=5, padx=(8,0))
|
||||
|
||||
# after creating the row, update scrollregion so scrollbar appears
|
||||
self.inner_frame.update_idletasks()
|
||||
self.canvas.configure(scrollregion=self.canvas.bbox("all"))
|
||||
|
||||
# wire adv toggle button
|
||||
def toggle_adv():
|
||||
if adv_frame.winfo_ismapped():
|
||||
adv_frame.grid_remove()
|
||||
adv_btn.config(text="Advanced ▾")
|
||||
else:
|
||||
adv_frame.grid()
|
||||
adv_btn.config(text="Advanced ▴")
|
||||
adv_btn.config(command=toggle_adv)
|
||||
|
||||
# update widgets visibility according to type
|
||||
def update_type_widgets():
|
||||
t = type_var.get()
|
||||
if t == "Random":
|
||||
min_entry.configure(state="normal")
|
||||
max_entry.configure(state="normal")
|
||||
fixed_entry.configure(state="disabled")
|
||||
else:
|
||||
min_entry.configure(state="disabled")
|
||||
max_entry.configure(state="disabled")
|
||||
fixed_entry.configure(state="normal")
|
||||
|
||||
update_type_widgets()
|
||||
|
||||
# bind entry updates
|
||||
def on_minmax_focus_out(e=None):
|
||||
try:
|
||||
col.min = float(min_var.get())
|
||||
col.max = float(max_var.get())
|
||||
if col.min > col.max:
|
||||
col.min, col.max = col.max, col.min
|
||||
min_var.set(str(col.min))
|
||||
max_var.set(str(col.max))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def on_fixed_focus_out(e=None):
|
||||
try:
|
||||
col.fixed = float(fixed_var.get())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
min_entry.bind("<FocusOut>", on_minmax_focus_out)
|
||||
max_entry.bind("<FocusOut>", on_minmax_focus_out)
|
||||
fixed_entry.bind("<FocusOut>", on_fixed_focus_out)
|
||||
|
||||
# when combobox type changed
|
||||
def on_type_selected(e=None):
|
||||
col.type = type_var.get()
|
||||
update_type_widgets()
|
||||
|
||||
type_combo.bind("<<ComboboxSelected>>", lambda e=None: on_type_selected())
|
||||
|
||||
# name change handler
|
||||
# already bound above
|
||||
|
||||
# rounding handler bound above
|
||||
|
||||
# set initial values in widgets (in case)
|
||||
min_var.set(str(col.min))
|
||||
max_var.set(str(col.max))
|
||||
fixed_var.set(str(col.fixed))
|
||||
|
||||
# clicking a row to 'select' it
|
||||
def on_click_row(event=None):
|
||||
self.selected_col_id = col.id
|
||||
# highlight selection visually
|
||||
for rf_cid, rf in self.row_frames.items():
|
||||
if rf_cid == col.id:
|
||||
rf.config(style="Selected.TFrame")
|
||||
else:
|
||||
rf.config(style="TFrame")
|
||||
|
||||
row.bind("<Button-1>", lambda e: on_click_row())
|
||||
# also store refs for later updates
|
||||
self.row_frames[col.id] = row
|
||||
|
||||
# store final update callbacks when user changes advanced widgets
|
||||
def apply_all_changes():
|
||||
col.name = name_var.get().strip() or col.name
|
||||
col.type = type_var.get()
|
||||
try:
|
||||
col.min = float(min_var.get())
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
col.max = float(max_var.get())
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
col.fixed = float(fixed_var.get())
|
||||
except Exception:
|
||||
pass
|
||||
col.decimals = int(round_var.get())
|
||||
col.linearity["enabled"] = bool(lin_enabled_var.get())
|
||||
col.linearity["weight"] = float(weight_var.get())
|
||||
|
||||
# determine target id by name in current dropdown
|
||||
if isinstance(target_combo, ttk.Combobox) and target_combo['state'] != 'disabled':
|
||||
sel_name = target_combo.get()
|
||||
# find id by matching current dropdown index instead of text
|
||||
if target_combo.current() >= 0 and target_values:
|
||||
col.linearity["target_id"] = target_values[target_combo.current()][1]
|
||||
else:
|
||||
col.linearity["target_id"] = None
|
||||
|
||||
# call apply_all_changes when adv toggled or when leaving row
|
||||
for w in [name_entry, min_entry, max_entry, fixed_entry, round_combo, lin_check, weight_spin, target_combo]:
|
||||
w.bind("<FocusOut>", lambda e, f=apply_all_changes: f())
|
||||
|
||||
# when columns list changes (someone added/renamed) we need to refresh target lists.
|
||||
# We'll rely on refresh_column_list to rebuild everything - keep simple.
|
||||
|
||||
# ---------- simple callbacks to update model ----------
|
||||
def _on_name_change(self, cid, new_name):
|
||||
c = self.find_column(cid)
|
||||
if c:
|
||||
c.name = new_name.strip() or c.name
|
||||
# refresh to update target name lists
|
||||
self.refresh_column_list()
|
||||
|
||||
def _on_type_change(self, cid, new_type):
|
||||
c = self.find_column(cid)
|
||||
if c:
|
||||
c.type = new_type
|
||||
self.refresh_column_list()
|
||||
|
||||
def _on_round_change(self, cid, dec):
|
||||
c = self.find_column(cid)
|
||||
if c:
|
||||
c.decimals = dec
|
||||
|
||||
def _on_row_select(self, cid):
|
||||
self.selected_col_id = cid
|
||||
# visual selection handled in row click
|
||||
|
||||
# ---------- Generation logic ----------
|
||||
def generate_rows(self, nrows):
|
||||
# Validate and collect column metadata
|
||||
if not self.columns:
|
||||
raise RuntimeError("No columns defined")
|
||||
# ensure all names unique
|
||||
names = [c.name for c in self.columns]
|
||||
if len(set(names)) != len(names):
|
||||
# enforce uniqueness by appending small suffixes
|
||||
seen = {}
|
||||
for c in self.columns:
|
||||
if c.name in seen:
|
||||
seen[c.name] += 1
|
||||
c.name = f"{c.name}_{seen[c.name]}"
|
||||
else:
|
||||
seen[c.name] = 1
|
||||
|
||||
# Produce base values
|
||||
base_values = { c.id: [] for c in self.columns }
|
||||
for c in self.columns:
|
||||
for _ in range(nrows):
|
||||
if c.type == "Random":
|
||||
a = float(c.min)
|
||||
b = float(c.max)
|
||||
if a == b:
|
||||
val = a
|
||||
else:
|
||||
val = random.random() * (b - a) + a
|
||||
else:
|
||||
val = float(c.fixed)
|
||||
base_values[c.id].append(val)
|
||||
|
||||
# Now apply linearity adjustments
|
||||
final_values = { c.id: [v for v in base_values[c.id]] for c in self.columns }
|
||||
|
||||
# We'll process adjustments in simple pass: for each column with linearity enabled,
|
||||
# map target base value to source's min/max and mix with weight.
|
||||
for c in self.columns:
|
||||
lin = c.linearity
|
||||
if lin["enabled"] and lin["target_id"]:
|
||||
target = self.find_column(lin["target_id"])
|
||||
if not target:
|
||||
continue
|
||||
w = clamp(float(lin["weight"]), 0.0, 1.0)
|
||||
smin = c.range_min()
|
||||
smax = c.range_max()
|
||||
tmin = target.range_min()
|
||||
tmax = target.range_max()
|
||||
# precompute denominators
|
||||
t_range = tmax - tmin
|
||||
s_range = smax - smin
|
||||
for i in range(nrows):
|
||||
base_val = base_values[c.id][i]
|
||||
tval = base_values[target.id][i]
|
||||
# map tval into source's range:
|
||||
if t_range == 0:
|
||||
# target constant: use its value, scaled by midpoint mapping to source range
|
||||
mapped = smin + (s_range * 0.5) if s_range != 0 else smin
|
||||
else:
|
||||
frac = (tval - tmin) / t_range
|
||||
mapped = smin + frac * s_range
|
||||
new_val = (1.0 - w) * base_val + w * mapped
|
||||
final_values[c.id][i] = new_val
|
||||
|
||||
# apply rounding according to decimals
|
||||
rows = []
|
||||
for i in range(nrows):
|
||||
row = {}
|
||||
for c in self.columns:
|
||||
dec = int(c.decimals)
|
||||
val = final_values[c.id][i]
|
||||
# apply rounding
|
||||
if dec == 0:
|
||||
val = int(round(val))
|
||||
else:
|
||||
val = round(val, dec)
|
||||
row[c.name] = val
|
||||
rows.append(row)
|
||||
return rows
|
||||
|
||||
def generate_and_preview(self):
|
||||
n_preview = 10
|
||||
nrows = self.rows_var.get()
|
||||
|
||||
if nrows < n_preview:
|
||||
n_preview = nrows
|
||||
|
||||
try:
|
||||
data = self.generate_rows(n_preview)
|
||||
self.generated_data = self.generate_rows(nrows) # store full data
|
||||
self.update_preview_table(data)
|
||||
self.preview_status.config(
|
||||
text=f"Preview showing first {n_preview} rows. Total rows to generate: {nrows}"
|
||||
)
|
||||
except Exception as e:
|
||||
messagebox.showerror("Error generating", str(e))
|
||||
self.preview_status.config(text="Error generating preview data")
|
||||
|
||||
def update_preview_table(self, data):
|
||||
"""Update the preview table with generated data"""
|
||||
# Clear existing data
|
||||
for item in self.preview_tree.get_children():
|
||||
self.preview_tree.delete(item)
|
||||
|
||||
# Clear existing columns
|
||||
for col in self.preview_tree["columns"]:
|
||||
self.preview_tree.heading(col, text="")
|
||||
self.preview_tree.column(col, width=0)
|
||||
|
||||
# Set up new columns
|
||||
if not self.columns:
|
||||
return
|
||||
|
||||
columns = [c.name for c in self.columns]
|
||||
self.preview_tree["columns"] = columns
|
||||
|
||||
# Configure column headers
|
||||
for col_name in columns:
|
||||
self.preview_tree.heading(col_name, text=col_name)
|
||||
self.preview_tree.column(col_name, width=100, minwidth=80, anchor='center')
|
||||
|
||||
# Add data rows
|
||||
for i, row in enumerate(data):
|
||||
values = [row[col_name] for col_name in columns]
|
||||
self.preview_tree.insert("", "end", values=values, tags=('evenrow' if i % 2 == 0 else 'oddrow',))
|
||||
|
||||
# Configure row colors for better readability
|
||||
self.preview_tree.tag_configure('evenrow', background='#f0f0f0')
|
||||
self.preview_tree.tag_configure('oddrow', background='white')
|
||||
|
||||
# ---------- Visualization ----------
|
||||
def visualize_data(self):
|
||||
if not self.generated_data:
|
||||
messagebox.showinfo("Visualize Data", "Generate data first")
|
||||
return
|
||||
|
||||
df = pd.DataFrame(self.generated_data)
|
||||
nrows_total = len(df)
|
||||
ncols = len(df.columns)
|
||||
if ncols == 0:
|
||||
messagebox.showinfo("Visualize Data", "No columns available")
|
||||
return
|
||||
|
||||
# layout: up to 5 plots per row
|
||||
max_cols = 5
|
||||
nrows = (ncols + max_cols - 1) // max_cols
|
||||
fig, axes = plt.subplots(nrows, min(ncols, max_cols), figsize=(4*max_cols, 3*nrows))
|
||||
|
||||
# normalize axes to flat list
|
||||
if nrows == 1 and ncols == 1:
|
||||
axes = [axes]
|
||||
elif nrows == 1:
|
||||
axes = axes
|
||||
elif ncols <= max_cols:
|
||||
axes = axes
|
||||
axes = axes.flatten() if hasattr(axes, "flatten") else list(axes)
|
||||
|
||||
for i, col in enumerate(df.columns):
|
||||
ax = axes[i]
|
||||
if pd.api.types.is_numeric_dtype(df[col]):
|
||||
df[col].plot(kind="hist", bins=20, ax=ax, title=col)
|
||||
else:
|
||||
df[col].value_counts().plot(kind="bar", ax=ax, title=col)
|
||||
|
||||
# hide unused axes
|
||||
for j in range(i+1, len(axes)):
|
||||
axes[j].set_visible(False)
|
||||
|
||||
# add top label showing dataset size
|
||||
fig.suptitle(f"Visualizations based on {nrows_total} rows", fontsize=12, fontweight="bold")
|
||||
|
||||
plt.tight_layout(rect=[0, 0, 1, 0.96]) # leave space for title
|
||||
plt.show()
|
||||
|
||||
|
||||
# ---------- Export ----------
|
||||
def export_csv_dialog(self):
|
||||
nrows = self.rows_var.get()
|
||||
if nrows <= 0:
|
||||
messagebox.showinfo("Rows required", "Please specify a number of rows > 0.")
|
||||
return
|
||||
fname = filedialog.asksaveasfilename(title="Save CSV", defaultextension=".csv",
|
||||
filetypes=[("CSV files","*.csv"),("All files","*.*")])
|
||||
if not fname:
|
||||
return
|
||||
try:
|
||||
data = self.generate_rows(nrows)
|
||||
with open(fname, "w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=[c.name for c in self.columns])
|
||||
writer.writeheader()
|
||||
for row in data:
|
||||
writer.writerow(row)
|
||||
messagebox.showinfo("Exported", f"Wrote {nrows} rows to:\n{fname}")
|
||||
except Exception as e:
|
||||
messagebox.showerror("Error", str(e))
|
||||
|
||||
|
||||
# ---------- Run the app ----------
|
||||
if __name__ == "__main__":
|
||||
app = SyntheticDataGUI()
|
||||
app.mainloop()
|
||||
Reference in New Issue
Block a user