📊 OOP IN AI TRAINING: SYNTHETIC DATA GENERATION
OOP isn't just for deployment—it's also revolutionizing how we generate training data. By defining object models of real-world domains, we can simulate infinite realistic scenarios and use them to train or fine-tune AI models. This is particularly valuable for domains where real data is scarce, expensive, or privacy-sensitive.
# Simulating a customer service environment with OOP for synthetic data
import random
from dataclasses import dataclass, field
from typing import List, Dict
import json
@dataclass
class Product:
id: int
category: str
price: float
features: List[str]
@dataclass
class Customer:
id: int
tier: str # "basic", "premium", "enterprise"
purchase_history: List[int] = field(default_factory=list)
preferences: Dict[str, float] = field(default_factory=dict)
def generate_query(self) -> str:
templates = [
f"Does {random.choice(['this', 'the'])} product support {random.choice(['wifi', 'bluetooth', '4K'])}?",
f"What's the price of {random.choice(['item', 'product'])} {random.randint(100, 999)}?",
f"Can I return {random.choice(['this', 'the'])} item after {random.randint(7, 30)} days?",
f"How does {random.choice(['premium', 'enterprise'])} tier compare to basic?",
f"Tell me about {random.choice(['warranty', 'shipping', 'discounts'])}"
]
return random.choice(templates)
@dataclass
class SupportAgent:
id: int
expertise: List[str] # categories they handle well
response_templates: Dict[str, List[str]]
def generate_response(self, query: str, customer_tier: str) -> str:
# Simulate agent's response based on expertise and customer tier
if "price" in query.lower() and customer_tier == "premium":
return "As a premium customer, you get an additional 10% discount!"
elif "return" in query.lower():
return "Our return policy allows returns within 30 days of purchase."
else:
return random.choice(self.response_templates.get("general", ["Thank you for your inquiry."]))
# Generate 10,000 synthetic customer-service interactions
products = [Product(i, random.choice(["electronics", "books", "home"]),
random.uniform(10, 500), random.sample(["wifi", "4K", "bluetooth"], 2))
for i in range(100)]
customers = [Customer(i, random.choice(["basic", "premium", "enterprise"]))
for i in range(1000)]
agents = [
SupportAgent(1, ["electronics", "returns"], {
"general": ["We'll help you with that.", "Let me check for you.", "Here's what we can do."]
}),
SupportAgent(2, ["books", "shipping"], {
"general": ["I can assist with that.", "One moment please.", "Certainly."]
})
]
training_corpus = []
for _ in range(10000):
customer = random.choice(customers)
agent = random.choice(agents)
query = customer.generate_query()
response = agent.generate_response(query, customer.tier)
training_corpus.append({
"customer_tier": customer.tier,
"query": query,
"response": response,
"agent_expertise": agent.expertise
})
# Save as JSONL for fine-tuning
with open("synthetic_training.jsonl", "w") as f:
for item in training_corpus:
f.write(json.dumps(item) + "\n")