Examples
Practical examples demonstrating CTK’s Python API and common workflows.
Example 1: Basic Database Operations
from ctk import ConversationDB, registry
# Load conversations from a file
conversations = registry.import_file("chatgpt_export.json", format="openai")
# Save to database
with ConversationDB("my_chats.db") as db:
for conv in conversations:
db.save_conversation(conv)
# Get statistics
stats = db.get_statistics()
print(f"Total conversations: {stats['total_conversations']}")
print(f"Total messages: {stats['total_messages']}")
Example 2: Searching Conversations
from ctk import ConversationDB
with ConversationDB("chats.db") as db:
# Full-text search
results = db.search_conversations("python async")
for conv in results:
print(f"{conv.id}: {conv.title}")
# Filter by metadata
starred = db.search_conversations(starred=True)
recent = db.search_conversations(
date_from="2024-01-01",
date_to="2024-12-31"
)
Example 3: Working with Conversation Trees
from ctk import ConversationDB
from ctk.core.models import Message, MessageContent, MessageRole
with ConversationDB("chats.db") as db:
# Load a conversation
conv = db.load_conversation("conv_id_123")
# Get all paths (for branching conversations)
paths = conv.get_all_paths()
print(f"Found {len(paths)} paths in this conversation")
# Get the longest path
longest = conv.get_longest_path()
for msg in longest:
print(f"{msg.role}: {msg.content.text[:50]}...")
# Add a new message
new_msg = Message(
role=MessageRole.USER,
content=MessageContent(text="Follow-up question here")
)
conv.add_message(new_msg, parent_id="previous_msg_id")
# Save changes
db.save_conversation(conv)
Example 4: Batch Import from Multiple Sources
import glob
from ctk import ConversationDB, registry
with ConversationDB("all_chats.db") as db:
# Import all exports from a directory
for file in glob.glob("exports/*.json"):
# Auto-detect format
format_hint = None
if "chatgpt" in file.lower():
format_hint = "openai"
elif "claude" in file.lower():
format_hint = "anthropic"
convs = registry.import_file(file, format=format_hint)
for conv in convs:
# Add source file as tag
conv.metadata.tags.append(f"source:{file}")
db.save_conversation(conv)
# Summary
stats = db.get_statistics()
print(f"Imported {stats['total_conversations']} conversations")
print(f"From sources: {stats.get('sources', {})}")
Example 5: Custom Sanitization
from ctk.core.sanitizer import Sanitizer, SanitizationRule
import re
# Create sanitizer with custom rules
sanitizer = Sanitizer(enabled=True)
# Add company-specific patterns
sanitizer.add_rule(SanitizationRule(
name="internal_urls",
pattern=re.compile(r'https://internal\.company\.com/[^\s]+'),
replacement="[INTERNAL_URL]"
))
sanitizer.add_rule(SanitizationRule(
name="employee_ids",
pattern=re.compile(r'EMP\d{6}'),
replacement="[EMPLOYEE_ID]"
))
sanitizer.add_rule(SanitizationRule(
name="project_codes",
pattern=re.compile(r'PRJ-[A-Z]{3}-\d{4}'),
replacement="[PROJECT_CODE]"
))
# Apply to text
clean_text = sanitizer.sanitize("Check EMP123456 on https://internal.company.com/docs")
print(clean_text) # "Check [EMPLOYEE_ID] on [INTERNAL_URL]"
Example 6: Export for Fine-Tuning
from ctk import ConversationDB, registry
with ConversationDB("chats.db") as db:
# Get all conversations
conversations = db.get_all_conversations()
# Filter for quality (starred, from GPT-4)
quality_convs = [
c for c in conversations
if c.metadata.starred and "GPT-4" in str(c.metadata.model)
]
# Export as JSONL for fine-tuning
registry.export_conversations(
quality_convs,
"training_data.jsonl",
format="jsonl",
path_selection="longest" # Use longest path in branching convs
)
print(f"Exported {len(quality_convs)} conversations")
Example 7: Creating a Custom Importer Plugin
# File: ctk/integrations/importers/my_format.py
from ctk.core.plugin import ImporterPlugin
from ctk.core.models import ConversationTree, Message, MessageContent, MessageRole
class MyFormatImporter(ImporterPlugin):
name = "my_format"
description = "Import from My Custom Format"
version = "1.0.0"
def validate(self, data):
"""Check if data matches this format."""
if isinstance(data, dict):
return "my_format_marker" in data
return False
def import_data(self, data, **kwargs):
"""Convert data to ConversationTree objects."""
conversations = []
for item in data.get("conversations", []):
tree = ConversationTree(
id=item.get("id", str(uuid.uuid4())),
title=item.get("title", "Imported Conversation")
)
parent_id = None
for msg_data in item.get("messages", []):
msg = Message(
role=MessageRole(msg_data["role"]),
content=MessageContent(text=msg_data["text"])
)
tree.add_message(msg, parent_id=parent_id)
parent_id = msg.id
conversations.append(tree)
return conversations
# Plugin is auto-discovered when placed in integrations folder!
Example 8: Database Merge with Deduplication
from ctk import ConversationDB
def merge_databases(db_paths, output_path):
"""Merge multiple databases, handling duplicates."""
seen_ids = set()
merged_count = 0
duplicate_count = 0
with ConversationDB(output_path) as output_db:
for db_path in db_paths:
with ConversationDB(db_path) as source_db:
for conv in source_db.get_all_conversations():
if conv.id in seen_ids:
duplicate_count += 1
continue
seen_ids.add(conv.id)
# Add source tag
conv.metadata.tags.append(f"merged_from:{db_path}")
output_db.save_conversation(conv)
merged_count += 1
print(f"Merged {merged_count} conversations")
print(f"Skipped {duplicate_count} duplicates")
# Usage
merge_databases(
["personal.db", "work.db", "archive.db"],
"merged.db"
)
Example 9: Analytics and Statistics
from collections import Counter
from ctk import ConversationDB
with ConversationDB("chats.db") as db:
conversations = db.get_all_conversations()
# Count by source
sources = Counter(c.metadata.source for c in conversations)
print("Conversations by source:")
for source, count in sources.most_common():
print(f" {source}: {count}")
# Count by model
models = Counter(c.metadata.model for c in conversations)
print("\nConversations by model:")
for model, count in models.most_common(5):
print(f" {model}: {count}")
# Average messages per conversation
total_msgs = sum(len(c.get_all_messages()) for c in conversations)
avg_msgs = total_msgs / len(conversations) if conversations else 0
print(f"\nAverage messages per conversation: {avg_msgs:.1f}")
# Find longest conversations
longest = sorted(
conversations,
key=lambda c: len(c.get_longest_path()),
reverse=True
)[:5]
print("\nLongest conversations:")
for c in longest:
print(f" {c.title}: {len(c.get_longest_path())} messages")
Example 10: Export to Hugo Static Site
from ctk import ConversationDB, registry
from pathlib import Path
def export_curated_blog(db_path, output_dir, limit=20):
"""Export starred conversations as Hugo blog posts."""
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
with ConversationDB(db_path) as db:
# Get starred conversations, newest first
starred = db.search_conversations(starred=True, limit=limit)
for conv in starred:
# Create page bundle directory
slug = conv.title.lower().replace(" ", "-")[:50]
date_str = conv.created_at.strftime("%Y-%m-%d")
bundle_dir = output_path / f"{date_str}-{slug}"
bundle_dir.mkdir(exist_ok=True)
# Export as Hugo markdown
registry.export_conversations(
[conv],
str(bundle_dir / "index.md"),
format="hugo",
draft=False
)
print(f"Exported {len(starred)} posts to {output_dir}")
# Usage
export_curated_blog("chats.db", "content/ai-conversations/", limit=50)
More Examples
For additional examples including:
- MCP tool integration
- Interactive TUI customization
- Bulk operations with progress bars
See the GitHub repository.