Back to CTK - Conversation Toolkit

Examples

Code examples and use cases for CTK.

Examples

Practical examples demonstrating CTK’s Python API and common workflows.

Example 1: Basic Database Operations

from ctk import ConversationDB, registry

# Load conversations from a file
conversations = registry.import_file("chatgpt_export.json", format="openai")

# Save to database
with ConversationDB("my_chats.db") as db:
    for conv in conversations:
        db.save_conversation(conv)

    # Get statistics
    stats = db.get_statistics()
    print(f"Total conversations: {stats['total_conversations']}")
    print(f"Total messages: {stats['total_messages']}")

Example 2: Searching Conversations

from ctk import ConversationDB

with ConversationDB("chats.db") as db:
    # Full-text search
    results = db.search_conversations("python async")

    for conv in results:
        print(f"{conv.id}: {conv.title}")

    # Filter by metadata
    starred = db.search_conversations(starred=True)
    recent = db.search_conversations(
        date_from="2024-01-01",
        date_to="2024-12-31"
    )

Example 3: Working with Conversation Trees

from ctk import ConversationDB
from ctk.core.models import Message, MessageContent, MessageRole

with ConversationDB("chats.db") as db:
    # Load a conversation
    conv = db.load_conversation("conv_id_123")

    # Get all paths (for branching conversations)
    paths = conv.get_all_paths()
    print(f"Found {len(paths)} paths in this conversation")

    # Get the longest path
    longest = conv.get_longest_path()
    for msg in longest:
        print(f"{msg.role}: {msg.content.text[:50]}...")

    # Add a new message
    new_msg = Message(
        role=MessageRole.USER,
        content=MessageContent(text="Follow-up question here")
    )
    conv.add_message(new_msg, parent_id="previous_msg_id")

    # Save changes
    db.save_conversation(conv)

Example 4: Batch Import from Multiple Sources

import glob
from ctk import ConversationDB, registry

with ConversationDB("all_chats.db") as db:
    # Import all exports from a directory
    for file in glob.glob("exports/*.json"):
        # Auto-detect format
        format_hint = None
        if "chatgpt" in file.lower():
            format_hint = "openai"
        elif "claude" in file.lower():
            format_hint = "anthropic"

        convs = registry.import_file(file, format=format_hint)

        for conv in convs:
            # Add source file as tag
            conv.metadata.tags.append(f"source:{file}")
            db.save_conversation(conv)

    # Summary
    stats = db.get_statistics()
    print(f"Imported {stats['total_conversations']} conversations")
    print(f"From sources: {stats.get('sources', {})}")

Example 5: Custom Sanitization

from ctk.core.sanitizer import Sanitizer, SanitizationRule
import re

# Create sanitizer with custom rules
sanitizer = Sanitizer(enabled=True)

# Add company-specific patterns
sanitizer.add_rule(SanitizationRule(
    name="internal_urls",
    pattern=re.compile(r'https://internal\.company\.com/[^\s]+'),
    replacement="[INTERNAL_URL]"
))

sanitizer.add_rule(SanitizationRule(
    name="employee_ids",
    pattern=re.compile(r'EMP\d{6}'),
    replacement="[EMPLOYEE_ID]"
))

sanitizer.add_rule(SanitizationRule(
    name="project_codes",
    pattern=re.compile(r'PRJ-[A-Z]{3}-\d{4}'),
    replacement="[PROJECT_CODE]"
))

# Apply to text
clean_text = sanitizer.sanitize("Check EMP123456 on https://internal.company.com/docs")
print(clean_text)  # "Check [EMPLOYEE_ID] on [INTERNAL_URL]"

Example 6: Export for Fine-Tuning

from ctk import ConversationDB, registry

with ConversationDB("chats.db") as db:
    # Get all conversations
    conversations = db.get_all_conversations()

    # Filter for quality (starred, from GPT-4)
    quality_convs = [
        c for c in conversations
        if c.metadata.starred and "GPT-4" in str(c.metadata.model)
    ]

    # Export as JSONL for fine-tuning
    registry.export_conversations(
        quality_convs,
        "training_data.jsonl",
        format="jsonl",
        path_selection="longest"  # Use longest path in branching convs
    )

    print(f"Exported {len(quality_convs)} conversations")

Example 7: Creating a Custom Importer Plugin

# File: ctk/integrations/importers/my_format.py
from ctk.core.plugin import ImporterPlugin
from ctk.core.models import ConversationTree, Message, MessageContent, MessageRole

class MyFormatImporter(ImporterPlugin):
    name = "my_format"
    description = "Import from My Custom Format"
    version = "1.0.0"

    def validate(self, data):
        """Check if data matches this format."""
        if isinstance(data, dict):
            return "my_format_marker" in data
        return False

    def import_data(self, data, **kwargs):
        """Convert data to ConversationTree objects."""
        conversations = []

        for item in data.get("conversations", []):
            tree = ConversationTree(
                id=item.get("id", str(uuid.uuid4())),
                title=item.get("title", "Imported Conversation")
            )

            parent_id = None
            for msg_data in item.get("messages", []):
                msg = Message(
                    role=MessageRole(msg_data["role"]),
                    content=MessageContent(text=msg_data["text"])
                )
                tree.add_message(msg, parent_id=parent_id)
                parent_id = msg.id

            conversations.append(tree)

        return conversations

# Plugin is auto-discovered when placed in integrations folder!

Example 8: Database Merge with Deduplication

from ctk import ConversationDB

def merge_databases(db_paths, output_path):
    """Merge multiple databases, handling duplicates."""
    seen_ids = set()
    merged_count = 0
    duplicate_count = 0

    with ConversationDB(output_path) as output_db:
        for db_path in db_paths:
            with ConversationDB(db_path) as source_db:
                for conv in source_db.get_all_conversations():
                    if conv.id in seen_ids:
                        duplicate_count += 1
                        continue

                    seen_ids.add(conv.id)
                    # Add source tag
                    conv.metadata.tags.append(f"merged_from:{db_path}")
                    output_db.save_conversation(conv)
                    merged_count += 1

    print(f"Merged {merged_count} conversations")
    print(f"Skipped {duplicate_count} duplicates")

# Usage
merge_databases(
    ["personal.db", "work.db", "archive.db"],
    "merged.db"
)

Example 9: Analytics and Statistics

from collections import Counter
from ctk import ConversationDB

with ConversationDB("chats.db") as db:
    conversations = db.get_all_conversations()

    # Count by source
    sources = Counter(c.metadata.source for c in conversations)
    print("Conversations by source:")
    for source, count in sources.most_common():
        print(f"  {source}: {count}")

    # Count by model
    models = Counter(c.metadata.model for c in conversations)
    print("\nConversations by model:")
    for model, count in models.most_common(5):
        print(f"  {model}: {count}")

    # Average messages per conversation
    total_msgs = sum(len(c.get_all_messages()) for c in conversations)
    avg_msgs = total_msgs / len(conversations) if conversations else 0
    print(f"\nAverage messages per conversation: {avg_msgs:.1f}")

    # Find longest conversations
    longest = sorted(
        conversations,
        key=lambda c: len(c.get_longest_path()),
        reverse=True
    )[:5]
    print("\nLongest conversations:")
    for c in longest:
        print(f"  {c.title}: {len(c.get_longest_path())} messages")

Example 10: Export to Hugo Static Site

from ctk import ConversationDB, registry
from pathlib import Path

def export_curated_blog(db_path, output_dir, limit=20):
    """Export starred conversations as Hugo blog posts."""
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    with ConversationDB(db_path) as db:
        # Get starred conversations, newest first
        starred = db.search_conversations(starred=True, limit=limit)

        for conv in starred:
            # Create page bundle directory
            slug = conv.title.lower().replace(" ", "-")[:50]
            date_str = conv.created_at.strftime("%Y-%m-%d")
            bundle_dir = output_path / f"{date_str}-{slug}"
            bundle_dir.mkdir(exist_ok=True)

            # Export as Hugo markdown
            registry.export_conversations(
                [conv],
                str(bundle_dir / "index.md"),
                format="hugo",
                draft=False
            )

        print(f"Exported {len(starred)} posts to {output_dir}")

# Usage
export_curated_blog("chats.db", "content/ai-conversations/", limit=50)

More Examples

For additional examples including:

  • MCP tool integration
  • Interactive TUI customization
  • Bulk operations with progress bars

See the GitHub repository.