Database Schema
Complete documentation of Mosaic Builder's database schema, including core tables, DSL storage, and observability.
Overview
Mosaic Builder uses PostgreSQL with an event-sourced architecture for DSL storage, providing complete audit trails and time-travel capabilities.
erDiagram
User ||--o{ Chat : creates
Chat ||--o{ Message : contains
Message ||--o{ Vote : receives
Chat ||--o{ DSLEvent : generates
Chat ||--o{ DSLSnapshot : has
Message ||--o{ ModelCall : triggers
ModelCall ||--o{ ToolCall : executes
User ||--o{ Document : owns
Chat ||--o{ PhaseTransition : experiences
Core Tables
User Table
Stores user accounts and authentication data.CREATE TABLE "User" (
id TEXT PRIMARY KEY DEFAULT gen_random_uuid(),
email TEXT UNIQUE NOT NULL,
password TEXT, -- Hashed with bcrypt
name TEXT,
role TEXT DEFAULT 'user', -- user, admin, pro
image TEXT, -- Avatar URL
createdAt TIMESTAMP DEFAULT NOW(),
updatedAt TIMESTAMP DEFAULT NOW()
);
CREATE INDEX idx_user_email ON "User"(email);
CREATE INDEX idx_user_created ON "User"(createdAt);
Chat Table
Represents conversation sessions.CREATE TABLE "Chat" (
id TEXT PRIMARY KEY DEFAULT gen_random_uuid(),
userId TEXT NOT NULL REFERENCES "User"(id) ON DELETE CASCADE,
title TEXT, -- Auto-generated from first message
visibility TEXT DEFAULT 'private', -- private, public, shared
sharePath TEXT UNIQUE, -- For shared chats
createdAt TIMESTAMP DEFAULT NOW(),
updatedAt TIMESTAMP DEFAULT NOW()
);
CREATE INDEX idx_chat_user ON "Chat"(userId);
CREATE INDEX idx_chat_visibility ON "Chat"(visibility);
CREATE INDEX idx_chat_share ON "Chat"(sharePath);
Message Table
Stores all messages in conversations.CREATE TABLE "Message" (
id TEXT PRIMARY KEY DEFAULT gen_random_uuid(),
chatId TEXT NOT NULL REFERENCES "Chat"(id) ON DELETE CASCADE,
role TEXT NOT NULL, -- user, assistant, system
content TEXT NOT NULL,
metadata JSONB, -- Additional message data
createdAt TIMESTAMP DEFAULT NOW(),
updatedAt TIMESTAMP DEFAULT NOW()
);
CREATE INDEX idx_message_chat ON "Message"(chatId);
CREATE INDEX idx_message_created ON "Message"(createdAt);
CREATE INDEX idx_message_metadata ON "Message" USING GIN(metadata);
Vote Table
Tracks user feedback on messages.CREATE TABLE "Vote" (
id TEXT PRIMARY KEY DEFAULT gen_random_uuid(),
chatId TEXT NOT NULL REFERENCES "Chat"(id) ON DELETE CASCADE,
messageId TEXT NOT NULL REFERENCES "Message"(id) ON DELETE CASCADE,
userId TEXT NOT NULL REFERENCES "User"(id) ON DELETE CASCADE,
isUpvoted BOOLEAN NOT NULL,
feedback TEXT, -- Optional feedback text
createdAt TIMESTAMP DEFAULT NOW(),
UNIQUE(messageId, userId) -- One vote per user per message
);
CREATE INDEX idx_vote_message ON "Vote"(messageId);
CREATE INDEX idx_vote_user ON "Vote"(userId);
DSL Storage (Event Sourced)
DSL Event Table
Stores all changes to DSL as events for complete history.CREATE TABLE dsl_event (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
chatId UUID NOT NULL REFERENCES "Chat"(id) ON DELETE CASCADE,
messageId UUID REFERENCES "Message"(id),
eventType TEXT NOT NULL, -- entity_added, field_modified, relationship_created, etc
changeDelta JSONB NOT NULL, -- JSON Patch format
confidence DECIMAL(3,2),
reasoning TEXT,
createdAt TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_dsl_event_chat ON dsl_event(chatId);
CREATE INDEX idx_dsl_event_type ON dsl_event(eventType);
CREATE INDEX idx_dsl_event_created ON dsl_event(createdAt);
#### Event Types
type DSLEventType =
| 'entity_added'
| 'entity_modified'
| 'entity_removed'
| 'field_added'
| 'field_modified'
| 'field_removed'
| 'relationship_added'
| 'relationship_removed'
| 'phase_changed'
| 'metadata_updated'
#### Example Event
{
"id": "evt_123",
"chatId": "chat_456",
"eventType": "entity_added",
"changeDelta": [
{
"op": "add",
"path": "/entities/User",
"value": {
"name": "User",
"fields": {
"id": { "type": "uuid", "required": true },
"email": { "type": "string", "required": true }
}
}
}
],
"confidence": 0.95,
"reasoning": "User mentioned 'users need to login with email'",
"createdAt": "2024-01-15T10:00:00Z"
}
DSL Snapshot Table
Periodic snapshots for performance optimization.CREATE TABLE dsl_snapshot (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
chatId UUID NOT NULL REFERENCES "Chat"(id) ON DELETE CASCADE,
schema JSONB NOT NULL, -- Complete DSL at this point
version INTEGER NOT NULL,
createdAt TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_dsl_snapshot_chat ON dsl_snapshot(chatId);
CREATE INDEX idx_dsl_snapshot_version ON dsl_snapshot(version);
CREATE UNIQUE INDEX idx_dsl_snapshot_unique ON dsl_snapshot(chatId, version);
Phase Transition Table
Tracks conversation phase changes.CREATE TABLE phase_transition (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
chatId UUID NOT NULL REFERENCES "Chat"(id) ON DELETE CASCADE,
fromPhase TEXT,
toPhase TEXT NOT NULL,
triggerReason TEXT,
confidence DECIMAL(3,2),
createdAt TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_phase_chat ON phase_transition(chatId);
CREATE INDEX idx_phase_created ON phase_transition(createdAt);
Observability Tables
Model Call Table
Logs every LLM API call for debugging and cost tracking.CREATE TABLE model_call (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
chatId UUID REFERENCES "Chat"(id),
messageId UUID REFERENCES "Message"(id),
provider TEXT NOT NULL, -- openrouter, openai, anthropic
model TEXT NOT NULL, -- claude-3.5-sonnet, gpt-4-turbo
requestPayload JSONB NOT NULL,
responsePayload JSONB,
streamChunks JSONB[], -- For streaming responses
latencyMs INTEGER,
promptTokens INTEGER,
completionTokens INTEGER,
costUsd DECIMAL(10,6),
error TEXT, -- Error message if failed
createdAt TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_model_call_chat ON model_call(chatId);
CREATE INDEX idx_model_call_created ON model_call(createdAt);
CREATE INDEX idx_model_call_model ON model_call(model);
Tool Call Table
Records tool/function invocations.CREATE TABLE tool_call (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
modelCallId UUID REFERENCES model_call(id),
toolName TEXT NOT NULL, -- extract_dsl, detect_phase, generate_code
inputArgs JSONB NOT NULL,
outputResult JSONB,
executionTimeMs INTEGER,
error TEXT,
createdAt TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_tool_call_model ON tool_call(modelCallId);
CREATE INDEX idx_tool_call_name ON tool_call(toolName);
Caching Tables
Exact Cache Table
Simple exact-match cache for responses.CREATE TABLE cache_exact (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
messageHash TEXT NOT NULL, -- SHA256 of message
contextHash TEXT NOT NULL, -- SHA256 of context
response JSONB NOT NULL,
hitCount INTEGER DEFAULT 0,
createdAt TIMESTAMPTZ DEFAULT NOW(),
expiresAt TIMESTAMPTZ DEFAULT NOW() + INTERVAL '7 days',
UNIQUE(messageHash, contextHash)
);
CREATE INDEX idx_cache_hashes ON cache_exact(messageHash, contextHash);
CREATE INDEX idx_cache_expires ON cache_exact(expiresAt);
Analytics Tables
Conversation Metrics Table
Aggregated metrics per conversation.CREATE TABLE conversation_metrics (
chatId UUID PRIMARY KEY REFERENCES "Chat"(id),
totalMessages INTEGER DEFAULT 0,
totalLlmCalls INTEGER DEFAULT 0,
cacheHits INTEGER DEFAULT 0,
totalTokensUsed INTEGER DEFAULT 0,
totalCostUsd DECIMAL(10,4),
dslCompleteness DECIMAL(3,2),
lastPhase TEXT,
updatedAt TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_metrics_updated ON conversation_metrics(updatedAt);
Daily Usage Table
Track usage patterns.CREATE TABLE daily_usage (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
userId UUID REFERENCES "User"(id),
date DATE NOT NULL,
messageCount INTEGER DEFAULT 0,
tokenCount INTEGER DEFAULT 0,
costUsd DECIMAL(10,4),
generationCount INTEGER DEFAULT 0,
UNIQUE(userId, date)
);
CREATE INDEX idx_usage_user_date ON daily_usage(userId, date);
Document Storage
Document Table
Stores generated artifacts and documents.CREATE TABLE "Document" (
id TEXT PRIMARY KEY,
title TEXT NOT NULL,
content TEXT,
kind TEXT NOT NULL, -- text, code, image, spreadsheet
status TEXT DEFAULT 'idle', -- idle, streaming, completed, failed
userId TEXT NOT NULL REFERENCES "User"(id),
metadata JSONB, -- Language, framework, etc
createdAt TIMESTAMP DEFAULT NOW(),
updatedAt TIMESTAMP DEFAULT NOW()
);
CREATE INDEX idx_document_user ON "Document"(userId);
CREATE INDEX idx_document_kind ON "Document"(kind);
CREATE INDEX idx_document_created ON "Document"(createdAt);
DocumentVersion Table
Version history for documents.CREATE TABLE "DocumentVersion" (
id TEXT PRIMARY KEY DEFAULT gen_random_uuid(),
documentId TEXT NOT NULL REFERENCES "Document"(id) ON DELETE CASCADE,
version INTEGER NOT NULL,
content TEXT NOT NULL,
changeDescription TEXT,
userId TEXT REFERENCES "User"(id),
createdAt TIMESTAMP DEFAULT NOW(),
UNIQUE(documentId, version)
);
CREATE INDEX idx_version_document ON "DocumentVersion"(documentId);
Migrations
Initial Migration
-- Enable UUID extension
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- Create all tables in order
CREATE TABLE "User" (...);
CREATE TABLE "Chat" (...);
CREATE TABLE "Message" (...);
-- ... etc
Adding Indexes
-- Add missing indexes for performance
CREATE INDEX CONCURRENTLY idx_message_content_search
ON "Message" USING GIN(to_tsvector('english', content));
-- Partial indexes for common queries
CREATE INDEX idx_message_recent
ON "Message"(createdAt)
WHERE createdAt > NOW() - INTERVAL '7 days';
Data Migration Example
-- Migrate from old schema to event-sourced
INSERT INTO dsl_event (chatId, eventType, changeDelta, createdAt)
SELECT
id,
'snapshot_migration',
jsonb_build_object('dsl', old_dsl_column),
created_at
FROM old_chat_table;
Query Examples
Reconstruct DSL from Events
-- Get latest snapshot
WITH latest_snapshot AS (
SELECT schema, createdAt
FROM dsl_snapshot
WHERE chatId = $1
ORDER BY version DESC
LIMIT 1
)
-- Get events since snapshot
SELECT * FROM dsl_event
WHERE chatId = $1
AND createdAt > COALESCE(
(SELECT createdAt FROM latest_snapshot),
'1970-01-01'::timestamptz
)
ORDER BY createdAt ASC;
User Activity Dashboard
SELECT
DATE(m.createdAt) as date,
COUNT(DISTINCT c.id) as conversations,
COUNT(m.id) as messages,
SUM(mc.costUsd) as total_cost
FROM "User" u
LEFT JOIN "Chat" c ON c.userId = u.id
LEFT JOIN "Message" m ON m.chatId = c.id
LEFT JOIN model_call mc ON mc.messageId = m.id
WHERE u.id = $1
AND m.createdAt > NOW() - INTERVAL '30 days'
GROUP BY DATE(m.createdAt)
ORDER BY date DESC;
Popular Entities Analysis
SELECT
eventType,
changeDelta->>'entityName' as entity,
COUNT(*) as usage_count,
AVG(confidence::numeric) as avg_confidence
FROM dsl_event
WHERE eventType = 'entity_added'
GROUP BY eventType, entity
ORDER BY usage_count DESC
LIMIT 20;
Performance Optimization
Index Strategy
Partitioning (for scale)
-- Partition messages by month
CREATE TABLE "Message" (
-- columns
) PARTITION BY RANGE (createdAt);
CREATE TABLE message_2024_01
PARTITION OF "Message"
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
Vacuum Strategy
-- Regular maintenance
VACUUM ANALYZE "Message";
VACUUM ANALYZE dsl_event;
-- Aggressive vacuum for heavy tables
VACUUM FULL "Message";
Backup & Recovery
Backup Commands
# Full backup
pg_dump -h localhost -U postgres -d mosaic_builder > backup.sql
# Specific tables
pg_dump -t '"User"' -t '"Chat"' -t '"Message"' > core_backup.sql
# Data only (no schema)
pg_dump --data-only -d mosaic_builder > data.sql
Point-in-Time Recovery
-- Enable WAL archiving in postgresql.conf
wal_level = replica
archive_mode = on
archive_command = 'cp %p /backup/archive/%f'
Security Considerations
Row-Level Security
-- Enable RLS
ALTER TABLE "Chat" ENABLE ROW LEVEL SECURITY;
-- Policy: Users can only see their own chats
CREATE POLICY chat_isolation ON "Chat"
FOR ALL
USING (userId = current_user_id());
Encryption
-- Encrypt sensitive columns
CREATE EXTENSION pgcrypto;
UPDATE "User"
SET email = pgp_sym_encrypt(email, 'encryption_key');
Audit Logging
-- Audit table for sensitive operations
CREATE TABLE audit_log (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tableName TEXT NOT NULL,
operation TEXT NOT NULL, -- INSERT, UPDATE, DELETE
userId UUID,
oldData JSONB,
newData JSONB,
createdAt TIMESTAMPTZ DEFAULT NOW()
);