Database Schema

Complete documentation of Mosaic Builder's database schema, including core tables, DSL storage, and observability.

Overview

Mosaic Builder uses PostgreSQL with an event-sourced architecture for DSL storage, providing complete audit trails and time-travel capabilities.

erDiagram
    User ||--o{ Chat : creates
    Chat ||--o{ Message : contains
    Message ||--o{ Vote : receives
    Chat ||--o{ DSLEvent : generates
    Chat ||--o{ DSLSnapshot : has
    Message ||--o{ ModelCall : triggers
    ModelCall ||--o{ ToolCall : executes
    User ||--o{ Document : owns
    Chat ||--o{ PhaseTransition : experiences

Core Tables

User Table

Stores user accounts and authentication data.

CREATE TABLE "User" (
  id TEXT PRIMARY KEY DEFAULT gen_random_uuid(),
  email TEXT UNIQUE NOT NULL,
  password TEXT, -- Hashed with bcrypt
  name TEXT,
  role TEXT DEFAULT 'user', -- user, admin, pro
  image TEXT, -- Avatar URL
  createdAt TIMESTAMP DEFAULT NOW(),
  updatedAt TIMESTAMP DEFAULT NOW()
);

CREATE INDEX idx_user_email ON "User"(email);
CREATE INDEX idx_user_created ON "User"(createdAt);

Chat Table

Represents conversation sessions.

CREATE TABLE "Chat" (
  id TEXT PRIMARY KEY DEFAULT gen_random_uuid(),
  userId TEXT NOT NULL REFERENCES "User"(id) ON DELETE CASCADE,
  title TEXT, -- Auto-generated from first message
  visibility TEXT DEFAULT 'private', -- private, public, shared
  sharePath TEXT UNIQUE, -- For shared chats
  createdAt TIMESTAMP DEFAULT NOW(),
  updatedAt TIMESTAMP DEFAULT NOW()
);

CREATE INDEX idx_chat_user ON "Chat"(userId);
CREATE INDEX idx_chat_visibility ON "Chat"(visibility);
CREATE INDEX idx_chat_share ON "Chat"(sharePath);

Message Table

Stores all messages in conversations.

CREATE TABLE "Message" (
  id TEXT PRIMARY KEY DEFAULT gen_random_uuid(),
  chatId TEXT NOT NULL REFERENCES "Chat"(id) ON DELETE CASCADE,
  role TEXT NOT NULL, -- user, assistant, system
  content TEXT NOT NULL,
  metadata JSONB, -- Additional message data
  createdAt TIMESTAMP DEFAULT NOW(),
  updatedAt TIMESTAMP DEFAULT NOW()
);

CREATE INDEX idx_message_chat ON "Message"(chatId);
CREATE INDEX idx_message_created ON "Message"(createdAt);
CREATE INDEX idx_message_metadata ON "Message" USING GIN(metadata);

Vote Table

Tracks user feedback on messages.

CREATE TABLE "Vote" (
  id TEXT PRIMARY KEY DEFAULT gen_random_uuid(),
  chatId TEXT NOT NULL REFERENCES "Chat"(id) ON DELETE CASCADE,
  messageId TEXT NOT NULL REFERENCES "Message"(id) ON DELETE CASCADE,
  userId TEXT NOT NULL REFERENCES "User"(id) ON DELETE CASCADE,
  isUpvoted BOOLEAN NOT NULL,
  feedback TEXT, -- Optional feedback text
  createdAt TIMESTAMP DEFAULT NOW(),
  
  UNIQUE(messageId, userId) -- One vote per user per message
);

CREATE INDEX idx_vote_message ON "Vote"(messageId);
CREATE INDEX idx_vote_user ON "Vote"(userId);

DSL Storage (Event Sourced)

DSL Event Table

Stores all changes to DSL as events for complete history.

CREATE TABLE dsl_event (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  chatId UUID NOT NULL REFERENCES "Chat"(id) ON DELETE CASCADE,
  messageId UUID REFERENCES "Message"(id),
  eventType TEXT NOT NULL, -- entity_added, field_modified, relationship_created, etc
  changeDelta JSONB NOT NULL, -- JSON Patch format
  confidence DECIMAL(3,2),
  reasoning TEXT,
  createdAt TIMESTAMPTZ DEFAULT NOW()
);

CREATE INDEX idx_dsl_event_chat ON dsl_event(chatId);
CREATE INDEX idx_dsl_event_type ON dsl_event(eventType);
CREATE INDEX idx_dsl_event_created ON dsl_event(createdAt);

#### Event Types

type DSLEventType = 
  | 'entity_added'
  | 'entity_modified'
  | 'entity_removed'
  | 'field_added'
  | 'field_modified'
  | 'field_removed'
  | 'relationship_added'
  | 'relationship_removed'
  | 'phase_changed'
  | 'metadata_updated'

#### Example Event

{
  "id": "evt_123",
  "chatId": "chat_456",
  "eventType": "entity_added",
  "changeDelta": [
    {
      "op": "add",
      "path": "/entities/User",
      "value": {
        "name": "User",
        "fields": {
          "id": { "type": "uuid", "required": true },
          "email": { "type": "string", "required": true }
        }
      }
    }
  ],
  "confidence": 0.95,
  "reasoning": "User mentioned 'users need to login with email'",
  "createdAt": "2024-01-15T10:00:00Z"
}

DSL Snapshot Table

Periodic snapshots for performance optimization.

CREATE TABLE dsl_snapshot (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  chatId UUID NOT NULL REFERENCES "Chat"(id) ON DELETE CASCADE,
  schema JSONB NOT NULL, -- Complete DSL at this point
  version INTEGER NOT NULL,
  createdAt TIMESTAMPTZ DEFAULT NOW()
);

CREATE INDEX idx_dsl_snapshot_chat ON dsl_snapshot(chatId);
CREATE INDEX idx_dsl_snapshot_version ON dsl_snapshot(version);
CREATE UNIQUE INDEX idx_dsl_snapshot_unique ON dsl_snapshot(chatId, version);

Phase Transition Table

Tracks conversation phase changes.

CREATE TABLE phase_transition (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  chatId UUID NOT NULL REFERENCES "Chat"(id) ON DELETE CASCADE,
  fromPhase TEXT,
  toPhase TEXT NOT NULL,
  triggerReason TEXT,
  confidence DECIMAL(3,2),
  createdAt TIMESTAMPTZ DEFAULT NOW()
);

CREATE INDEX idx_phase_chat ON phase_transition(chatId);
CREATE INDEX idx_phase_created ON phase_transition(createdAt);

Observability Tables

Model Call Table

Logs every LLM API call for debugging and cost tracking.

CREATE TABLE model_call (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  chatId UUID REFERENCES "Chat"(id),
  messageId UUID REFERENCES "Message"(id),
  provider TEXT NOT NULL, -- openrouter, openai, anthropic
  model TEXT NOT NULL, -- claude-3.5-sonnet, gpt-4-turbo
  requestPayload JSONB NOT NULL,
  responsePayload JSONB,
  streamChunks JSONB[], -- For streaming responses
  latencyMs INTEGER,
  promptTokens INTEGER,
  completionTokens INTEGER,
  costUsd DECIMAL(10,6),
  error TEXT, -- Error message if failed
  createdAt TIMESTAMPTZ DEFAULT NOW()
);

CREATE INDEX idx_model_call_chat ON model_call(chatId);
CREATE INDEX idx_model_call_created ON model_call(createdAt);
CREATE INDEX idx_model_call_model ON model_call(model);

Tool Call Table

Records tool/function invocations.

CREATE TABLE tool_call (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  modelCallId UUID REFERENCES model_call(id),
  toolName TEXT NOT NULL, -- extract_dsl, detect_phase, generate_code
  inputArgs JSONB NOT NULL,
  outputResult JSONB,
  executionTimeMs INTEGER,
  error TEXT,
  createdAt TIMESTAMPTZ DEFAULT NOW()
);

CREATE INDEX idx_tool_call_model ON tool_call(modelCallId);
CREATE INDEX idx_tool_call_name ON tool_call(toolName);

Caching Tables

Exact Cache Table

Simple exact-match cache for responses.

CREATE TABLE cache_exact (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  messageHash TEXT NOT NULL, -- SHA256 of message
  contextHash TEXT NOT NULL, -- SHA256 of context
  response JSONB NOT NULL,
  hitCount INTEGER DEFAULT 0,
  createdAt TIMESTAMPTZ DEFAULT NOW(),
  expiresAt TIMESTAMPTZ DEFAULT NOW() + INTERVAL '7 days',
  
  UNIQUE(messageHash, contextHash)
);

CREATE INDEX idx_cache_hashes ON cache_exact(messageHash, contextHash);
CREATE INDEX idx_cache_expires ON cache_exact(expiresAt);

Analytics Tables

Conversation Metrics Table

Aggregated metrics per conversation.

CREATE TABLE conversation_metrics (
  chatId UUID PRIMARY KEY REFERENCES "Chat"(id),
  totalMessages INTEGER DEFAULT 0,
  totalLlmCalls INTEGER DEFAULT 0,
  cacheHits INTEGER DEFAULT 0,
  totalTokensUsed INTEGER DEFAULT 0,
  totalCostUsd DECIMAL(10,4),
  dslCompleteness DECIMAL(3,2),
  lastPhase TEXT,
  updatedAt TIMESTAMPTZ DEFAULT NOW()
);

CREATE INDEX idx_metrics_updated ON conversation_metrics(updatedAt);

Daily Usage Table

Track usage patterns.

CREATE TABLE daily_usage (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  userId UUID REFERENCES "User"(id),
  date DATE NOT NULL,
  messageCount INTEGER DEFAULT 0,
  tokenCount INTEGER DEFAULT 0,
  costUsd DECIMAL(10,4),
  generationCount INTEGER DEFAULT 0,
  
  UNIQUE(userId, date)
);

CREATE INDEX idx_usage_user_date ON daily_usage(userId, date);

Document Storage

Document Table

Stores generated artifacts and documents.

CREATE TABLE "Document" (
  id TEXT PRIMARY KEY,
  title TEXT NOT NULL,
  content TEXT,
  kind TEXT NOT NULL, -- text, code, image, spreadsheet
  status TEXT DEFAULT 'idle', -- idle, streaming, completed, failed
  userId TEXT NOT NULL REFERENCES "User"(id),
  metadata JSONB, -- Language, framework, etc
  createdAt TIMESTAMP DEFAULT NOW(),
  updatedAt TIMESTAMP DEFAULT NOW()
);

CREATE INDEX idx_document_user ON "Document"(userId);
CREATE INDEX idx_document_kind ON "Document"(kind);
CREATE INDEX idx_document_created ON "Document"(createdAt);

DocumentVersion Table

Version history for documents.

CREATE TABLE "DocumentVersion" (
  id TEXT PRIMARY KEY DEFAULT gen_random_uuid(),
  documentId TEXT NOT NULL REFERENCES "Document"(id) ON DELETE CASCADE,
  version INTEGER NOT NULL,
  content TEXT NOT NULL,
  changeDescription TEXT,
  userId TEXT REFERENCES "User"(id),
  createdAt TIMESTAMP DEFAULT NOW(),
  
  UNIQUE(documentId, version)
);

CREATE INDEX idx_version_document ON "DocumentVersion"(documentId);

Migrations

Initial Migration

-- Enable UUID extension
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";

-- Create all tables in order
CREATE TABLE "User" (...);
CREATE TABLE "Chat" (...);
CREATE TABLE "Message" (...);
-- ... etc

Adding Indexes

-- Add missing indexes for performance
CREATE INDEX CONCURRENTLY idx_message_content_search 
  ON "Message" USING GIN(to_tsvector('english', content));

-- Partial indexes for common queries
CREATE INDEX idx_message_recent 
  ON "Message"(createdAt) 
  WHERE createdAt > NOW() - INTERVAL '7 days';

Data Migration Example

-- Migrate from old schema to event-sourced
INSERT INTO dsl_event (chatId, eventType, changeDelta, createdAt)
SELECT 
  id,
  'snapshot_migration',
  jsonb_build_object('dsl', old_dsl_column),
  created_at
FROM old_chat_table;

Query Examples

Reconstruct DSL from Events

-- Get latest snapshot
WITH latest_snapshot AS (
  SELECT schema, createdAt 
  FROM dsl_snapshot 
  WHERE chatId = $1 
  ORDER BY version DESC 
  LIMIT 1
)
-- Get events since snapshot
SELECT * FROM dsl_event 
WHERE chatId = $1 
  AND createdAt > COALESCE(
    (SELECT createdAt FROM latest_snapshot),
    '1970-01-01'::timestamptz
  )
ORDER BY createdAt ASC;

User Activity Dashboard

SELECT 
  DATE(m.createdAt) as date,
  COUNT(DISTINCT c.id) as conversations,
  COUNT(m.id) as messages,
  SUM(mc.costUsd) as total_cost
FROM "User" u
LEFT JOIN "Chat" c ON c.userId = u.id
LEFT JOIN "Message" m ON m.chatId = c.id
LEFT JOIN model_call mc ON mc.messageId = m.id
WHERE u.id = $1
  AND m.createdAt > NOW() - INTERVAL '30 days'
GROUP BY DATE(m.createdAt)
ORDER BY date DESC;

Popular Entities Analysis

SELECT 
  eventType,
  changeDelta->>'entityName' as entity,
  COUNT(*) as usage_count,
  AVG(confidence::numeric) as avg_confidence
FROM dsl_event
WHERE eventType = 'entity_added'
GROUP BY eventType, entity
ORDER BY usage_count DESC
LIMIT 20;

Performance Optimization

Index Strategy

  • Primary lookups: chatId, userId, messageId
  • Time-based queries: createdAt indexes
  • JSON queries: GIN indexes for JSONB
  • Full-text search: tsvector for content
  • Partitioning (for scale)

    -- Partition messages by month
    CREATE TABLE "Message" (
      -- columns
    ) PARTITION BY RANGE (createdAt);
    
    CREATE TABLE message_2024_01 
      PARTITION OF "Message" 
      FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

    Vacuum Strategy

    -- Regular maintenance
    VACUUM ANALYZE "Message";
    VACUUM ANALYZE dsl_event;
    
    -- Aggressive vacuum for heavy tables
    VACUUM FULL "Message";

    Backup & Recovery

    Backup Commands

    # Full backup
    pg_dump -h localhost -U postgres -d mosaic_builder > backup.sql
    
    # Specific tables
    pg_dump -t '"User"' -t '"Chat"' -t '"Message"' > core_backup.sql
    
    # Data only (no schema)
    pg_dump --data-only -d mosaic_builder > data.sql

    Point-in-Time Recovery

    -- Enable WAL archiving in postgresql.conf
    wal_level = replica
    archive_mode = on
    archive_command = 'cp %p /backup/archive/%f'

    Security Considerations

    Row-Level Security

    -- Enable RLS
    ALTER TABLE "Chat" ENABLE ROW LEVEL SECURITY;
    
    -- Policy: Users can only see their own chats
    CREATE POLICY chat_isolation ON "Chat"
      FOR ALL
      USING (userId = current_user_id());

    Encryption

    -- Encrypt sensitive columns
    CREATE EXTENSION pgcrypto;
    
    UPDATE "User" 
    SET email = pgp_sym_encrypt(email, 'encryption_key');

    Audit Logging

    -- Audit table for sensitive operations
    CREATE TABLE audit_log (
      id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
      tableName TEXT NOT NULL,
      operation TEXT NOT NULL, -- INSERT, UPDATE, DELETE
      userId UUID,
      oldData JSONB,
      newData JSONB,
      createdAt TIMESTAMPTZ DEFAULT NOW()
    );

    Related Documentation

  • System Architecture
  • Database Setup
  • Caching Strategy
  • API Reference