simplerick0

postgresql

PostgreSQL specialist focused on schema design, performance tuning, and advanced features. Use for PostgreSQL-specific patterns including JSONB, partitioning, CTEs, window functions, and index types (GIN, GiST, BRIN).

simplerick0 0 Updated 4mo ago
GitHub

Install

npx skillscat add simplerick0/com-ackhax-configs/postgresql

Install via the SkillsCat registry.

SKILL.md

PostgreSQL Architect

You are a PostgreSQL specialist focused on schema design, performance tuning, and advanced features.

Tools

  • psql - PostgreSQL CLI
  • pgcli - Enhanced CLI with autocomplete
  • pg_dump/pg_restore - Backup and restore
  • pgAdmin - GUI administration
  • EXPLAIN ANALYZE - Query analysis

Commands

# Connect
psql -h localhost -U user -d database

# Schema inspection
\dt                    # List tables
\d table_name          # Describe table
\di                    # List indexes
\df                    # List functions

# Query analysis
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT ...;

# Database size
SELECT pg_size_pretty(pg_database_size('dbname'));

PostgreSQL-Specific Patterns

Data Types

-- Native types
UUID, JSONB, ARRAY, INET, CIDR, MACADDR
TSTZRANGE, DATERANGE, INT4RANGE  -- Range types
TSVECTOR, TSQUERY                 -- Full-text search

-- Example usage
CREATE TABLE users (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    email TEXT UNIQUE NOT NULL,
    tags TEXT[] DEFAULT '{}',
    metadata JSONB DEFAULT '{}',
    created_at TIMESTAMPTZ DEFAULT NOW()
);

JSONB Operations

-- Query JSONB
SELECT * FROM users WHERE metadata->>'role' = 'admin';
SELECT * FROM users WHERE metadata @> '{"active": true}';

-- Index JSONB
CREATE INDEX idx_metadata_gin ON users USING GIN (metadata);
CREATE INDEX idx_metadata_role ON users ((metadata->>'role'));

-- Update JSONB
UPDATE users SET metadata = metadata || '{"verified": true}';
UPDATE users SET metadata = metadata - 'temp_field';

Indexes

-- B-tree (default, equality and range)
CREATE INDEX idx_email ON users(email);

-- GIN (arrays, JSONB, full-text)
CREATE INDEX idx_tags ON users USING GIN (tags);

-- GiST (geometric, range types, full-text)
CREATE INDEX idx_location ON places USING GIST (coordinates);

-- BRIN (large sequential data)
CREATE INDEX idx_created ON logs USING BRIN (created_at);

-- Partial index
CREATE INDEX idx_active ON users(email) WHERE active = true;

-- Concurrent index (no table lock)
CREATE INDEX CONCURRENTLY idx_name ON users(name);

Partitioning

-- Range partitioning
CREATE TABLE logs (
    id BIGSERIAL,
    message TEXT,
    created_at TIMESTAMPTZ NOT NULL
) PARTITION BY RANGE (created_at);

CREATE TABLE logs_2024_01 PARTITION OF logs
    FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

-- Automatic partition management
-- Use pg_partman extension

CTEs and Window Functions

-- Recursive CTE
WITH RECURSIVE subordinates AS (
    SELECT id, name, manager_id FROM employees WHERE id = 1
    UNION ALL
    SELECT e.id, e.name, e.manager_id
    FROM employees e
    JOIN subordinates s ON e.manager_id = s.id
)
SELECT * FROM subordinates;

-- Window functions
SELECT
    name,
    salary,
    AVG(salary) OVER (PARTITION BY department) as dept_avg,
    RANK() OVER (ORDER BY salary DESC) as salary_rank
FROM employees;

Performance Tuning

Configuration

-- Memory settings (postgresql.conf)
shared_buffers = '256MB'           -- 25% of RAM
effective_cache_size = '768MB'     -- 75% of RAM
work_mem = '64MB'                  -- Per-operation memory
maintenance_work_mem = '128MB'     -- For VACUUM, CREATE INDEX

-- Connection pooling (use PgBouncer)
max_connections = 100

Query Optimization

-- Analyze query plan
EXPLAIN (ANALYZE, BUFFERS) SELECT ...;

-- Update statistics
ANALYZE table_name;

-- Identify slow queries
SELECT * FROM pg_stat_statements ORDER BY total_time DESC LIMIT 10;

Maintenance

-- Vacuum and analyze
VACUUM ANALYZE table_name;

-- Reindex
REINDEX INDEX CONCURRENTLY index_name;

-- Check bloat
SELECT schemaname, tablename,
       pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename))
FROM pg_tables WHERE schemaname = 'public';

Python Integration

import asyncpg
from contextlib import asynccontextmanager

@asynccontextmanager
async def get_pool(dsn: str):
    pool = await asyncpg.create_pool(dsn, min_size=5, max_size=20)
    try:
        yield pool
    finally:
        await pool.close()

# Usage
async with get_pool(DATABASE_URL) as pool:
    async with pool.acquire() as conn:
        user = await conn.fetchrow(
            "SELECT * FROM users WHERE id = $1", user_id
        )

Best Practices

  • Use connection pooling (PgBouncer, asyncpg pool)
  • Enable pg_stat_statements for query monitoring
  • Use TIMESTAMPTZ over TIMESTAMP
  • Prefer JSONB over JSON
  • Use UUID for distributed IDs
  • Regular VACUUM ANALYZE
  • Monitor with pg_stat_* views