Data Manipulation Language (DML)

INSERT, UPDATE, and DELETE operations for modifying data in AVA Database.

INSERT - Add New Rows

Insert new rows into tables using various methods.

Insert Single Row

-- Insert with all columns
INSERT INTO prices (TICKER, DATE, PRICE)
VALUES ('AAPL', '2025-01-15', 185.50);

-- Insert multiple rows
INSERT INTO prices (TICKER, DATE, PRICE)
VALUES
    ('AAPL', '2025-01-16', 186.20),
    ('MSFT', '2025-01-16', 420.75),
    ('GOOGL', '2025-01-16', 142.30);

Insert from SELECT

-- Insert query results into another table
INSERT INTO prices_high
SELECT * FROM prices
WHERE PRICE > 100;

-- Insert with transformation
INSERT INTO daily_returns (TICKER, DATE, return_pct)
SELECT
    TICKER,
    DATE,
    PRICE / LAG(PRICE, 1) - 1 as return_pct
FROM prices
GROUP BY TICKER;

Create Table and Insert

-- Create table from query results
CREATE TABLE price_summary AS (
    SELECT
        TICKER,
        COUNT(*) as num_records,
        AVG(PRICE) as avg_price,
        MIN(PRICE) as min_price,
        MAX(PRICE) as max_price
    FROM prices
    GROUP BY TICKER
);

UPDATE - Modify Existing Data

AVA Database Unique Syntax

AVA uses column_name: expression syntax instead of SET column_name = expression. This enables powerful vectorized operations and window functions directly in UPDATE statements.

Basic UPDATE Syntax

-- AVA syntax (column: expression)
UPDATE PRICE: PRICE * 1.1
FROM prices
WHERE TICKER = 'AAPL';

-- Update multiple columns
UPDATE
    PRICE: PRICE * adjustment_factor,
    VOLUME: VOLUME * 2
FROM prices
WHERE DATE = '2025-01-15';

UPDATE with Window Functions

-- Add previous day's price (LAG)
UPDATE prev_price: LAG(PRICE, 1)
FROM prices
GROUP BY TICKER;

-- Calculate daily return
UPDATE daily_return: PRICE / LAG(PRICE, 1) - 1
FROM prices
GROUP BY TICKER;

-- Add moving average
UPDATE
    price_ema20: EMA(PRICE, 0.095),
    price_ema50: EMA(PRICE, 0.039)
FROM prices
GROUP BY TICKER;

UPDATE with Multiple Window Functions

-- Complex time-series features
UPDATE
    prev_price: LAG(PRICE, 1),
    next_price: LEAD(PRICE, 1),
    daily_return: PRICE / LAG(PRICE, 1) - 1,
    forward_return: LEAD(PRICE, 1) / PRICE - 1,
    price_ema: EMA(PRICE, 0.1),
    cumulative_return: CUMPROD(1 + daily_return) - 1,
    price_rank: WRANK(PRICE, 10)
FROM prices
GROUP BY TICKER;

UPDATE with Table Lookup

-- Lookup from another table using [key] syntax
UPDATE company_name: companies[TICKER].company_name
FROM prices;

-- Multiple key lookup
UPDATE factor_value: factors[TICKER, DATE].value
FROM portfolio;

-- Conditional lookup with COALESCE
UPDATE
    sector: companies[TICKER].sector,
    industry: COALESCE(companies[TICKER].industry, 'Unknown')
FROM prices;

UPDATE with Aggregation

-- Normalize by group mean
UPDATE normalized_price: PRICE / AVG(PRICE)
FROM prices
GROUP BY TICKER;

-- Z-score calculation
UPDATE
    mean_price: AVG(PRICE),
    std_price: STD(PRICE),
    z_score: (PRICE - AVG(PRICE)) / STD(PRICE)
FROM prices
GROUP BY TICKER;

UPDATE with Subquery

-- Update based on subquery result
UPDATE category: CASE
    WHEN PRICE > (SELECT AVG(PRICE) FROM prices) THEN 'High'
    WHEN PRICE < (SELECT AVG(PRICE) FROM prices) THEN 'Low'
    ELSE 'Medium'
END
FROM prices;

DELETE - Remove Rows

Remove rows from tables based on conditions.

Basic DELETE

-- Delete specific rows
DELETE FROM prices
WHERE TICKER = 'AAPL' AND DATE < '2024-01-01';

-- Delete with multiple conditions
DELETE FROM prices
WHERE (PRICE IS NULL OR PRICE <= 0)
   OR (DATE IS NULL);

DELETE with Subquery

-- Delete based on subquery
DELETE FROM prices
WHERE PRICE < (SELECT AVG(PRICE) * 0.5 FROM prices);

-- Delete using NOT IN
DELETE FROM prices
WHERE TICKER NOT IN (
    SELECT TICKER FROM active_tickers
);

DELETE with Table Lookup

-- Delete using table lookup
DELETE FROM prices
WHERE exclusions[TICKER].is_excluded IS NOT NULL;

-- Delete based on lookup value
DELETE FROM prices
WHERE companies[TICKER].status = 'delisted';

DELETE All Rows

-- Remove all rows (keeps table structure)
DELETE FROM prices;

-- More efficient: drop and recreate
DROP TABLE prices;
-- Then recreate or reload

Practical Examples

Example 1: Data Quality Workflow

-- Load raw data
.load '/data/bt/20251208W/POLARIS1/data/prices.csv' as prices_raw

-- Remove invalid records
DELETE FROM prices_raw
WHERE PRICE IS NULL
   OR PRICE <= 0
   OR DATE IS NULL;

-- Add quality flags
UPDATE
    prev_price: LAG(PRICE, 1),
    price_change: ABS(PRICE / LAG(PRICE, 1) - 1),
    is_outlier: CASE
        WHEN ABS(PRICE / LAG(PRICE, 1) - 1) > 0.5 THEN 1
        ELSE 0
    END
FROM prices_raw
GROUP BY TICKER;

-- Remove extreme outliers
DELETE FROM prices_raw
WHERE is_outlier = 1;

-- Save cleaned data
.save prices_raw AS '/data/processed/prices_clean.adb' parameter(format=binary)

Example 2: Feature Engineering

-- Load price data
.load '/data/bt/20251208W/POLARIS1/data/prices.csv' as prices

-- Add technical indicators
UPDATE
    -- Returns
    daily_return: PRICE / LAG(PRICE, 1) - 1,
    weekly_return: PRICE / LAG(PRICE, 5) - 1,
    monthly_return: PRICE / LAG(PRICE, 21) - 1,

    -- Moving averages
    ema_20: EMA(PRICE, 0.095),
    ema_50: EMA(PRICE, 0.039),

    -- Momentum
    momentum_20: PRICE / LAG(PRICE, 20) - 1,

    -- Ranking
    price_rank: WRANK(PRICE, 10),
    return_rank: WRANK(daily_return, 10)
FROM prices
GROUP BY TICKER;

-- Add cross-sectional features
UPDATE
    avg_price_today: AVG(PRICE),
    rel_price: PRICE / AVG(PRICE)
FROM prices
GROUP BY DATE;

Example 3: Corporate Actions Adjustment

-- Load prices and splits
.load '/data/prices.csv' as prices
.load '/data/splits.csv' as splits

-- Calculate cumulative split adjustment
UPDATE split_adj: CUMPROD(split_ratio)
FROM splits
GROUP BY TICKER;

-- Apply split adjustment to prices using ASOF JOIN
UPDATE split_adjustment: COALESCE(s.split_adj, 1)
FROM prices p
ASOF JOIN splits s
    ON p.TICKER = s.TICKER
   AND p.DATE = s.effective_date;

-- Adjust prices
UPDATE adjusted_price: PRICE * split_adjustment
FROM prices;

Example 4: Python API Integration

import avapy

# Load data
avapy.asql(".load '/data/prices.csv' as prices")

# Add computed columns
avapy.asql("""
    UPDATE
        prev_price: LAG(PRICE, 1),
        daily_return: PRICE / LAG(PRICE, 1) - 1
    FROM prices
    GROUP BY TICKER
""")

# Filter and save
avapy.asql("DELETE FROM prices WHERE daily_return IS NULL")
avapy.asql(".save prices AS '/data/prices_with_returns.adb' parameter(format=binary)")

# Query results
result = avapy.asql("""
    SELECT TICKER, AVG(daily_return) as avg_return
    FROM prices
    GROUP BY TICKER
""")
print(result)