Data Manipulation Language (DML)
INSERT, UPDATE, and DELETE operations for modifying data in AVA Database.
INSERT - Add New Rows
Insert new rows into tables using various methods.
Insert Single Row
-- Insert with all columns
INSERT INTO prices (TICKER, DATE, PRICE)
VALUES ('AAPL', '2025-01-15', 185.50);
-- Insert multiple rows
INSERT INTO prices (TICKER, DATE, PRICE)
VALUES
('AAPL', '2025-01-16', 186.20),
('MSFT', '2025-01-16', 420.75),
('GOOGL', '2025-01-16', 142.30);
Insert from SELECT
-- Insert query results into another table
INSERT INTO prices_high
SELECT * FROM prices
WHERE PRICE > 100;
-- Insert with transformation
INSERT INTO daily_returns (TICKER, DATE, return_pct)
SELECT
TICKER,
DATE,
PRICE / LAG(PRICE, 1) - 1 as return_pct
FROM prices
GROUP BY TICKER;
Create Table and Insert
-- Create table from query results
CREATE TABLE price_summary AS (
SELECT
TICKER,
COUNT(*) as num_records,
AVG(PRICE) as avg_price,
MIN(PRICE) as min_price,
MAX(PRICE) as max_price
FROM prices
GROUP BY TICKER
);
UPDATE - Modify Existing Data
AVA Database Unique Syntax
AVA uses column_name: expression syntax instead of SET column_name = expression.
This enables powerful vectorized operations and window functions directly in UPDATE statements.
Basic UPDATE Syntax
-- AVA syntax (column: expression)
UPDATE PRICE: PRICE * 1.1
FROM prices
WHERE TICKER = 'AAPL';
-- Update multiple columns
UPDATE
PRICE: PRICE * adjustment_factor,
VOLUME: VOLUME * 2
FROM prices
WHERE DATE = '2025-01-15';
UPDATE with Window Functions
-- Add previous day's price (LAG)
UPDATE prev_price: LAG(PRICE, 1)
FROM prices
GROUP BY TICKER;
-- Calculate daily return
UPDATE daily_return: PRICE / LAG(PRICE, 1) - 1
FROM prices
GROUP BY TICKER;
-- Add moving average
UPDATE
price_ema20: EMA(PRICE, 0.095),
price_ema50: EMA(PRICE, 0.039)
FROM prices
GROUP BY TICKER;
UPDATE with Multiple Window Functions
-- Complex time-series features
UPDATE
prev_price: LAG(PRICE, 1),
next_price: LEAD(PRICE, 1),
daily_return: PRICE / LAG(PRICE, 1) - 1,
forward_return: LEAD(PRICE, 1) / PRICE - 1,
price_ema: EMA(PRICE, 0.1),
cumulative_return: CUMPROD(1 + daily_return) - 1,
price_rank: WRANK(PRICE, 10)
FROM prices
GROUP BY TICKER;
UPDATE with Table Lookup
-- Lookup from another table using [key] syntax
UPDATE company_name: companies[TICKER].company_name
FROM prices;
-- Multiple key lookup
UPDATE factor_value: factors[TICKER, DATE].value
FROM portfolio;
-- Conditional lookup with COALESCE
UPDATE
sector: companies[TICKER].sector,
industry: COALESCE(companies[TICKER].industry, 'Unknown')
FROM prices;
UPDATE with Aggregation
-- Normalize by group mean
UPDATE normalized_price: PRICE / AVG(PRICE)
FROM prices
GROUP BY TICKER;
-- Z-score calculation
UPDATE
mean_price: AVG(PRICE),
std_price: STD(PRICE),
z_score: (PRICE - AVG(PRICE)) / STD(PRICE)
FROM prices
GROUP BY TICKER;
UPDATE with Subquery
-- Update based on subquery result
UPDATE category: CASE
WHEN PRICE > (SELECT AVG(PRICE) FROM prices) THEN 'High'
WHEN PRICE < (SELECT AVG(PRICE) FROM prices) THEN 'Low'
ELSE 'Medium'
END
FROM prices;
DELETE - Remove Rows
Remove rows from tables based on conditions.
Basic DELETE
-- Delete specific rows
DELETE FROM prices
WHERE TICKER = 'AAPL' AND DATE < '2024-01-01';
-- Delete with multiple conditions
DELETE FROM prices
WHERE (PRICE IS NULL OR PRICE <= 0)
OR (DATE IS NULL);
DELETE with Subquery
-- Delete based on subquery
DELETE FROM prices
WHERE PRICE < (SELECT AVG(PRICE) * 0.5 FROM prices);
-- Delete using NOT IN
DELETE FROM prices
WHERE TICKER NOT IN (
SELECT TICKER FROM active_tickers
);
DELETE with Table Lookup
-- Delete using table lookup
DELETE FROM prices
WHERE exclusions[TICKER].is_excluded IS NOT NULL;
-- Delete based on lookup value
DELETE FROM prices
WHERE companies[TICKER].status = 'delisted';
DELETE All Rows
-- Remove all rows (keeps table structure)
DELETE FROM prices;
-- More efficient: drop and recreate
DROP TABLE prices;
-- Then recreate or reload
Practical Examples
Example 1: Data Quality Workflow
-- Load raw data
.load '/data/bt/20251208W/POLARIS1/data/prices.csv' as prices_raw
-- Remove invalid records
DELETE FROM prices_raw
WHERE PRICE IS NULL
OR PRICE <= 0
OR DATE IS NULL;
-- Add quality flags
UPDATE
prev_price: LAG(PRICE, 1),
price_change: ABS(PRICE / LAG(PRICE, 1) - 1),
is_outlier: CASE
WHEN ABS(PRICE / LAG(PRICE, 1) - 1) > 0.5 THEN 1
ELSE 0
END
FROM prices_raw
GROUP BY TICKER;
-- Remove extreme outliers
DELETE FROM prices_raw
WHERE is_outlier = 1;
-- Save cleaned data
.save prices_raw AS '/data/processed/prices_clean.adb' parameter(format=binary)
Example 2: Feature Engineering
-- Load price data
.load '/data/bt/20251208W/POLARIS1/data/prices.csv' as prices
-- Add technical indicators
UPDATE
-- Returns
daily_return: PRICE / LAG(PRICE, 1) - 1,
weekly_return: PRICE / LAG(PRICE, 5) - 1,
monthly_return: PRICE / LAG(PRICE, 21) - 1,
-- Moving averages
ema_20: EMA(PRICE, 0.095),
ema_50: EMA(PRICE, 0.039),
-- Momentum
momentum_20: PRICE / LAG(PRICE, 20) - 1,
-- Ranking
price_rank: WRANK(PRICE, 10),
return_rank: WRANK(daily_return, 10)
FROM prices
GROUP BY TICKER;
-- Add cross-sectional features
UPDATE
avg_price_today: AVG(PRICE),
rel_price: PRICE / AVG(PRICE)
FROM prices
GROUP BY DATE;
Example 3: Corporate Actions Adjustment
-- Load prices and splits
.load '/data/prices.csv' as prices
.load '/data/splits.csv' as splits
-- Calculate cumulative split adjustment
UPDATE split_adj: CUMPROD(split_ratio)
FROM splits
GROUP BY TICKER;
-- Apply split adjustment to prices using ASOF JOIN
UPDATE split_adjustment: COALESCE(s.split_adj, 1)
FROM prices p
ASOF JOIN splits s
ON p.TICKER = s.TICKER
AND p.DATE = s.effective_date;
-- Adjust prices
UPDATE adjusted_price: PRICE * split_adjustment
FROM prices;
Example 4: Python API Integration
import avapy
# Load data
avapy.asql(".load '/data/prices.csv' as prices")
# Add computed columns
avapy.asql("""
UPDATE
prev_price: LAG(PRICE, 1),
daily_return: PRICE / LAG(PRICE, 1) - 1
FROM prices
GROUP BY TICKER
""")
# Filter and save
avapy.asql("DELETE FROM prices WHERE daily_return IS NULL")
avapy.asql(".save prices AS '/data/prices_with_returns.adb' parameter(format=binary)")
# Query results
result = avapy.asql("""
SELECT TICKER, AVG(daily_return) as avg_return
FROM prices
GROUP BY TICKER
""")
print(result)