Back to Overview

SQL Interface

Complete SQL reference for AVA Database with extended analytics commands.

Standard SQL Support

AVA supports standard SQL-92 syntax for data manipulation and queries.

SELECT Statement

-- Basic SELECT
SELECT column1, column2 FROM table_name;

-- With WHERE clause
SELECT * FROM sales WHERE revenue > 1000;

-- Aggregations
SELECT
    region,
    COUNT(*) as count,
    SUM(revenue) as total_revenue,
    AVG(revenue) as avg_revenue,
    MIN(revenue) as min_revenue,
    MAX(revenue) as max_revenue
FROM sales
GROUP BY region
HAVING total_revenue > 10000
ORDER BY total_revenue DESC;

JOIN Operations

-- INNER JOIN
SELECT s.*, c.customer_name
FROM sales s
INNER JOIN customers c ON s.customer_id = c.id;

-- LEFT JOIN
SELECT p.*, o.order_date
FROM products p
LEFT JOIN orders o ON p.product_id = o.product_id;

-- Multiple JOINs
SELECT
    o.order_id,
    c.customer_name,
    p.product_name,
    o.quantity
FROM orders o
JOIN customers c ON o.customer_id = c.id
JOIN products p ON o.product_id = p.id;

Data Loading

.load Command

Load data from CSV files into AVA.

-- Load CSV file
.load "data.csv" as my_table

-- Load with specific delimiter
.load "data.tsv" as my_table delimiter="\t"

-- Load multiple files
.load "sales_2023_*.csv" as sales

CREATE TABLE

CREATE TABLE sales (
    order_id INT,
    customer_id INT,
    product_id INT,
    quantity INT,
    price DOUBLE,
    order_date DATE
);

REGRESS Command

AVA extends SQL with built-in regression analysis capabilities.

Syntax

REGRESS dependent_variable
FROM table_name
USING predictor1, predictor2, ...

Simple Linear Regression

-- Predict price based on square feet
REGRESS price FROM houses USING sqft

-- Output includes:
-- - Coefficients (intercept and slopes)
-- - R-squared value
-- - Standard errors
-- - P-values

Multiple Linear Regression

-- Multiple predictors
REGRESS sales
FROM advertising
USING tv, radio, newspaper

-- With more features
REGRESS price
FROM real_estate
USING sqft, bedrooms, bathrooms, age, distance_to_city

CREATE REGRESSION MODEL

Create named models for predictions.

-- Create model
CREATE REGRESSION MODEL price_model
AS SELECT sqft, bedrooms, age
FROM housing_data
PREDICT price

-- Use model for predictions
SELECT *,
       PREDICT(price_model) as predicted_price,
       price - PREDICT(price_model) as residual
FROM new_listings;

Model Output

Regression output includes coefficients, R-squared, adjusted R-squared, standard errors, t-statistics, and p-values for each predictor.

Aggregate Functions

COUNT()

Count number of rows

SELECT COUNT(*) FROM sales

SUM()

Sum of values

SELECT SUM(revenue) FROM sales

AVG()

Average value

SELECT AVG(price) FROM products

MIN() / MAX()

Minimum/maximum

SELECT MIN(price), MAX(price)

STDDEV()

Standard deviation

SELECT STDDEV(revenue)

VARIANCE()

Statistical variance

SELECT VARIANCE(values)

Date and Time Functions

-- Extract date parts
SELECT
    YEAR(order_date) as year,
    MONTH(order_date) as month,
    DAY(order_date) as day
FROM orders;

-- Date arithmetic
SELECT order_date + INTERVAL 7 DAY as delivery_date
FROM orders;

-- Date formatting
SELECT DATE_FORMAT(order_date, '%Y-%m-%d') as formatted
FROM orders;

Complete Example

-- Load data
.load "sales.csv" as sales
.load "customers.csv" as customers
.load "products.csv" as products

-- Analyze sales by product category
SELECT
    p.category,
    COUNT(DISTINCT s.customer_id) as unique_customers,
    COUNT(*) as total_orders,
    SUM(s.quantity * s.price) as total_revenue,
    AVG(s.quantity * s.price) as avg_order_value
FROM sales s
JOIN products p ON s.product_id = p.id
GROUP BY p.category
ORDER BY total_revenue DESC;

-- Customer segmentation analysis
SELECT
    CASE
        WHEN total_spent > 10000 THEN 'Premium'
        WHEN total_spent > 5000 THEN 'Standard'
        ELSE 'Basic'
    END as segment,
    COUNT(*) as customer_count,
    AVG(total_spent) as avg_spending
FROM (
    SELECT
        customer_id,
        SUM(quantity * price) as total_spent
    FROM sales
    GROUP BY customer_id
) customer_totals
GROUP BY segment;

-- Predict future sales using regression
CREATE REGRESSION MODEL sales_forecast
AS SELECT
    marketing_spend,
    season,
    competitor_activity
FROM historical_sales
PREDICT monthly_revenue;

-- Generate predictions
SELECT
    month,
    PREDICT(sales_forecast) as predicted_revenue
FROM future_periods;