SQL Interface
Complete SQL reference for AVA Database with extended analytics commands.
Standard SQL Support
AVA supports standard SQL-92 syntax for data manipulation and queries.
SELECT Statement
-- Basic SELECT
SELECT column1, column2 FROM table_name;
-- With WHERE clause
SELECT * FROM sales WHERE revenue > 1000;
-- Aggregations
SELECT
region,
COUNT(*) as count,
SUM(revenue) as total_revenue,
AVG(revenue) as avg_revenue,
MIN(revenue) as min_revenue,
MAX(revenue) as max_revenue
FROM sales
GROUP BY region
HAVING total_revenue > 10000
ORDER BY total_revenue DESC;
JOIN Operations
-- INNER JOIN
SELECT s.*, c.customer_name
FROM sales s
INNER JOIN customers c ON s.customer_id = c.id;
-- LEFT JOIN
SELECT p.*, o.order_date
FROM products p
LEFT JOIN orders o ON p.product_id = o.product_id;
-- Multiple JOINs
SELECT
o.order_id,
c.customer_name,
p.product_name,
o.quantity
FROM orders o
JOIN customers c ON o.customer_id = c.id
JOIN products p ON o.product_id = p.id;
Data Loading
.load Command
Load data from CSV files into AVA.
-- Load CSV file
.load "data.csv" as my_table
-- Load with specific delimiter
.load "data.tsv" as my_table delimiter="\t"
-- Load multiple files
.load "sales_2023_*.csv" as sales
CREATE TABLE
CREATE TABLE sales (
order_id INT,
customer_id INT,
product_id INT,
quantity INT,
price DOUBLE,
order_date DATE
);
REGRESS Command
AVA extends SQL with built-in regression analysis capabilities.
Syntax
REGRESS dependent_variable
FROM table_name
USING predictor1, predictor2, ...
Simple Linear Regression
-- Predict price based on square feet
REGRESS price FROM houses USING sqft
-- Output includes:
-- - Coefficients (intercept and slopes)
-- - R-squared value
-- - Standard errors
-- - P-values
Multiple Linear Regression
-- Multiple predictors
REGRESS sales
FROM advertising
USING tv, radio, newspaper
-- With more features
REGRESS price
FROM real_estate
USING sqft, bedrooms, bathrooms, age, distance_to_city
CREATE REGRESSION MODEL
Create named models for predictions.
-- Create model
CREATE REGRESSION MODEL price_model
AS SELECT sqft, bedrooms, age
FROM housing_data
PREDICT price
-- Use model for predictions
SELECT *,
PREDICT(price_model) as predicted_price,
price - PREDICT(price_model) as residual
FROM new_listings;
Model Output
Regression output includes coefficients, R-squared, adjusted R-squared, standard errors, t-statistics, and p-values for each predictor.
Aggregate Functions
COUNT()
Count number of rows
SELECT COUNT(*) FROM sales
SUM()
Sum of values
SELECT SUM(revenue) FROM sales
AVG()
Average value
SELECT AVG(price) FROM products
MIN() / MAX()
Minimum/maximum
SELECT MIN(price), MAX(price)
STDDEV()
Standard deviation
SELECT STDDEV(revenue)
VARIANCE()
Statistical variance
SELECT VARIANCE(values)
Date and Time Functions
-- Extract date parts
SELECT
YEAR(order_date) as year,
MONTH(order_date) as month,
DAY(order_date) as day
FROM orders;
-- Date arithmetic
SELECT order_date + INTERVAL 7 DAY as delivery_date
FROM orders;
-- Date formatting
SELECT DATE_FORMAT(order_date, '%Y-%m-%d') as formatted
FROM orders;
Complete Example
-- Load data
.load "sales.csv" as sales
.load "customers.csv" as customers
.load "products.csv" as products
-- Analyze sales by product category
SELECT
p.category,
COUNT(DISTINCT s.customer_id) as unique_customers,
COUNT(*) as total_orders,
SUM(s.quantity * s.price) as total_revenue,
AVG(s.quantity * s.price) as avg_order_value
FROM sales s
JOIN products p ON s.product_id = p.id
GROUP BY p.category
ORDER BY total_revenue DESC;
-- Customer segmentation analysis
SELECT
CASE
WHEN total_spent > 10000 THEN 'Premium'
WHEN total_spent > 5000 THEN 'Standard'
ELSE 'Basic'
END as segment,
COUNT(*) as customer_count,
AVG(total_spent) as avg_spending
FROM (
SELECT
customer_id,
SUM(quantity * price) as total_spent
FROM sales
GROUP BY customer_id
) customer_totals
GROUP BY segment;
-- Predict future sales using regression
CREATE REGRESSION MODEL sales_forecast
AS SELECT
marketing_spend,
season,
competitor_activity
FROM historical_sales
PREDICT monthly_revenue;
-- Generate predictions
SELECT
month,
PREDICT(sales_forecast) as predicted_revenue
FROM future_periods;