Code Examples

Real-world examples demonstrating AVA Database capabilities.

Sample Dataset

Download our sample stock prices dataset (prices.csv) to follow along with the examples below.

Download prices.csv

Sales Data Analysis

Analyze sales data with aggregations, joins, and time-based queries.

import avapy

# Load data
avapy.LoadTable("sales_2024.csv", "sales")
avapy.LoadTable("products.csv", "products")
avapy.LoadTable("customers.csv", "customers")

# Top products by revenue
top_products = avapy.asql("""
    SELECT 
        p.product_name,
        COUNT(*) as orders,
        SUM(s.quantity) as units_sold,
        SUM(s.quantity * s.price) as revenue
    FROM sales s
    JOIN products p ON s.product_id = p.id
    GROUP BY p.product_name
    ORDER BY revenue DESC
    LIMIT 10
""")

avapy.Show(top_products)

# Monthly revenue trend
monthly_trend = avapy.asql("""
    SELECT 
        YEAR(order_date) as year,
        MONTH(order_date) as month,
        SUM(quantity * price) as revenue,
        COUNT(DISTINCT customer_id) as customers
    FROM sales
    GROUP BY year, month
    ORDER BY year, month
""")

# Customer segmentation
segments = avapy.asql("""
    SELECT
        CASE
            WHEN total > 10000 THEN 'Platinum'
            WHEN total > 5000 THEN 'Gold'
            WHEN total > 1000 THEN 'Silver'
            ELSE 'Bronze'
        END as segment,
        COUNT(*) as customer_count,
        AVG(total) as avg_spending
    FROM (
        SELECT customer_id, SUM(quantity * price) as total
        FROM sales
        GROUP BY customer_id
    )
    GROUP BY segment
""")

Predictive Modeling

Build and use regression models for predictions.

import avapy
import numpy as np

# Load training data
avapy.LoadTable("housing_train.csv", "train")
avapy.LoadTable("housing_test.csv", "test")

# Create regression model
avapy.asql("""
    CREATE REGRESSION MODEL housing_model
    AS SELECT 
        square_feet,
        bedrooms,
        bathrooms,
        year_built,
        location_score
    FROM train
    PREDICT sale_price
""")

# Make predictions on test set
predictions = avapy.asql("""
    SELECT 
        id,
        actual_price,
        PREDICT(housing_model) as predicted_price,
        ABS(actual_price - PREDICT(housing_model)) as error,
        ABS(actual_price - PREDICT(housing_model)) / actual_price * 100 as pct_error
    FROM test
""")

# Calculate model accuracy
test_table = avapy.GetTable("test")
actual = avapy.ColToNumpy(test_table.column("actual_price"))

predictions_table = avapy.GetTable("predictions")
predicted = avapy.ColToNumpy(predictions_table.column("predicted_price"))

mae = np.mean(np.abs(actual - predicted))
rmse = np.sqrt(np.mean((actual - predicted)**2))
r2 = 1 - (np.sum((actual - predicted)**2) / np.sum((actual - np.mean(actual))**2))

print(f"Model Performance:")
print(f"  MAE: ${mae:,.2f}")
print(f"  RMSE: ${rmse:,.2f}")
print(f"  R²: {r2:.4f}")

Time Series Analysis

import avapy

avapy.LoadTable("stock_prices.csv", "stocks")

# Calculate moving averages
avapy.asql("""
    SELECT 
        date,
        symbol,
        close_price,
        AVG(close_price) OVER (
            PARTITION BY symbol 
            ORDER BY date 
            ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
        ) as ma_7day,
        AVG(close_price) OVER (
            PARTITION BY symbol 
            ORDER BY date 
            ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
        ) as ma_30day
    FROM stocks
    ORDER BY symbol, date
""")

# Year-over-year growth
avapy.asql("""
    SELECT 
        symbol,
        YEAR(date) as year,
        AVG(close_price) as avg_price,
        (AVG(close_price) - LAG(AVG(close_price)) OVER (
            PARTITION BY symbol ORDER BY YEAR(date)
        )) / LAG(AVG(close_price)) OVER (
            PARTITION BY symbol ORDER BY YEAR(date)
        ) * 100 as yoy_growth_pct
    FROM stocks
    GROUP BY symbol, YEAR(date)
""")

ETL Pipeline

Extract, transform, and load data workflow.

import avapy
import glob

# Extract: Load multiple files
for file in glob.glob("raw_data_*.csv"):
    table_name = file.replace(".csv", "").replace("raw_data_", "")
    avapy.LoadTable(file, f"raw_{table_name}")
    print(f"Loaded {file}")

# Transform: Clean and enrich
avapy.asql("""
    CREATE TABLE clean_sales AS
    SELECT 
        order_id,
        customer_id,
        product_id,
        CAST(quantity AS INT) as quantity,
        CAST(price AS DOUBLE) as price,
        CAST(order_date AS DATE) as order_date,
        quantity * price as revenue,
        CASE 
            WHEN quantity * price > 1000 THEN 'Large'
            WHEN quantity * price > 100 THEN 'Medium'
            ELSE 'Small'
        END as order_size
    FROM raw_sales
    WHERE quantity > 0 
      AND price > 0
      AND order_date IS NOT NULL
""")

# Aggregate: Create summary tables
avapy.asql("""
    CREATE TABLE daily_summary AS
    SELECT 
        order_date,
        COUNT(*) as orders,
        SUM(revenue) as total_revenue,
        AVG(revenue) as avg_order_value,
        COUNT(DISTINCT customer_id) as unique_customers
    FROM clean_sales
    GROUP BY order_date
""")

# Export results
avapy.asql("""
    .export daily_summary TO 'daily_summary.csv'
""")

print("ETL pipeline complete!")