Code Examples
Real-world examples demonstrating AVA Database capabilities.
Sample Dataset
Download our sample stock prices dataset (prices.csv) to follow along with the examples below.
Sales Data Analysis
Analyze sales data with aggregations, joins, and time-based queries.
import avapy
# Load data
avapy.LoadTable("sales_2024.csv", "sales")
avapy.LoadTable("products.csv", "products")
avapy.LoadTable("customers.csv", "customers")
# Top products by revenue
top_products = avapy.asql("""
SELECT
p.product_name,
COUNT(*) as orders,
SUM(s.quantity) as units_sold,
SUM(s.quantity * s.price) as revenue
FROM sales s
JOIN products p ON s.product_id = p.id
GROUP BY p.product_name
ORDER BY revenue DESC
LIMIT 10
""")
avapy.Show(top_products)
# Monthly revenue trend
monthly_trend = avapy.asql("""
SELECT
YEAR(order_date) as year,
MONTH(order_date) as month,
SUM(quantity * price) as revenue,
COUNT(DISTINCT customer_id) as customers
FROM sales
GROUP BY year, month
ORDER BY year, month
""")
# Customer segmentation
segments = avapy.asql("""
SELECT
CASE
WHEN total > 10000 THEN 'Platinum'
WHEN total > 5000 THEN 'Gold'
WHEN total > 1000 THEN 'Silver'
ELSE 'Bronze'
END as segment,
COUNT(*) as customer_count,
AVG(total) as avg_spending
FROM (
SELECT customer_id, SUM(quantity * price) as total
FROM sales
GROUP BY customer_id
)
GROUP BY segment
""")
Predictive Modeling
Build and use regression models for predictions.
import avapy
import numpy as np
# Load training data
avapy.LoadTable("housing_train.csv", "train")
avapy.LoadTable("housing_test.csv", "test")
# Create regression model
avapy.asql("""
CREATE REGRESSION MODEL housing_model
AS SELECT
square_feet,
bedrooms,
bathrooms,
year_built,
location_score
FROM train
PREDICT sale_price
""")
# Make predictions on test set
predictions = avapy.asql("""
SELECT
id,
actual_price,
PREDICT(housing_model) as predicted_price,
ABS(actual_price - PREDICT(housing_model)) as error,
ABS(actual_price - PREDICT(housing_model)) / actual_price * 100 as pct_error
FROM test
""")
# Calculate model accuracy
test_table = avapy.GetTable("test")
actual = avapy.ColToNumpy(test_table.column("actual_price"))
predictions_table = avapy.GetTable("predictions")
predicted = avapy.ColToNumpy(predictions_table.column("predicted_price"))
mae = np.mean(np.abs(actual - predicted))
rmse = np.sqrt(np.mean((actual - predicted)**2))
r2 = 1 - (np.sum((actual - predicted)**2) / np.sum((actual - np.mean(actual))**2))
print(f"Model Performance:")
print(f" MAE: ${mae:,.2f}")
print(f" RMSE: ${rmse:,.2f}")
print(f" R²: {r2:.4f}")
Time Series Analysis
import avapy
avapy.LoadTable("stock_prices.csv", "stocks")
# Calculate moving averages
avapy.asql("""
SELECT
date,
symbol,
close_price,
AVG(close_price) OVER (
PARTITION BY symbol
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as ma_7day,
AVG(close_price) OVER (
PARTITION BY symbol
ORDER BY date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as ma_30day
FROM stocks
ORDER BY symbol, date
""")
# Year-over-year growth
avapy.asql("""
SELECT
symbol,
YEAR(date) as year,
AVG(close_price) as avg_price,
(AVG(close_price) - LAG(AVG(close_price)) OVER (
PARTITION BY symbol ORDER BY YEAR(date)
)) / LAG(AVG(close_price)) OVER (
PARTITION BY symbol ORDER BY YEAR(date)
) * 100 as yoy_growth_pct
FROM stocks
GROUP BY symbol, YEAR(date)
""")
ETL Pipeline
Extract, transform, and load data workflow.
import avapy
import glob
# Extract: Load multiple files
for file in glob.glob("raw_data_*.csv"):
table_name = file.replace(".csv", "").replace("raw_data_", "")
avapy.LoadTable(file, f"raw_{table_name}")
print(f"Loaded {file}")
# Transform: Clean and enrich
avapy.asql("""
CREATE TABLE clean_sales AS
SELECT
order_id,
customer_id,
product_id,
CAST(quantity AS INT) as quantity,
CAST(price AS DOUBLE) as price,
CAST(order_date AS DATE) as order_date,
quantity * price as revenue,
CASE
WHEN quantity * price > 1000 THEN 'Large'
WHEN quantity * price > 100 THEN 'Medium'
ELSE 'Small'
END as order_size
FROM raw_sales
WHERE quantity > 0
AND price > 0
AND order_date IS NOT NULL
""")
# Aggregate: Create summary tables
avapy.asql("""
CREATE TABLE daily_summary AS
SELECT
order_date,
COUNT(*) as orders,
SUM(revenue) as total_revenue,
AVG(revenue) as avg_order_value,
COUNT(DISTINCT customer_id) as unique_customers
FROM clean_sales
GROUP BY order_date
""")
# Export results
avapy.asql("""
.export daily_summary TO 'daily_summary.csv'
""")
print("ETL pipeline complete!")