Files
yolkbook/backend/routers/stats.py
derekc 60fed6d464 Implement performance improvements across backend and frontend
- models.py: add composite (user_id, date) indexes to flock_history,
  feed_purchases, and other_purchases for faster date-filtered queries
  (egg_collections already had one via its unique constraint)
- main.py: add v2.2 migration to create the three composite indexes on
  existing installs at startup
- stats.py: fix N+1 query in monthly_stats — flock history is now fetched
  once and looked up per month using bisect_right instead of one DB query
  per month row; also remove unnecessary Decimal(str(...)) round-trips
  since SQLAlchemy already returns Numeric columns as Decimal
- eggs.py: add limit parameter (default 500, max 1000) to list_eggs to
  cap unbounded fetches on large datasets
- dashboard.js: pass start= (30 days ago) when fetching eggs so the
  dashboard only loads the data it actually needs for the chart and
  recent collections list

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-18 00:02:58 -07:00

267 lines
9.4 KiB
Python

import calendar
from bisect import bisect_right
from datetime import date, datetime, timedelta
from decimal import Decimal
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from fastapi import APIRouter, Depends
from sqlalchemy import select, func
from sqlalchemy.orm import Session
from database import get_db
from models import EggCollection, FlockHistory, FeedPurchase, OtherPurchase, User
from schemas import DashboardStats, BudgetStats, MonthlySummary
from auth import get_current_user
router = APIRouter(prefix="/api/stats", tags=["stats"])
def _today(user_timezone: str) -> date:
try:
return datetime.now(ZoneInfo(user_timezone)).date()
except ZoneInfoNotFoundError:
return date.today()
def _avg_per_hen_30d(db: Session, user_id: int, start_30d: date) -> float | None:
flock_at_date = (
select(FlockHistory.chicken_count)
.where(FlockHistory.user_id == user_id)
.where(FlockHistory.date <= EggCollection.date)
.order_by(FlockHistory.date.desc())
.limit(1)
.correlate(EggCollection)
.scalar_subquery()
)
rows = db.execute(
select(EggCollection.eggs, flock_at_date.label('flock_count'))
.where(EggCollection.user_id == user_id)
.where(EggCollection.date >= start_30d)
).all()
valid = [(r.eggs, r.flock_count) for r in rows if r.flock_count]
if not valid:
return None
return round(sum(e / f for e, f in valid) / len(valid), 3)
def _current_flock(db: Session, user_id: int) -> int | None:
row = db.scalars(
select(FlockHistory)
.where(FlockHistory.user_id == user_id)
.order_by(FlockHistory.date.desc())
.limit(1)
).first()
return row.chicken_count if row else None
def _total_eggs(db: Session, user_id: int, start: date | None = None, end: date | None = None) -> int:
q = select(func.coalesce(func.sum(EggCollection.eggs), 0)).where(EggCollection.user_id == user_id)
if start:
q = q.where(EggCollection.date >= start)
if end:
q = q.where(EggCollection.date <= end)
return db.scalar(q)
def _total_feed_cost(db: Session, user_id: int, start: date | None = None, end: date | None = None):
q = select(
func.coalesce(func.sum(FeedPurchase.bags * FeedPurchase.price_per_bag), 0)
).where(FeedPurchase.user_id == user_id)
if start:
q = q.where(FeedPurchase.date >= start)
if end:
q = q.where(FeedPurchase.date <= end)
return db.scalar(q)
def _total_other_cost(db: Session, user_id: int, start: date | None = None, end: date | None = None):
q = select(func.coalesce(func.sum(OtherPurchase.total), 0)).where(OtherPurchase.user_id == user_id)
if start:
q = q.where(OtherPurchase.date >= start)
if end:
q = q.where(OtherPurchase.date <= end)
return db.scalar(q)
@router.get("/dashboard", response_model=DashboardStats)
def dashboard_stats(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
uid = current_user.id
today = _today(current_user.timezone)
start_30d = today - timedelta(days=30)
start_7d = today - timedelta(days=7)
total_alltime = _total_eggs(db, uid)
total_30d = _total_eggs(db, uid, start=start_30d)
total_7d = _total_eggs(db, uid, start=start_7d)
flock = _current_flock(db, uid)
days_tracked = db.scalar(
select(func.count(func.distinct(EggCollection.date)))
.where(EggCollection.user_id == uid)
)
days_with_data_30d = db.scalar(
select(func.count(func.distinct(EggCollection.date)))
.where(EggCollection.user_id == uid)
.where(EggCollection.date >= start_30d)
)
avg_per_day = round(total_30d / days_with_data_30d, 2) if days_with_data_30d else None
avg_per_hen = _avg_per_hen_30d(db, uid, start_30d)
return DashboardStats(
current_flock=flock,
total_eggs_alltime=total_alltime,
total_eggs_30d=total_30d,
total_eggs_7d=total_7d,
avg_eggs_per_day_30d=avg_per_day,
avg_eggs_per_hen_day_30d=avg_per_hen,
days_tracked=days_tracked,
)
@router.get("/monthly", response_model=list[MonthlySummary])
def monthly_stats(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
uid = current_user.id
MONTH_NAMES = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
egg_rows = db.execute(
select(
func.year(EggCollection.date).label('year'),
func.month(EggCollection.date).label('month'),
func.sum(EggCollection.eggs).label('total_eggs'),
func.count(EggCollection.date).label('days_logged'),
)
.where(EggCollection.user_id == uid)
.group_by(func.year(EggCollection.date), func.month(EggCollection.date))
.order_by(func.year(EggCollection.date).desc(), func.month(EggCollection.date).desc())
).all()
if not egg_rows:
return []
feed_rows = db.execute(
select(
func.year(FeedPurchase.date).label('year'),
func.month(FeedPurchase.date).label('month'),
func.sum(FeedPurchase.bags * FeedPurchase.price_per_bag).label('feed_cost'),
)
.where(FeedPurchase.user_id == uid)
.group_by(func.year(FeedPurchase.date), func.month(FeedPurchase.date))
).all()
feed_map = {(r.year, r.month): r.feed_cost for r in feed_rows}
other_rows = db.execute(
select(
func.year(OtherPurchase.date).label('year'),
func.month(OtherPurchase.date).label('month'),
func.sum(OtherPurchase.total).label('other_cost'),
)
.where(OtherPurchase.user_id == uid)
.group_by(func.year(OtherPurchase.date), func.month(OtherPurchase.date))
).all()
other_map = {(r.year, r.month): r.other_cost for r in other_rows}
# Fetch all flock history once (ascending) to avoid N+1 per month
flock_all = db.scalars(
select(FlockHistory)
.where(FlockHistory.user_id == uid)
.order_by(FlockHistory.date)
).all()
flock_dates = [f.date for f in flock_all]
flock_counts = [f.chicken_count for f in flock_all]
def flock_at(month_end: date) -> int | None:
idx = bisect_right(flock_dates, month_end) - 1
return flock_counts[idx] if idx >= 0 else None
results = []
for row in egg_rows:
y, m = int(row.year), int(row.month)
month_end = date(y, m, calendar.monthrange(y, m)[1])
flock = flock_at(month_end)
total_eggs = int(row.total_eggs)
days_logged = int(row.days_logged)
avg_per_day = round(total_eggs / days_logged, 2) if days_logged else None
avg_per_hen = round(avg_per_day / flock, 3) if (avg_per_day and flock) else None
raw_feed_cost = feed_map.get((y, m))
raw_other_cost = other_map.get((y, m))
feed_cost = round(raw_feed_cost, 2) if raw_feed_cost else None
other_cost = round(raw_other_cost, 2) if raw_other_cost else None
total_cost = (raw_feed_cost or Decimal(0)) + (raw_other_cost or Decimal(0))
cpe = round(total_cost / total_eggs, 4) if (total_cost and total_eggs) else None
cpd = round(cpe * 12, 4) if cpe else None
results.append(MonthlySummary(
year=y,
month=m,
month_label=f"{MONTH_NAMES[m - 1]} {y}",
total_eggs=total_eggs,
days_logged=days_logged,
avg_eggs_per_day=avg_per_day,
flock_at_month_end=flock,
avg_eggs_per_hen_per_day=avg_per_hen,
feed_cost=feed_cost,
other_cost=other_cost,
cost_per_egg=cpe,
cost_per_dozen=cpd,
))
return results
@router.get("/budget", response_model=BudgetStats)
def budget_stats(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
uid = current_user.id
today = _today(current_user.timezone)
start_30d = today - timedelta(days=30)
total_feed_cost = _total_feed_cost(db, uid)
total_feed_cost_30d = _total_feed_cost(db, uid, start=start_30d)
total_other_cost = _total_other_cost(db, uid)
total_other_cost_30d = _total_other_cost(db, uid, start=start_30d)
total_eggs = _total_eggs(db, uid)
total_eggs_30d = _total_eggs(db, uid, start=start_30d)
def cost_per_egg(cost, eggs):
if not eggs or not cost:
return None
return round(Decimal(str(cost)) / Decimal(eggs), 4)
def cost_per_dozen(cpe):
return round(cpe * 12, 4) if cpe else None
combined_cost = total_feed_cost + total_other_cost
combined_cost_30d = total_feed_cost_30d + total_other_cost_30d
cpe = cost_per_egg(combined_cost, total_eggs)
cpe_30d = cost_per_egg(combined_cost_30d, total_eggs_30d)
return BudgetStats(
total_feed_cost=round(Decimal(str(total_feed_cost)), 2) if total_feed_cost else None,
total_feed_cost_30d=round(Decimal(str(total_feed_cost_30d)), 2) if total_feed_cost_30d else None,
total_other_cost=round(Decimal(str(total_other_cost)), 2) if total_other_cost else None,
total_other_cost_30d=round(Decimal(str(total_other_cost_30d)), 2) if total_other_cost_30d else None,
total_eggs_alltime=total_eggs,
total_eggs_30d=total_eggs_30d,
cost_per_egg=cpe,
cost_per_dozen=cost_per_dozen(cpe),
cost_per_egg_30d=cpe_30d,
cost_per_dozen_30d=cost_per_dozen(cpe_30d),
)