LifeRPG_v2.0/modern/backend/worker.py
TLimoges33 2b961611fd
🚀 Major Enhancement: Complete AI-Powered LifeRPG Platform with Git LFS
 New Features:
- AI-powered habit creation with natural language processing
- HuggingFace transformers integration for sentiment analysis (tracked via Git LFS)
- Advanced predictive analytics and behavioral insights
- Voice & image input capabilities for hands-free habit tracking
- Real-time notifications and community features
- Plugin system with extensible architecture

🔧 Technical Improvements:
- Comprehensive FastAPI backend with 30+ endpoints
- React frontend with PWA capabilities
- Advanced authentication with 2FA support
- RBAC authorization system
- Comprehensive security features (CSRF, rate limiting, audit logging)
- Database migrations and health monitoring
- Docker containerization support
- Git LFS configured for large AI model files (2+ GB)

📚 Documentation & DevOps:
- Complete deployment guides for multiple platforms
- Professional README with feature highlights
- GitHub Actions CI/CD workflows
- Comprehensive API documentation
- Security audit roadmap and compliance framework
- Setup scripts for development environment

🧪 Testing & Quality:
- Comprehensive test suite with 20+ test modules
- Setup verification scripts
- Working development environment with both backend and frontend
- Health checks and monitoring systems

🌟 Ready for:
- Portfolio showcasing
- Community contributions
- Production deployment
- Professional presentation
2025-09-28 21:29:19 +00:00

298 lines
12 KiB
Python

import os
import time
from typing import Optional
try:
from rq import Queue
from rq import Retry
from redis import Redis
except Exception:
Queue = None
Retry = None
Redis = None
from metrics import (
record_job_processed, record_integration_sync_by_id,
log_job_event, record_enqueue_skipped, SYNC_JOB_DURATION_SECONDS
)
from notifier import emit_sync_event
from hooks import hooks_for_integration
from adapters import ADAPTERS, AdapterError, TransientError
def get_queue():
if not Queue or not Redis:
return None
url = os.getenv('REDIS_URL', 'redis://localhost:6379/0')
try:
conn = Redis.from_url(url)
# probe connectivity; if fails, fall back to inline (None)
try:
conn.ping()
except Exception:
return None
return Queue('default', connection=conn)
except Exception:
# No Redis available
return None
def example_job(payload: dict):
try:
time.sleep(0.1)
record_job_processed('success')
return {'ok': True, 'echo': payload}
except Exception:
record_job_processed('error')
raise
def _sleep_backoff(attempt: int, base: float = 0.5, cap: float = 10.0):
# Exponential backoff with jitter
delay = min(cap, base * (2 ** (attempt - 1)))
# tiny jitter
time.sleep(delay + (0.1 * (attempt % 3)))
def run_adapter_sync(provider: str, integration_id: int) -> dict:
"""Execute an adapter sync with retries/backoff for transient failures.
If running under RQ and Retry is available, rely on RQ for retry scheduling
(we still guard a couple quick local retries for connection hiccups).
"""
adapter = ADAPTERS.get(provider)
if not adapter:
record_job_processed('error')
raise ValueError('unknown provider')
# Provider inflight accounting
inflight_key = f"sync_provider_inflight:{provider}"
r = None
try:
if Redis:
url = os.getenv('REDIS_URL', 'redis://localhost:6379/0')
r = Redis.from_url(url)
r.incr(inflight_key)
r.expire(inflight_key, 300)
except Exception:
r = None
# Lazy import to obtain a session
from .models import SessionLocal
db = SessionLocal()
try:
# Quick local retry loop (max 3 attempts) for immediate hiccups
attempts = 0
while True:
attempts += 1
try:
log_job_event('start', provider=provider, integration_id=integration_id, attempt=attempts)
try:
hooks_for_integration(db, integration_id).run_pre(db=db, integration_id=integration_id, context={'provider': provider})
except Exception:
pass
import time as _t
_t0 = _t.perf_counter()
result = adapter.sync(db=db, integration_id=integration_id)
_dur = _t.perf_counter() - _t0
record_job_processed('success')
record_integration_sync_by_id(integration_id, 'success')
try:
SYNC_JOB_DURATION_SECONDS.labels(provider=provider, result='success').observe(_dur)
except Exception:
pass
log_job_event('success', provider=provider, integration_id=integration_id, attempts=attempts)
try:
emit_sync_event(db, integration_id, 'sync_success', { 'provider': provider, 'summary': result })
except Exception:
pass
try:
hooks_for_integration(db, integration_id).run_post(db=db, integration_id=integration_id, status='success', context={'provider': provider, 'count': result.get('count')})
except Exception:
pass
return {**result, 'attempts': attempts}
except TransientError:
if attempts >= 3:
record_job_processed('error')
record_integration_sync_by_id(integration_id, 'transient_fail')
try:
SYNC_JOB_DURATION_SECONDS.labels(provider=provider, result='transient_fail').observe((_t.perf_counter() - _t0) if '_t0' in locals() else 0.0)
except Exception:
pass
log_job_event('fail', provider=provider, integration_id=integration_id, reason='transient', attempts=attempts)
try:
emit_sync_event(db, integration_id, 'sync_fail', { 'provider': provider, 'reason': 'transient' })
except Exception:
pass
try:
hooks_for_integration(db, integration_id).run_post(db=db, integration_id=integration_id, status='fail', context={'provider': provider})
except Exception:
pass
raise
_sleep_backoff(attempts)
continue
except AdapterError:
record_job_processed('error')
record_integration_sync_by_id(integration_id, 'error')
try:
SYNC_JOB_DURATION_SECONDS.labels(provider=provider, result='error').observe((_t.perf_counter() - _t0) if '_t0' in locals() else 0.0)
except Exception:
pass
log_job_event('fail', provider=provider, integration_id=integration_id, reason='adapter_error', attempts=attempts)
try:
emit_sync_event(db, integration_id, 'sync_fail', { 'provider': provider, 'reason': 'adapter_error' })
except Exception:
pass
try:
hooks_for_integration(db, integration_id).run_post(db=db, integration_id=integration_id, status='fail', context={'provider': provider})
except Exception:
pass
raise
finally:
try:
db.close()
except Exception:
pass
# Decrement inflight
try:
if r:
r.decr(inflight_key)
except Exception:
pass
def enqueue_adapter_sync(provider: str, integration_id: int):
q = get_queue()
if not q:
# run inline if no queue
return None
# Backpressure: prevent duplicate enqueues within a short window per integration
try:
import os
from redis import Redis
url = os.getenv('REDIS_URL', 'redis://localhost:6379/0')
r = Redis.from_url(url)
# Provider concurrency check with per-provider overrides from Integration.config
# Base cap: env default or settings default
try:
from .config import settings
max_conc = settings.DEFAULT_PROVIDER_CAP
# apply global per-provider override if present
if provider in settings.PROVIDER_CAPS:
max_conc = min(max_conc, int(settings.PROVIDER_CAPS[provider]))
except Exception:
max_conc = int(os.getenv('SYNC_MAX_CONCURRENCY_PER_PROVIDER', '4'))
try:
# if a per-provider cap is configured on any integration for this provider, use the min cap
from .models import SessionLocal, Integration
s = SessionLocal()
caps = []
try:
for row in s.query(Integration).filter_by(provider=provider).all():
if row.config:
import json as _json
try:
cfg = _json.loads(row.config)
v = cfg.get('sync_max_concurrency')
if isinstance(v, int) and v > 0:
caps.append(v)
except Exception:
continue
# include global admin settings provider caps
admin_row = (
s.query(Integration)
.filter_by(provider='admin', external_id='settings')
.order_by(Integration.id.desc())
.first()
)
if admin_row and admin_row.config:
import json as _json
try:
acfg = _json.loads(admin_row.config) or {}
pc = acfg.get('provider_caps') or {}
if isinstance(pc, dict) and provider in pc:
pv = int(pc.get(provider))
if pv > 0:
caps.append(pv)
except Exception:
pass
finally:
s.close()
if caps:
max_conc = min(max_conc, min(caps))
except Exception:
pass
inflight_key = f"sync_provider_inflight:{provider}"
try:
inflight = int(r.get(inflight_key) or 0)
except Exception:
inflight = 0
if inflight >= max_conc:
# increment queue depth metric key and skip
r.incr(f"sync_queue_depth:{provider}")
r.expire(f"sync_queue_depth:{provider}", 300)
log_job_event('enqueue_skipped', provider=provider, integration_id=integration_id, reason='provider_cap', inflight=inflight, max=max_conc)
record_enqueue_skipped('provider_cap')
return None
guard_key = f"sync_guard:{integration_id}"
if r.setnx(guard_key, '1'):
r.expire(guard_key, 30) # 30s guard
else:
# already enqueued recently
log_job_event('enqueue_skipped', integration_id=integration_id, reason='guard')
record_enqueue_skipped('guard')
return None
except Exception:
pass
kwargs = {'provider': provider, 'integration_id': integration_id}
# If RQ Retry is available, add a retry policy with exponential backoff
if Retry is not None:
return q.enqueue(run_adapter_sync, provider, integration_id, retry=Retry(max=5, interval=[5, 10, 20, 40, 60]))
return q.enqueue(run_adapter_sync, provider, integration_id)
def schedule_periodic_syncs():
"""Naive scheduler: enqueue all integrations periodically.
Intended to be called by an external timer (cron/k8s CronJob) or a long-running worker.
"""
from .models import SessionLocal, Integration
db = SessionLocal()
try:
rows = db.query(Integration).all()
import json as _json, random
now = time.time()
for integ in rows:
conf = {}
if integ.config:
try:
conf = _json.loads(integ.config)
except Exception:
conf = {}
# default 15 minutes
interval = int(conf.get('sync_interval_seconds', 900))
# jitter up to 10%
jitter = int(interval * 0.1)
interval_with_jitter = interval + (random.randint(-jitter, jitter) if jitter > 0 else 0)
last_sync_at = conf.get('last_sync_at') or conf.get('github_since')
should_run = True
if last_sync_at:
try:
# parse ISO and compare
from datetime import datetime, timezone
ts = datetime.fromisoformat(last_sync_at.replace('Z','+00:00')).timestamp()
should_run = (now - ts) >= max(60, interval_with_jitter)
except Exception:
should_run = True
if should_run:
enqueue_adapter_sync(integ.provider, integ.id)
finally:
try:
db.close()
except Exception:
pass
if __name__ == "__main__":
# Simple CLI entrypoint: python -m backend.worker schedule
import sys
cmd = sys.argv[1] if len(sys.argv) > 1 else 'schedule'
if cmd == 'schedule':
schedule_periodic_syncs()