LifeRPG_v2.0/modern/backend/huggingface_ai.py

"""
HuggingFace AI Integration for LifeRPG Phase 3
- Free/low-cost NLP using HuggingFace Transformers
- Local model inference where possible
- Fallback to HuggingFace API for complex tasks
- Predictive analytics using lightweight models
"""

import os
import re
import json
import asyncio
from typing import Dict, List, Optional, Any
from datetime import datetime, timedelta
import logging

# For local inference (free)
try:
    from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
    from transformers import AutoModelForCausalLM, AutoTokenizer as AutoTokenizer2
    TRANSFORMERS_AVAILABLE = True
except ImportError:
    TRANSFORMERS_AVAILABLE = False
    logging.warning("Transformers not installed. Install with: pip install transformers torch")

# For HuggingFace API (free tier available)
import requests
from sqlalchemy.orm import Session
from sqlalchemy import func, desc

class HuggingFaceAI:
    """HuggingFace AI service for habit analysis and NLP"""

    def __init__(self):
        self.api_token = os.getenv("HUGGINGFACE_API_TOKEN")  # Optional for public models
        self.api_url = "https://api-inference.huggingface.co/models"

        # Initialize local models (lightweight, free)
        self._init_local_models()

    def _init_local_models(self):
        """Initialize lightweight local models for offline inference"""
        self.local_models = {}

        if TRANSFORMERS_AVAILABLE:
            try:
                # Small sentiment analysis model (40MB)
                self.local_models['sentiment'] = pipeline(
                    "sentiment-analysis",
                    model="cardiffnlp/twitter-roberta-base-sentiment-latest",
                    return_all_scores=True
                )

                # Small text classification model for habit categorization
                self.local_models['text_classifier'] = pipeline(
                    "zero-shot-classification",
                    model="facebook/bart-large-mnli"  # 1.6GB but very capable
                )

                logging.info("✅ Local HuggingFace models loaded successfully")
            except Exception as e:
                logging.warning(f"Could not load local models: {e}")
        else:
            logging.warning("Transformers not available - using API fallback only")

    async def parse_habit_from_text(self, text: str) -> Dict[str, Any]:
        """Parse natural language text into structured habit data"""

        # Use regex patterns first (fast, free, works offline)
        habit_data = self._regex_parse_habit(text)

        # Enhance with AI if available
        if TRANSFORMERS_AVAILABLE and 'text_classifier' in self.local_models:
            try:
                # Categorize the habit
                categories = [
                    "health", "fitness", "productivity", "learning",
                    "social", "creativity", "mindfulness", "nutrition"
                ]

                result = self.local_models['text_classifier'](text, categories)
                if result['scores'][0] > 0.5:  # High confidence
                    habit_data['category'] = result['labels'][0]
                    habit_data['confidence'] = result['scores'][0]
            except Exception as e:
                logging.warning(f"Local classification failed: {e}")

        # Fallback to API for complex parsing if needed
        if not habit_data.get('title') and self.api_token:
            habit_data = await self._api_parse_habit(text)

        return habit_data

    def _regex_parse_habit(self, text: str) -> Dict[str, Any]:
        """Fast regex-based parsing for common habit patterns"""
        text_lower = text.lower()

        # Extract title (remove common prefixes)
        title = text
        for prefix in ['remind me to ', 'i want to ', 'help me ', 'i need to ']:
            if text_lower.startswith(prefix):
                title = text[len(prefix):]
                break

        # Extract frequency/cadence
        cadence = 'daily'  # default
        if any(word in text_lower for word in ['weekly', 'week', 'sunday', 'monday']):
            cadence = 'weekly'
        elif any(word in text_lower for word in ['monthly', 'month']):
            cadence = 'monthly'

        # Extract time
        time_patterns = [
            r'(\d{1,2}):(\d{2})\s*(am|pm)',
            r'(\d{1,2})\s*(am|pm)',
            r'at\s+(\d{1,2})\s*(am|pm)',
        ]

        due_time = None
        for pattern in time_patterns:
            match = re.search(pattern, text_lower)
            if match:
                if len(match.groups()) == 3:  # Hour:minute am/pm
                    hour, minute, period = match.groups()
                    due_time = f"{hour}:{minute} {period.upper()}"
                else:  # Hour am/pm
                    hour, period = match.groups()
                    due_time = f"{hour}:00 {period.upper()}"
                break

        # Extract difficulty indicators
        difficulty = 1  # default
        if any(word in text_lower for word in ['hard', 'difficult', 'challenging']):
            difficulty = 3
        elif any(word in text_lower for word in ['moderate', 'medium']):
            difficulty = 2

        return {
            'title': title.strip(),
            'cadence': cadence,
            'due_time': due_time,
            'difficulty': difficulty,
            'source': 'regex_parser'
        }

    async def _api_parse_habit(self, text: str) -> Dict[str, Any]:
        """Use HuggingFace API for complex parsing (fallback)"""
        try:
            # Use a small language model for text generation
            payload = {
                "inputs": f"Parse this habit request into JSON: {text}\nJSON:",
                "parameters": {
                    "max_new_tokens": 100,
                    "temperature": 0.1,
                    "return_full_text": False
                }
            }

            headers = {"Authorization": f"Bearer {self.api_token}"} if self.api_token else {}

            response = requests.post(
                f"{self.api_url}/microsoft/DialoGPT-small",
                headers=headers,
                json=payload,
                timeout=10
            )

            if response.status_code == 200:
                result = response.json()
                # Parse the generated JSON (simplified)
                return {"title": text, "source": "api_parser"}

        except Exception as e:
            logging.warning(f"API parsing failed: {e}")

        return {"title": text, "source": "fallback"}

    async def get_habit_suggestions(self, user_habits: List[str], user_data: Dict) -> List[str]:
        """Generate personalized habit suggestions"""

        # Rule-based suggestions (free, fast)
        suggestions = []

        habit_text = " ".join(user_habits).lower()

        # Health suggestions
        if not any(word in habit_text for word in ['water', 'hydrat']):
            suggestions.append("Drink 8 glasses of water daily")

        if not any(word in habit_text for word in ['walk', 'exercise', 'workout']):
            suggestions.append("Take a 15-minute walk after lunch")

        if not any(word in habit_text for word in ['sleep', 'bed']):
            suggestions.append("Go to bed by 10 PM for better sleep")

        # Productivity suggestions
        if not any(word in habit_text for word in ['read', 'book']):
            suggestions.append("Read for 20 minutes before bed")

        if not any(word in habit_text for word in ['gratitude', 'journal']):
            suggestions.append("Write 3 things you're grateful for")

        # Use AI for personalized suggestions if available
        if TRANSFORMERS_AVAILABLE and 'sentiment' in self.local_models:
            try:
                # Analyze sentiment of existing habits
                for habit in user_habits:
                    sentiment = self.local_models['sentiment'](habit)[0]
                    if sentiment['label'] == 'NEGATIVE':
                        # Suggest positive alternatives
                        suggestions.append("Practice 5 minutes of meditation")
                        break
            except Exception as e:
                logging.warning(f"Sentiment analysis failed: {e}")

        return suggestions[:5]  # Limit to top 5

    async def predict_habit_success(self, habit_data: Dict, user_history: List[Dict]) -> Dict[str, Any]:
        """Predict habit success probability using simple ML"""

        # Simple rule-based prediction (can be enhanced with ML)
        base_probability = 0.7  # Default 70%

        # Adjust based on habit characteristics
        difficulty = habit_data.get('difficulty', 1)
        if difficulty >= 3:
            base_probability -= 0.2

        # Adjust based on user history
        if user_history:
            recent_success_rate = sum(1 for h in user_history[-10:] if h.get('completed', False)) / len(user_history[-10:])
            base_probability = (base_probability + recent_success_rate) / 2

        # Adjust based on category (if available)
        category = habit_data.get('category', '')
        if category in ['health', 'fitness']:
            base_probability += 0.1  # Health habits tend to be more successful

        # Clamp between 0 and 1
        probability = max(0.0, min(1.0, base_probability))

        # Generate insights
        insights = []
        if probability < 0.5:
            insights.append("Consider starting with an easier version of this habit")
        if habit_data.get('due_time'):
            insights.append("Having a specific time increases success rate by 40%")
        if difficulty >= 3:
            insights.append("High difficulty habits benefit from gradual progression")

        return {
            'success_probability': round(probability, 2),
            'confidence': 0.8,  # Static for now
            'insights': insights,
            'recommended_adjustments': self._get_habit_adjustments(habit_data, probability)
        }

    def _get_habit_adjustments(self, habit_data: Dict, probability: float) -> List[str]:
        """Suggest adjustments to improve habit success"""
        adjustments = []

        if probability < 0.6:
            adjustments.append("Start with a smaller, easier version")
            adjustments.append("Add a specific time and location")

        if habit_data.get('difficulty', 1) >= 3:
            adjustments.append("Break into smaller daily steps")

        if not habit_data.get('due_time'):
            adjustments.append("Set a specific time for better consistency")

        return adjustments

    async def analyze_habit_patterns(self, db: Session, user_id: int) -> Dict[str, Any]:
        """Analyze user's habit patterns using AI"""

        # This would use more sophisticated ML models
        # For now, return basic analytics with AI insights

        from .models import Habit, Log  # Import here to avoid circular imports

        # Get user's habits and logs
        habits = db.query(Habit).filter(Habit.user_id == user_id).all()
        recent_logs = db.query(Log).filter(Log.user_id == user_id).filter(
            Log.timestamp >= datetime.now() - timedelta(days=30)
        ).all()

        # Basic pattern analysis
        patterns = {
            'best_time_of_day': self._find_best_time_pattern(recent_logs),
            'success_by_difficulty': self._analyze_difficulty_success(habits, recent_logs),
            'streak_patterns': self._analyze_streak_patterns(habits),
            'category_performance': self._analyze_category_performance(habits, recent_logs)
        }

        return {
            'patterns': patterns,
            'insights': self._generate_pattern_insights(patterns),
            'recommendations': self._generate_recommendations(patterns)
        }

    def _find_best_time_pattern(self, logs: List) -> Dict[str, Any]:
        """Find the time of day user is most successful"""
        time_success = {}

        for log in logs:
            if log.action == 'complete':
                hour = log.timestamp.hour
                if hour not in time_success:
                    time_success[hour] = 0
                time_success[hour] += 1

        if time_success:
            best_hour = max(time_success.keys(), key=lambda k: time_success[k])
            return {
                'best_hour': best_hour,
                'success_count': time_success[best_hour],
                'total_completions': sum(time_success.values())
            }

        return {'best_hour': None, 'success_count': 0}

    def _analyze_difficulty_success(self, habits: List, logs: List) -> Dict[str, float]:
        """Analyze success rate by habit difficulty"""
        difficulty_stats = {}

        for habit in habits:
            difficulty = habit.difficulty or 1
            if difficulty not in difficulty_stats:
                difficulty_stats[difficulty] = {'attempts': 0, 'completions': 0}

            habit_logs = [l for l in logs if l.habit_id == habit.id]
            difficulty_stats[difficulty]['attempts'] += len(habit_logs)
            difficulty_stats[difficulty]['completions'] += len([l for l in habit_logs if l.action == 'complete'])

        # Calculate success rates
        success_rates = {}
        for difficulty, stats in difficulty_stats.items():
            if stats['attempts'] > 0:
                success_rates[f'difficulty_{difficulty}'] = stats['completions'] / stats['attempts']

        return success_rates

    def _analyze_streak_patterns(self, habits: List) -> Dict[str, Any]:
        """Analyze streak patterns"""
        streaks = [h.current_streak or 0 for h in habits]

        return {
            'average_streak': sum(streaks) / len(streaks) if streaks else 0,
            'max_streak': max(streaks) if streaks else 0,
            'habits_with_streaks': len([s for s in streaks if s > 0])
        }

    def _analyze_category_performance(self, habits: List, logs: List) -> Dict[str, float]:
        """Analyze performance by habit category"""
        category_stats = {}

        for habit in habits:
            category = habit.category or 'uncategorized'
            if category not in category_stats:
                category_stats[category] = {'attempts': 0, 'completions': 0}

            habit_logs = [l for l in logs if l.habit_id == habit.id]
            category_stats[category]['attempts'] += len(habit_logs)
            category_stats[category]['completions'] += len([l for l in habit_logs if l.action == 'complete'])

        # Calculate success rates
        success_rates = {}
        for category, stats in category_stats.items():
            if stats['attempts'] > 0:
                success_rates[category] = stats['completions'] / stats['attempts']

        return success_rates

    def _generate_pattern_insights(self, patterns: Dict) -> List[str]:
        """Generate insights from patterns"""
        insights = []

        best_time = patterns.get('best_time_of_day', {})
        if best_time.get('best_hour'):
            hour_12 = best_time['best_hour']
            if hour_12 > 12:
                hour_12 -= 12
                period = "PM"
            else:
                period = "AM"
            insights.append(f"You're most successful completing habits at {hour_12} {period}")

        difficulty_success = patterns.get('success_by_difficulty', {})
        if difficulty_success:
            best_difficulty = max(difficulty_success.keys(), key=lambda k: difficulty_success[k])
            insights.append(f"You have highest success with {best_difficulty} habits")

        streak_patterns = patterns.get('streak_patterns', {})
        if streak_patterns.get('average_streak', 0) > 5:
            insights.append("You're great at maintaining streaks!")

        return insights

    def _generate_recommendations(self, patterns: Dict) -> List[str]:
        """Generate recommendations based on patterns"""
        recommendations = []

        best_time = patterns.get('best_time_of_day', {})
        if best_time.get('best_hour'):
            recommendations.append(f"Schedule new habits around {best_time['best_hour']}:00 for better success")

        difficulty_success = patterns.get('success_by_difficulty', {})
        if difficulty_success.get('difficulty_1', 0) > difficulty_success.get('difficulty_3', 0):
            recommendations.append("Start with easier habits and gradually increase difficulty")

        category_performance = patterns.get('category_performance', {})
        if category_performance:
            best_category = max(category_performance.keys(), key=lambda k: category_performance[k])
            recommendations.append(f"Focus on {best_category} habits - you excel in this area")

        return recommendations

# Global instance
huggingface_ai = HuggingFaceAI()