/**
 * Azure Text Analytics Service
 *
 * This service provides sentiment analysis and key phrase extraction
 * using Azure Language Service (formerly Text Analytics).
 *
 * Implements the same functionality as the C# TextAnalyticsService.cs
 * to ensure compatibility with existing data.
 */

const { TextAnalyticsClient, AzureKeyCredential } = require("@azure/ai-text-analytics");
const config = require("@config");
const stopWordsService = require("./stopWords.service");

class TextAnalyticsService {
  constructor() {
    // Initialize Azure Text Analytics client
    if (!config.azureLanguage?.key || !config.azureLanguage?.endpoint) {
      console.warn("⚠️ Azure Language Service credentials not configured");
      this.client = null;
    } else {
      this.client = new TextAnalyticsClient(
        config.azureLanguage.endpoint,
        new AzureKeyCredential(config.azureLanguage.key)
      );
      console.log("✅ Azure Text Analytics Service initialized");
    }
  }

  /**
   * Chunks text into segments suitable for Azure Text Analytics
   * Azure has a 5100 character limit per document
   *
   * @param {string} text - The text to chunk
   * @returns {string[]} Array of text chunks
   */
  chunkText(text) {
    if (!text || text.length === 0) {
      return [];
    }

    const MAX_CHUNK_SIZE = 5100;
    const chunks = [];

    // If text is small enough, return as single chunk
    if (text.length <= MAX_CHUNK_SIZE) {
      return [text];
    }

    // Split by sentences to preserve sentence boundaries (like C#)
    const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];
    let currentChunk = '';

    for (const sentence of sentences) {
      // If single sentence is too long, split it
      if (sentence.length > MAX_CHUNK_SIZE) {
        // Save current chunk if not empty
        if (currentChunk) {
          chunks.push(currentChunk.trim());
          currentChunk = '';
        }

        // Split long sentence by words
        const words = sentence.split(/\s+/);
        let tempChunk = '';

        for (const word of words) {
          if ((tempChunk + ' ' + word).length > MAX_CHUNK_SIZE) {
            if (tempChunk) {
              chunks.push(tempChunk.trim());
            }
            tempChunk = word;
          } else {
            tempChunk = tempChunk ? tempChunk + ' ' + word : word;
          }
        }

        if (tempChunk) {
          chunks.push(tempChunk.trim());
        }
      } else if ((currentChunk + ' ' + sentence).length > MAX_CHUNK_SIZE) {
        // Current chunk + new sentence exceeds limit
        chunks.push(currentChunk.trim());
        currentChunk = sentence;
      } else {
        // Add sentence to current chunk
        currentChunk = currentChunk ? currentChunk + ' ' + sentence : sentence;
      }
    }

    // Add remaining chunk
    if (currentChunk) {
      chunks.push(currentChunk.trim());
    }

    console.log(`📄 Text chunked into ${chunks.length} segments (max 5100 chars each)`);
    return chunks;
  }

  /**
   * Analyzes sentiment of text
   * Returns averaged sentiment scores across all chunks
   *
   * @param {string} text - The text to analyze
   * @param {string} language - Language code ('de' or 'en')
   * @returns {Promise<{positiveScore: number, negativeScore: number, neutralScore: number}>}
   */
  async analyzeSentiment(text, language = 'en') {
    // Fallback if client not initialized
    if (!this.client) {
      console.warn("Azure Text Analytics not available, using default scores");
      return {
        positiveScore: 0.33,
        negativeScore: 0.33,
        neutralScore: 0.34
      };
    }

    try {
      // Chunk the text
      const chunks = this.chunkText(text);

      if (chunks.length === 0) {
        return {
          positiveScore: 0.33,
          negativeScore: 0.33,
          neutralScore: 0.34
        };
      }

      // Prepare documents for Azure
      const documents = chunks.map((chunk, index) => ({
        id: String(index),
        language: language,
        text: chunk
      }));

      console.log(`🔍 Analyzing sentiment for ${documents.length} chunks in '${language}'...`);

      // Call Azure Text Analytics (like C# AnalyzeSentimentBatchAsync)
      const results = await this.client.analyzeSentiment(documents, language, {
        includeOpinionMining: false
      });

      // Aggregate scores (like C# lines 157-160)
      let totalPositive = 0;
      let totalNegative = 0;
      let totalNeutral = 0;
      let validResults = 0;

      for (const result of results) {
        if (!result.error) {
          totalPositive += result.confidenceScores.positive;
          totalNegative += result.confidenceScores.negative;
          totalNeutral += result.confidenceScores.neutral;
          validResults++;
        } else {
          console.error(`Sentiment analysis error for chunk: ${result.error}`);
        }
      }

      // Calculate averages (exactly like C#)
      const avgPositive = validResults > 0 ? totalPositive / validResults : 0.33;
      const avgNegative = validResults > 0 ? totalNegative / validResults : 0.33;
      const avgNeutral = validResults > 0 ? totalNeutral / validResults : 0.34;

      console.log(`✅ Sentiment scores - Positive: ${avgPositive.toFixed(3)}, Negative: ${avgNegative.toFixed(3)}, Neutral: ${avgNeutral.toFixed(3)}`);

      return {
        positiveScore: avgPositive,
        negativeScore: avgNegative,
        neutralScore: avgNeutral
      };

    } catch (error) {
      console.error("❌ Azure sentiment analysis failed:", error);
      // Return default values on error
      return {
        positiveScore: 0.33,
        negativeScore: 0.33,
        neutralScore: 0.34
      };
    }
  }

  /**
   * Extracts key phrases from text
   *
   * @param {string} text - The text to analyze
   * @param {string} language - Language code ('de' or 'en')
   * @returns {Promise<string[]>} Array of key phrases
   */
  async extractKeyPhrases(text, language = 'en') {
    // Fallback if client not initialized
    if (!this.client) {
      console.warn("Azure Text Analytics not available, returning empty key phrases");
      return [];
    }

    try {
      // Chunk the text
      const chunks = this.chunkText(text);

      if (chunks.length === 0) {
        return [];
      }

      // Prepare documents for Azure
      const documents = chunks.map((chunk, index) => ({
        id: String(index),
        language: language,
        text: chunk
      }));

      console.log(`🔍 Extracting key phrases for ${documents.length} chunks in '${language}'...`);

      // Call Azure Text Analytics (like C# ExtractKeyPhrasesBatchAsync)
      const results = await this.client.extractKeyPhrases(documents, language);

      // Collect all key phrases
      const allKeyPhrases = new Set(); // Use Set to avoid duplicates

      for (const result of results) {
        if (!result.error && result.keyPhrases) {
          result.keyPhrases.forEach(phrase => {
            allKeyPhrases.add(phrase);
          });
        } else if (result.error) {
          console.error(`Key phrase extraction error for chunk: ${result.error}`);
        }
      }

      const keyPhrases = Array.from(allKeyPhrases);
      console.log(`✅ Extracted ${keyPhrases.length} unique key phrases`);

      return keyPhrases;

    } catch (error) {
      console.error("❌ Azure key phrase extraction failed:", error);
      return [];
    }
  }

  /**
   * Analyzes sentences individually for sentiment
   * Maps sentences to transcribed segments for timing information
   *
   * @param {string} text - The full transcript text
   * @param {Array} transcribedSegments - Array of transcribed segments with timing
   * @param {string} language - Language code ('de' or 'en')
   * @returns {Promise<Array>} Array of analyzed sentences
   */
  async analyzeSentences(text, transcribedSegments, language = 'en') {
    // Fallback if client not initialized
    if (!this.client) {
      console.warn("Azure Text Analytics not available, returning empty sentence analysis");
      return [];
    }

    try {
      // Split text into sentences
      const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];

      if (sentences.length === 0) {
        return [];
      }

      console.log(`🔍 Analyzing ${sentences.length} sentences individually...`);

      // Prepare documents for Azure
      const documents = sentences.map((sentence, index) => ({
        id: String(index),
        language: language,
        text: sentence.trim()
      }));

      // Batch process sentences (Azure can handle up to 10 documents per request)
      const batchSize = 10;
      const analyzedSentences = [];

      for (let i = 0; i < documents.length; i += batchSize) {
        const batch = documents.slice(i, Math.min(i + batchSize, documents.length));

        try {
          const results = await this.client.analyzeSentiment(batch, language);

          for (let j = 0; j < results.length; j++) {
            const result = results[j];
            const sentenceIndex = i + j;
            const sentenceText = sentences[sentenceIndex].trim();

            if (!result.error) {
              // Map to transcribed segments for timing (like C# lines 162-179)
              let offset = 0;
              let duration = 0;

              // Find matching segment by text similarity
              if (transcribedSegments && transcribedSegments.length > 0) {
                const matchingSegment = this.findMatchingSegment(sentenceText, transcribedSegments);
                if (matchingSegment) {
                  offset = matchingSegment.offset || 0;
                  duration = matchingSegment.duration || 0;
                }
              }

              analyzedSentences.push({
                text: sentenceText,
                sentiment: result.sentiment, // "positive", "negative", or "neutral"
                positiveScore: result.confidenceScores.positive,
                negativeScore: result.confidenceScores.negative,
                neutralScore: result.confidenceScores.neutral,
                offset: offset,
                duration: duration
              });
            }
          }
        } catch (batchError) {
          console.error(`Error analyzing sentence batch ${i / batchSize + 1}:`, batchError);
        }
      }

      console.log(`✅ Analyzed ${analyzedSentences.length} sentences with sentiment scores`);
      return analyzedSentences;

    } catch (error) {
      console.error("❌ Sentence-level sentiment analysis failed:", error);
      return [];
    }
  }

  /**
   * Helper method to find matching transcribed segment for a sentence
   *
   * @param {string} sentence - The sentence to match
   * @param {Array} segments - Array of transcribed segments
   * @returns {Object|null} Matching segment or null
   */
  findMatchingSegment(sentence, segments) {
    if (!segments || segments.length === 0) {
      return null;
    }

    // Normalize sentence for comparison
    const normalizedSentence = sentence.toLowerCase().replace(/[^\w\s]/g, '').trim();

    // Find best matching segment
    let bestMatch = null;
    let bestScore = 0;

    for (const segment of segments) {
      if (!segment.text) continue;

      const normalizedSegment = segment.text.toLowerCase().replace(/[^\w\s]/g, '').trim();

      // Simple similarity check (could be improved with better algorithm)
      const score = this.calculateSimilarity(normalizedSentence, normalizedSegment);

      if (score > bestScore && score > 0.5) { // At least 50% similarity
        bestScore = score;
        bestMatch = segment;
      }
    }

    return bestMatch;
  }

  /**
   * Calculate similarity between two strings (simple implementation)
   *
   * @param {string} str1 - First string
   * @param {string} str2 - Second string
   * @returns {number} Similarity score between 0 and 1
   */
  calculateSimilarity(str1, str2) {
    const words1 = str1.split(/\s+/);
    const words2 = str2.split(/\s+/);

    const set1 = new Set(words1);
    const set2 = new Set(words2);

    let matches = 0;
    for (const word of set1) {
      if (set2.has(word)) {
        matches++;
      }
    }

    const totalWords = Math.max(set1.size, set2.size);
    return totalWords > 0 ? matches / totalWords : 0;
  }

  /**
   * Retry helper for Azure API calls
   *
   * @param {Function} fn - Function to retry
   * @param {number} maxRetries - Maximum number of retries
   * @returns {Promise<any>} Result of the function
   */
  async retryWithBackoff(fn, maxRetries = 3) {
    let lastError;

    for (let attempt = 0; attempt < maxRetries; attempt++) {
      try {
        return await fn();
      } catch (error) {
        lastError = error;
        console.warn(`Attempt ${attempt + 1} failed, retrying...`);

        // Exponential backoff
        const delay = Math.pow(2, attempt) * 1000;
        await new Promise(resolve => setTimeout(resolve, delay));
      }
    }

    throw lastError;
  }

  /**
   * Extracts key words from text by removing stop words and filler words
   * This is different from Azure's key phrase extraction - it extracts individual important words
   * Matches the C# implementation's key_words field
   *
   * @param {string} text - The text to extract key words from
   * @param {string} language - Language code ('de' or 'en')
   * @returns {string[]} Array of unique key words
   */
  extractKeyWords(text, language = 'en') {
    try {
      // Use the stop words service to extract key words
      const keyWords = stopWordsService.extractKeyWords(text, language);

      console.log(`🔑 Extracted ${keyWords.length} key words for '${language}'`);
      return keyWords;
    } catch (error) {
      console.error("❌ Key words extraction failed:", error);
      return [];
    }
  }

  /**
   * Gets the top N most frequent key words from text
   *
   * @param {string} text - The text to analyze
   * @param {string} language - Language code ('de' or 'en')
   * @param {number} topN - Number of top words to return
   * @returns {Array} Array of {word, count} objects sorted by frequency
   */
  getTopKeyWords(text, language = 'en', topN = 20) {
    try {
      const topKeyWords = stopWordsService.getTopKeyWords(text, language, topN);

      console.log(`📊 Retrieved top ${topKeyWords.length} key words for '${language}'`);
      return topKeyWords;
    } catch (error) {
      console.error("❌ Top key words extraction failed:", error);
      return [];
    }
  }
}

// Export singleton instance
module.exports = new TextAnalyticsService();