/**
 * Test Script for Key Words Extraction
 *
 * This script tests the key words extraction functionality
 * with both German and English text samples.
 *
 * Usage: node src/test/testKeyWordsExtraction.js
 */

// Set environment to development
process.env.NODE_ENV = 'development';

// Setup module aliases
require('module-alias/register');

const textAnalyticsService = require('../services/textAnalytics.service');
const stopWordsService = require('../services/stopWords.service');

// Test data - same as in testTextAnalytics.js
const germanText = `Das war eine großartige Präsentation. Die Folien waren sehr gut strukturiert und die Inhalte klar verständlich. Allerdings war das Tempo etwas zu schnell und einige wichtige Punkte wurden nur oberflächlich behandelt. Insgesamt aber eine sehr positive Erfahrung mit vielen wertvollen Informationen.`;

const englishText = `This was an excellent presentation. The slides were well-organized and the content was clearly explained. However, the pace was a bit too fast and some important points were only covered superficially. Overall, it was a very positive experience with lots of valuable information.`;

// Additional test with filler words
const germanTextWithFillers = `Äh, also ich muss sagen, ähm, die Präsentation war eigentlich wirklich gut. Also, die Struktur war sozusagen perfekt und, äh, die Inhalte waren quasi sehr verständlich. Halt, das Tempo war irgendwie etwas schnell, aber insgesamt war es eigentlich eine tolle Erfahrung.`;

const englishTextWithFillers = `Um, like, I have to say, uh, the presentation was actually really good. You know, the structure was basically perfect and, um, the content was literally very understandable. Sort of, the pace was like a bit fast, but overall it was actually a great experience.`;

async function testGermanKeyWords() {
  console.log('\n========================================');
  console.log('🇩🇪 TESTING GERMAN KEY WORDS EXTRACTION');
  console.log('========================================\n');

  console.log('📝 Original Text:');
  console.log(germanText);
  console.log('\nWord count:', germanText.split(/\s+/).length);

  // Test key words extraction
  console.log('\n🔑 Extracting Key Words...');
  const keyWords = textAnalyticsService.extractKeyWords(germanText, 'de');

  console.log(`\n✅ Extracted ${keyWords.length} key words:`);
  console.log(keyWords.join(', '));

  // Test top key words
  console.log('\n📊 Top 10 Most Frequent Key Words:');
  const topWords = textAnalyticsService.getTopKeyWords(germanText, 'de', 10);
  topWords.forEach((item, index) => {
    console.log(`  ${index + 1}. "${item.word}" (${item.count} times)`);
  });

  // Compare with Azure key phrases
  console.log('\n🔍 Comparing with Azure Key Phrases...');
  const keyPhrases = await textAnalyticsService.extractKeyPhrases(germanText, 'de');
  console.log(`Azure Key Phrases (${keyPhrases.length}):`, keyPhrases.slice(0, 5).join(', '));
}

async function testEnglishKeyWords() {
  console.log('\n========================================');
  console.log('🇬🇧 TESTING ENGLISH KEY WORDS EXTRACTION');
  console.log('========================================\n');

  console.log('📝 Original Text:');
  console.log(englishText);
  console.log('\nWord count:', englishText.split(/\s+/).length);

  // Test key words extraction
  console.log('\n🔑 Extracting Key Words...');
  const keyWords = textAnalyticsService.extractKeyWords(englishText, 'en');

  console.log(`\n✅ Extracted ${keyWords.length} key words:`);
  console.log(keyWords.join(', '));

  // Test top key words
  console.log('\n📊 Top 10 Most Frequent Key Words:');
  const topWords = textAnalyticsService.getTopKeyWords(englishText, 'en', 10);
  topWords.forEach((item, index) => {
    console.log(`  ${index + 1}. "${item.word}" (${item.count} times)`);
  });

  // Compare with Azure key phrases
  console.log('\n🔍 Comparing with Azure Key Phrases...');
  const keyPhrases = await textAnalyticsService.extractKeyPhrases(englishText, 'en');
  console.log(`Azure Key Phrases (${keyPhrases.length}):`, keyPhrases.slice(0, 5).join(', '));
}

async function testFillerWordsRemoval() {
  console.log('\n========================================');
  console.log('🎯 TESTING FILLER WORDS REMOVAL');
  console.log('========================================\n');

  console.log('🇩🇪 German Text with Fillers:');
  const germanKeyWords = textAnalyticsService.extractKeyWords(germanTextWithFillers, 'de');
  const germanFillers = stopWordsService.getFillerWords('de');

  console.log('Filler words to remove:', germanFillers.join(', '));
  console.log(`\nExtracted ${germanKeyWords.length} key words (fillers removed):`);
  console.log(germanKeyWords.join(', '));

  // Check if any filler words remain
  const remainingFillers = germanKeyWords.filter(word =>
    germanFillers.includes(word.toLowerCase())
  );
  console.log(`✅ Filler words successfully removed: ${remainingFillers.length === 0 ? 'YES' : 'NO'}`);

  console.log('\n🇬🇧 English Text with Fillers:');
  const englishKeyWords = textAnalyticsService.extractKeyWords(englishTextWithFillers, 'en');
  const englishFillers = stopWordsService.getFillerWords('en');

  console.log('Filler words to remove:', englishFillers.join(', '));
  console.log(`\nExtracted ${englishKeyWords.length} key words (fillers removed):`);
  console.log(englishKeyWords.join(', '));

  // Check if any filler words remain
  const remainingEnglishFillers = englishKeyWords.filter(word =>
    englishFillers.includes(word.toLowerCase())
  );
  console.log(`✅ Filler words successfully removed: ${remainingEnglishFillers.length === 0 ? 'YES' : 'NO'}`);
}

async function testStopWordsCount() {
  console.log('\n========================================');
  console.log('📚 STOP WORDS STATISTICS');
  console.log('========================================\n');

  const germanStopWords = stopWordsService.getStopWords('de');
  const englishStopWords = stopWordsService.getStopWords('en');

  console.log(`German stop words loaded: ${germanStopWords.length}`);
  console.log(`English stop words loaded: ${englishStopWords.length}`);

  console.log('\n📊 Sample German stop words:');
  console.log(germanStopWords.slice(0, 20).join(', '));

  console.log('\n📊 Sample English stop words:');
  console.log(englishStopWords.slice(0, 20).join(', '));
}

async function testDatabaseFormat() {
  console.log('\n========================================');
  console.log('💾 DATABASE FORMAT TEST');
  console.log('========================================\n');

  const keyWords = textAnalyticsService.extractKeyWords(germanText, 'de');
  const keyWordsJSON = JSON.stringify(keyWords);

  console.log('Key words array:', keyWords.slice(0, 10));
  console.log('\n📦 JSON format for database:');
  console.log(keyWordsJSON.substring(0, 200) + '...');
  console.log(`\nTotal JSON length: ${keyWordsJSON.length} characters`);
  console.log('✅ Ready to store in key_words TEXT field');
}

// Run all tests
async function runAllTests() {
  console.log('====================================================');
  console.log('🚀 KEY WORDS EXTRACTION TEST SUITE');
  console.log('====================================================');
  console.log(`📅 Date: ${new Date().toISOString()}`);
  console.log(`🔧 Environment: ${process.env.NODE_ENV}`);

  await testStopWordsCount();
  await testGermanKeyWords();
  await testEnglishKeyWords();
  await testFillerWordsRemoval();
  await testDatabaseFormat();

  console.log('\n====================================================');
  console.log('✅ ALL TESTS COMPLETED');
  console.log('====================================================\n');
}

// Execute tests
runAllTests().catch(error => {
  console.error('Fatal error during testing:', error);
  process.exit(1);
});