UNPKG

ai-text-detector

Version:

A lightweight, fast JavaScript/TypeScript library for detecting AI-generated text using advanced linguistic analysis. Works in Node.js, React, and browser environments with zero dependencies.

1 lines 95.7 kB
{"version":3,"file":"index.cjs","sources":["../src/utils.ts","../src/metrics/perplexity.ts","../src/metrics/burstiness.ts","../src/metrics/lexical.ts","../src/metrics/syntactic.ts","../src/constants.ts","../src/metrics/style.ts","../src/metrics/human.ts","../src/metrics/contextual.ts","../src/metrics/narrative.ts","../src/detector.ts","../src/index.ts"],"sourcesContent":["/**\r\n * Text processing utilities for AI text detection\r\n */\r\n\r\nexport function tokenizeWords(text: string): string[] {\r\n return text\r\n .toLowerCase()\r\n .replace(/[^\\w\\s'-]/g, \" \")\r\n .split(/\\s+/)\r\n .filter((word) => word.length > 0);\r\n}\r\n\r\nexport function splitIntoSentences(text: string): string[] {\r\n return text\r\n .split(/[.!?]+/)\r\n .map((s) => s.trim())\r\n .filter((s) => s.length > 0);\r\n}\r\n\r\nexport function extractTopicWords(\r\n sentence: string,\r\n commonWords: Set<string>,\r\n transitionWords: string[]\r\n): string[] {\r\n const words = tokenizeWords(sentence);\r\n return words.filter(\r\n (word) =>\r\n word.length > 4 &&\r\n !commonWords.has(word) &&\r\n !transitionWords.includes(word)\r\n );\r\n}\r\n","/**\r\n * Perplexity calculation for AI text detection\r\n * Measures how well a text can be predicted based on statistical language models\r\n */\r\n\r\nexport function calculatePerplexity(words: string[]): number {\r\n if (words.length < 3) return 10; // Default high perplexity for very short texts\r\n\r\n const wordCounts = new Map<string, number>();\r\n const bigramCounts = new Map<string, number>();\r\n const trigramCounts = new Map<string, number>();\r\n\r\n // Count unigrams\r\n words.forEach((word) => {\r\n wordCounts.set(word, (wordCounts.get(word) || 0) + 1);\r\n });\r\n\r\n // Count bigrams\r\n for (let i = 0; i < words.length - 1; i++) {\r\n const bigram = `${words[i]} ${words[i + 1]}`;\r\n bigramCounts.set(bigram, (bigramCounts.get(bigram) || 0) + 1);\r\n }\r\n\r\n // Count trigrams\r\n for (let i = 0; i < words.length - 2; i++) {\r\n const trigram = `${words[i]} ${words[i + 1]} ${words[i + 2]}`;\r\n trigramCounts.set(trigram, (trigramCounts.get(trigram) || 0) + 1);\r\n }\r\n\r\n let totalLogProb = 0;\r\n let totalPredictions = 0;\r\n\r\n // Calculate log probability using interpolated n-gram model\r\n for (let i = 2; i < words.length; i++) {\r\n const currentWord = words[i];\r\n const prevWord = words[i - 1];\r\n const prevPrevWord = words[i - 2];\r\n\r\n const trigram = `${prevPrevWord} ${prevWord} ${currentWord}`;\r\n const bigram = `${prevWord} ${currentWord}`;\r\n const prevBigram = `${prevPrevWord} ${prevWord}`;\r\n\r\n const trigramFreq = trigramCounts.get(trigram) || 0;\r\n const bigramFreq = bigramCounts.get(bigram) || 0;\r\n const prevBigramFreq = bigramCounts.get(prevBigram) || 0;\r\n const wordFreq = wordCounts.get(currentWord) || 0;\r\n\r\n // Interpolated probability with smoothing\r\n let probability = 0;\r\n\r\n // Trigram probability\r\n if (prevBigramFreq > 0) {\r\n probability +=\r\n (0.6 * (trigramFreq + 0.1)) /\r\n (prevBigramFreq + 0.1 * trigramCounts.size);\r\n }\r\n\r\n // Bigram probability\r\n const prevWordFreq = wordCounts.get(prevWord) || 0;\r\n if (prevWordFreq > 0) {\r\n probability +=\r\n (0.3 * (bigramFreq + 0.1)) / (prevWordFreq + 0.1 * bigramCounts.size);\r\n }\r\n\r\n // Unigram probability\r\n probability +=\r\n (0.1 * (wordFreq + 0.1)) / (words.length + 0.1 * wordCounts.size);\r\n\r\n // Ensure minimum probability\r\n probability = Math.max(probability, 0.0001);\r\n\r\n totalLogProb += Math.log2(probability);\r\n totalPredictions++;\r\n }\r\n\r\n // Calculate perplexity\r\n const averageLogProb = totalLogProb / Math.max(totalPredictions, 1);\r\n return Math.pow(2, -averageLogProb);\r\n}\r\n","/**\r\n * Burstiness calculation for AI text detection\r\n * Measures variation in sentence lengths\r\n */\r\n\r\nexport function calculateBurstiness(sentences: string[]): number {\r\n if (sentences.length < 2) return 0;\r\n\r\n const lengths = sentences.map((s) => s.split(/\\s+/).length);\r\n const mean = lengths.reduce((a, b) => a + b, 0) / lengths.length;\r\n const variance =\r\n lengths.reduce((sum, len) => sum + Math.pow(len - mean, 2), 0) /\r\n lengths.length;\r\n const stdDev = Math.sqrt(variance);\r\n\r\n // Burstiness formula: (σ - μ) / (σ + μ)\r\n return (stdDev - mean) / (stdDev + mean);\r\n}\r\n\r\nexport function calculateAverageWordsPerSentence(sentences: string[]): number {\r\n if (sentences.length === 0) return 0;\r\n const totalWords = sentences.reduce(\r\n (sum, sentence) => sum + sentence.split(/\\s+/).length,\r\n 0\r\n );\r\n return totalWords / sentences.length;\r\n}\r\n\r\nexport function calculateSentenceVariability(sentences: string[]): number {\r\n if (sentences.length < 2) return 0;\r\n\r\n const lengths = sentences.map((s) => s.split(/\\s+/).length);\r\n const average = lengths.reduce((a, b) => a + b, 0) / lengths.length;\r\n const variance =\r\n lengths.reduce((sum, len) => sum + Math.pow(len - average, 2), 0) /\r\n lengths.length;\r\n return Math.sqrt(variance);\r\n}\r\n","/**\r\n * Lexical analysis metrics for AI text detection\r\n */\r\n\r\nexport function calculateLexicalDiversity(words: string[]): number {\r\n const uniqueWords = new Set(words);\r\n return uniqueWords.size / words.length;\r\n}\r\n\r\nexport function calculateVocabularyRichness(words: string[]): number {\r\n const uniqueWords = new Set(words);\r\n const hapaxLegomena = Array.from(uniqueWords).filter(\r\n (word) => words.filter((w) => w === word).length === 1\r\n );\r\n\r\n return hapaxLegomena.length / uniqueWords.size;\r\n}\r\n\r\nexport function analyzeWordFrequencyDistribution(words: string[]): number {\r\n const wordCounts = new Map<string, number>();\r\n words.forEach((word) => {\r\n wordCounts.set(word, (wordCounts.get(word) || 0) + 1);\r\n });\r\n\r\n const frequencies = Array.from(wordCounts.values()).sort((a, b) => b - a);\r\n\r\n // Zipf's law analysis - natural text follows specific distribution\r\n let zipfScore = 0;\r\n for (let i = 1; i < Math.min(frequencies.length, 10); i++) {\r\n const expected = frequencies[0] / (i + 1);\r\n const actual = frequencies[i];\r\n const ratio = Math.min(actual, expected) / Math.max(actual, expected);\r\n zipfScore += ratio;\r\n }\r\n\r\n return zipfScore / Math.min(frequencies.length - 1, 9);\r\n}\r\n\r\nexport function calculateEntropyScore(words: string[]): number {\r\n const wordCounts = new Map<string, number>();\r\n words.forEach((word) => {\r\n wordCounts.set(word, (wordCounts.get(word) || 0) + 1);\r\n });\r\n\r\n const totalWords = words.length;\r\n let entropy = 0;\r\n\r\n for (const count of wordCounts.values()) {\r\n const probability = count / totalWords;\r\n entropy -= probability * Math.log2(probability);\r\n }\r\n\r\n // Normalize entropy (higher entropy = more human-like)\r\n return entropy / Math.log2(Math.min(wordCounts.size, totalWords));\r\n}\r\n","/**\r\n * Syntactic and semantic analysis metrics\r\n */\r\n\r\nimport { tokenizeWords } from \"../utils\";\r\n\r\nexport function calculateSyntacticComplexity(sentences: string[]): number {\r\n let totalComplexity = 0;\r\n\r\n sentences.forEach((sentence) => {\r\n const words = sentence.split(/\\s+/);\r\n let complexity = 0;\r\n\r\n // Count subordinate clauses (simplified)\r\n complexity += (\r\n sentence.match(\r\n /\\b(that|which|who|whom|whose|when|where|while|although|because|since|if|unless|until)\\b/gi\r\n ) || []\r\n ).length;\r\n\r\n // Count conjunctions\r\n complexity += (sentence.match(/\\b(and|but|or|yet|so|for|nor)\\b/gi) || [])\r\n .length;\r\n\r\n // Penalize very long sentences\r\n if (words.length > 30) complexity += 2;\r\n if (words.length > 40) complexity += 3;\r\n\r\n totalComplexity += complexity / Math.max(words.length, 1);\r\n });\r\n\r\n return totalComplexity / sentences.length;\r\n}\r\n\r\nexport function calculateSemanticCoherence(sentences: string[]): number {\r\n if (sentences.length < 2) return 1;\r\n\r\n let coherenceScore = 0;\r\n\r\n for (let i = 1; i < sentences.length; i++) {\r\n const prevWords = new Set(tokenizeWords(sentences[i - 1]));\r\n const currWords = new Set(tokenizeWords(sentences[i]));\r\n\r\n // Calculate word overlap between consecutive sentences\r\n const intersection = new Set(\r\n [...prevWords].filter((x) => currWords.has(x))\r\n );\r\n const union = new Set([...prevWords, ...currWords]);\r\n\r\n coherenceScore += intersection.size / union.size;\r\n }\r\n\r\n return coherenceScore / (sentences.length - 1);\r\n}\r\n\r\nexport function calculateNGramRepetition(words: string[]): number {\r\n const trigrams = new Map<string, number>();\r\n\r\n for (let i = 0; i < words.length - 2; i++) {\r\n const trigram = `${words[i]} ${words[i + 1]} ${words[i + 2]}`;\r\n trigrams.set(trigram, (trigrams.get(trigram) || 0) + 1);\r\n }\r\n\r\n const repeatedTrigrams = Array.from(trigrams.values()).filter(\r\n (count) => count > 1\r\n );\r\n return repeatedTrigrams.length / Math.max(trigrams.size, 1);\r\n}\r\n\r\nexport function calculateBigramUnusualness(words: string[]): number {\r\n const bigramCounts = new Map<string, number>();\r\n const totalBigrams = words.length - 1;\r\n\r\n // Count bigrams\r\n for (let i = 0; i < words.length - 1; i++) {\r\n const bigram = `${words[i]} ${words[i + 1]}`;\r\n bigramCounts.set(bigram, (bigramCounts.get(bigram) || 0) + 1);\r\n }\r\n\r\n // Calculate unusualness based on expected frequency\r\n let unusualness = 0;\r\n bigramCounts.forEach((count, bigram) => {\r\n const [word1, word2] = bigram.split(\" \");\r\n const word1Count = words.filter((w) => w === word1).length;\r\n const word2Count = words.filter((w) => w === word2).length;\r\n\r\n // Expected frequency based on individual word frequencies\r\n const expectedFreq = (word1Count * word2Count) / words.length;\r\n const actualFreq = count;\r\n\r\n if (actualFreq > expectedFreq * 2) {\r\n unusualness += actualFreq / totalBigrams;\r\n }\r\n });\r\n\r\n return Math.min(unusualness, 1);\r\n}\r\n","// Common words for frequency analysis\r\nexport const commonWords = new Set([\r\n \"the\",\r\n \"be\",\r\n \"to\",\r\n \"of\",\r\n \"and\",\r\n \"a\",\r\n \"in\",\r\n \"that\",\r\n \"have\",\r\n \"i\",\r\n \"it\",\r\n \"for\",\r\n \"not\",\r\n \"on\",\r\n \"with\",\r\n \"he\",\r\n \"as\",\r\n \"you\",\r\n \"do\",\r\n \"at\",\r\n \"this\",\r\n \"but\",\r\n \"his\",\r\n \"by\",\r\n \"from\",\r\n \"they\",\r\n \"she\",\r\n \"or\",\r\n \"an\",\r\n \"will\",\r\n \"my\",\r\n \"one\",\r\n \"all\",\r\n \"would\",\r\n \"there\",\r\n \"their\",\r\n]);\r\n\r\n// AI-typical phrases and patterns (expanded)\r\nexport const aiPatterns = [\r\n /\\b(it is important to note|it should be noted|it is worth mentioning|it is crucial to understand)\\b/gi,\r\n /\\b(furthermore|moreover|additionally|consequently|therefore|thus|hence|nonetheless|nevertheless)\\b/gi,\r\n /\\b(in conclusion|to summarize|in summary|overall|ultimately|essentially)\\b/gi,\r\n /\\b(various|numerous|several|multiple|different|diverse|wide range of)\\b/gi,\r\n /\\b(enhance|optimize|facilitate|utilize|implement|establish|maintain|ensure)\\b/gi,\r\n /\\b(significant|substantial|considerable|notable|remarkable|extensive)\\b/gi,\r\n /\\b(comprehensive|thorough|detailed|in-depth|multifaceted)\\b/gi,\r\n /\\b(approach|strategy|methodology|framework|process|procedure)\\b/gi,\r\n // New AI patterns based on recent models\r\n /\\b(it's worth noting|it's important to understand|it's crucial to consider)\\b/gi,\r\n /\\b(as we delve into|let's explore|let's examine|it's clear that)\\b/gi,\r\n /\\b(in today's|in our modern|in the current|in this digital age)\\b/gi,\r\n /\\b(revolutionize|transform|streamline|cutting-edge|state-of-the-art)\\b/gi,\r\n /\\b(stakeholders|end-users|best practices|value proposition|synergistic)\\b/gi,\r\n /\\b(leverage the power of|harness the potential|unlock the benefits)\\b/gi,\r\n];\r\n\r\n// Human-like patterns and indicators\r\nexport const humanPatterns = [\r\n /\\b(lol|lmao|omg|wtf|btw|tbh|imho|imo)\\b/gi,\r\n /\\b(gonna|wanna|gotta|kinda|sorta|dunno)\\b/gi,\r\n /\\b(yeah|yep|nah|nope|meh|ugh|hmm)\\b/gi,\r\n /\\b(super|really|pretty|kinda|totally|absolutely)\\b/gi,\r\n /\\b(awesome|amazing|terrible|awful|weird|crazy)\\b/gi,\r\n];\r\n\r\n// Emotional expressions\r\nexport const emotionalMarkers = [\r\n /\\b(love|hate|excited|frustrated|angry|happy|sad|worried|anxious)\\b/gi,\r\n /\\b(feel|felt|feeling|emotions|emotional|mood)\\b/gi,\r\n /(!{2,}|\\?{2,}|\\.{3,})/g, // Multiple punctuation marks\r\n /[A-Z]{2,}/g, // CAPS for emphasis\r\n];\r\n\r\n// Discourse markers for sophisticated analysis\r\nexport const discourseMarkers = [\r\n \"first\",\r\n \"second\",\r\n \"third\",\r\n \"finally\",\r\n \"lastly\",\r\n \"initially\",\r\n \"subsequently\",\r\n \"meanwhile\",\r\n \"simultaneously\",\r\n \"on the other hand\",\r\n \"in contrast\",\r\n \"however\",\r\n \"nevertheless\",\r\n \"for instance\",\r\n \"for example\",\r\n \"such as\",\r\n \"namely\",\r\n \"in fact\",\r\n \"indeed\",\r\n \"actually\",\r\n \"certainly\",\r\n \"admittedly\",\r\n \"granted\",\r\n \"of course\",\r\n \"naturally\",\r\n];\r\n\r\n// Function words for stylometric analysis\r\nexport const functionWords = [\r\n \"the\",\r\n \"be\",\r\n \"to\",\r\n \"of\",\r\n \"and\",\r\n \"a\",\r\n \"in\",\r\n \"that\",\r\n \"have\",\r\n \"i\",\r\n \"it\",\r\n \"for\",\r\n \"not\",\r\n \"on\",\r\n \"with\",\r\n \"he\",\r\n \"as\",\r\n \"you\",\r\n \"do\",\r\n \"at\",\r\n \"this\",\r\n \"but\",\r\n \"his\",\r\n \"by\",\r\n \"from\",\r\n \"they\",\r\n \"she\",\r\n \"or\",\r\n \"an\",\r\n \"will\",\r\n \"my\",\r\n \"one\",\r\n \"all\",\r\n \"would\",\r\n \"there\",\r\n \"their\",\r\n \"what\",\r\n \"so\",\r\n \"up\",\r\n \"out\",\r\n \"if\",\r\n \"about\",\r\n \"who\",\r\n \"get\",\r\n \"which\",\r\n \"go\",\r\n \"me\",\r\n \"when\",\r\n \"make\",\r\n \"can\",\r\n \"like\",\r\n \"time\",\r\n \"no\",\r\n \"just\",\r\n \"him\",\r\n \"know\",\r\n \"take\",\r\n \"people\",\r\n \"into\",\r\n \"year\",\r\n \"your\",\r\n \"good\",\r\n \"some\",\r\n \"could\",\r\n \"them\",\r\n \"see\",\r\n \"other\",\r\n \"than\",\r\n \"then\",\r\n \"now\",\r\n \"look\",\r\n \"only\",\r\n \"come\",\r\n \"its\",\r\n \"over\",\r\n \"think\",\r\n \"also\",\r\n \"back\",\r\n \"after\",\r\n \"use\",\r\n \"two\",\r\n \"how\",\r\n \"our\",\r\n \"work\",\r\n \"first\",\r\n \"well\",\r\n \"way\",\r\n \"even\",\r\n \"new\",\r\n \"want\",\r\n \"because\",\r\n \"any\",\r\n \"these\",\r\n \"give\",\r\n \"day\",\r\n \"most\",\r\n \"us\",\r\n];\r\n\r\n// Transition words for density analysis\r\nexport const transitionWords = [\r\n \"however\",\r\n \"furthermore\",\r\n \"moreover\",\r\n \"additionally\",\r\n \"consequently\",\r\n \"therefore\",\r\n \"thus\",\r\n \"hence\",\r\n \"nevertheless\",\r\n \"nonetheless\",\r\n \"meanwhile\",\r\n \"subsequently\",\r\n \"ultimately\",\r\n \"essentially\",\r\n \"specifically\",\r\n \"particularly\",\r\n \"notably\",\r\n \"importantly\",\r\n \"significantly\",\r\n \"interestingly\",\r\n \"surprisingly\",\r\n \"accordingly\",\r\n \"alternatively\",\r\n \"comparatively\",\r\n \"conversely\",\r\n \"similarly\",\r\n \"likewise\",\r\n \"meanwhile\",\r\n \"simultaneously\",\r\n];\r\n\r\n// Sophisticated vocabulary often used by AI\r\nexport const sophisticatedWords = [\r\n \"utilize\",\r\n \"facilitate\",\r\n \"demonstrate\",\r\n \"implement\",\r\n \"establish\",\r\n \"maintain\",\r\n \"require\",\r\n \"appropriate\",\r\n \"significant\",\r\n \"considerable\",\r\n \"substantial\",\r\n \"comprehensive\",\r\n \"extensive\",\r\n \"innovative\",\r\n \"strategic\",\r\n \"optimize\",\r\n \"enhance\",\r\n \"leverage\",\r\n \"paradigm\",\r\n \"methodology\",\r\n \"framework\",\r\n \"initiative\",\r\n \"synergy\",\r\n];\r\n","/**\r\n * Readability and style analysis metrics\r\n */\r\n\r\nimport {\r\n transitionWords,\r\n sophisticatedWords,\r\n functionWords,\r\n commonWords,\r\n} from \"../constants\";\r\nimport { tokenizeWords } from \"../utils\";\r\n\r\nexport function calculateReadabilityScore(\r\n text: string,\r\n sentences: string[],\r\n words: string[]\r\n): number {\r\n const avgWordsPerSentence = words.length / sentences.length;\r\n const complexWords = words.filter((word) => word.length > 6).length;\r\n const complexWordRatio = complexWords / words.length;\r\n\r\n // Simplified Flesch-Kincaid-like formula\r\n return 206.835 - 1.015 * avgWordsPerSentence - 84.6 * complexWordRatio;\r\n}\r\n\r\nexport function analyzePunctuationPatterns(text: string): number {\r\n const punctuation = text.match(/[.!?;:,]/g) || [];\r\n const words = tokenizeWords(text);\r\n\r\n if (words.length === 0) return 0;\r\n\r\n // AI often has consistent punctuation patterns\r\n const punctuationRatio = punctuation.length / words.length;\r\n const commaRatio = (text.match(/,/g) || []).length / words.length;\r\n const semicolonRatio = (text.match(/;/g) || []).length / words.length;\r\n\r\n // AI tends to use moderate punctuation\r\n let score = 0;\r\n if (punctuationRatio > 0.05 && punctuationRatio < 0.15) score += 0.3;\r\n if (commaRatio > 0.02 && commaRatio < 0.08) score += 0.3;\r\n if (semicolonRatio > 0.001 && semicolonRatio < 0.01) score += 0.2;\r\n\r\n return score;\r\n}\r\n\r\nexport function calculateTransitionDensity(words: string[]): number {\r\n const transitionCount = words.filter((word) =>\r\n transitionWords.some((tw) => word.includes(tw))\r\n ).length;\r\n\r\n return (transitionCount / words.length) * 100;\r\n}\r\n\r\nexport function calculateFormalityIndex(words: string[]): number {\r\n const sophisticatedCount = words.filter((word) =>\r\n sophisticatedWords.includes(word)\r\n ).length;\r\n\r\n const commonWordCount = words.filter((word) => commonWords.has(word)).length;\r\n\r\n return (\r\n sophisticatedCount /\r\n words.length /\r\n Math.max(commonWordCount / words.length, 0.1)\r\n );\r\n}\r\n\r\nexport function calculateStylometricSignature(\r\n text: string,\r\n sentences: string[],\r\n words: string[]\r\n): number {\r\n let signature = 0;\r\n let components = 0;\r\n\r\n // Average sentence length variability\r\n const sentenceLengths = sentences.map((s) => s.split(/\\s+/).length);\r\n const avgLength =\r\n sentenceLengths.reduce((a, b) => a + b, 0) / sentenceLengths.length;\r\n const lengthVariance =\r\n sentenceLengths.reduce(\r\n (sum, len) => sum + Math.pow(len - avgLength, 2),\r\n 0\r\n ) / sentenceLengths.length;\r\n signature += Math.min(Math.sqrt(lengthVariance) / avgLength, 1);\r\n components++;\r\n\r\n // Word length distribution\r\n const wordLengths = words.map((w) => w.length);\r\n const avgWordLength =\r\n wordLengths.reduce((a, b) => a + b, 0) / wordLengths.length;\r\n const wordLengthVariance =\r\n wordLengths.reduce(\r\n (sum, len) => sum + Math.pow(len - avgWordLength, 2),\r\n 0\r\n ) / wordLengths.length;\r\n signature += Math.min(Math.sqrt(wordLengthVariance) / avgWordLength, 1);\r\n components++;\r\n\r\n // Punctuation variety\r\n const punctuationTypes = new Set(text.match(/[.!?;:,\\-()]/g) || []);\r\n signature += Math.min(punctuationTypes.size / 8, 1);\r\n components++;\r\n\r\n // Sentence beginning variety\r\n const sentenceBeginnings = sentences\r\n .map((s) => {\r\n const firstWord = s.trim().split(/\\s+/)[0];\r\n return firstWord ? firstWord.toLowerCase() : \"\";\r\n })\r\n .filter((w) => w.length > 0);\r\n\r\n const uniqueBeginnings = new Set(sentenceBeginnings);\r\n signature += Math.min(uniqueBeginnings.size / sentenceBeginnings.length, 1);\r\n components++;\r\n\r\n return signature / components;\r\n}\r\n","/**\r\n * Human-likeness and emotional analysis metrics\r\n */\r\n\r\nimport {\r\n humanPatterns,\r\n emotionalMarkers,\r\n discourseMarkers,\r\n functionWords,\r\n} from \"../constants\";\r\nimport { tokenizeWords, splitIntoSentences } from \"../utils\";\r\n\r\nexport function calculateHumanLikenessIndicators(text: string): number {\r\n let score = 0;\r\n let totalIndicators = 0;\r\n\r\n const words = tokenizeWords(text);\r\n const sentences = splitIntoSentences(text);\r\n\r\n // Check for informal language (enhanced patterns)\r\n const informalMatches = humanPatterns.reduce((count, pattern) => {\r\n return count + (text.match(pattern) || []).length;\r\n }, 0);\r\n score += Math.min(informalMatches / 3, 1); // Increased sensitivity\r\n totalIndicators++;\r\n\r\n // Check for contractions (very human-like)\r\n const contractions = (text.match(/\\b\\w+[''](?:t|re|ve|ll|d|s|m)\\b/gi) || [])\r\n .length;\r\n score += Math.min(contractions / 5, 1); // More sensitive\r\n totalIndicators++;\r\n\r\n // Check for typos and misspellings\r\n const potentialTypos = text.match(/\\b[a-z]*[aeiou]{3,}[a-z]*\\b/gi) || [];\r\n const doubleLetters = text.match(/\\b\\w*([a-z])\\1{2,}\\w*\\b/gi) || [];\r\n const inconsistentSpacing = text.match(/\\s{2,}/g) || [];\r\n score += Math.min(\r\n (potentialTypos.length +\r\n doubleLetters.length +\r\n inconsistentSpacing.length) /\r\n 5, // More sensitive\r\n 1\r\n );\r\n totalIndicators++;\r\n\r\n // Check for personal pronouns and narrative style\r\n const personalPronouns = (\r\n text.match(/\\b(I|me|my|mine|myself|we|us|our|ours)\\b/gi) || []\r\n ).length;\r\n score += Math.min(personalPronouns / Math.max(words.length * 0.05, 1), 1); // More sensitive\r\n totalIndicators++;\r\n\r\n // Check for emotional punctuation (very human)\r\n const emotionalPunct = (text.match(/[!]{2,}|[?]{2,}|[.]{3,}/g) || []).length;\r\n score += Math.min(emotionalPunct / 3, 1); // More sensitive\r\n totalIndicators++;\r\n\r\n // Check for ALL CAPS words (emphasis) - very human\r\n const capsWords = (text.match(/\\b[A-Z]{2,}\\b/g) || []).length;\r\n score += Math.min(capsWords / 5, 1); // More sensitive\r\n totalIndicators++;\r\n\r\n // Check for internet slang and abbreviations (very human)\r\n const internetSlang = (\r\n text.match(\r\n /\\b(lol|lmao|omg|wtf|btw|tbh|imho|imo|ngl|smh|fml|irl|rn|af|fr|periodt|idk|ikr|brb|ttyl|dm|pm|sus|lit|fam|bae|goat|facts|no cap|bet|vibe|mood|periodt)\\b/gi\r\n ) || []\r\n ).length;\r\n score += Math.min(internetSlang / 2, 1); // Very sensitive to slang\r\n totalIndicators++;\r\n\r\n // Check for incomplete sentences or fragments (human-like)\r\n const fragments = sentences.filter((s) => {\r\n const words = s.trim().split(/\\s+/);\r\n return (\r\n words.length < 4 &&\r\n !words.some((w) =>\r\n w.match(/^(yes|no|ok|okay|yeah|nah|sure|maybe|absolutely|definitely)$/i)\r\n )\r\n );\r\n }).length;\r\n score += Math.min(fragments / Math.max(sentences.length * 0.3, 1), 0.8);\r\n totalIndicators++;\r\n\r\n // Check for conversational markers (very human)\r\n const conversationalMarkers = (\r\n text.match(\r\n /\\b(like|you know|I mean|right|so|well|um|uh|actually|basically|literally|honestly|seriously|obviously|apparently|supposedly|kinda|sorta|maybe|probably|definitely|absolutely|totally|completely|exactly|precisely)\\b/gi\r\n ) || []\r\n ).length;\r\n score += Math.min(conversationalMarkers / Math.max(words.length * 0.1, 1), 1);\r\n totalIndicators++;\r\n\r\n // Check for creative/descriptive language (narrative human writing)\r\n const creativeDescriptions = (\r\n text.match(\r\n /\\b(nearly twice|hardly any|very large|came in very useful|no finer|big beefy|which made|although he did|spent so much|craning over|spying on)\\b/gi\r\n ) || []\r\n ).length;\r\n score += Math.min(creativeDescriptions / 3, 1);\r\n totalIndicators++;\r\n\r\n // Check for character names and storytelling elements\r\n const narrativeElements = (\r\n text.match(\r\n /\\b(Mr\\.|Mrs\\.|called|named|director|firm|son|opinion|neighbors|mustache|blonde)\\b/gi\r\n ) || []\r\n ).length;\r\n score += Math.min(narrativeElements / 5, 0.8);\r\n totalIndicators++;\r\n\r\n // Check for narrative pronouns (third person storytelling)\r\n const narrativePronouns = (\r\n text.match(/\\b(he|she|they|him|her|them|his|hers|their|theirs)\\b/gi) || []\r\n ).length;\r\n score += Math.min(narrativePronouns / Math.max(words.length * 0.08, 1), 0.7);\r\n totalIndicators++;\r\n\r\n return score / totalIndicators;\r\n}\r\n\r\nexport function calculateEmotionalToneVariability(text: string): number {\r\n let emotionalMarkerCount = 0;\r\n\r\n emotionalMarkers.forEach((pattern) => {\r\n emotionalMarkerCount += (text.match(pattern) || []).length;\r\n });\r\n\r\n // Additional emotional indicators\r\n const exclamations = (text.match(/!/g) || []).length;\r\n const questions = (text.match(/\\?/g) || []).length;\r\n const emotionalWords = (\r\n text.match(\r\n /\\b(love|hate|excited|frustrated|angry|happy|sad|worried|anxious|amazing|terrible|awesome|awful|horrible|wonderful|fantastic|disgusting|annoying|brilliant|stupid|crazy|insane|wild|mad|furious|thrilled|devastated|shocked|surprised|confused|overwhelmed)\\b/gi\r\n ) || []\r\n ).length;\r\n\r\n const words = tokenizeWords(text);\r\n const totalEmotionalSignals =\r\n emotionalMarkerCount + exclamations + questions + emotionalWords;\r\n\r\n return Math.min(totalEmotionalSignals / Math.max(words.length * 0.1, 1), 1);\r\n}\r\n\r\nexport function calculateInformalnessScore(text: string): number {\r\n let informalityScore = 0;\r\n let totalFeatures = 0;\r\n\r\n const words = tokenizeWords(text);\r\n const sentences = splitIntoSentences(text);\r\n\r\n // Contractions (very informal)\r\n const contractions = (text.match(/\\b\\w+[''](?:t|re|ve|ll|d|s|m)\\b/gi) || [])\r\n .length;\r\n informalityScore += Math.min(\r\n contractions / Math.max(words.length * 0.1, 1),\r\n 1\r\n );\r\n totalFeatures++;\r\n\r\n // Slang and colloquialisms (very informal)\r\n const slangCount = humanPatterns.reduce((count, pattern) => {\r\n return count + (text.match(pattern) || []).length;\r\n }, 0);\r\n informalityScore += Math.min(slangCount / 5, 1); // More sensitive\r\n totalFeatures++;\r\n\r\n // Sentence fragments (informal)\r\n const fragments = sentences.filter((s) => s.split(/\\s+/).length < 4).length;\r\n informalityScore += Math.min(\r\n fragments / Math.max(sentences.length * 0.4, 1),\r\n 1\r\n );\r\n totalFeatures++;\r\n\r\n // Ellipses and multiple punctuation (informal)\r\n const multiplePunct = (text.match(/[.!?]{2,}/g) || []).length;\r\n informalityScore += Math.min(multiplePunct / 5, 1); // More sensitive\r\n totalFeatures++;\r\n\r\n // Conversational words (informal)\r\n const conversationalWords = (\r\n text.match(\r\n /\\b(like|you know|I mean|right|so|well|um|uh|actually|basically|literally|honestly|seriously|obviously|apparently|kinda|sorta|gonna|wanna|gotta)\\b/gi\r\n ) || []\r\n ).length;\r\n informalityScore += Math.min(\r\n conversationalWords / Math.max(words.length * 0.05, 1),\r\n 1\r\n );\r\n totalFeatures++;\r\n\r\n // Lowercase sentence beginnings (very informal)\r\n const lowercaseStarts = sentences.filter((s) => {\r\n const trimmed = s.trim();\r\n return (\r\n trimmed.length > 0 &&\r\n trimmed[0] === trimmed[0].toLowerCase() &&\r\n trimmed[0].match(/[a-z]/)\r\n );\r\n }).length;\r\n informalityScore += Math.min(\r\n lowercaseStarts / Math.max(sentences.length * 0.3, 1),\r\n 1\r\n );\r\n totalFeatures++;\r\n\r\n // Run-on sentences with \"and\" (informal)\r\n const runOnSentences = sentences.filter((s) => {\r\n const andCount = (s.match(/\\band\\b/gi) || []).length;\r\n const wordCount = s.split(/\\s+/).length;\r\n return andCount > 2 && wordCount > 20;\r\n }).length;\r\n informalityScore += Math.min(\r\n runOnSentences / Math.max(sentences.length * 0.5, 1),\r\n 0.8\r\n );\r\n totalFeatures++;\r\n\r\n return informalityScore / totalFeatures;\r\n}\r\n\r\nexport function calculateDiscourseMarkerPatterns(words: string[]): number {\r\n const discourseMarkerCount = words.filter((word) =>\r\n discourseMarkers.some((marker) =>\r\n marker.toLowerCase().includes(word.toLowerCase())\r\n )\r\n ).length;\r\n\r\n // AI tends to overuse discourse markers\r\n const density = discourseMarkerCount / words.length;\r\n return Math.min(density * 50, 1);\r\n}\r\n\r\nexport function calculateFunctionWordAnalysis(words: string[]): number {\r\n const functionWordCount = words.filter((word) =>\r\n functionWords.includes(word.toLowerCase())\r\n ).length;\r\n\r\n const ratio = functionWordCount / words.length;\r\n\r\n // Natural human text typically has 40-60% function words\r\n // AI often deviates from this pattern\r\n if (ratio >= 0.4 && ratio <= 0.6) {\r\n return 0.2; // Lower score for human-like ratio\r\n } else {\r\n return Math.min(Math.abs(ratio - 0.5) * 2, 1);\r\n }\r\n}\r\n","/**\r\n * Contextual and structural analysis metrics\r\n */\r\n\r\nimport { extractTopicWords } from \"../utils\";\r\nimport { commonWords, transitionWords } from \"../constants\";\r\n\r\nexport function calculateContextualConsistency(sentences: string[]): number {\r\n if (sentences.length < 3) return 1;\r\n\r\n let consistencyScore = 0;\r\n const topics = sentences.map((sentence) =>\r\n extractTopicWords(sentence, commonWords, transitionWords)\r\n );\r\n\r\n for (let i = 1; i < topics.length - 1; i++) {\r\n const prevTopics = new Set(topics[i - 1]);\r\n const currTopics = new Set(topics[i]);\r\n const nextTopics = new Set(topics[i + 1]);\r\n\r\n const allTopics = new Set([...prevTopics, ...currTopics, ...nextTopics]);\r\n const commonTopicsCount = [...allTopics].filter(\r\n (topic) =>\r\n [prevTopics, currTopics, nextTopics].filter((set) => set.has(topic))\r\n .length >= 2\r\n );\r\n\r\n consistencyScore += commonTopicsCount.length / Math.max(allTopics.size, 1);\r\n }\r\n\r\n return consistencyScore / Math.max(sentences.length - 2, 1);\r\n}\r\n\r\nexport function calculateSentenceStructureEntropy(sentences: string[]): number {\r\n const structures = sentences.map((sentence) => {\r\n const words = sentence.split(/\\s+/);\r\n const length = words.length;\r\n\r\n // Classify sentence structure\r\n if (length <= 5) return \"short\";\r\n if (length <= 15) return \"medium\";\r\n if (length <= 25) return \"long\";\r\n return \"very_long\";\r\n });\r\n\r\n const structureCounts = new Map<string, number>();\r\n structures.forEach((structure) => {\r\n structureCounts.set(structure, (structureCounts.get(structure) || 0) + 1);\r\n });\r\n\r\n let entropy = 0;\r\n const totalSentences = sentences.length;\r\n\r\n for (const count of structureCounts.values()) {\r\n const probability = count / totalSentences;\r\n entropy -= probability * Math.log2(probability);\r\n }\r\n\r\n return entropy / Math.log2(Math.min(structureCounts.size, totalSentences));\r\n}\r\n\r\nexport function calculateTopicCoherenceScore(sentences: string[]): number {\r\n if (sentences.length < 2) return 1;\r\n\r\n const topicWords = sentences.map((sentence) =>\r\n extractTopicWords(sentence, commonWords, transitionWords)\r\n );\r\n\r\n let coherenceSum = 0;\r\n let comparisons = 0;\r\n\r\n for (let i = 0; i < topicWords.length - 1; i++) {\r\n for (let j = i + 1; j < Math.min(i + 4, topicWords.length); j++) {\r\n const words1 = new Set(topicWords[i]);\r\n const words2 = new Set(topicWords[j]);\r\n\r\n const intersection = new Set([...words1].filter((x) => words2.has(x)));\r\n const union = new Set([...words1, ...words2]);\r\n\r\n const similarity = intersection.size / Math.max(union.size, 1);\r\n coherenceSum += similarity;\r\n comparisons++;\r\n }\r\n }\r\n\r\n return comparisons > 0 ? coherenceSum / comparisons : 0;\r\n}\r\n","/**\r\n * Narrative and creative writing analysis\r\n */\r\n\r\nimport { tokenizeWords } from \"../utils\";\r\n\r\nexport function calculateNarrativeScore(text: string): number {\r\n let narrativeScore = 0;\r\n let totalIndicators = 0;\r\n\r\n const words = tokenizeWords(text);\r\n\r\n // Check for character names and proper nouns (common in narrative)\r\n const properNouns = (text.match(/\\b[A-Z][a-z]+\\b/g) || []).length;\r\n narrativeScore += Math.min(properNouns / Math.max(words.length * 0.1, 1), 1);\r\n totalIndicators++;\r\n\r\n // Check for past tense narrative patterns\r\n const pastTenseVerbs = (\r\n text.match(\r\n /\\b\\w+(ed|was|were|had|did|said|went|came|saw|looked|thought|felt|knew|told|asked|answered|walked|turned|opened|closed)\\b/gi\r\n ) || []\r\n ).length;\r\n narrativeScore += Math.min(\r\n pastTenseVerbs / Math.max(words.length * 0.1, 1),\r\n 1\r\n );\r\n totalIndicators++;\r\n\r\n // Check for descriptive language\r\n const descriptiveWords = (\r\n text.match(\r\n /\\b(big|small|large|tiny|huge|enormous|beautiful|ugly|old|young|tall|short|fat|thin|thick|wide|narrow|bright|dark|loud|quiet|soft|hard|smooth|rough|hot|cold|warm|cool|dry|wet|clean|dirty|new|old|fresh|stale|sweet|sour|bitter|salty|spicy|mild|strong|weak|heavy|light|fast|slow|quick|careful|gentle|rough|kind|mean|nice|bad|good|excellent|terrible|wonderful|awful|amazing|boring|interesting|exciting|scary|funny|sad|happy|angry|surprised|confused|tired|energetic)\\b/gi\r\n ) || []\r\n ).length;\r\n narrativeScore += Math.min(\r\n descriptiveWords / Math.max(words.length * 0.08, 1),\r\n 1\r\n );\r\n totalIndicators++;\r\n\r\n // Check for dialogue indicators\r\n const dialogueIndicators = (text.match(/[\"'\"]/g) || []).length;\r\n narrativeScore += Math.min(dialogueIndicators / 10, 0.8);\r\n totalIndicators++;\r\n\r\n // Check for third-person narrative pronouns\r\n const thirdPersonPronouns = (\r\n text.match(/\\b(he|she|they|him|her|them|his|hers|their|theirs)\\b/gi) || []\r\n ).length;\r\n narrativeScore += Math.min(\r\n thirdPersonPronouns / Math.max(words.length * 0.05, 1),\r\n 1\r\n );\r\n totalIndicators++;\r\n\r\n return narrativeScore / totalIndicators;\r\n}\r\n\r\nexport function calculateCreativityScore(text: string): number {\r\n let creativityScore = 0;\r\n let totalIndicators = 0;\r\n\r\n const words = tokenizeWords(text);\r\n\r\n // Check for metaphors and similes\r\n const metaphorPatterns = (\r\n text.match(\r\n /\\b(like|as|seemed|appeared|looked like|sounded like|felt like|was like|were like)\\b/gi\r\n ) || []\r\n ).length;\r\n creativityScore += Math.min(\r\n metaphorPatterns / Math.max(words.length * 0.05, 1),\r\n 1\r\n );\r\n totalIndicators++;\r\n\r\n // Check for unique/creative descriptions (unusual adjective-noun combinations)\r\n const creativeDescriptions = (\r\n text.match(\r\n /\\b(nearly twice|hardly any|very large|came in very useful|no finer|so much of|which made|although he did)\\b/gi\r\n ) || []\r\n ).length;\r\n creativityScore += Math.min(creativeDescriptions / 5, 1);\r\n totalIndicators++;\r\n\r\n // Check for vivid imagery words\r\n const imageryWords = (\r\n text.match(\r\n /\\b(craning|spying|mustache|beefy|blonde|drilling|garden fences|neighbors|opinion|director|firm)\\b/gi\r\n ) || []\r\n ).length;\r\n creativityScore += Math.min(\r\n imageryWords / Math.max(words.length * 0.1, 1),\r\n 1\r\n );\r\n totalIndicators++;\r\n\r\n // Check for specific, concrete details rather than abstract concepts\r\n const concreteNouns = (\r\n text.match(\r\n /\\b(drill|mustache|neck|fence|garden|neighbor|son|boy|director|firm|company|house|car|door|window|street|road|tree|flower|table|chair|book|phone|computer|cat|dog|bird|food|water|coffee|tea|money|time|day|night|morning|evening|sun|moon|star|cloud|rain|snow|wind|fire|ice|rock|sand|grass|leaf|branch|root|seed)\\b/gi\r\n ) || []\r\n ).length;\r\n creativityScore += Math.min(\r\n concreteNouns / Math.max(words.length * 0.08, 1),\r\n 1\r\n );\r\n totalIndicators++;\r\n\r\n // Check for character-focused writing\r\n const characterFocus = (\r\n text.match(\r\n /\\b(Mr\\.|Mrs\\.|Dursley|Dudley|Grunnings|called|named|known as)\\b/gi\r\n ) || []\r\n ).length;\r\n creativityScore += Math.min(characterFocus / 8, 1);\r\n totalIndicators++;\r\n\r\n return creativityScore / totalIndicators;\r\n}\r\n","/**\r\n * Main AI Text Detector class\r\n */\r\n\r\nimport { DetectionResult, AnalysisMetrics } from \"./types\";\r\nimport { tokenizeWords, splitIntoSentences } from \"./utils\";\r\nimport * as metrics from \"./metrics\";\r\n\r\nexport class AITextDetector {\r\n public detectAIText(text: string): DetectionResult {\r\n if (!text || text.trim().length === 0) {\r\n throw new Error(\"Text cannot be empty\");\r\n }\r\n\r\n if (text.trim().length < 50) {\r\n throw new Error(\r\n \"Text too short for reliable analysis (minimum 50 characters)\"\r\n );\r\n }\r\n\r\n const analysisMetrics = this.analyzeText(text);\r\n const score = this.calculateAdvancedAIScore(analysisMetrics);\r\n\r\n // More balanced threshold - conservative but not too much\r\n let threshold = 0.58; // Slightly lower base threshold\r\n\r\n // Adjust threshold based on text length\r\n const wordCount = tokenizeWords(text).length;\r\n if (wordCount < 100) {\r\n threshold += 0.04; // Less adjustment for shorter texts\r\n } else if (wordCount > 300) {\r\n threshold -= 0.02; // Slightly more sensitive for longer texts\r\n }\r\n\r\n // Detect narrative/literary writing patterns - ONLY for highly narrative text\r\n const narrativeScore = metrics.calculateNarrativeScore(text);\r\n if (narrativeScore > 0.5) {\r\n threshold += 0.15; // Much more conservative for clearly narrative text\r\n } else if (narrativeScore > 0.35) {\r\n threshold += 0.08; // Moderate adjustment for somewhat narrative text\r\n }\r\n\r\n // Adjust based on detected human patterns - be more selective\r\n if (analysisMetrics.humanLikenessIndicators > 0.6) {\r\n threshold += 0.2; // Strong adjustment for very human text\r\n } else if (analysisMetrics.humanLikenessIndicators > 0.4) {\r\n threshold += 0.12; // Moderate adjustment for clearly human text\r\n } else if (analysisMetrics.humanLikenessIndicators > 0.2) {\r\n threshold += 0.05; // Small adjustment for somewhat human text\r\n }\r\n\r\n if (analysisMetrics.informalnessScore > 0.5) {\r\n threshold += 0.15; // Strong adjustment for very informal text\r\n } else if (analysisMetrics.informalnessScore > 0.3) {\r\n threshold += 0.08; // Moderate adjustment for informal text\r\n } else if (analysisMetrics.informalnessScore > 0.15) {\r\n threshold += 0.03; // Small adjustment for somewhat informal text\r\n }\r\n\r\n if (analysisMetrics.emotionalToneVariability > 0.4) {\r\n threshold += 0.12; // Adjustment for very emotional text\r\n } else if (analysisMetrics.emotionalToneVariability > 0.2) {\r\n threshold += 0.06; // Small adjustment for somewhat emotional text\r\n }\r\n\r\n // Check for creative writing indicators - be more selective\r\n const creativityScore = metrics.calculateCreativityScore(text);\r\n if (creativityScore > 0.5) {\r\n threshold += 0.2; // Strong boost for very creative writing\r\n } else if (creativityScore > 0.35) {\r\n threshold += 0.1; // Moderate boost for creative writing\r\n }\r\n\r\n const isAIGenerated = score > threshold;\r\n const confidence = Math.round(score * 100) / 100;\r\n const reasons = this.generateDetailedReasons(analysisMetrics, score);\r\n\r\n return {\r\n isAIGenerated,\r\n confidence,\r\n reasons,\r\n score,\r\n perplexityScore: analysisMetrics.perplexity,\r\n burstinessScore: analysisMetrics.burstiness,\r\n };\r\n }\r\n\r\n private analyzeText(text: string): AnalysisMetrics {\r\n const sentences = splitIntoSentences(text);\r\n const words = tokenizeWords(text);\r\n const cleanWords = words.filter((word) => word.length > 2);\r\n\r\n return {\r\n perplexity: metrics.calculatePerplexity(words),\r\n burstiness: metrics.calculateBurstiness(sentences),\r\n averageWordsPerSentence:\r\n metrics.calculateAverageWordsPerSentence(sentences),\r\n sentenceVariability: metrics.calculateSentenceVariability(sentences),\r\n lexicalDiversity: metrics.calculateLexicalDiversity(cleanWords),\r\n readabilityScore: metrics.calculateReadabilityScore(\r\n text,\r\n sentences,\r\n words\r\n ),\r\n syntacticComplexity: metrics.calculateSyntacticComplexity(sentences),\r\n semanticCoherence: metrics.calculateSemanticCoherence(sentences),\r\n nGramRepetition: metrics.calculateNGramRepetition(words),\r\n punctuationPatterns: metrics.analyzePunctuationPatterns(text),\r\n wordFrequencyDistribution:\r\n metrics.analyzeWordFrequencyDistribution(cleanWords),\r\n transitionDensity: metrics.calculateTransitionDensity(cleanWords),\r\n formalityIndex: metrics.calculateFormalityIndex(cleanWords),\r\n vocabularyRichness: metrics.calculateVocabularyRichness(cleanWords),\r\n contextualConsistency: metrics.calculateContextualConsistency(sentences),\r\n // Enhanced metrics\r\n entropyScore: metrics.calculateEntropyScore(words),\r\n humanLikenessIndicators: metrics.calculateHumanLikenessIndicators(text),\r\n emotionalToneVariability: metrics.calculateEmotionalToneVariability(text),\r\n discourseMarkerPatterns:\r\n metrics.calculateDiscourseMarkerPatterns(cleanWords),\r\n functionWordAnalysis: metrics.calculateFunctionWordAnalysis(cleanWords),\r\n informalnessScore: metrics.calculateInformalnessScore(text),\r\n sentenceStructureEntropy:\r\n metrics.calculateSentenceStructureEntropy(sentences),\r\n topicCoherenceScore: metrics.calculateTopicCoherenceScore(sentences),\r\n bigramUnusualness: metrics.calculateBigramUnusualness(words),\r\n stylometricSignature: metrics.calculateStylometricSignature(\r\n text,\r\n sentences,\r\n words\r\n ),\r\n };\r\n }\r\n\r\n private calculateAdvancedAIScore(analysisMetrics: AnalysisMetrics): number {\r\n let score = 0;\r\n\r\n // Focus on the most discriminative metrics with proper weighting\r\n\r\n // 1. Human-likeness indicators (MOST IMPORTANT - inverse scoring)\r\n const humanScore = 1 - analysisMetrics.humanLikenessIndicators;\r\n score += humanScore * 0.25; // Reduced from 0.30\r\n\r\n // 2. Informality score (VERY IMPORTANT - inverse scoring)\r\n const formalityScore = 1 - analysisMetrics.informalnessScore;\r\n score += formalityScore * 0.2; // Reduced from 0.25\r\n\r\n // 3. Emotional tone variability (IMPORTANT - inverse scoring)\r\n const emotionalScore =\r\n 1 - Math.min(analysisMetrics.emotionalToneVariability, 1);\r\n score += emotionalScore * 0.15; // Reduced from 0.20\r\n\r\n // 4. Perplexity analysis (more balanced)\r\n let perplexityScore = 0;\r\n if (analysisMetrics.perplexity < 2) {\r\n perplexityScore = 1; // Very AI-like\r\n } else if (analysisMetrics.perplexity < 4) {\r\n perplexityScore = 0.8; // Likely AI\r\n } else if (analysisMetrics.perplexity < 7) {\r\n perplexityScore = 0.5; // Uncertain - could be formal human writing\r\n } else if (analysisMetrics.perplexity < 12) {\r\n perplexityScore = 0.2; // Likely human\r\n } else {\r\n perplexityScore = 0.05; // Very likely human\r\n }\r\n score += perplexityScore * 0.18; // Reduced from 0.20\r\n\r\n // 5. Burstiness analysis (increased weight)\r\n let burstinessScore = 0;\r\n if (analysisMetrics.burstiness < -0.5) {\r\n burstinessScore = 0.9; // Very consistent = AI-like\r\n } else if (analysisMetrics.burstiness < 0) {\r\n burstinessScore = 0.6; // Somewhat consistent = possibly AI\r\n } else if (analysisMetrics.burstiness < 0.3) {\r\n burstinessScore = 0.3; // Some variation = possibly human\r\n } else {\r\n burstinessScore = 0.1; // High variation = likely human\r\n }\r\n score += burstinessScore * 0.15; // Increased from 0.10\r\n\r\n // 6. Add some additional AI indicators with smaller weights\r\n // Transition density (AI overuses transitions)\r\n if (analysisMetrics.transitionDensity > 2) {\r\n score += Math.min(analysisMetrics.transitionDensity / 10, 0.1) * 0.05;\r\n }\r\n\r\n // Apply adaptive adjustments based on strong human indicators\r\n score = this.applyAdaptiveThresholding(score, analysisMetrics);\r\n\r\n // Ensure score is between 0 and 1\r\n return Math.max(0, Math.min(1, score));\r\n }\r\n\r\n private applyAdaptiveThresholding(\r\n baseScore: number,\r\n analysisMetrics: AnalysisMetrics\r\n ): number {\r\n let adjustedScore = baseScore;\r\n\r\n // Strong human indicators should significantly reduce AI probability\r\n if (analysisMetrics.humanLikenessIndicators > 0.6) {\r\n adjustedScore *= 0.2; // Very strong reduction for very human text\r\n } else if (analysisMetrics.humanLikenessIndicators > 0.4) {\r\n adjustedScore *= 0.4; // Strong reduction for clearly human text\r\n } else if (analysisMetrics.humanLikenessIndicators > 0.2) {\r\n adjustedScore *= 0.7; // Moderate reduction for somewhat human text\r\n }\r\n\r\n // High informality should reduce AI probability\r\n if (analysisMetrics.informalnessScore > 0.6) {\r\n adjustedScore *= 0.3; // Strong reduction for very informal text\r\n } else if (analysisMetrics.informalnessScore > 0.4) {\r\n adjustedScore *= 0.5; // Moderate reduction for informal text\r\n } else if (analysisMetrics.informalnessScore > 0.2) {\r\n adjustedScore *= 0.8; // Light reduction for somewhat informal text\r\n }\r\n\r\n // High emotional variability should reduce AI probability\r\n if (analysisMetrics.emotionalToneVariability > 0.5) {\r\n adjustedScore *= 0.4; // Strong reduction for very emotional text\r\n } else if (analysisMetrics.emotionalToneVariability > 0.3) {\r\n adjustedScore *= 0.6; // Moderate reduction for emotional text\r\n }\r\n\r\n // Multiple strong human indicators compound the effect\r\n const strongHumanIndicators = [\r\n analysisMetrics.humanLikenessIndicators > 0.3,\r\n analysisMetrics.informalnessScore > 0.3,\r\n analysisMetrics.emotionalToneVariability > 0.2,\r\n analysisMetrics.entropyScore > 0.8,\r\n ].filter(Boolean).length;\r\n\r\n if (strongHumanIndicators >= 3) {\r\n adjustedScore *= 0.1; // Very strong reduction for clearly human text\r\n } else if (strongHumanIndicators >= 2) {\r\n adjustedScore *= 0.3; // Strong reduction for likely human text\r\n }\r\n\r\n return Math.max(0, Math.min(1, adjustedScore));\r\n }\r\n\r\n private generateDetailedReasons(\r\n analysisMetrics: AnalysisMetrics,\r\n score: number\r\n ): string[] {\r\n const reasons: string[] = [];\r\n\r\n if (analysisMetrics.perplexity < 8) {\r\n reasons.push(\r\n `Low perplexity (${analysisMetrics.perplexity.toFixed(\r\n 2\r\n )}) suggests predictable word patterns typical of AI`\r\n );\r\n }\r\n\r\n if (analysisMetrics.burstiness < 0.1) {\r\n reasons.push(\r\n `Low burstiness (${analysisMetrics.burstiness.toFixed(\r\n 2\r\n )}) indicates consistent sentence structure characteristic of AI`\r\n );\r\n }\r\n\r\n if (analysisMetrics.humanLikenessIndicators < 0.3) {\r\n reasons.push(\r\n `Low human-likeness indicators (${analysisMetrics.humanLikenessIndicators.toFixed(\r\n 2\r\n )}) suggest absence of typical human writing patterns`\r\n );\r\n }\r\n\r\n if (analysisMetrics.entropyScore < 0.7) {\r\n reasons.push(\r\n `Low entropy score (${analysisMetrics.entropyScore.toFixed(\r\n 2\r\n )}) indicates predictable word choice patterns typical of AI`\r\n );\r\n }\r\n\r\n if (analysisMetrics.informalnessScore < 0.2) {\r\n reasons.push(\r\n `Low informality score (${analysisMetrics.informalnessScore.toFixed(\r\n 2\r\n )}) suggests formal, AI-like writing style`\r\n );\r\n }\r\n\r\n if (\r\n analysisMetrics.lexicalDiversity > 0.4 &&\r\n analysisMetrics.lexicalDiversity < 0.7\r\n ) {\r\n reasons.push(\r\n `Lexical diversity (${analysisMetrics.lexicalDiversity.toFixed(\r\n 2\r\n )}) falls within AI-typical range`\r\n );\r\n }\r\n\r\n if (analysisMetrics.transitionDensity > 2) {\r\n reasons.push(\r\n `High transition word density (${analysisMetrics.transitionDensity.toFixed(\r\n 1\r\n )}%) characteristic of AI writing`\r\n );\r\n }\r\n\r\n if (analysisMetrics.discourseMarkerPatterns > 0.3) {\r\n reasons.push(\r\n `Elevated discourse marker usage (${analysisMetrics.discourseMarkerPatterns.toFixed(\r\n 2\r\n )}) typical of AI text structure`\r\n );\r\n }\r\n\r\n if (analysisMetrics.formalityIndex > 0.5) {\r\n reasons.push(\r\n `Elevated formality index (${analysisMetrics.