UNPKG

local-leetcode-trainer

Version:

A complete local LeetCode practice environment with multi-language support - use your IDE, collaborate with AI, submit with confidence

740 lines (660 loc) 21.4 kB
/** * LeetCode Web Scraper * Directly scrapes LeetCode website to get problem information */ const https = require('https'); const { URL } = require('url'); class LeetCodeScraper { constructor() { this.baseUrl = 'https://leetcode.com'; this.timeout = 10000; this.userAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'; } /** * Make HTTP request to get HTML content */ async fetchHTML(url) { return new Promise((resolve, reject) => { const parsedUrl = new URL(url); const options = { hostname: parsedUrl.hostname, port: parsedUrl.port || 443, path: parsedUrl.pathname + parsedUrl.search, method: 'GET', headers: { 'User-Agent': this.userAgent, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Cache-Control': 'max-age=0' }, timeout: this.timeout }; const req = https.request(options, (res) => { let data = ''; res.on('data', (chunk) => { data += chunk; }); res.on('end', () => { if (res.statusCode >= 200 && res.statusCode < 300) { resolve(data); } else { reject(new Error(`HTTP ${res.statusCode}: ${res.statusMessage}`)); } }); }); req.on('error', reject); req.on('timeout', () => { req.destroy(); reject(new Error('Request timeout')); }); req.end(); }); } /** * Scrape problem data from LeetCode website */ async scrapeProblem(problemSlug) { try { const url = `${this.baseUrl}/problems/${problemSlug}/`; console.log(`🌐 Scraping problem from: ${url}`); const html = await this.fetchHTML(url); return this.parseProblemHTML(html, problemSlug); } catch (error) { throw new Error(`Failed to scrape problem ${problemSlug}: ${error.message}`); } } /** * Parse HTML to extract problem information */ parseProblemHTML(html, problemSlug) { try { // Extract problem title and ID const titleMatch = html.match(/<title>([^<]+)<\/title>/); const fullTitle = titleMatch ? titleMatch[1].trim() : ''; // Parse title to get ID and name const titleParts = fullTitle.match(/^(\d+)\.\s*(.+?)\s*-\s*LeetCode$/); const problemId = titleParts ? parseInt(titleParts[1]) : null; const problemTitle = titleParts ? titleParts[2].trim() : fullTitle.replace(' - LeetCode', ''); // Extract difficulty const difficulty = this.extractDifficulty(html); // Extract problem description const description = this.extractDescription(html); // Extract examples const examples = this.extractExamples(html); // Extract constraints const constraints = this.extractConstraints(html); // Extract topics const topics = this.extractTopics(html); // Extract companies (if available) const companies = this.extractCompanies(html); // Generate function signatures const functionSignatures = this.generateFunctionSignatures(problemTitle, html); // Generate comprehensive test cases const testCases = this.generateTestCases(examples, constraints); return { id: problemId, title: problemTitle, name: problemSlug, difficulty: difficulty, description: description, examples: examples, constraints: constraints, topics: topics, companies: companies, functionSignatures: functionSignatures, testCases: testCases, metadata: { fetchedAt: new Date(), source: 'web-scraper', version: '1.0' } }; } catch (error) { throw new Error(`Failed to parse problem HTML: ${error.message}`); } } /** * Extract difficulty from HTML */ extractDifficulty(html) { // Look for difficulty indicators const patterns = [ /class="[^"]*difficulty[^"]*"[^>]*>([^<]+)</i, /"difficulty":\s*"([^"]+)"/i, />(?:Easy|Medium|Hard)</gi ]; for (const pattern of patterns) { const match = html.match(pattern); if (match) { const difficulty = match[1] || match[0]; return difficulty.replace(/[<>]/g, '').toLowerCase(); } } return 'unknown'; } /** * Extract problem description from HTML */ extractDescription(html) { // Try multiple patterns to find the description const patterns = [ /<div[^>]*class="[^"]*content[^"]*"[^>]*>(.*?)<\/div>/s, /<div[^>]*data-track-load="description_content"[^>]*>(.*?)<\/div>/s, /"content":"([^"]+)"/, /<p[^>]*>(.*?)<\/p>/s ]; for (const pattern of patterns) { const match = html.match(pattern); if (match) { let description = this.cleanHTML(match[1]); if (description.length > 50) { return description; } } } // Fallback: extract from JSON data const jsonMatch = html.match(/"content":"([^"]+)"/); if (jsonMatch) { return this.cleanHTML(jsonMatch[1].replace(/\\n/g, '\n').replace(/\\"/g, '"')); } return `${problemSlug.replace(/-/g, ' ')} - Problem description extracted from LeetCode.`; } /** * Extract examples from HTML */ extractExamples(html) { const examples = []; // Pattern 1: Look for Example sections const exampleRegex = /<strong[^>]*>Example\s*(\d+):<\/strong>(.*?)(?=<strong[^>]*>Example|<strong[^>]*>Constraints|<\/div>)/gs; let match; while ((match = exampleRegex.exec(html)) !== null) { const exampleText = this.cleanHTML(match[2]); const example = this.parseExample(exampleText); if (example) { examples.push(example); } } // Pattern 2: Look for JSON examples if (examples.length === 0) { const jsonMatch = html.match(/"exampleTestcases":"([^"]+)"/); if (jsonMatch) { const testCases = jsonMatch[1].split('\\n'); for (let i = 0; i < testCases.length; i += 2) { if (testCases[i] && testCases[i + 1]) { examples.push({ input: testCases[i], output: testCases[i + 1], explanation: `Example ${examples.length + 1}` }); } } } } // Generate at least 2 examples if none found if (examples.length === 0) { examples.push( { input: "Input example 1", output: "Output example 1", explanation: "Example explanation 1" }, { input: "Input example 2", output: "Output example 2", explanation: "Example explanation 2" } ); } return examples; } /** * Parse individual example text */ parseExample(exampleText) { const inputMatch = exampleText.match(/Input:\s*(.+?)(?=Output:|$)/s); const outputMatch = exampleText.match(/Output:\s*(.+?)(?=Explanation:|$)/s); const explanationMatch = exampleText.match(/Explanation:\s*(.+?)$/s); if (inputMatch && outputMatch) { return { input: inputMatch[1].trim(), output: outputMatch[1].trim(), explanation: explanationMatch ? explanationMatch[1].trim() : undefined }; } return null; } /** * Extract constraints from HTML */ extractConstraints(html) { const constraints = []; // Pattern 1: Look for Constraints section const constraintsMatch = html.match(/<strong[^>]*>Constraints:<\/strong>(.*?)(?=<\/div>|<strong)/s); if (constraintsMatch) { const constraintsText = this.cleanHTML(constraintsMatch[1]); const lines = constraintsText.split('\n').filter(line => line.trim()); for (const line of lines) { const trimmed = line.trim(); if (trimmed && !trimmed.startsWith('Constraints:')) { constraints.push(trimmed); } } } // Pattern 2: Look for constraint patterns if (constraints.length === 0) { const constraintPatterns = [ /\d+\s*<=\s*\w+\s*<=\s*\d+/g, /\d+\s*<=\s*\w+\.length\s*<=\s*\d+/g, /-?\d+\s*<=\s*\w+\[i\]\s*<=\s*\d+/g ]; for (const pattern of constraintPatterns) { const matches = html.match(pattern); if (matches) { constraints.push(...matches); } } } // Generate basic constraints if none found if (constraints.length === 0) { constraints.push("1 <= n <= 10^4", "Valid input guaranteed"); } return constraints; } /** * Extract topics from HTML */ extractTopics(html) { const topics = []; // Pattern 1: Look for topic tags const topicMatches = html.match(/data-topic="([^"]+)"/g); if (topicMatches) { for (const match of topicMatches) { const topic = match.match(/data-topic="([^"]+)"/)[1]; if (!topics.includes(topic)) { topics.push(topic); } } } // Pattern 2: Look for JSON topics const jsonTopicsMatch = html.match(/"topicTags":\[([^\]]+)\]/); if (jsonTopicsMatch) { const topicData = jsonTopicsMatch[1]; const topicNames = topicData.match(/"name":"([^"]+)"/g); if (topicNames) { for (const topicName of topicNames) { const topic = topicName.match(/"name":"([^"]+)"/)[1]; if (!topics.includes(topic)) { topics.push(topic); } } } } return topics.length > 0 ? topics : ['Algorithm']; } /** * Extract companies from HTML */ extractCompanies(html) { const companies = []; // Look for company information in JSON data const companyMatch = html.match(/"companyTags":\[([^\]]+)\]/); if (companyMatch) { const companyData = companyMatch[1]; const companyNames = companyData.match(/"name":"([^"]+)"/g); if (companyNames) { for (const companyName of companyNames) { const company = companyName.match(/"name":"([^"]+)"/)[1]; companies.push(company); } } } return companies.length > 0 ? companies : ['Tech Companies']; } /** * Generate function signatures for different languages */ generateFunctionSignatures(problemTitle, html) { // Try to extract from code templates in HTML const codeTemplates = this.extractCodeTemplates(html); if (codeTemplates.javascript) { return this.parseCodeTemplates(codeTemplates); } // Fallback: generate based on problem title const functionName = this.generateFunctionName(problemTitle); return { javascript: { name: functionName, params: [{ name: "input", type: "any" }], returnType: "any" }, python: { name: functionName, params: [{ name: "input", type: "Any" }], returnType: "Any" }, java: { name: functionName, params: [{ name: "input", type: "Object" }], returnType: "Object" }, cpp: { name: functionName, params: [{ name: "input", type: "auto" }], returnType: "auto" } }; } /** * Extract code templates from HTML */ extractCodeTemplates(html) { const templates = {}; // Look for JavaScript template const jsMatch = html.match(/var\s+(\w+)\s*=\s*function\s*\(([^)]*)\)/); if (jsMatch) { templates.javascript = { name: jsMatch[1], params: jsMatch[2] }; } return templates; } /** * Parse code templates to extract signatures */ parseCodeTemplates(templates) { const signatures = {}; if (templates.javascript) { const params = templates.javascript.params .split(',') .map(p => p.trim()) .filter(p => p) .map(p => ({ name: p, type: 'any' })); signatures.javascript = { name: templates.javascript.name, params: params, returnType: 'any' }; } return signatures; } /** * Generate function name from problem title */ generateFunctionName(title) { return title .replace(/[^a-zA-Z0-9\s]/g, '') .split(' ') .map((word, index) => { if (index === 0) { return word.toLowerCase(); } return word.charAt(0).toUpperCase() + word.slice(1).toLowerCase(); }) .join(''); } /** * Generate comprehensive test cases */ generateTestCases(examples, constraints) { const testCases = []; // Add example-based test cases examples.forEach((example, index) => { testCases.push({ input: [example.input], expected: example.output, description: `Example ${index + 1}: ${example.explanation || 'Basic test case'}`, category: 'basic' }); }); // Generate edge cases based on constraints constraints.forEach(constraint => { const edgeCases = this.generateEdgeCasesFromConstraint(constraint); testCases.push(...edgeCases); }); // Add some generic edge cases testCases.push( { input: [null], expected: null, description: 'Null input edge case', category: 'edge' }, { input: [[]], expected: null, description: 'Empty array edge case', category: 'edge' }, { input: [''], expected: null, description: 'Empty string edge case', category: 'edge' } ); // Add stress test testCases.push({ input: [Array(1000).fill(1)], expected: null, description: 'Large input stress test', category: 'stress' }); return testCases.slice(0, 12); // Limit to 12 test cases } /** * Generate edge cases from constraint */ generateEdgeCasesFromConstraint(constraint) { const edgeCases = []; // Look for numeric ranges const rangeMatch = constraint.match(/(\d+)\s*<=\s*\w+\s*<=\s*(\d+)/); if (rangeMatch) { const min = parseInt(rangeMatch[1]); const max = parseInt(rangeMatch[2]); edgeCases.push({ input: [min], expected: null, description: `Minimum value: ${min}`, category: 'edge' }); edgeCases.push({ input: [max], expected: null, description: `Maximum value: ${max}`, category: 'edge' }); } return edgeCases; } /** * Clean HTML content */ cleanHTML(html) { return html .replace(/<[^>]+>/g, '') // Remove HTML tags .replace(/&lt;/g, '<') .replace(/&gt;/g, '>') .replace(/&amp;/g, '&') .replace(/&quot;/g, '"') .replace(/&#39;/g, "'") .replace(/&nbsp;/g, ' ') .replace(/\\n/g, '\n') .replace(/\\"/g, '"') .replace(/\s+/g, ' ') .trim(); } /** * Get problem by ID (convert to slug first) */ async getProblemById(problemId) { // First, we need to get the problem slug from the ID // This requires scraping the problems list or using a mapping const problemSlug = await this.getProblemSlugById(problemId); return await this.scrapeProblem(problemSlug); } /** * Get problem slug by ID (simplified mapping for common problems) */ async getProblemSlugById(problemId) { const commonProblems = { 1: 'two-sum', 2: 'add-two-numbers', 3: 'longest-substring-without-repeating-characters', 4: 'median-of-two-sorted-arrays', 5: 'longest-palindromic-substring', 7: 'reverse-integer', 8: 'string-to-integer-atoi', 9: 'palindrome-number', 11: 'container-with-most-water', 13: 'roman-to-integer', 14: 'longest-common-prefix', 15: '3sum', 20: 'valid-parentheses', 21: 'merge-two-sorted-lists', 26: 'remove-duplicates-from-sorted-array', 27: 'remove-element', 35: 'search-insert-position', 53: 'maximum-subarray', 70: 'climbing-stairs', 121: 'best-time-to-buy-and-sell-stock', 125: 'valid-palindrome', 136: 'single-number', 141: 'linked-list-cycle', 169: 'majority-element', 206: 'reverse-linked-list', 217: 'contains-duplicate', 226: 'invert-binary-tree', 242: 'valid-anagram', 268: 'missing-number', 283: 'move-zeroes', 344: 'reverse-string', 387: 'first-unique-character-in-a-string', 412: 'fizz-buzz', 448: 'find-all-numbers-disappeared-in-an-array', 461: 'hamming-distance', 463: 'island-perimeter', 476: 'number-complement', 496: 'next-greater-element-i', 500: 'keyboard-row', 509: 'fibonacci-number', 520: 'detect-capital', 557: 'reverse-words-in-a-string-iii', 561: 'array-partition-i', 566: 'reshape-the-matrix', 575: 'distribute-candies', 589: 'n-ary-tree-preorder-traversal', 590: 'n-ary-tree-postorder-traversal', 594: 'longest-harmonious-subsequence', 598: 'range-addition-ii', 599: 'minimum-index-sum-of-two-lists', 605: 'can-place-flowers', 606: 'construct-string-from-binary-tree', 617: 'merge-two-binary-trees', 628: 'maximum-product-of-three-numbers', 637: 'average-of-levels-in-binary-tree', 643: 'maximum-average-subarray-i', 645: 'set-mismatch', 653: 'two-sum-iv-input-is-a-bst', 657: 'robot-return-to-origin', 661: 'image-smoother', 665: 'non-decreasing-array', 669: 'trim-a-binary-search-tree', 671: 'second-minimum-node-in-a-binary-tree', 674: 'longest-continuous-increasing-subsequence', 680: 'valid-palindrome-ii', 682: 'baseball-game', 686: 'repeated-string-match', 687: 'longest-univalue-path', 690: 'employee-importance', 693: 'binary-number-with-alternating-bits', 696: 'count-binary-substrings', 697: 'degree-of-an-array', 700: 'search-in-a-binary-search-tree', 703: 'kth-largest-element-in-a-stream', 704: 'binary-search', 705: 'design-hashset', 706: 'design-hashmap', 707: 'design-linked-list', 709: 'to-lower-case', 717: '1-bit-and-2-bit-characters', 720: 'longest-word-in-dictionary', 724: 'find-pivot-index', 728: 'self-dividing-numbers', 733: 'flood-fill', 734: 'sentence-similarity', 744: 'find-smallest-letter-greater-than-target', 746: 'min-cost-climbing-stairs', 747: 'largest-number-at-least-twice-of-others', 748: 'shortest-completing-word', 762: 'prime-number-of-set-bits-in-binary-representation', 766: 'toeplitz-matrix', 771: 'jewels-and-stones', 783: 'minimum-distance-between-bst-nodes', 784: 'letter-case-permutation', 788: 'rotated-digits', 796: 'rotate-string', 804: 'unique-morse-code-words', 806: 'number-of-lines-to-write-string', 811: 'subdomain-visit-count', 812: 'largest-triangle-area', 819: 'most-common-word', 821: 'shortest-distance-to-a-character', 824: 'goat-latin', 830: 'positions-of-large-groups', 832: 'flipping-an-image', 836: 'rectangle-overlap', 840: 'magic-squares-in-grid', 844: 'backspace-string-compare', 849: 'maximize-distance-to-closest-person', 852: 'peak-index-in-a-mountain-array', 859: 'buddy-strings', 860: 'lemonade-change', 867: 'transpose-matrix', 868: 'binary-gap', 872: 'leaf-similar-trees', 874: 'walking-robot-simulation', 876: 'middle-of-the-linked-list', 883: 'projection-area-of-3d-shapes', 884: 'uncommon-words-from-two-sentences', 888: 'fair-candy-swap', 892: 'surface-area-of-3d-shapes', 893: 'groups-of-special-equivalent-strings', 896: 'monotonic-array', 897: 'increasing-order-search-tree', 905: 'sort-array-by-parity', 908: 'smallest-range-i', 914: 'x-of-a-kind-in-a-deck-of-cards', 917: 'reverse-only-letters', 922: 'sort-array-by-parity-ii', 925: 'long-pressed-name', 929: 'unique-email-addresses', 933: 'number-of-recent-calls', 937: 'reorder-data-in-log-files', 938: 'range-sum-of-bst', 941: 'valid-mountain-array', 942: 'di-string-match', 944: 'delete-columns-to-make-sorted', 949: 'largest-time-for-given-digits', 953: 'verifying-an-alien-dictionary', 961: 'n-repeated-element-in-size-2n-array', 965: 'univalued-binary-tree', 970: 'powerful-integers', 976: 'largest-perimeter-triangle', 977: 'squares-of-a-sorted-array', 985: 'sum-of-even-numbers-after-queries', 989: 'add-to-array-form-of-integer', 993: 'cousins-in-binary-tree', 997: 'find-the-town-judge', 999: 'available-captures-for-rook' }; return commonProblems[problemId] || `problem-${problemId}`; } } module.exports = { LeetCodeScraper };