UNPKG

obsidian-cyber-utils

Version:

Utility functions for developing Obsidian plugins, focused on cyber security note-taking.

1 lines 154 kB
{"version":3,"sources":["../src/cyberPlugin.ts","../src/vaultUtils.ts","../src/regex.ts","../src/iocParser.ts","../src/textUtils.ts","../src/matcher.ts","../src/ocr.ts","../src/ocr/utils.ts","../src/ocr/ocrProvider.ts","../src/ocr/baseProcessor.ts","../src/ocr/tesseractProcessor.ts","../src/domUtils.ts","../src/editorUtils.ts","../src/modal.ts","../src/workspaceUtils.ts","../src/safeStorageProvider.ts","../src/searchSites.ts","../src/api/virusTotal.ts","../src/api/gemini.ts","../src/api/openai.ts","../src/api/ollama.ts","../src/ocr/geminiProcessor.ts"],"sourcesContent":["import { App, EventRef, Plugin, PluginManifest, TAbstractFile, TFile, WorkspaceLeaf } from \"obsidian\";\nimport { EventEmitter } from \"events\";\n\nimport { SearchSite } from \"./searchSites\";\nimport { getAttachments } from \"./vaultUtils\";\nimport { getIndicatorMatches, getMatches } from \"./matcher\";\nimport { initializeWorker } from \"./ocr\";\nimport { validateDomains } from \"./textUtils\";\nimport { filterExclusions, Indicator, IndicatorExclusion, IndicatorSource, ParsedIndicators } from \"./iocParser\";\nimport { IEventEmitter, IOcrProvider, OcrCompletePayload, OcrErrorPayload, OcrJobData, OcrProgressPayload, OcrProvider, OcrProviderEvent } from \"./ocr/ocrProvider\";\nimport { readImageFile } from \"./ocr/utils\";\nimport { TesseractOcrProcessor } from \"./ocr/tesseractProcessor\";\nimport { IOcrProcessor } from \"./ocr/baseProcessor\";\n\nexport interface CyberPluginSettings {\n validTld: string[];\n searchSites: SearchSite[];\n}\n\nexport interface IndicatorExclusions {\n ipv4Exclusions: IndicatorExclusion[];\n ipv6Exclusions: IndicatorExclusion[];\n hashExclusions: IndicatorExclusion[];\n domainExclusions: IndicatorExclusion[];\n}\n\nexport type CyberPluginEvent = 'settings-change' | 'file-opened' | 'file-modified' | 'attachments-changed' | 'indicators-changed';\n\n/**\n * An Obsidian plugin class focused on Cybersecurity use cases.\n */\nexport class CyberPlugin extends Plugin {\n private isDebugging: boolean = false;\n\n settings: CyberPluginSettings | undefined;\n validTld: string[] | null | undefined;\n sidebarContainers: Map<string, WorkspaceLeaf> | undefined;\n protected emitter: EventEmitter;\n protected indicators: Indicator[];\n protected ocrIndicators: Indicator[] | null = null;\n protected ocrCache: Map<string, Map<string, Indicator[]>> = new Map();\n protected exclusions: IndicatorExclusions;\n private fileOpenRef: EventRef | null = null;\n private fileModifyRef: EventRef | null = null;\n private vaultCacheRef: EventRef | null = null;\n protected activeFile: TFile | null = null;\n protected activeFileContent: string | null = null;\n protected activeFileAttachments: TFile[] | null = null;\n\n protected ocrProvider: IOcrProvider | null = null;\n private ocrProcessingEmitter: IEventEmitter<OcrProviderEvent>;\n protected uiNotifier: IEventEmitter<{ type: CyberPluginEvent; payload: unknown }>;\n\n private ocrProgressRef?: (payload: OcrProgressPayload) => void;\n private ocrCompleteRef?: (payload: OcrCompletePayload) => Promise<void>;\n private ocrErrorRef?: (payload: OcrErrorPayload) => void;\n\n protected worker: Tesseract.Worker | null = null;\n // private fileProgressRef: () => void;\n // private fileCompleteRef: () => void;\n\n constructor(app: App, manifest: PluginManifest, enableDebug: boolean = false) {\n super(app, manifest);\n this.isDebugging = enableDebug;\n \n // Initialize emitter\n this.emitter = new EventEmitter();\n\n // Initialize indicators and exclusions\n this.indicators = [];\n this.exclusions = {\n ipv4Exclusions: [],\n ipv6Exclusions: [],\n hashExclusions: [],\n domainExclusions: []\n }\n\n this.uiNotifier = new EventEmitter() as IEventEmitter<{ type: CyberPluginEvent; payload: unknown }>;\n this.ocrProcessingEmitter = new EventEmitter() as IEventEmitter<OcrProviderEvent>;\n const cacheChecker = this.hasCachedOcrResult.bind(this);\n this.ocrProvider = new OcrProvider([], cacheChecker);\n\n this.registerObsidianListeners();\n }\n\n async onload(): Promise<void> {\n if (!this.ocrProvider) {\n await this.initializeOcrSystem();\n }\n\n this.worker = await initializeWorker();\n this.ocrProvider?.addProcessor(new TesseractOcrProcessor(this.worker));\n\n this.setupOcrEventListeners();\n }\n\n async refreshIndicators() {\n // Refresh static indicators from Markdown content\n if (this.activeFile && this.activeFileContent) {\n this.indicators = getIndicatorMatches(this.activeFileContent, IndicatorSource.TEXT);\n this.debug(`Refreshed ${this.indicators.length} indicators from Markdown text.`);\n } else {\n this.indicators = [];\n }\n\n const collectedOcrIndicators: Indicator[] = [];\n if (this.activeFileAttachments && this.activeFileAttachments.length > 0) {\n this.debug(`Checking OCR cache for ${this.activeFileAttachments.length} attachments.`);\n for (const att of this.activeFileAttachments) {\n const fileCache = this.ocrCache.get(att.path);\n if (fileCache) {\n for (const ind of fileCache.values()) {\n collectedOcrIndicators.push(...ind);\n }\n this.debug(`Found cached OCR indicators for ${att.path}`);\n } else {\n this.debug(`No cached OCR indicators for ${att.path}`);\n }\n }\n }\n this.ocrIndicators = collectedOcrIndicators;\n\n this.emitter.emit('indicators-changed');\n }\n\n getFileContent(): string | null {\n return this.activeFileContent;\n }\n\n getFileAttachments(): TFile[] {\n return this.activeFileAttachments ?? [];\n }\n\n getIocExclusions(): IndicatorExclusions {\n return this.exclusions;\n }\n\n /**\n * Compare attachments for the current file against the plugin's attachment list.\n * @param file the file to evaluate\n * @returns true if attachments are unchanged, false if attachments have changed\n */\n private compareAttachments(file: TFile): boolean {\n const currentAttachments = getAttachments(file.path, this.app);\n const existingAttachments = this.activeFileAttachments ?? [];\n \n const currentPaths = new Set(currentAttachments.map(f => f.path));\n const existingPaths = new Set(existingAttachments.map(f => f.path));\n\n const unchanged = currentPaths.size === existingPaths.size &&\n [...currentPaths].every(path => existingPaths.has(path));\n\n if (!unchanged) {\n this.debug(`Attachments changed for ${file.path}. New count: ${currentPaths.size}, Old count: ${existingPaths.size}`);\n this.activeFileAttachments = currentAttachments;\n this.emitter.emit('attachments-changed');\n }\n return unchanged;\n }\n\n registerObsidianListeners(): void {\n this.fileOpenRef = this.app.workspace.on('file-open', this.handleFileOpen.bind(this));\n this.fileModifyRef = this.app.vault.on('modify', this.handleFileModify.bind(this));\n this.vaultCacheRef = this.app.metadataCache.on('resolve', this.handleMetadataResolve.bind(this));\n }\n\n async handleFileOpen(file: TFile | null): Promise<void> {\n if (file && file instanceof TFile) {\n this.activeFile = file;\n this.activeFileContent = await this.app.vault.cachedRead(file);\n this.compareAttachments(file);\n this.emitter.emit('file-opened');\n await this.refreshIndicators();\n this.triggerOcrProcessing();\n } else {\n this.activeFile = null;\n this.activeFileContent = null;\n this.activeFileAttachments = null;\n this.indicators = [];\n this.ocrIndicators = [];\n await this.refreshIndicators();\n this.debug(\"Active file closed or is not a TFile, indicators cleared.\");\n }\n }\n\n async handleFileModify(file: TAbstractFile): Promise<void> {\n if (file === this.activeFile && file instanceof TFile) {\n this.activeFileContent = await this.app.vault.cachedRead(file);\n const attachmentsChanged = this.compareAttachments(file);\n this.emitter.emit('file-modified');\n await this.refreshIndicators();\n\n if (attachmentsChanged) {\n this.debug(\"Attachments changed during file modify, re-triggering OCR processing.\");\n this.triggerOcrProcessing();\n }\n }\n }\n\n async handleMetadataResolve(file: TFile): Promise<void> {\n if (file === this.activeFile) {\n const attachmentsChanged = this.compareAttachments(file);\n await this.refreshIndicators();\n\n if (attachmentsChanged) {\n this.debug(\"Attachments changed during metadata resolve, re-triggering OCR processing.\");\n this.triggerOcrProcessing();\n }\n }\n }\n\n protected async triggerOcrProcessing(): Promise<void> {\n if (!this.ocrProvider || !this.activeFileAttachments || this.activeFileAttachments.length === 0) {\n return;\n }\n\n this.debug(`Triggering OCR for ${this.activeFileAttachments.length} new attachment(s)...`);\n\n const ocrJobs: OcrJobData[] = [];\n for (const att of this.activeFileAttachments) {\n try {\n const content = await readImageFile(this.app, att);\n ocrJobs.push({\n fileId: att.path,\n imageData: content\n });\n this.debug(`Added Job for ${att.path}`);\n } catch (error) {\n console.error(`Failed to read or encode attachment ${att.path}`, error);\n this.handleOcrError({\n fileId: att.path,\n processorId: 'plugin',\n error: `Failed to read file: ${error}`,\n canRetry: false\n })\n }\n }\n\n if (ocrJobs.length > 0) {\n this.ocrProvider.processAttachments(this.activeFile?.path ?? 'unknown', ocrJobs)\n .catch((error: any) => {\n console.error(`Error occurred during OCR Provider processAttachments call:`, error);\n });\n }\n }\n\n /**\n * Add search sites to a set of ParsedIndicators.\n */\n protected applySearchSites(indicators: ParsedIndicators[]): ParsedIndicators[] {\n if (!this.settings?.searchSites) return indicators;\n return indicators.map(indicator => {\n const indicatorCopy = { ...indicator };\n\n switch (indicator.title) {\n case \"IPs (Public)\":\n case \"IPs (Private)\":\n case \"IPv6\":\n indicatorCopy.sites = this.settings?.searchSites.filter(\n (x: SearchSite) => x.enabled && x.ip\n );\n break;\n case \"Domains\":\n indicatorCopy.sites = this.settings?.searchSites.filter(\n (x: SearchSite) => x.enabled && x.domain\n );\n break;\n case \"Hashes\":\n indicatorCopy.sites = this.settings?.searchSites.filter(\n (x: SearchSite) => x.enabled && x.hash\n );\n default:\n // No sites for unknown indicator types\n indicatorCopy.sites = [];\n }\n\n return indicatorCopy;\n });\n }\n\n /**\n * Validate that domains end with a valid TLD\n */\n protected validateDomains(indicators: ParsedIndicators[]): ParsedIndicators[] {\n if (!this.validTld) return indicators;\n return indicators.map(indicator => {\n const indicatorCopy = { ...indicator };\n\n // Only validate domains\n if (this.validTld && indicator.title === \"Domain\" && indicator.items.length > 0) {\n indicatorCopy.items = validateDomains(indicator.items, this.validTld);\n }\n\n return indicatorCopy;\n });\n }\n\n protected processExclusions(indicators: ParsedIndicators[]): ParsedIndicators[] {\n return indicators.map(iocs => {\n const processed = { ...iocs };\n\n switch(processed.title) {\n case \"IPs\":\n case \"IPs (Public)\":\n case \"IPs (Private)\":\n processed.exclusions = this.exclusions.ipv4Exclusions;\n break;\n case \"IPv6\":\n processed.exclusions = this.exclusions.ipv6Exclusions;\n break;\n case \"Domains\":\n processed.exclusions = this.exclusions.domainExclusions;\n break;\n case \"Hashes\":\n processed.exclusions = this.exclusions.hashExclusions;\n break;\n default:\n processed.exclusions = [];\n break;\n }\n\n if (processed.exclusions.length > 0) {\n processed.items = filterExclusions(processed.items, processed.exclusions);\n }\n\n return processed;\n });\n }\n\n async initializeOcrSystem(): Promise<void> {\n const processors: IOcrProcessor[] = [];\n\n if (processors.length === 0) {\n console.warn(\"No OCR processors were successfully initialized.\");\n return;\n }\n const cacheChecker = this.hasCachedOcrResult.bind(this);\n\n this.ocrProvider = new OcrProvider(processors, cacheChecker);\n this.debug(`OCR Provider initialized with ${processors.length} processors`);\n }\n\n public hasCachedOcrResult(fileId: string, processorId: string): boolean {\n const hasResult = this.ocrCache.get(fileId)?.has(processorId) ?? false;\n if (hasResult) {\n this.debug(`[Cache Check] Cache hit for ${fileId} using ${processorId}.`);\n }\n return hasResult;\n }\n\n setupOcrEventListeners(): void {\n if (!this.ocrProvider) return;\n\n this.ocrCompleteRef = this.handleOcrComplete.bind(this);\n this.ocrErrorRef = this.handleOcrError.bind(this);\n this.ocrProgressRef = this.handleOcrProgress.bind(this);\n\n this.ocrProvider.emitter.on('ocr-complete', this.ocrCompleteRef);\n this.ocrProvider.emitter.on('ocr-error', this.ocrErrorRef);\n this.ocrProvider.emitter.on('ocr-progress', this.ocrProgressRef);\n }\n\n async handleOcrComplete(payload: OcrCompletePayload): Promise<void> {\n this.debug(`OCR Complete: ${payload.fileId} by ${payload.processorId}`);\n\n // Check if the completed OCR belongs to an attachment of the current file\n const isRelevantAttachment = this.activeFileAttachments?.some(att => att.path === payload.fileId);\n if (!isRelevantAttachment) {\n this.debug(`Received OCR result for ${payload.fileId} which is not an attachment of the active file`);\n }\n\n const indicators = getIndicatorMatches(payload.extractedText, IndicatorSource.OCR, {'processor': payload.processorId});\n this.debug(`Extracted ${indicators.length} indicators via OCR from ${payload.fileId}.`);\n\n if (!this.ocrCache.has(payload.fileId)) {\n this.ocrCache.set(payload.fileId, new Map<string, Indicator[]>());\n }\n const fileCache = this.ocrCache.get(payload.fileId);\n if (fileCache) {\n fileCache.set(payload.processorId, indicators);\n this.debug(`Cached OCR results from ${payload.processorId} for ${payload.fileId}`);\n }\n\n if (isRelevantAttachment) {\n this.debug(`OCR result is for a relevant attachment (${payload.fileId}), refreshing indicators.`);\n await this.refreshIndicators();\n }\n }\n\n handleOcrError(payload: OcrErrorPayload): void {\n console.error(`OCR error: ${payload.fileId} by ${payload.processorId}:`, payload.error);\n }\n\n handleOcrProgress(payload: OcrProgressPayload): void {\n this.debug(`OCR Progress: ${payload.fileId} by ${payload.processorId}: ${payload.status} ${payload.progressPercent ?? ''}% ${payload.message ?? ''}`);\n }\n\n /**\n * Activate a view of the given type in the right sidebar.\n * @param type a view type\n */\n async activateView(type: string): Promise<void> {\n const { workspace } = this.app;\n let leaf = await workspace.ensureSideLeaf(type, 'right', { active: true });\n this.sidebarContainers?.set(type, leaf);\n }\n\n on(event: CyberPluginEvent, callback: (...args: any[]) => void) {\n this.emitter.on(event, callback);\n return () => this.emitter.off(event, callback);\n }\n\n async saveSettings() {\n await this.saveData(this.settings);\n this.emitter.emit('settings-change');\n }\n\n async onunload(): Promise<void> {\n if (this.fileOpenRef) this.app.workspace.offref(this.fileOpenRef);\n if (this.fileModifyRef) this.app.vault.offref(this.fileModifyRef);\n if (this.vaultCacheRef) this.app.metadataCache.offref(this.vaultCacheRef);\n \n this.worker?.terminate();\n }\n\n public setDebugging(enabled: boolean): void {\n const changed = this.isDebugging !== enabled;\n this.isDebugging = enabled;\n if (changed) {\n this.debug(`Debugging ${enabled ? 'enabled' : 'disabled'}.`);\n }\n }\n\n protected debug(...args: any[]): void {\n if (this.isDebugging) {\n console.log(`[${this.manifest.name}]`, ...args);\n }\n }\n}","import { App, TFile, Vault } from 'obsidian';\n\nexport { \n checkFolderExistsRecursive,\n createFolderIfNotExists,\n createNote,\n getAttachments,\n getAttachmentFiles,\n getBacklinks,\n noteAppend,\n noteReplace,\n openNote,\n removeDotObsidian\n};\n\n/**\n * Check if a given folder exists\n * @param rootPath the folder to start searching from\n * @returns folder name, blank if not exists\n */\nasync function checkFolderExistsRecursive(vault: Vault, folderName: string): Promise<string> {\n async function searchFolder(rootPath: string): Promise<string> {\n const checkVal = rootPath + \"/\" + folderName;\n const folderExists = await vault.adapter.exists(checkVal, true);\n if (folderExists) return folderName;\n const subFolders = (await vault.adapter.list(rootPath)).folders;\n // skip .obsidian config folder\n const i = subFolders.indexOf('.obsidian');\n //i > -1 ? subFolders.splice(i, 1) : {};\n if (i > -1) {\n subFolders.splice(i, 1);\n }\n for (const subFolder of subFolders) {\n const isSubFolder = await vault.adapter.exists(subFolder, true);\n if (isSubFolder) {\n const found = await searchFolder(subFolder);\n if (found && !found.startsWith(subFolder)) {\n return `${subFolder}/${found}`;\n } \n else if (found) return found;\n }\n }\n\n return \"\";\n }\n\n return await searchFolder(\"\");\n}\n\n\n/**\n * Remove .obsidian config folder, .DS_Store file from a list of file/folder names\n * @param files an array of file/folder names\n * @returns the array with unnecessary files removed\n */\nfunction removeDotObsidian(files: string[]): string[] {\n const removals = ['.obsidian', '.DS_Store'];\n removals.forEach((value) => {\n const i = files.indexOf(value);\n if (i > -1) {\n files.splice(i, 1);\n }\n })\n return files;\n}\n\n/**\n * Creates a folder if it does not already exist.\n * @param vault\n */\nasync function createFolderIfNotExists(vault: Vault, folderName: string) {\n const folder = await checkFolderExistsRecursive(vault, folderName);\n if (!folder) {\n await vault.createFolder(folderName);\n }\n}\n\n/**\n * Creates a note within the given vault.\n * @param vault\n * @param folderName\n * @param noteTitle\n * @returns the newly created note\n */\nasync function createNote(vault: Vault, folderName: string, noteTitle: string): Promise<TFile> {\n return await vault.create(`${folderName}/${noteTitle}.md`, '');\n}\n\n/**\n * Get an array of the unresolved backlinks in a note.\n * @param notePath the note to check\n * @param app the current App class instance\n * @param resolved whether or not you want resolved links\n * @returns an array of strings\n */\nfunction getBacklinks(notePath: string, app: App, resolved = false): Array<string> {\n let backlinks = null;\n if (resolved) {\n backlinks = app.metadataCache.resolvedLinks[notePath];\n } else {\n backlinks = app.metadataCache.unresolvedLinks[notePath];\n }\n const retval = [];\n for (const i in backlinks) {\n retval.push(i);\n }\n return retval;\n}\n\n/**\n * Get an array of linked non-markdown (presumably attachment) files in a note.\n * @param notePath the path of the note to check for attachment links\n * @param app the current App class instance\n */\nfunction getAttachments(notePath: string, app: App): Array<TFile> {\n const links = getBacklinks(notePath, app, true);\n const attachments = new Set<TFile>();\n links.forEach((link) => {\n const file = app.vault.getAbstractFileByPath(link);\n if (file && file instanceof TFile && file.extension !== \"md\") {\n attachments.add(file);\n }\n });\n return Array.from(attachments);\n}\n\n/**\n * Get an array of linked file objects from a note.\n * @param note the note to check for linked attachment files\n * @param app the current App class instance\n */\nfunction getAttachmentFiles(note: TFile, app: App): TFile[] {\n const links = getBacklinks(note.path, app, true);\n const attachments = new Set<TFile>();\n links.forEach((link) => {\n const file = app.vault.getAbstractFileByPath(link);\n if (file && file instanceof TFile && file.extension !== \"md\") {\n attachments.add(file);\n }\n });\n return Array.from(attachments);\n}\n\n/**\n * Append to the end of a note\n * @param vault the current vault\n * @param note the note to append to\n * @param content the content to append\n * @returns the modified content\n */\nfunction noteAppend(vault: Vault, note: TFile, content: string): Promise<string> {\n return vault.process(note, (data) => {\n return data + content;\n });\n}\n\n/**\n * Replace content in a note by regular expression\n * @param vault the current vault\n * @param note the note to append to\n * @param regex the pattern to match for replacement\n * @param content the content to replace with \n * @returns the modified content\n */\nfunction noteReplace(vault: Vault, note: TFile, regex: RegExp, content: string): Promise<string> {\n return vault.process(note, (data) => {\n return data.replace(regex, content);\n });\n}\n\n/**\n * Opens the note in a new tab\n * @param app the current App class instance\n * @param note the file you would like to open\n */\nfunction openNote(app: App, note: TFile) {\n if (!note || !app) return;\n const view = app.workspace.getLeaf();\n view.openFile(note);\n}","// regex for possibly defanged values\nconst ipv4Octet = \"(?:25[0-5]|\" + // 250-255\n \"(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\"; // 0-249\nconst ipv6Octet = \"[0-9a-fA-F]{1,4}\"\nexport const IP_REGEX = new RegExp(\n // match a possibly url-encoded character preceding, or\n // a word boundary\n String.raw`(?:%[0-9a-fA-F]{2})?(?=\\b|^)(` +\n `(?:${ipv4Octet + possiblyDefangedVal(String.raw`\\.`)}){3}` +\n ipv4Octet +\n \")\",\n \"g\" // flags\n);\nexport { IP_REGEX as IPv4_REGEX };\nexport const IPv6_REGEX = new RegExp(\n `((?:${ipv6Octet}${possiblyDefangedVal(\":\")}){7}${ipv6Octet}|` + // 8 segments\n `(?:(?:${ipv6Octet}${possiblyDefangedVal(\":\")})*${ipv6Octet})?${possiblyDefangedVal(\"::\")}` + // zero or more segments followed by ::\n `(?:(?:${ipv6Octet}${possiblyDefangedVal(\":\")})*${ipv6Octet})?)`, // zero or more segments\n \"gi\" // flags\n)\nexport const LOCAL_IP_REGEX = /^((127\\.)|(10\\.)|(172\\.1[6-9]\\.)|(172\\.2[0-9]\\.)|(172\\.3[0-1]\\.)|(192\\.168\\.))/g;\nexport const MACRO_REGEX = /({{([^}]+)}})/g;\nconst DOMAIN_REGEX_OLD = /(?:%[0-9a-f]{2})?((?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?(\\.|\\[\\.\\]))+[a-z][a-z0-9-]{0,61}[a-z](?=\\.?)\\b)/gi;\nexport const DOMAIN_REGEX = new RegExp(\n String.raw`(?:%[0-9a-f]{2})?((?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?` +\n possiblyDefangedVal(String.raw`\\.`) + `)+` + \n String.raw`[a-z][a-z0-9-]{0,61}[a-z](?=\\.?)\\b)`,\n \"gi\" // flags\n);\nconst hash_start = \"(?:%[0-9a-f]{2})?(?<=^|[^a-f0-9]+)\"; // beginning of string or non-hex character\nconst hash_end = \"(?=$|[^a-f0-9]+)\"; // end of string or non-hex character\nexport const HASH_REGEX = new RegExp(\n hash_start +\n \"([a-f0-9]{64}|[a-f0-9]{40}|[a-f0-9]{32})\" + // standard hash length (SHA256, MD5, SHA1)\n hash_end,\n \"gi\" // flags\n);\nexport const SHA256_REGEX = new RegExp(\n hash_start +\n \"([a-f0-9]{64})\" + // SHA256 hash length\n hash_end,\n \"gi\" // flags\n);\nexport const MD5_REGEX = new RegExp(\n hash_start +\n \"([a-f0-9]{32})\" + // SHA256 hash length\n hash_end,\n \"gi\" // flags\n);\nexport const SHA1_REGEX = new RegExp(\n hash_start +\n \"([a-f0-9]{40})\" + // SHA256 hash length\n hash_end,\n \"gi\" // flags\n);\nexport const FILE_REGEX = new RegExp(\n String.raw`(?:%[0-9a-f]{2})?(?<=^|\\s|\")` + // beginning of string, space, or open quote\n \"(\" +\n String.raw`(?:\\w:\\\\|[\\\\/])` + // drive letter or leading slash\n String.raw`(?:[^\\\\/:][\\\\/]?)+` + // \n String.raw`[^\\\\/\\n\"|]+\\.\\w+` + // filename with extension\n \")\",\n \"gi\"\n)\n\nfunction possiblyDefangedVal(val: string): string {\n return String.raw`[\\[\\(\\\\]?${val}[\\]\\)]?`;\n}","import { SearchSite } from \"./searchSites\";\n\nexport type IndicatorExclusion = string | RegExp;\n\nexport interface Indicator {\n value: string;\n type: IndicatorType;\n source?: IndicatorSource;\n metadata?: Record<string, any>;\n}\n\nexport type GroupedIndicators = {\n [key in IndicatorSource]: {\n [key in IndicatorType]: Indicator[]\n };\n};\n\nexport enum IndicatorType {\n IPv4 = 'ipv4',\n PRIVATE_IPv4 = 'private_ipv4',\n IPv6 = 'ipv6',\n PRIVATE_IPv6 = 'private_ipv6',\n DOMAIN = 'domain',\n HASH = 'hash',\n EMAIL = 'email'\n}\n\nexport enum IndicatorSource {\n TEXT = 'text',\n OCR = 'ocr'\n}\n\nexport interface ParsedIndicators {\n title: string;\n items: string[];\n sites?: SearchSite[];\n exclusions?: IndicatorExclusion[];\n}\n\n/**\n * Filter a list of indicators based on the provided exclusions.\n * @param items a list of indicators\n * @param exclusions a list of indicator exclusions\n * @returns the list of indicators with exclusions filtered\n */\nexport function filterExclusions(items: string[], exclusions: IndicatorExclusion[]): string[] {\n return items.filter(item => \n !exclusions.some(exclusion => \n typeof exclusion === 'string'\n ? item === exclusion\n : exclusion.test(item)\n )\n );\n}\n\nexport function groupIndicators(indicators: Indicator[]): GroupedIndicators {\n const grouped: GroupedIndicators = {\n [IndicatorSource.TEXT]: {\n [IndicatorType.IPv4]: [],\n [IndicatorType.PRIVATE_IPv4]: [],\n [IndicatorType.IPv6]: [],\n [IndicatorType.PRIVATE_IPv6]: [],\n [IndicatorType.DOMAIN]: [],\n [IndicatorType.HASH]: [],\n [IndicatorType.EMAIL]: []\n },\n [IndicatorSource.OCR]: {\n [IndicatorType.IPv4]: [],\n [IndicatorType.PRIVATE_IPv4]: [],\n [IndicatorType.IPv6]: [],\n [IndicatorType.PRIVATE_IPv6]: [],\n [IndicatorType.DOMAIN]: [],\n [IndicatorType.HASH]: [],\n [IndicatorType.EMAIL]: []\n }\n };\n\n const seenIndicators = new Set<string>();\n \n for (const indicator of indicators) {\n // Default to text if not specified\n const source = indicator.source || IndicatorSource.TEXT;\n const uniqueKey = `${source}:${indicator.value}`;\n \n // If we've already seen this indicator in this source, skip it\n if (seenIndicators.has(uniqueKey)) {\n continue;\n }\n \n // Mark the indicator as seen and add to the appropriate group\n seenIndicators.add(uniqueKey);\n grouped[source][indicator.type].push(indicator);\n }\n\n return grouped;\n}","import { type RequestUrlParam, TFile, request } from \"obsidian\";\nimport type { Code } from \"./modal\";\nexport {\n addUniqueValuesToArray,\n constructMacroRegex,\n convertTime,\n defangIp,\n defangDomain,\n defangEmail,\n extractMacros,\n extractMatches,\n findFirstByRegex,\n friendlyDatetime,\n getValidTld,\n getIocType,\n isLocalIpv4,\n localDateTime,\n lowerSha256,\n lowerMd5,\n parseCodeBlocks,\n refangIoc,\n removeArrayDuplicates,\n replaceMacros,\n replaceTemplateText,\n todayLocalDate,\n todayFolderStructure,\n validateDomain,\n validateDomains\n}\nimport { DOMAIN_REGEX, HASH_REGEX, IP_REGEX, IPv6_REGEX, LOCAL_IP_REGEX, MACRO_REGEX } from \"./regex\";\n\nexport const TLD_URL = 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt';\n\n/**\n * Get a list of valid top-level domains from IANA.\n * @returns a promise with the list of valid strings, or null if failed.\n */\nasync function getValidTld(): Promise<string[] | null> {\n const tldParams = {url: 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt', throw: true} as RequestUrlParam;\n try {\n const data = await request(tldParams);\n const tlds = data.split('\\n');\n if (tlds[0].startsWith('#')) tlds.shift(); // first line comment\n if (!tlds.slice(-1)[0]) tlds.pop(); // last line empty string\n return tlds;\n } catch (e) {\n console.error('failed to get valid TLDs');\n console.error(e);\n return null;\n }\n}\n\n/**\n * @returns current local date as a string in format YYYY-MM-DD\n */\nfunction todayLocalDate(): string {\n const tzoffset = (new Date()).getTimezoneOffset() * 60000; //offset in milliseconds\n const date = (new Date(Date.now() - tzoffset)).toISOString().slice(0, 10);\n return date;\n}\n\n/**\n * @returns the local date/time in format `YYYY-MM-DD HH:SS`\n */\nfunction localDateTime() {\n return `${todayLocalDate()} ${new Date(Date.now()).toString().slice(16, 21)}`\n}\n\nexport interface folderPrefs {\n year: boolean,\n month: boolean,\n quarter: boolean,\n day: boolean\n}\n\n/**\n * Returns a string array with the folder structure for the current date based on user preferences\n * Format: `YYYY/YYYY-QQ/YYYY-MM/YYYY-MM-DD`\n * \n * @param prefs booleans specifying whether to include certain portions in the structure\n * @returns the folder structure for the current date\n */\nfunction todayFolderStructure(prefs: folderPrefs): Array<string> {\n const date = todayLocalDate();\n const year = date.slice(0,4);\n const month = Number(date.slice(5,7));\n const yearMonth = date.slice(0,7);\n const currentQuarter = Math.floor((month + 2) / 3);\n const folderArray = [];\n if (prefs.year) folderArray.push(year);\n if (prefs.quarter) folderArray.push(`${year}-Q${currentQuarter}`);\n if (prefs.month) folderArray.push(yearMonth);\n if (prefs.day) folderArray.push(date);\n return folderArray;\n}\n\n/**\n * Defangs IP addresses, e.g. `8.8.8.8` becomes `8.8.8[.]8`\n * @param text a string containing IP addresses\n * @returns input string with IP addresses defanged\n */\nfunction defangIp(text: string): string {\n return text.replaceAll(/(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})\\.(\\d{1,3})/g, \"$1[.]$2\");\n}\n\n/**\n * Defangs domains preceded with http(s), e.g. `https://google.com` \n * becomes `hxxps[://]google[.]com`\n * @param text a string containing domain names\n * @returns input string with domains defanged\n */\nfunction defangDomain(text: string): string {\n const httpString = /http(s?):\\/\\//gi;\n const anyDomain = /(([\\w-]\\.?)+)\\.((xn--)?([a-z][a-z0-9-]{1,60}|[a-z][a-z0-9-]{1,29}\\.[a-z]{2,}))/gi;\n let retval = text.replaceAll(httpString, \"hxxp$1[://]\");\n retval = retval.replaceAll(anyDomain, \"$1[.]$3\");\n return retval;\n}\n\n/**\n * Defangs email addresses\n * @param text a string containing email addresses\n * @returns input string with email addresses defanged\n */\nfunction defangEmail(text: string): string {\n const emailString = /([^\\s]+)@([^\\s]+)\\.([^\\s]+)/gi;\n const retval = text.replaceAll(emailString, \"$1[@]$2[.]$3\");\n return retval;\n}\n\n/**\n * refang an IOC (domain, URL, IP, email address)\n * @param text a string with defanged IOC(s)\n * @returns the string with IOCs re-fanged\n */\nfunction refangIoc(text: string): string {\n let retval = text.replaceAll('[.]', '.');\n retval = retval.replaceAll('(.)', '.');\n retval = retval.replaceAll(String.raw`\\.`, '.');\n retval = retval.replaceAll('[/]', '/');\n retval = retval.replaceAll('[//]', '/');\n retval = retval.replaceAll('[@]', '@');\n retval = retval.replaceAll('[at]', '@');\n retval = retval.replaceAll('hxxp', 'http');\n retval = retval.replaceAll('[:]', ':');\n retval = retval.replaceAll('[::]', '::');\n retval = retval.replaceAll('[://]', '://');\n retval = retval.toLowerCase();\n return retval;\n}\n\n/**\n * Converts SHA256 hashes (or any 64 character hex string) to lowercase\n * @param text a string\n * @returns input string with SHA256 hashes converted to lowercase\n */\nfunction lowerSha256(hash: string): string {\n return hash.replace(/([0-9a-fA-F]{64})/g, function(match) {\n return match.toLowerCase();\n });\n}\n\n/**\n * Converts MD5 hashes (or any 32 character hex string) to lowercase\n * @param text a string\n * @returns input string with MD5 hashes converted to lowercase\n */\nfunction lowerMd5(text: string): string {\n return text.replace(/([0-9a-fA-F]{32})/g, function(match) {\n return match.toLowerCase();\n });\n}\n\nexport const dateTimeRegex = /(\\d{4}-\\d{2}-\\d{2})\\s+(\\d{2}:\\d{2}:\\d{2}\\s+UTC)/g;\n/**\n * Converts a datetime string in the format `YYYY-MM-DD HH:MM:SS UTC`\n * to the following: `YYYY-MM-DD at HH:MM:SS UTC`\n * @returns input string with datetimes converted to \"{date} at {time}\"\n */\nfunction friendlyDatetime(text: string): string {\n return text.replace(dateTimeRegex, \"$1 at $2\");\n}\n\n/**\n * Find the first match of a regex in the given string.\n * @param text the text to search\n * @param regex the regular expression to match\n * @returns first match of a regex in the given string\n */\nfunction findFirstByRegex(text: string, regex: RegExp): string | null {\n const result = regex.exec(text);\n if (!result) {\n return null;\n } else {\n return result[1]\n }\n}\n\n/**\n * Put a template around the given content.\n * Supported macros: \n * - {{title}} the note title\n * - {{date}} the date in format YYYY-MM-DD\n * - {{time}} the time in format HH:SS\n * - {{content}} the content you want to replace'\n * @param template the template\n * @param content the content\n * @param note the note to which it will be inserted\n * @param contentMacro the string to replace content with @default \"{{content}}\"\n */\nfunction replaceTemplateText(template: string, content: string, note: TFile, contentMacro = \"{{content}}\") {\n let template_replaced = template.replaceAll(\"{{title}}\", note.name.slice(0, -3));\n const dateTime = localDateTime().split(\" \");\n template_replaced = template_replaced.replaceAll(\"{{date}}\", dateTime[0]);\n template_replaced = template_replaced.replaceAll(\"{{time}}\", dateTime[1]);\n template_replaced = template_replaced.replaceAll(contentMacro, content);\n return template_replaced;\n}\n\n/**\n * Extract macros in the format {{macro}}\n * @param text\n * @returns a unique list of macros in the text\n */\nfunction extractMacros(text: string): string[] {\n const regexTest = new RegExp(MACRO_REGEX.source, MACRO_REGEX.flags);\n const matches = text.matchAll(regexTest);\n return addUniqueValuesToArray([], matches);\n}\n\n/**\n * Extracts matches for all of the given regular expressions.\n * @param text the text to check against\n * @param pattern the regex pattern(s) to evaluate\n * @returns an array of strings that matched the given regex\n */\nfunction extractMatches(text: string, pattern: RegExp | RegExp[]): string[] {\n if (Array.isArray(pattern)) {\n const matches: string[] = [];\n pattern.forEach((value) => {\n addUniqueValuesToArray(matches, text.matchAll(value));\n });\n return matches;\n } else {\n const matches = text.matchAll(pattern);\n return addUniqueValuesToArray([], matches);\n }\n}\n\n/**\n * Replace (1:1) keys with their associated values in the provided text.\n * @param text the text in which to replace\n * @param replacements the map of keys to values\n * @returns the input with replaced text\n */\nfunction replaceMacros(text: string, replacements: Map<string, string>): string {\n let retval = text;\n replacements.forEach((value, key) => {\n retval = retval.replaceAll(key, value);\n });\n return retval;\n}\n\n/**\n * Add unique values from the passed RegExpMatchArray to the given array of strings\n * @param array an array of strings\n * @param values a set of regex matches\n * @returns the passed array with unique values added\n */\nfunction addUniqueValuesToArray(array: string[], values: IterableIterator<RegExpMatchArray>): string[] {\n const valueArray = [...values];\n valueArray.forEach((match) => {\n if (!array.includes(match[1])) {\n array.push(match[1]);\n }\n });\n return array;\n}\n\n\n/**\n * Parse code blocks and the headers before them\n * @param content file content\n * @returns a mapping of headers to code block content\n */\nfunction parseCodeBlocks(content: string): Map<string, Code> {\n const retval = new Map();\n const codeBlockRegex = /#+\\s+(.+)$\\n+```([\\w-_\\s]*)\\n(((?!^```\\n).|\\n)*)\\n^```$/gm;\n const matches = content.matchAll(codeBlockRegex);\n const matchArray = [...matches];\n matchArray.forEach((match) => {\n if (!retval.has(match[1])) {\n const code: Code = {\n content: match[3],\n lang: match[2]\n };\n retval.set(match[1], code);\n }\n });\n return retval;\n}\n\nexport const macroSeparator = \"(?:\\\\s*[:=]\\\\s*|\\\\s+)\";\nexport const macroValue = \"(((?:[^}\\\\s]*\\\\w[^}\\\\s]*)+))\";\n/**\n * Constructs a regular expression to match values in the note based on the passed values\n * and separator constants above\n * @param macroRegex the macro name (like file, user, etc)\n * @returns the constructed regular expression\n */\nfunction constructMacroRegex(macroRegex: string | RegExp): RegExp {\n if (macroRegex instanceof RegExp) macroRegex = macroRegex.source;\n const retval = new RegExp(macroRegex + macroSeparator + macroValue, \"gi\");\n return retval;\n}\n\n/**\n * Validate a domain against a list of valid top-level domains (TLD)\n * @param domain the domain to validate\n * @param validTld an array of valid TLD strings in uppercase\n * @returns the boolean representation of the domain's validity\n */\nfunction validateDomain(domain: string, validTld: string[]): boolean {\n let tld = domain.split('.').pop()?.toUpperCase();\n if (tld && validTld.includes(tld)) return true;\n tld = domain.split('[.]').pop()?.toUpperCase();\n if (tld && validTld.includes(tld)) return true;\n return false;\n}\n\n/**\n * Removes duplicates from the passed array.\n * @param array an array of strings\n * @returns the array with duplicates removed\n */\nfunction removeArrayDuplicates(array: string[]): string[] {\n return array.filter((item, index) => {\n return array.indexOf(item) === index;\n });\n}\n\nfunction convertTime(timeString: string): number {\n return Date.parse(timeString);\n}\n\n/**\n * Validate a list of domains against a list of valid top-level domains (TLD)\n * @param domains a list of domains to validate\n * @param validTld a list of valid TLDs\n * @returns domains with valid TLDs\n */\nfunction validateDomains(domains: string[], validTld: string[]): string[] {\n let index = domains.length - 1;\n while (index >= 0) {\n const domain = domains[index];\n if (!validateDomain(domain, validTld)) {\n domains.splice(index, 1);\n }\n index -= 1;\n }\n return domains;\n}\n\n/**\n * Checks an IP address is local/private per RFC 1918\n * @param ip an IPv4 address\n * @returns a boolean representing whether the IP is local or not\n */\nfunction isLocalIpv4(ip: string): boolean {\n const localIpTest = new RegExp(LOCAL_IP_REGEX.source, LOCAL_IP_REGEX.flags);\n if (localIpTest.exec(ip)) return true;\n else return false;\n}\n\n\nexport type IocType = 'hash' | 'ip' | 'domain';\n/**\n * Get the type of an IOC (hash, IP, domain)\n * @param val an IOC value\n * @returns a string representation of the IOC type (hash, ip, domain) or null\n */\nfunction getIocType(val: string): IocType | null {\n val = val.trim().toLowerCase();\n const ipTest = new RegExp(IP_REGEX.source, IP_REGEX.flags);\n if (ipTest.exec(val)) return 'ip';\n const ipv6Test = new RegExp(IPv6_REGEX.source, IPv6_REGEX.flags);\n if (ipv6Test.exec(val)) return 'ip';\n const domainTest = new RegExp(DOMAIN_REGEX.source, DOMAIN_REGEX.flags);\n if (domainTest.exec(val)) return 'domain';\n const hashTest = new RegExp(HASH_REGEX.source, HASH_REGEX.flags);\n if (hashTest.exec(val)) return 'hash';\n return null;\n}","import { DOMAIN_REGEX, FILE_REGEX, IP_REGEX, IPv6_REGEX, LOCAL_IP_REGEX, MACRO_REGEX, MD5_REGEX, SHA1_REGEX, SHA256_REGEX } from \"./regex\";\nimport { filterExclusions, Indicator, IndicatorSource, IndicatorType, ParsedIndicators } from \"./iocParser\";\nimport { isLocalIpv4, refangIoc } from \"./textUtils\";\nimport { CyberPlugin } from \"./cyberPlugin\";\n\nexport const PATTERN_KEYS = ['IPv6', 'IP', 'IPv4', 'LocalIP', 'Domain', 'SHA256', 'MD5', 'SHA1', 'File'];\nexport type PatternKey = typeof PATTERN_KEYS[number];\n\nexport class Matcher {\n private static readonly Patterns: Record<PatternKey, string> = {\n IPv6: IPv6_REGEX.source,\n IPv4: IP_REGEX.source,\n IP: IP_REGEX.source,\n LocalIP: LOCAL_IP_REGEX.source,\n LocalIPv4: LOCAL_IP_REGEX.source,\n Domain: DOMAIN_REGEX.source,\n SHA256: SHA256_REGEX.source,\n MD5: MD5_REGEX.source,\n SHA1: SHA1_REGEX.source,\n File: FILE_REGEX.source,\n Macro: MACRO_REGEX.source\n } as const;\n\n static getAvailablePatterns(): readonly PatternKey[] {\n return PATTERN_KEYS;\n }\n\n static getExactMatcher(pattern: PatternKey): RegExp {\n return new RegExp(`^${Matcher.Patterns[pattern]}$`, 'i');\n }\n\n static getGlobalMatcher(pattern: PatternKey): RegExp {\n return new RegExp(Matcher.Patterns[pattern], 'gi');\n }\n\n static findAll(text: string, pattern: PatternKey): string[] {\n return Array.from(text.matchAll(this.getGlobalMatcher(pattern)), m => m[0]);\n }\n\n static isMatch(text: string, pattern: PatternKey): boolean {\n return this.getExactMatcher(pattern).test(text);\n }\n\n static findFirst(text: string, pattern: PatternKey): string | null {\n const match = text.match(Matcher.Patterns[pattern]);\n return match ? match[0] : null;\n }\n}\n\n\n/**\n * Extract IOCs from the given file content.\n * @param fileContent content from which to extract IOCs\n * @returns an array of ParsedIndicators objects for each IOC type\n */\nexport function getMatches(fileContent: string): ParsedIndicators[] {\n if (!fileContent) return [];\n const ips: ParsedIndicators = {\n title: \"IPs\",\n items: Matcher.findAll(fileContent, 'IPv4')\n }\n const domains: ParsedIndicators = {\n title: \"Domains\",\n items: Matcher.findAll(fileContent, 'Domain')\n }\n const hashes: ParsedIndicators = {\n title: \"Hashes\",\n items: Matcher.findAll(fileContent, 'SHA256')\n }\n const privateIps: ParsedIndicators = {\n title: \"IPs (Private)\",\n items: []\n }\n const ipv6: ParsedIndicators = {\n title: \"IPv6\",\n items: Matcher.findAll(fileContent, 'IPv6')\n }\n ips.title = \"IPs (Public)\";\n for (let i = 0; i < ips.items.length; i++) {\n const item = ips.items[i];\n if(isLocalIpv4(item)) {\n ips.items.splice(i, 1);\n i--;\n privateIps.items.push(item);\n }\n }\n const retval = [ips, privateIps, domains, hashes, ipv6];\n\n // refang and de-duplicate IOCs\n retval.forEach((iocList, index, array) => {\n const refangedItems = iocList.items.map((x) => refangIoc(x));\n \n // remove duplicates using a set\n iocList.items = [...new Set(refangedItems)];\n array[index] = iocList;\n });\n return retval;\n}\n\nexport function getIndicatorMatches(fileContent: string, source: IndicatorSource = IndicatorSource.TEXT, metadata?: Record<string, any>): Indicator[] {\n const indicators: Indicator[] = [];\n if (!fileContent) return indicators;\n\n const ipv4Addresses = Matcher.findAll(fileContent, 'IPv4');\n for (const ip of ipv4Addresses) {\n const refangedIp = refangIoc(ip);\n if (isLocalIpv4(refangedIp)) {\n indicators.push({\n value: refangedIp,\n type: IndicatorType.PRIVATE_IPv4,\n source,\n metadata\n });\n } else {\n indicators.push({\n value: refangedIp,\n type: IndicatorType.IPv4,\n source,\n metadata\n });\n }\n }\n\n const ipv6Addresses = Matcher.findAll(fileContent, 'IPv6');\n for (const ip of ipv6Addresses) {\n indicators.push({\n value: refangIoc(ip),\n type: IndicatorType.IPv6,\n source,\n metadata\n });\n }\n\n const domains = Matcher.findAll(fileContent, 'Domain');\n for (const domain of domains) {\n indicators.push({\n value: refangIoc(domain),\n type: IndicatorType.DOMAIN,\n source,\n metadata\n });\n }\n\n const sha256Hashes = Matcher.findAll(fileContent, 'SHA256');\n for (const hash of sha256Hashes) {\n indicators.push({\n value: hash,\n type: IndicatorType.HASH,\n source,\n metadata\n });\n }\n\n return indicators;\n}\n\n/**\n * Process exclusions for a list of IOCs.\n * @param plugin a CyberPlugin\n * @param indicators a list of parsed indicators\n * @returns indicators with exclusions applied\n */\nexport function processExclusions(iocs: ParsedIndicators[], plugin: CyberPlugin | undefined): ParsedIndicators[] {\n if (!iocs || !plugin) return iocs;\n const exclusions = plugin.getIocExclusions();\n \n return iocs.map(indicatorList => {\n // create a copy to avoid modifying the original\n const processed = { ...indicatorList };\n \n switch(processed.title) {\n case \"IPs\":\n case \"IPs (Public)\":\n case \"IPs (Private)\":\n processed.exclusions = exclusions.ipv4Exclusions || [];\n break;\n case \"IPv6\":\n processed.exclusions = exclusions.ipv6Exclusions || [];\n break;\n case \"Domains\":\n processed.exclusions = exclusions.domainExclusions || [];\n break;\n case \"Hashes\":\n processed.exclusions = exclusions.hashExclusions || [];\n break;\n default:\n processed.exclusions = [];\n break;\n }\n\n if (processed.exclusions && processed.exclusions.length > 0) {\n processed.items = filterExclusions(processed.items, processed.exclusions);\n }\n\n return processed;\n });\n}","import { App, TFile } from \"obsidian\";\nimport { createWorker, type Worker } from \"tesseract.js\";\nimport { readImageFile } from \"./ocr/utils\";\n\nexport { initializeWorker, ocr, ocrMultiple };\n\ninterface OcrQueueItem {\n image: Buffer;\n resolve: (value: string | PromiseLike<string>) => void;\n reject: (reason?: any) => void;\n}\n\nclass OcrQueue {\n