UNPKG

@mieweb/wikigdrive

Version:

Google Drive to MarkDown synchronization

591 lines (521 loc) 22 kB
import winston from 'winston'; import Transport from 'winston-transport'; import {Container, ContainerConfig, ContainerConfigArr, ContainerEngine} from '../../ContainerEngine.ts'; import {FileContentService} from '../../utils/FileContentService.ts'; import {appendConflict, DirectoryScanner, stripConflict} from './DirectoryScanner.ts'; import {GoogleFilesScanner} from './GoogleFilesScanner.ts'; import {convertToRelativeMarkDownPath, convertToRelativeSvgPath} from '../../LinkTranslator.ts'; import {LocalFilesGenerator} from './LocalFilesGenerator.ts'; import {QueueTransformer} from './QueueTransformer.ts'; import {ConflictFile, LocalFile, RedirFile} from '../../model/LocalFile.ts'; import {TaskLocalFileTransform} from './TaskLocalFileTransform.ts'; import {MimeTypes} from '../../model/GoogleFile.ts'; import {generateDirectoryYaml, parseDirectoryYaml} from './frontmatters/generateDirectoryYaml.ts'; import {getContentFileService, removeMarkDownsAndImages} from './utils.ts'; import {LocalLog} from './LocalLog.ts'; import {LocalLinks} from './LocalLinks.ts'; import {TaskRedirFileTransform} from './TaskRedirFileTransform.ts'; import {TocGenerator} from './frontmatters/TocGenerator.ts'; import {FileId} from '../../model/model.ts'; import {MarkdownTreeProcessor} from './MarkdownTreeProcessor.ts'; import {LunrIndexer} from '../search/LunrIndexer.ts'; import {JobManagerContainer} from '../job/JobManagerContainer.ts'; import {UserConfigService} from '../google_folder/UserConfigService.ts'; import {getUrlHash} from '../../utils/idParsers.ts'; import {TaskGoogleMarkdownTransform} from './TaskGoogleMarkdownTransform.ts'; import {frontmatter} from './frontmatters/frontmatter.ts'; const __filename = import.meta.filename; function doesExistIn(googleFolderFiles: LocalFile[], localFile: LocalFile) { return !!googleFolderFiles.find(file => file.id === localFile.id); } interface NameToConflicts { [fileName: string]: LocalFile[]; } export function solveConflicts(filesToGenerate: LocalFile[], destinationFiles: { [realFileName: string]: LocalFile }) { const nameToConflictGroups: NameToConflicts = {}; for (const file of filesToGenerate) { if (!nameToConflictGroups[file.fileName]) { nameToConflictGroups[file.fileName] = []; } nameToConflictGroups[file.fileName].push(file); } const realFileNameToGenerated: { [realFileName: string]: LocalFile } = {}; for (const fileName in nameToConflictGroups) { const group = nameToConflictGroups[fileName]; if (group.length === 1) { realFileNameToGenerated[fileName] = group[0]; } else { const conflictFile: ConflictFile = group[0].type === 'md' ? { conflicting: [], fileName: fileName, id: 'conflict:' + fileName, mimeType: MimeTypes.MARKDOWN, modifiedTime: new Date().toISOString(), title: 'Conflict: ' + group[0].title, type: 'conflict' } : null; if (conflictFile) { realFileNameToGenerated[fileName] = conflictFile; } const conflictsToAssign: LocalFile[] = []; for (const fileToGenerate of group) { const destinationEntry = Object.entries(destinationFiles).find(f => f[1].id === fileToGenerate.id); if (destinationEntry) { const realFileName = destinationEntry[0]; const destinationFile = destinationEntry[1]; if (stripConflict(realFileName) === fileName) { realFileNameToGenerated[realFileName] = destinationFile; if (conflictFile) { conflictFile.conflicting.push({ realFileName, id: destinationFile.id, title: destinationFile.title }); } continue; } } conflictsToAssign.push(fileToGenerate); } let counter = 1; for (const destinationFile of conflictsToAssign) { let realFileName = appendConflict(destinationFile.fileName, counter++); while (realFileNameToGenerated[realFileName]) { realFileName = appendConflict(destinationFile.fileName, counter++); } realFileNameToGenerated[realFileName] = destinationFile; if (conflictFile) { conflictFile.conflicting.push({ realFileName, id: destinationFile.id, title: destinationFile.title }); } } } } return realFileNameToGenerated; } function processLogExisting(realFileName: string, fileToGenerate: LocalFile, destinationFiles: { [realFileName: string]: LocalFile }, localLog: LocalLog, prefix: string) { const destinationEntry = Object.entries(destinationFiles).find(item => item[1].id === fileToGenerate.id); if (destinationEntry) { if (destinationEntry[0] !== realFileName) { localLog.append({ filePath: prefix + realFileName, id: fileToGenerate.id, type: fileToGenerate.type, event: 'renamed', }); } else { localLog.append({ filePath: prefix + realFileName, id: fileToGenerate.id, type: fileToGenerate.type, event: 'touched', }); } } else { localLog.append({ filePath: prefix + realFileName, id: fileToGenerate.id, type: fileToGenerate.type, event: 'created', }); } } function processLogRemoved(realFileName: string, destinationFiles: { [realFileName: string]: LocalFile }, localLog: LocalLog, prefix: string) { const destinationFile = destinationFiles[realFileName]; const entryToGenerate = Object.entries(destinationFiles).find(item => item[1].id === destinationFile.id); if (!entryToGenerate) { localLog.append({ filePath: prefix + realFileName, id: destinationFile.id, type: destinationFile.type, event: 'removed', }); } } async function addBinaryMetaData(destinationFiles: { [realFileName: string]: LocalFile }, destinationDirectory: FileContentService) { const yamlContent = await destinationDirectory.exists('.wgd-directory.yaml') ? await destinationDirectory.readFile('.wgd-directory.yaml') : ''; const props = parseDirectoryYaml(yamlContent); const map = props?.fileMap || {}; for (const realFileName in destinationFiles) { const destinationFile = destinationFiles[realFileName]; if (destinationFile.id !== 'TO_FILL') { continue; } const mapData = map[realFileName]; if (!mapData) { continue; } destinationFile.fileName = mapData.fileName; destinationFile.id = mapData.id; destinationFile.modifiedTime = mapData.modifiedTime; } } export class TransformLog extends Transport { public errors = {}; constructor(options = {}) { super(options); } log(info: { level: string, errorMdFile: string, errorMdMsg: string }, next: () => void) { switch (info.level) { case 'error': case 'warn': if (info.errorMdFile) { if (!this.errors[info.errorMdFile]) { this.errors[info.errorMdFile] = []; } if (info.errorMdMsg) { this.errors[info.errorMdFile].push(info.errorMdMsg); } } } if (next) { next(); } } } export class TransformContainer extends Container { private logger: winston.Logger; private generatedFileService: FileContentService; private localLog: LocalLog; private localLinks: LocalLinks; private filterFilesIds: FileId[]; private userConfigService: UserConfigService; private progressNotifyCallback: ({total, completed, warnings, failed}: { total?: number; completed?: number; warnings?: number; failed?: number }) => void; private transformLog: TransformLog; private isFailed = false; private useGoogleMarkdowns = false; private globalHeadersMap: {[key: string]: string} = {}; private globalInvisibleBookmarks: {[key: string]: number} = {}; constructor(public readonly params: ContainerConfig, public readonly paramsArr: ContainerConfigArr = {}) { super(params, paramsArr); this.filterFilesIds = paramsArr['filesIds'] || []; } async mount2(fileService: FileContentService, destFileService: FileContentService): Promise<void> { this.filesService = fileService; this.generatedFileService = destFileService; this.userConfigService = new UserConfigService(this.filesService); await this.userConfigService.load(); } async init(engine: ContainerEngine): Promise<void> { await super.init(engine); this.logger = engine.logger.child({ filename: __filename, driveId: this.params.folderId, jobId: this.params.jobId }); this.transformLog = new TransformLog(); this.logger.add(this.transformLog); } async syncDir(googleFolder: FileContentService, destinationDirectory: FileContentService, queueTransformer: QueueTransformer) { const googleScanner = new GoogleFilesScanner(); if (!await googleFolder.exists('.folder.json')) { return; } const googleFolderData = await googleFolder.readJson('.folder.json') || {}; const googleFolderFiles = await googleScanner.scan(googleFolder); const destinationScanner = new DirectoryScanner(); const destinationFiles = await destinationScanner.scan(destinationDirectory); await addBinaryMetaData(destinationFiles, destinationDirectory); const localFilesGenerator = new LocalFilesGenerator(); const filesToGenerate: LocalFile[] = await localFilesGenerator.generateLocalFiles(googleFolderFiles); const realFileNameToGenerated = solveConflicts(filesToGenerate, destinationFiles); for (const realFileName in destinationFiles) { if (realFileName.startsWith('.')) { continue; } processLogRemoved(realFileName, destinationFiles, this.localLog, destinationDirectory.getVirtualPath()); const fileInDirectory = destinationFiles[realFileName]; if (!doesExistIn(filesToGenerate, fileInDirectory)) { await removeMarkDownsAndImages(realFileName, destinationDirectory); } if (fileInDirectory.type === 'redir' || fileInDirectory.type === 'conflict') { await removeMarkDownsAndImages(realFileName, destinationDirectory); } if (!realFileNameToGenerated[realFileName]) { await removeMarkDownsAndImages(realFileName, destinationDirectory); } } for (const realFileName in realFileNameToGenerated) { const localFile: LocalFile = realFileNameToGenerated[realFileName]; processLogExisting(realFileName, localFile, destinationFiles, this.localLog, destinationDirectory.getVirtualPath()); if (localFile.type === 'directory') { await destinationDirectory.mkdir(realFileName); const googleFolderFile = googleFolderFiles.find(f => f.id === localFile.id); if (googleFolderFile) { const googleSubFolder = await googleFolder.getSubFileService(googleFolderFile.id); await this.syncDir(googleSubFolder, await destinationDirectory.getSubFileService(realFileName), queueTransformer); } continue; } const googleFile = googleFolderFiles.find(f => f.id === localFile.id); if (this.filterFilesIds.length > 0 && -1 === this.filterFilesIds.indexOf(localFile.id)) { continue; } const jobManagerContainer = <JobManagerContainer>this.engine.getContainer('job_manager'); if (!this.useGoogleMarkdowns) { const task = new TaskLocalFileTransform( this.logger, jobManagerContainer, realFileName, googleFolder, googleFile, destinationDirectory, localFile, this.localLinks, this.userConfigService.config, this.globalHeadersMap, this.globalInvisibleBookmarks ); queueTransformer.addTask(task); } else { const task = new TaskGoogleMarkdownTransform( this.logger, jobManagerContainer, realFileName, googleFolder, googleFile, destinationDirectory, localFile, this.localLinks, this.userConfigService.config ); queueTransformer.addTask(task); } } const dirNames = destinationDirectory.getVirtualPath().replace(/\/$/, '').split('/'); const yaml = generateDirectoryYaml(stripConflict(dirNames[dirNames.length - 1]), googleFolderData, realFileNameToGenerated); await destinationDirectory.writeFile('.wgd-directory.yaml', yaml); } async run(rootFolderId: FileId) { if (!(this.userConfigService.config.transform_subdir || '').startsWith('/')) { this.logger.warn('Content subdirectory must be set and start with /'); return; } const contentFileService = await getContentFileService(this.generatedFileService, this.userConfigService); const queueTransformer = new QueueTransformer(this.logger); queueTransformer.onProgressNotify(({ total, completed, warnings, failed }) => { if (failed > 0) { this.isFailed = true; } if (this.progressNotifyCallback) { this.progressNotifyCallback({ total, completed, warnings, failed }); } }); this.logger.info('Start transforming: ' + rootFolderId); this.localLog = new LocalLog(contentFileService); await this.localLog.load(); this.localLinks = new LocalLinks(contentFileService); await this.localLinks.load(); const processed = new Set<string>(); const previouslyFailed = new Set<string>(); let retry = true; while (retry) { retry = false; await this.syncDir(this.filesService, contentFileService, queueTransformer); await queueTransformer.finished(); if (this.filterFilesIds.length > 0) { const filterFilesIds = new Set<string>(); for (const fileId of this.filterFilesIds) { processed.add(fileId); const backLinks = this.localLinks.getBackLinks(fileId); for (const backLink of backLinks) { if (processed.has(backLink.fileId)) { continue; } filterFilesIds.add(backLink.fileId); } } if (filterFilesIds.size > 0) { if (previouslyFailed.size === filterFilesIds.size) { let shouldBreak = true; for (const fileId of previouslyFailed) { if (filterFilesIds.has(fileId)) { shouldBreak = false; break; } } if (shouldBreak) { break; } } this.filterFilesIds = Array.from(filterFilesIds); previouslyFailed.clear(); for (const fileId of filterFilesIds) { previouslyFailed.add(fileId); } retry = true; } } } await queueTransformer.finished(); await contentFileService.remove('_errors.md'); if (Object.keys(this.transformLog.errors).length > 0) { let errorLog = ''; errorLog += '---\n'; errorLog += 'type: \'page\'\n'; errorLog += '---\n'; for (const mdFile in this.transformLog.errors) { errorLog += `\n* [${mdFile}](${mdFile})\n`; for (const mdMsg of this.transformLog.errors[mdFile]) { errorLog += ` ${mdMsg}\n`; } } await contentFileService.writeFile('_errors.md', errorLog); } await this.createRedirs(contentFileService); await this.writeToc(contentFileService); await this.rewriteLinks(contentFileService); await this.localLog.save(); await this.localLinks.save(); this.logger.info('Regenerate tree: ' + rootFolderId + ` to: ${contentFileService.getRealPath()}/.tree.json`); const markdownTreeProcessor = new MarkdownTreeProcessor(contentFileService); await markdownTreeProcessor.regenerateTree(rootFolderId); await markdownTreeProcessor.save(); const indexer = new LunrIndexer(); await markdownTreeProcessor.walkTree((page) => { indexer.addPage(page); return false; }); await this.generatedFileService.mkdir('/.private'); await this.generatedFileService.writeJson('/.private/lunr.json', indexer.getJson()); } public failed() { return this.isFailed; } async rewriteLinks(destinationDirectory: FileContentService) { const files = await destinationDirectory.list(); for (const fileName of files) { if (await destinationDirectory.isDirectory(fileName)) { await this.rewriteLinks(await destinationDirectory.getSubFileService(fileName)); continue; } if (fileName.endsWith('.md') || fileName.endsWith('.svg')) { const content = await destinationDirectory.readFile(fileName); const parsed = frontmatter(content); const props = parsed.data; let newContent = content; if (props?.id) { newContent = newContent.replace(/\n? ?<a id="([^"]*)"><\/a>\n?/igm, (str: string, hash: string) => { const fullLink = 'gdoc:' + props.id + '#' + hash; if (this.globalInvisibleBookmarks[fullLink]) { const retVal = str.replace(`<a id="${hash}"></a>`, ''); if (retVal === '\n \n') { return '\n'; } if (retVal === '\n\n') { return '\n'; } if (retVal.endsWith(' \n')) { return retVal.substring(0, retVal.length - 2) + '\n'; } if (retVal.startsWith('\n ')) { return '\n' + retVal.substring(1); } if (retVal === ' ') { return ''; } return retVal; } else { this.logger.warn(`In ${fileName} there is a link to ${fullLink} which can't be translated into bookmark link`); } return str; }); } newContent = newContent.replace(/(gdoc:[A-Z0-9_-]+)(#[^'")\s]*)?/ig, (str: string) => { let fileId = str.substring('gdoc:'.length).replace(/#.*/, ''); let hash = getUrlHash(str) || ''; if (hash) { if (this.globalHeadersMap[str]) { const idx = this.globalHeadersMap[str].indexOf('#'); if (idx >= 0) { fileId = this.globalHeadersMap[str].substring('gdoc:'.length, idx); hash = this.globalHeadersMap[str].substring(idx); } } else { const fullLink = str; this.logger.warn(`In ${fileName} there is a link to ${fullLink} which can't be translated into bookmark link`); } } const lastLog = this.localLog.findLastFile(fileId); if (lastLog && lastLog.event !== 'removed') { if (fileName.endsWith('.svg')) { return convertToRelativeSvgPath(lastLog.filePath, destinationDirectory.getVirtualPath() + fileName); } else { return convertToRelativeMarkDownPath(lastLog.filePath, destinationDirectory.getVirtualPath() + fileName) + hash; } } else { return 'https://drive.google.com/open?id=' + fileId + hash.replace('#_', '#heading=h.'); } }); if (content !== newContent) { await destinationDirectory.writeFile(fileName, newContent); } } } } async createRedirs(contentFileService: FileContentService) { const rows = this.localLog.getLogs(); const markDownScanner = new DirectoryScanner(); const transformerQueue = new QueueTransformer(this.logger); transformerQueue.onProgressNotify(({ total, completed, warnings, failed }) => { if (this.progressNotifyCallback) { this.progressNotifyCallback({ total, completed, warnings, failed }); } }); for (let rowNo = rows.length - 1; rowNo >= 0; rowNo--) { const row = rows[rowNo]; if (row.type === 'md' && !await contentFileService.exists(row.filePath)) { const lastLog = this.localLog.findLastFile(row.id); if (lastLog) { const parts = row.filePath.split('/'); const fileName = parts.pop(); const dirName = parts.join('/'); if (!await contentFileService.exists(lastLog.filePath)) { continue; } const localFileContent = await contentFileService.readFile(lastLog.filePath); const localFile = markDownScanner.parseMarkdown(localFileContent, lastLog.filePath); if (!localFile) { continue; } const lastLogRedir = this.localLog.findLastFileByPath(dirName ? dirName + '/' + fileName : fileName); if (lastLogRedir?.event === 'removed') { continue; } const redirFile: RedirFile = { type: 'redir', fileName, id: row.id, mimeType: MimeTypes.MARKDOWN, modifiedTime: new Date(row.mtime).toISOString(), redirectTo: lastLog.id, title: 'Redirect to: ' + localFile.title, }; const task = new TaskRedirFileTransform( this.logger, fileName, dirName ? await contentFileService.getSubFileService(dirName) : contentFileService, redirFile, localFile ); transformerQueue.addTask(task); } } } await transformerQueue.finished(); } async writeToc(contentFileService: FileContentService) { const tocGenerator = new TocGenerator(); const md = await tocGenerator.generate(contentFileService); await contentFileService.writeFile('toc.md', md); } // eslint-disable-next-line @typescript-eslint/no-empty-function async destroy(): Promise<void> { } onProgressNotify(callback: ({total, completed, warnings, failed}: { total?: number; completed?: number, warnings?: number, failed?: number }) => void) { this.progressNotifyCallback = callback; } setUseGoogleMarkdowns(value: boolean) { this.useGoogleMarkdowns = value; } }