UNPKG

twin-scanner-cli

Version:

Find duplicate files in multiple folders scanning .txt and .torrent files.

192 lines (177 loc) 6.72 kB
import path from 'node:path' import * as A from 'fp-ts/lib/Array' import { pipe } from 'fp-ts/lib/function' import * as TE from 'fp-ts/lib/TaskEither' import { areAllTextFiles, generateCombinationFolderName, getDuplicateStoragePath, mergeFileMapsExtraction, } from './helpers' import type { ExtractorFileExtensions, TUserChoices } from './types' import { appendIntoTxtFileEffect, createFolderEffect, removeEmptyFoldersInFolderEffect, } from '@/files/effects' import { logExtractionStatistics, logUniversalStatistics } from '@/strategies/formatters' import { torrentDuplicateStrategy } from '@/strategies/torrent' import type { TDuplicateFormatTorrent, TDuplicateFormatTxt } from '@/strategies/torrent/types' import { txtDuplicateStrategy } from '@/strategies/txt' import { convertTorrentFilenameToURL } from '@/strategies/txt/helpers' import type { TApplyFileExtractionEffect } from '@/strategies/txt/types' import type { TExtensionsRemoveDuplicatesStrategies } from '@/strategies/types' const processFileTypeHandlers = ( strategies: TExtensionsRemoveDuplicatesStrategies, absolutePathToCommonStorageCur: string, duplicateFilename: string ): Record<ExtractorFileExtensions, (dAbsPath: AbsolutePath) => TE.TaskEither<Error, void>> => ({ torrent: (dAbsPath: AbsolutePath): TE.TaskEither<Error, void> => strategies.torrent.moveFileEffect( dAbsPath, path.join(absolutePathToCommonStorageCur, duplicateFilename) ), txt: (duplicateAbsolutePath: AbsolutePath): TE.TaskEither<Error, void> => strategies.txt.removeContentFromFileEffect( duplicateAbsolutePath, convertTorrentFilenameToURL(duplicateFilename) ), }) const handleMixedFilesEffect = ( strategies: TExtensionsRemoveDuplicatesStrategies, absolutePathToCommonStorageCur: string, duplicateFilename: string, paths: string[] ): TE.TaskEither<Error, void[]> => pipe( paths, A.traverse(TE.ApplicativePar)(dAbsPath => // 2a. If torrent => move to file to duplicate folder // 2b. Since there is a real file => just remove from source txt file dAbsPath.endsWith('.torrent') ? processFileTypeHandlers( strategies, absolutePathToCommonStorageCur, duplicateFilename ).torrent(dAbsPath) : processFileTypeHandlers( strategies, absolutePathToCommonStorageCur, duplicateFilename ).txt(dAbsPath) ) ) const handleTextFilesEffect = ( strategies: TExtensionsRemoveDuplicatesStrategies, absolutePathToCommonStorageCur: string, duplicateFilename: string, paths: string[] ): TE.TaskEither<Error, void[]> => pipe( appendIntoTxtFileEffect( path.join(absolutePathToCommonStorageCur, 'common.txt'), convertTorrentFilenameToURL(duplicateFilename).concat('\n') ), TE.flatMap(() => pipe( paths, A.traverse(TE.ApplicativePar)(duplicatePath => strategies.txt.removeContentFromFileEffect( duplicatePath, convertTorrentFilenameToURL(duplicateFilename) ) ) ) ) ) const processDuplicateEffect = ( strategies: TExtensionsRemoveDuplicatesStrategies, storagePath: AbsolutePath, dFilename: string, dAbsPaths: string[] ): TE.TaskEither<Error, void[]> => pipe( storagePath, createFolderEffect, TE.flatMap(() => areAllTextFiles(dAbsPaths) ? handleTextFilesEffect(strategies, storagePath, dFilename, dAbsPaths) : handleMixedFilesEffect(strategies, storagePath, dFilename, dAbsPaths) ) ) const processDuplicatesEffect = ( mergedFileMapsExtraction: Record<string, string[]>, strategies: TExtensionsRemoveDuplicatesStrategies ) => (absolutePathToStorageFolder: string): TE.TaskEither<Error, void[][]> => pipe( Object.entries(mergedFileMapsExtraction), A.traverse(TE.ApplicativeSeq)(([duplicateFilename, duplicateAbsolutePaths]) => { const storageFolderName = generateCombinationFolderName(duplicateAbsolutePaths) const absolutePathToCommonStorageCur = path.join( absolutePathToStorageFolder, storageFolderName ) return processDuplicateEffect( strategies, absolutePathToCommonStorageCur, duplicateFilename, duplicateAbsolutePaths ) }) ) export const applyFilesExtractionEffect: TApplyFileExtractionEffect = (strategies, options) => fileMapsExtraction => pipe(fileMapsExtraction, mergeFileMapsExtraction, mergedFileMapsExtraction => pipe(mergedFileMapsExtraction, logExtractionStatistics(options.readonly), () => options.readonly ? TE.right(undefined) : pipe(options, getDuplicateStoragePath, absolutePathToStorageFolder => pipe( absolutePathToStorageFolder, processDuplicatesEffect(mergedFileMapsExtraction, strategies), TE.map(() => removeEmptyFoldersInFolderEffect(absolutePathToStorageFolder)), TE.map(() => console.log('Duplicates were extracted to', absolutePathToStorageFolder) ) )))) export const getRidOfDuplicatesInFoldersEffect = ( folderList: string[], strategies: TExtensionsRemoveDuplicatesStrategies, options: TUserChoices ): TE.TaskEither<Error, void> => pipe( options.fileExtensions as ('txt' | 'torrent')[], A.traverse(TE.ApplicativePar)( ext => strategies[ext].getDuplicateMap(folderList) as TE.TaskEither< Error, TDuplicateFormatTxt | TDuplicateFormatTorrent > ), TE.flatMap((duplicateMaps) => { const torrentIdx = options.fileExtensions.findIndex(ext => ext === 'torrent') const txtIdx = options.fileExtensions.findIndex(ext => ext === 'txt') const txtFilesMapDuplicates = duplicateMaps[txtIdx] const torrentFileDuplicates = duplicateMaps[torrentIdx] return pipe( [ txtFilesMapDuplicates ? txtDuplicateStrategy.removeDuplicatesEffect( txtFilesMapDuplicates as TDuplicateFormatTxt, options.readonly ) : TE.right(undefined), torrentFileDuplicates ? torrentDuplicateStrategy.removeDuplicatesEffect( torrentFileDuplicates as TDuplicateFormatTorrent, options.readonly ) : TE.right(undefined), ] as TE.TaskEither<Error, void | void[] | undefined>[], A.sequence(TE.ApplicativePar), TE.flatMap(() => TE.fromIO(() => logUniversalStatistics(duplicateMaps, options))) ) }) )