gmail-mbox-stats
Version:
Nice tool to analyze Gmail MBOX file
540 lines (537 loc) • 25.1 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.generateGeneralStats = exports.writeStatsIntoFiles = exports.writeStatsOfSpecificSenderCategoryIntoFiles = exports.prepareOutputFolderStructure = exports.prepareResultsFolderForSenderCategory = void 0;
const node_path_1 = __importDefault(require("node:path"));
const node_fs_1 = require("node:fs");
const sync_1 = require("csv-stringify/sync");
const gloAccu_1 = require("../gloAccu");
const constants_1 = require("../constants");
const sweetUtils_1 = require("./sweetUtils");
const groundFolderMaker_1 = require("./groundFolderMaker");
const basicStarter_1 = require("../basicStarter");
const prepareResultsFolderForSenderCategory = (mboxStatsFolderPath, folderOfSpecificSenderCategory) => {
const resultsFolderPath = node_path_1.default.join(mboxStatsFolderPath, folderOfSpecificSenderCategory.folderName);
// create results Folder
try {
if (!(0, node_fs_1.existsSync)(resultsFolderPath)) {
(0, node_fs_1.mkdirSync)(resultsFolderPath);
}
folderOfSpecificSenderCategory[
// @ts-ignore
"pathAbsOrRel"] = resultsFolderPath;
}
catch (err) {
console.error(err);
}
//===================================
//===================================
//===================================
const resultsFoldersInnerFiles = folderOfSpecificSenderCategory.innerFiles;
//
const arrOfObjKeysOfCandFiles = Object.keys(resultsFoldersInnerFiles);
// create results files:
for (const propName of arrOfObjKeysOfCandFiles) {
const thePath = node_path_1.default.join(resultsFolderPath, resultsFoldersInnerFiles[propName].fileName);
try {
(0, node_fs_1.writeFileSync)(thePath, "");
resultsFoldersInnerFiles[propName][
// @ts-ignore
"pathAbsOrRel"] = thePath;
}
catch (err) {
console.error(err);
}
}
};
exports.prepareResultsFolderForSenderCategory = prepareResultsFolderForSenderCategory;
const prepareOutputFolderStructure = (mboxFilePath) => {
if (!mboxFilePath || typeof mboxFilePath !== "string") {
throw new Error("mboxPath notation is not valid");
}
const mboxStatsFolderPath = node_path_1.default.join(node_path_1.default.dirname(mboxFilePath), groundFolderMaker_1.groundFolder.innerFolders.mboxStats.folderName);
// console.log("haaaa_end", process.argv, process.env.npm_config_sss); // npm run go --aaabbbrt="sdfs"
// console.log("imushavaa", mboxFilePath, mboxStatsFolderPath);
// create mboxStats Folder
try {
if (!(0, node_fs_1.existsSync)(mboxStatsFolderPath)) {
(0, node_fs_1.mkdirSync)(mboxStatsFolderPath);
}
groundFolderMaker_1.groundFolder.innerFolders.mboxStats[
// @ts-ignore
"pathAbsOrRel"] = mboxStatsFolderPath;
}
catch (err) {
console.error(err);
}
// create generalStats file (empty now)
const generalStatsFilePath = node_path_1.default.join(mboxStatsFolderPath, groundFolderMaker_1.groundFolder.innerFolders.mboxStats.innerFiles.generalStats.fileName);
try {
(0, node_fs_1.writeFileSync)(generalStatsFilePath, "");
groundFolderMaker_1.groundFolder.innerFolders.mboxStats.innerFiles.generalStats[
// @ts-ignore
"pathAbsOrRel"] = generalStatsFilePath;
}
catch (err) {
console.error(err);
}
// create main index csv file
// TODO: maybe for future
/*
const allMailListFilePath = path.join(
mboxStatsFolderPath,
groundFolder.innerFolders.mboxStats.innerFiles.allMailList_csv.fileName,
);
try {
writeFileSync(allMailListFilePath, "");
groundFolder.innerFolders.mboxStats.innerFiles.allMailList_csv[
// @ts-ignore
"pathAbsOrRel"
] = allMailListFilePath;
} catch (err) {
console.error(err);
}
*/
//
(0, exports.prepareResultsFolderForSenderCategory)(mboxStatsFolderPath, groundFolderMaker_1.groundFolder.innerFolders.mboxStats.innerFolders.forMailsWhereSenderIsMe);
(0, exports.prepareResultsFolderForSenderCategory)(mboxStatsFolderPath, groundFolderMaker_1.groundFolder.innerFolders.mboxStats.innerFolders
.forMailsWhereSenderIsNotMeOrIsUnknown);
};
exports.prepareOutputFolderStructure = prepareOutputFolderStructure;
const generateCoolCounts = (propName, theFilesObj) => {
const currFile = theFilesObj[propName];
const freqDataAsSortedArr = [...currFile.freqMap].sort((a, b) => b[1] - a[1]);
const fullSumOfNumbers = (0, sweetUtils_1.getSumOfAllValuesOfMap)(currFile.freqMap);
//
const countOfEmptyValues = (propName !== "frequencySenderAddressAndName"
? currFile.freqMap.get(constants_1.str.EMPTY_ADDR)
: currFile.freqMap.get((0, sweetUtils_1.combineAddressAndName)(constants_1.str.EMPTY_ADDR, constants_1.str.EMPTY_NAME))) || 0;
const uniqueCountOfHiddenValues_obj = {
v: 0,
};
const countOfHiddenValues = freqDataAsSortedArr.reduce((accu, item) => {
const [currItemKey, currItemFreqNumber] = item;
if (currItemKey.includes(constants_1.str.STRANGE)) {
uniqueCountOfHiddenValues_obj.v += 1;
return accu + currItemFreqNumber;
}
return accu;
}, 0);
const countOfLegitValues = fullSumOfNumbers - (countOfEmptyValues + countOfHiddenValues);
const outObj = {
fullSumOfNumbers,
countOfLegitValues,
countOfEmptyValues,
countOfHiddenValues,
uniqueCountOfHiddenValues: uniqueCountOfHiddenValues_obj.v,
};
return outObj;
};
// This will be executed two times. One for "me" and one for "notMeOrUnknown"
const recordMajorCounts = (theFilesObj, senderCategory) => {
// frequencySenderAddress --> keyForMessageCountBySender
// We count messages (where sender is me or not me) with sender address,
// because sender is most likely just one for each message.
const majorCountsForCurrCategory = generateCoolCounts(groundFolderMaker_1.keyForMessageCountBySender, theFilesObj);
const { fullSumOfNumbers, countOfEmptyValues, countOfHiddenValues, countOfLegitValues, } = majorCountsForCurrCategory;
const recordAttachmMainStats = (currCaget) => {
const currFileForAttachments = theFilesObj.attachmentsBySender;
//
//
//
gloAccu_1.step.totalSizeOfAttachmentsWithSenderCategory[currCaget] =
(0, sweetUtils_1.getSumOfAllValuesOfMap)(currFileForAttachments.attachmTotalSizeMap);
gloAccu_1.step.totalCountOfAttachmentsWithSenderCategory[currCaget] =
(0, sweetUtils_1.getSumOfAllValuesOfMap)(currFileForAttachments.attachmTotalCountMap);
gloAccu_1.step.countOfMailsWithAtLeastOneAttachmentWithSenderCategory[currCaget] =
(0, sweetUtils_1.getSumOfAllValuesOfMap)(currFileForAttachments.mailCountWithNonZeroCountOfAttachmentsMap);
// console.log("zazaaaaaaa===>>>>>>:", senderCategory);
};
if (senderCategory === "me") {
// here, guarateed that fullSumOfNumbers equals countOfLegitValues
// because, in "me" category, sender addresses are known as just my address.
gloAccu_1.step.countOfMessagesWithSenderCategory[senderCategory] = fullSumOfNumbers;
//
recordAttachmMainStats(senderCategory);
}
else {
// senderCategory === "notMe" ---> or unknown (strange/hidden/empty)
// Only Senders, not other things.
gloAccu_1.step.countOfMessagesWithSenderCategory["empty"] = countOfEmptyValues;
gloAccu_1.step.countOfMessagesWithSenderCategory["hidden"] = countOfHiddenValues;
gloAccu_1.step.countOfMessagesWithSenderCategory["notMe"] = countOfLegitValues;
//
//
// I think specific senderCategory will always be me or notMe, never empty, never hidden.
recordAttachmMainStats("notMe");
}
return majorCountsForCurrCategory;
};
const writeStatsOfSpecificSenderCategoryIntoFiles = (folderOfSpecificSenderCategory, senderCategory) => {
const theFilesObj = folderOfSpecificSenderCategory.innerFiles;
//
const majorCounts = recordMajorCounts(theFilesObj, senderCategory);
const theKeysOfFilesObj = Object.keys(theFilesObj);
const fnForFreqFiles = (propName, currFile) => {
const { fullSumOfNumbers, countOfEmptyValues, countOfHiddenValues, countOfLegitValues,
// @ts-ignore
uniqueCountOfHiddenValues, } = propName === groundFolderMaker_1.keyForMessageCountBySender
? majorCounts
: generateCoolCounts(propName, theFilesObj);
currFile[
// @ts-ignore
"legitCount"] = countOfLegitValues;
currFile[
// @ts-ignore
"hiddenCount"] = countOfHiddenValues;
currFile[
// @ts-ignore
"emptyCount"] = countOfEmptyValues;
const messagesWhereWeSearched = senderCategory === "me"
? gloAccu_1.step.countOfMessagesWithSenderCategory["me"]
: gloAccu_1.step.v - gloAccu_1.step.countOfMessagesWithSenderCategory["me"];
// const legitUniqueAddresses =
// currFile.freqMap.size -
// (currFile.emptyCount === 0 ? 0 : 1) -
// uniqueCountOfHiddenValues;
const isKeyForSenders = groundFolderMaker_1.keysForSenders.includes(propName);
const notApplicable_hiddEmpt = senderCategory === "me" && isKeyForSenders;
const sideArr = [
["File name:", currFile.fileName.slice(0, currFile.fileName.length - 4)],
["100% (All Occurrences):", fullSumOfNumbers],
["Legit Occurrences:", countOfLegitValues],
["Empty:", notApplicable_hiddEmpt ? "-" : countOfEmptyValues],
["Hidden:", notApplicable_hiddEmpt ? "-" : countOfHiddenValues],
["Unique Count:", currFile.freqMap.size],
["Messages Where We Searched:", messagesWhereWeSearched],
["Messages Where Found:", currFile.messagesWhereRelevantValuesFound],
[
"Messages Where Not Found:",
messagesWhereWeSearched - currFile.messagesWhereRelevantValuesFound,
],
];
const freqDataAsSortedArr = [...currFile.freqMap].sort((a, b) => b[1] - a[1]);
const final2dArr = freqDataAsSortedArr.map((line) => {
const freq = line[1];
const percentage = (100 * freq) / fullSumOfNumbers;
// const fixedStr =
// percentage >= 0.02 ? percentage.toFixed(2) : percentage.toFixed(5);
const fixedStr = percentage.toFixed(12);
const percentageStr = `${fixedStr}%`;
const coolLine = [...line, percentageStr];
return coolLine;
});
const deltaheight = sideArr.length - final2dArr.length;
if (deltaheight >= 1) {
Array(deltaheight)
.fill("-")
.forEach(() => {
final2dArr.push(["", "", ""]); // to fill left area
});
}
sideArr.forEach((tupleItem, index) => {
final2dArr[index].push("", ...tupleItem);
});
//
return { final2dArr };
};
const fnForAttachmStatFiles = (propName, currFile) => {
const isForDomain = propName.toLowerCase().includes("domain");
const allMailCountPropFinder = {
attachmentsBySender: "frequencySenderAddress",
attachmentsByDomain: "frequencySenderDomain",
attachmentsByReceiver: "frequencyReceiverAddress",
};
const {
//
// @ts-ignore
fullSumOfNumbers, //
//
countOfEmptyValues, countOfHiddenValues, //
countOfLegitValues,
// @ts-ignore
uniqueCountOfHiddenValues, } = propName === groundFolderMaker_1.keyForMessageCountBySender
? majorCounts
: generateCoolCounts(propName, theFilesObj);
currFile[ //
// @ts-ignore
"legitCount"] = countOfLegitValues;
currFile[
// @ts-ignore
"hiddenCount"] = countOfHiddenValues;
currFile[
// @ts-ignore
"emptyCount"] = countOfEmptyValues;
const sideArr = [
["File name:", currFile.fileName.slice(0, currFile.fileName.length - 4)],
["", ""],
["Legend:", ""],
["", ""],
["A:", isForDomain ? "domain" : "address"],
["", ""],
["B:", "total size of attachments (MB => million Bytes)"],
["C:", "percent"],
["", ""],
["D:", "count of attachments"],
// ["E:", "percent"],
["", ""],
["E:", "count of mails with at least one attachment"],
// ["G", "percent"],
["", ""],
["F:", "count of all mails"],
["G:", "percent"],
];
const attachmTotalSizeDataAsSortedArr = [
...currFile.attachmTotalSizeMap,
].sort((a, b) => b[1] - a[1]);
const final2dArr = attachmTotalSizeDataAsSortedArr.map((line) => {
const currAddressOrDomain = line[0];
const buildCoolLinePair = (currMap) => {
// console.log("jaaaaaaaaa-currMap.size", mapName, currMap.size);
const num = currMap.get(currAddressOrDomain) || 0;
// if (num === undefined) {
// console.log(JSON.stringify([...currMap]));
// throw new Error(
// `${currAddressOrDomain} not found in ${mapName} for ${currFile.fileName} - size - ${currMap.size} --- buildCoolLinePair`,
// );
// }
const ofAllItems = (0, sweetUtils_1.getSumOfAllValuesOfMap)(currMap);
const percentStr = (0, sweetUtils_1.generatePercentStr)(ofAllItems, num);
return [num, percentStr];
};
const pair_totalSizeOfAttachments = buildCoolLinePair(currFile.attachmTotalSizeMap);
const pair_countOfAttachments = buildCoolLinePair(currFile.attachmTotalCountMap);
const pair_countOfMailsWithAtLeastOneAttachment = buildCoolLinePair(currFile.mailCountWithNonZeroCountOfAttachmentsMap);
const mapFinderKey = allMailCountPropFinder[propName];
if (!mapFinderKey) {
throw new Error(`!mapFinderKey --- fnForAttachmStatFiles --- propName: ${propName}`);
}
const pair_countOfAllMails = buildCoolLinePair(theFilesObj[mapFinderKey].freqMap);
//
const coolLine = [
currAddressOrDomain,
...pair_totalSizeOfAttachments,
pair_countOfAttachments[0],
pair_countOfMailsWithAtLeastOneAttachment[0],
...pair_countOfAllMails,
];
return coolLine;
});
const deltaheight = sideArr.length - final2dArr.length;
if (deltaheight >= 1) {
Array(deltaheight)
.fill("-")
.forEach(() => {
const forLeftEmptyAreaToFill = Array((final2dArr[0] || []).length).fill("");
final2dArr.push(forLeftEmptyAreaToFill);
});
}
sideArr.forEach((tupleItem, index) => {
final2dArr[index].push("", ...tupleItem);
});
//
return { final2dArr };
};
theKeysOfFilesObj.forEach((propName) => {
const currFile = theFilesObj[propName];
const isAttachmentStatFile = propName.toLowerCase().includes("attachm");
const { final2dArr } = isAttachmentStatFile
? fnForAttachmStatFiles(propName, currFile)
: fnForFreqFiles(propName, currFile);
//
//
const csvStringBy2dArr = (0, sync_1.stringify)(final2dArr, {
header: false,
columns: undefined,
});
if (!currFile.pathAbsOrRel) {
console.log(`Something's wrong --- path of ${currFile.fileName} not found`);
return;
}
(0, node_fs_1.writeFileSync)(currFile.pathAbsOrRel, csvStringBy2dArr, {
flag: "a+",
});
});
};
exports.writeStatsOfSpecificSenderCategoryIntoFiles = writeStatsOfSpecificSenderCategoryIntoFiles;
const writeStatsIntoFiles = () => {
(0, exports.writeStatsOfSpecificSenderCategoryIntoFiles)(groundFolderMaker_1.groundFolder.innerFolders.mboxStats.innerFolders.forMailsWhereSenderIsMe, "me");
(0, exports.writeStatsOfSpecificSenderCategoryIntoFiles)(groundFolderMaker_1.groundFolder.innerFolders.mboxStats.innerFolders
.forMailsWhereSenderIsNotMeOrIsUnknown, "notMeOrUnknown");
};
exports.writeStatsIntoFiles = writeStatsIntoFiles;
const generateGeneralStats = () => {
console.log("\n");
//
const line_fullCount = `Full count of messages: ${gloAccu_1.step.v}`;
if (gloAccu_1.step.v === gloAccu_1.step.succeededV) {
console.log("\n\n\nSuccess.");
}
console.log(line_fullCount + "\n");
const generateOneConsoleLineOfSenderCategory = (senderCateg) => {
let categRender = senderCateg;
if (senderCateg === "me") {
categRender = `--> me`;
}
else if (senderCateg === "notMe") {
categRender = "not me";
}
const currSpace = groundFolderMaker_1.groundFolder.innerFolders.mboxStats.innerFolders[senderCateg === "me"
? "forMailsWhereSenderIsMe"
: "forMailsWhereSenderIsNotMeOrIsUnknown"];
const senderAddressFreqFile = currSpace.innerFiles.frequencySenderAddress;
const senderDomainFreqFile = currSpace.innerFiles.frequencySenderDomain;
const receiverAddressFreqFile = currSpace.innerFiles.frequencyReceiverAddress;
const arr = [
`Messages where sender is ${categRender}: ${gloAccu_1.step.countOfMessagesWithSenderCategory[senderCateg]}`,
`Count of mails with at least one attachment: ${gloAccu_1.step.countOfMailsWithAtLeastOneAttachmentWithSenderCategory[senderCateg]}`,
`Total count of attachments: ${gloAccu_1.step.totalCountOfAttachmentsWithSenderCategory[senderCateg]}`,
`Total size of attachments: ${gloAccu_1.step.totalSizeOfAttachmentsWithSenderCategory[senderCateg]} MB => Million Bytes`,
`Unique sender addresses: ${senderAddressFreqFile.freqMap.size}`,
`Unique sender domains: ${senderDomainFreqFile.freqMap.size}`,
`Unique receiver addresses: ${receiverAddressFreqFile.freqMap.size}`,
];
return arr.join(`\n${" ".repeat(33)}`) + "\n";
};
//
const line_fullCountAsMe = generateOneConsoleLineOfSenderCategory("me");
console.log(line_fullCountAsMe);
//
const line_countAsLegitNotMe = generateOneConsoleLineOfSenderCategory("notMe");
console.log(line_countAsLegitNotMe);
// const line_countAsHidden = generateOneConsoleLineOfSenderCategory("hidden");
// console.log(line_countAsHidden);
// const line_countAsEmpty = generateOneConsoleLineOfSenderCategory("empty");
// console.log(line_countAsEmpty);
console.log(`\nCreated new folder "${groundFolderMaker_1.groundFolder.innerFolders.mboxStats.folderName}"`);
const generalStats2dArrNotation = [
[
"MBOX file name:",
(() => {
const nameWithFormat = basicStarter_1.mboxFilePath.split(/[/\\]/).at(-1);
if (!nameWithFormat) {
return basicStarter_1.mboxFilePath;
}
return nameWithFormat;
})(),
],
["My mail:", basicStarter_1.myEmail],
["Full count of messages:", gloAccu_1.step.v],
["Messages where sender is me:", gloAccu_1.step.countOfMessagesWithSenderCategory.me],
[
"Messages where sender is not me:",
gloAccu_1.step.countOfMessagesWithSenderCategory.notMe,
],
[
"Messages where sender is empty:",
gloAccu_1.step.countOfMessagesWithSenderCategory.empty,
],
[
"Messages where sender is hidden:",
gloAccu_1.step.countOfMessagesWithSenderCategory.hidden,
],
];
const generateSideStatsForOneCategory = (category, stats) => {
const categOfStepObj = category === "me" ? category : "notMe";
const currSpace = groundFolderMaker_1.groundFolder.innerFolders.mboxStats.innerFolders[category === "me"
? "forMailsWhereSenderIsMe"
: "forMailsWhereSenderIsNotMeOrIsUnknown"];
const senderAddressFreqFile = currSpace.innerFiles.frequencySenderAddress;
const senderDomainFreqFile = currSpace.innerFiles.frequencySenderDomain;
const receiverAddressFreqFile = currSpace.innerFiles.frequencyReceiverAddress;
const arr = [
["", ""],
[`For messages where sender is ${category}`],
[
"", // "Description",
// like TyUniAndFull
"Unique",
"Legit",
"Hidden",
"Empty",
"Messages Where Found",
],
//
["Sender addresses", ...stats.senderAddresses],
["Sender domains", ...stats.senderDomains],
["SenderPlusNames", ...stats.senderPlusNames],
["Receiver addresses", ...stats.receiverAddresses],
["CC addresses", ...stats.ccAddresses],
["BCC addresses", ...stats.BccAddresses],
//
["", ""],
["", ""],
[
"Count of mails with at least one attachment",
gloAccu_1.step.countOfMailsWithAtLeastOneAttachmentWithSenderCategory[categOfStepObj],
],
[
"Total count of attachments",
gloAccu_1.step.totalCountOfAttachmentsWithSenderCategory[categOfStepObj],
],
[
"Total size of attachments (MB => million Bytes)",
gloAccu_1.step.totalSizeOfAttachmentsWithSenderCategory[categOfStepObj],
],
["Unique sender addresses", senderAddressFreqFile.freqMap.size],
["Unique sender domains", senderDomainFreqFile.freqMap.size],
["Unique receiver addresses", receiverAddressFreqFile.freqMap.size],
["", ""],
["", ""],
["", ""],
["", ""],
];
return arr;
};
const folder_me = groundFolderMaker_1.groundFolder.innerFolders.mboxStats.innerFolders.forMailsWhereSenderIsMe
.innerFiles;
const folder_notMeOrUnknown = groundFolderMaker_1.groundFolder.innerFolders.mboxStats.innerFolders
.forMailsWhereSenderIsNotMeOrIsUnknown.innerFiles;
const fol = {
me: folder_me,
notMeOrUnknown: folder_notMeOrUnknown,
};
const generateStatsObj = (meOrNotMe) => {
const currFolder = fol[meOrNotMe];
const sculptOneFileStat = (fileKey) => {
const isKeyForSenders = groundFolderMaker_1.keysForSenders.includes(fileKey);
const notApplicable_hiddEmpt = meOrNotMe === "me" && isKeyForSenders;
const arr = [
currFolder[fileKey].freqMap.size,
currFolder[fileKey].legitCount,
notApplicable_hiddEmpt ? "-" : currFolder[fileKey].hiddenCount,
notApplicable_hiddEmpt ? "-" : currFolder[fileKey].emptyCount,
currFolder[fileKey].messagesWhereRelevantValuesFound,
];
return arr;
};
const obj = {
senderAddresses: sculptOneFileStat("frequencySenderAddress"),
senderDomains: sculptOneFileStat("frequencySenderDomain"),
senderPlusNames: sculptOneFileStat("frequencySenderAddressAndName"),
receiverAddresses: sculptOneFileStat("frequencyReceiverAddress"),
ccAddresses: sculptOneFileStat("frequencyCcAddress"),
BccAddresses: sculptOneFileStat("frequencyBccAddress"),
};
return obj;
};
generalStats2dArrNotation.push(...[
["", ""],
// [`Let's count unique addresses and more`],
["Participant --> Sender/Receiver/CC/BCC"],
[
"Hidden address --> Participant exists but address value is other kind of text instead of email address",
],
[
"Empty address --> Participant exists but address is empty. Or: participant (only for sender/receiver) does not exist at all.",
],
["Unknown address -> hidden or empty address"],
], ...generateSideStatsForOneCategory("me", generateStatsObj("me")), ...generateSideStatsForOneCategory("not me or unknown", generateStatsObj("notMeOrUnknown")));
const returnObj = {
generalStats2dArr: generalStats2dArrNotation,
};
return returnObj;
};
exports.generateGeneralStats = generateGeneralStats;