@stackmemoryai/stackmemory
Version:
Project-scoped memory for AI coding tools. Durable context across sessions with MCP integration, frames, smart retrieval, Claude Code skills, and automatic hooks.
287 lines (242 loc) โข 8.39 kB
text/typescript
/**
* Cleanup duplicate tasks from local JSONL storage
*
* This script:
* 1. Identifies duplicate tasks by title/Linear ID
* 2. Keeps the most recent version of each task
* 3. Removes redundant Linear-imported duplicates
* 4. Cleans up orphaned task mappings
*/
import { readFileSync, writeFileSync, existsSync } from 'fs';
import { join } from 'path';
interface TaskEvent {
id: string;
type: 'task_create' | 'task_update';
timestamp: number;
title: string;
status?: string;
frame_id?: string;
description?: string;
priority?: string;
[key: string]: unknown;
}
interface TaskMapping {
stackmemoryId: string;
linearId: string;
linearIdentifier: string;
lastSyncTimestamp: number;
}
const projectRoot = process.cwd();
const tasksPath = join(projectRoot, '.stackmemory', 'tasks.jsonl');
const mappingsPath = join(projectRoot, '.stackmemory', 'linear-mappings.json');
function loadTasks(): TaskEvent[] {
if (!existsSync(tasksPath)) {
console.log('No tasks file found');
return [];
}
const content = readFileSync(tasksPath, 'utf8');
const lines = content
.trim()
.split('\n')
.filter((l) => l.trim());
return lines
.map((line) => {
try {
return JSON.parse(line) as TaskEvent;
} catch {
console.warn('Failed to parse line:', line.substring(0, 50));
return null;
}
})
.filter((t): t is TaskEvent => t !== null);
}
function loadMappings(): TaskMapping[] {
if (!existsSync(mappingsPath)) {
return [];
}
try {
return JSON.parse(readFileSync(mappingsPath, 'utf8'));
} catch {
return [];
}
}
function extractLinearId(title: string): string | null {
// Match patterns like [STA-123], [ENG-123], etc.
const match = title.match(/\[([A-Z]+-\d+)\]/);
return match ? match[1] : null;
}
function normalizeTitle(title: string): string {
// Remove Linear ID prefix for comparison
return title
.replace(/^\[[A-Z]+-\d+\]\s*/, '')
.trim()
.toLowerCase();
}
function cleanupTasks(): void {
console.log('๐งน Starting task cleanup...\n');
const events = loadTasks();
const mappings = loadMappings();
console.log(`๐ Found ${events.length} task events`);
console.log(`๐ Found ${mappings.length} Linear mappings\n`);
// First, deduplicate events:
// 1. Remove exact duplicates (same id + type + timestamp)
// 2. Keep only one task_create per task ID (the earliest one)
const seenExact = new Set<string>();
const seenTaskCreate = new Map<string, TaskEvent>(); // id -> earliest create event
const dedupedEvents: TaskEvent[] = [];
let exactDupes = 0;
let createDupes = 0;
// First pass: identify the earliest task_create for each task
for (const event of events) {
if (event.type === 'task_create') {
const existing = seenTaskCreate.get(event.id);
if (!existing || (event.timestamp || 0) < (existing.timestamp || 0)) {
seenTaskCreate.set(event.id, event);
}
}
}
// Second pass: build deduped list
for (const event of events) {
const exactKey = `${event.id}:${event.type}:${event.timestamp}`;
// Skip exact duplicates
if (seenExact.has(exactKey)) {
exactDupes++;
continue;
}
seenExact.add(exactKey);
// For task_create, only keep the earliest one per ID
if (event.type === 'task_create') {
const earliest = seenTaskCreate.get(event.id);
if (
earliest &&
earliest !== event &&
earliest.timestamp !== event.timestamp
) {
createDupes++;
continue;
}
}
dedupedEvents.push(event);
}
if (exactDupes > 0) {
console.log(`๐ Removed ${exactDupes} exact duplicate events`);
}
if (createDupes > 0) {
console.log(`๐ Removed ${createDupes} duplicate task_create events\n`);
}
// Group events by task ID
const taskById = new Map<string, TaskEvent[]>();
for (const event of dedupedEvents) {
const existing = taskById.get(event.id) || [];
existing.push(event);
taskById.set(event.id, existing);
}
// Find the latest state for each task
const latestTaskState = new Map<string, TaskEvent>();
for (const [id, taskEvents] of taskById) {
// Get the latest event (highest timestamp)
const latest = taskEvents.reduce((a, b) =>
(a.timestamp || 0) > (b.timestamp || 0) ? a : b
);
latestTaskState.set(id, latest);
}
console.log(`๐ Found ${latestTaskState.size} unique tasks\n`);
// Identify duplicates by normalized title
const tasksByNormalizedTitle = new Map<string, TaskEvent[]>();
for (const task of latestTaskState.values()) {
const normalized = normalizeTitle(task.title);
const existing = tasksByNormalizedTitle.get(normalized) || [];
existing.push(task);
tasksByNormalizedTitle.set(normalized, existing);
}
// Find duplicate groups
const duplicateGroups: TaskEvent[][] = [];
for (const [, tasks] of tasksByNormalizedTitle) {
if (tasks.length > 1) {
duplicateGroups.push(tasks);
}
}
console.log(
`๐ Found ${duplicateGroups.length} groups of duplicate tasks:\n`
);
// Determine which tasks to keep
const tasksToRemove = new Set<string>();
for (const group of duplicateGroups) {
// Sort by: prefer non-linear-import frame, then by timestamp (newest first)
group.sort((a, b) => {
const aIsImport = a.frame_id === 'linear-import';
const bIsImport = b.frame_id === 'linear-import';
if (aIsImport !== bIsImport) {
return aIsImport ? 1 : -1; // Prefer non-import
}
return (b.timestamp || 0) - (a.timestamp || 0); // Prefer newer
});
// Keep the first (best) task, remove the rest
const keeper = group[0];
console.log(` โ Keeping: ${keeper.title.substring(0, 60)}...`);
console.log(` ID: ${keeper.id}, Frame: ${keeper.frame_id}`);
for (let i = 1; i < group.length; i++) {
const dupe = group[i];
tasksToRemove.add(dupe.id);
console.log(` โ Removing: ${dupe.id} (${dupe.frame_id})`);
}
console.log('');
}
// Also identify Linear-imported tasks that duplicate each other
const linearIdCounts = new Map<string, TaskEvent[]>();
for (const task of latestTaskState.values()) {
const linearId = extractLinearId(task.title);
if (linearId) {
const existing = linearIdCounts.get(linearId) || [];
existing.push(task);
linearIdCounts.set(linearId, existing);
}
}
// Find tasks with same Linear ID
for (const [linearId, tasks] of linearIdCounts) {
if (tasks.length > 1) {
// Sort by timestamp (newest first)
tasks.sort((a, b) => (b.timestamp || 0) - (a.timestamp || 0));
console.log(`๐ Duplicate Linear ID ${linearId}:`);
console.log(` โ Keeping: ${tasks[0].id}`);
for (let i = 1; i < tasks.length; i++) {
tasksToRemove.add(tasks[i].id);
console.log(` โ Removing: ${tasks[i].id}`);
}
console.log('');
}
}
// Filter out removed tasks (use dedupedEvents to include exact-dupe removal)
const cleanedEvents = dedupedEvents.filter((e) => !tasksToRemove.has(e.id));
// Write cleaned tasks
const cleanedContent =
cleanedEvents.map((e) => JSON.stringify(e)).join('\n') + '\n';
// Backup original
const backupPath = tasksPath + '.backup.' + Date.now();
writeFileSync(backupPath, readFileSync(tasksPath));
console.log(`๐พ Backed up original to: ${backupPath}\n`);
// Write cleaned file
writeFileSync(tasksPath, cleanedContent);
// Clean up orphaned mappings
const validTaskIds = new Set(cleanedEvents.map((e) => e.id));
const cleanedMappings = mappings.filter((m) =>
validTaskIds.has(m.stackmemoryId)
);
if (cleanedMappings.length < mappings.length) {
writeFileSync(mappingsPath, JSON.stringify(cleanedMappings, null, 2));
console.log(
`๐๏ธ Removed ${mappings.length - cleanedMappings.length} orphaned mappings`
);
}
console.log('\nโ
Cleanup complete!');
console.log(` Original events: ${events.length}`);
console.log(` After exact dedup: ${dedupedEvents.length}`);
console.log(` Final cleaned: ${cleanedEvents.length}`);
console.log(
` Total removed: ${events.length - cleanedEvents.length} events`
);
console.log(` Unique tasks removed: ${tasksToRemove.size}`);
}
// Run cleanup
cleanupTasks();