UNPKG

eggi-ai-db-schema

Version:

Type-safe database schema and ORM client for Eggi.AI with direct RDS connection

370 lines (350 loc) 15.1 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.findCloseConnections = findCloseConnections; exports.getReferenceUsers = getReferenceUsers; exports.generateAnalysisReport = generateAnalysisReport; exports.extractLinkedInIdentifier = extractLinkedInIdentifier; exports.findUsersByLinkedInIdentifiers = findUsersByLinkedInIdentifiers; const drizzle_orm_1 = require("drizzle-orm"); const LINKEDIN_URL_PREFIX = "https://www.linkedin.com/in/"; /** * Find close connections for given LinkedIn identifiers * This function implements the complex bidirectional relationship analysis * with 50% filtering and max 50 connections per person */ async function findCloseConnections(db, identifiers) { if (identifiers.length === 0) { return []; } // Convert array to PostgreSQL array format for IN clause const identifierList = identifiers.map(id => `'${id}'`).join(","); const query = (0, drizzle_orm_1.sql) ` WITH target_social_accounts AS ( -- Find social accounts for our target LinkedIn identifiers SELECT DISTINCT sa.id as social_account_id, sa.user_id, u.given_name, u.family_name, COALESCE(sa.internal_identifier, sa.internal_identifier_regular) as linkedin_identifier FROM social_accounts sa INNER JOIN users u ON sa.user_id = u.id WHERE sa.platform = 'linkedin' AND (sa.internal_identifier IN (${drizzle_orm_1.sql.raw(identifierList)}) OR sa.internal_identifier_regular IN (${drizzle_orm_1.sql.raw(identifierList)})) ), all_relationships AS ( -- Get all relationships bidirectionally using UNION -- Direction A->B: where target is social_account_id_a SELECT tsa.social_account_id as source_social_account_id, tsa.user_id as source_user_id, tsa.given_name as source_given_name, tsa.family_name as source_family_name, tsa.linkedin_identifier as source_linkedin_identifier, rs.social_account_id_b as connected_social_account_id, rs.score, rs.model_version, rs.created_at FROM target_social_accounts tsa INNER JOIN relationship_scores rs ON tsa.social_account_id = rs.social_account_id_a UNION ALL -- Direction B->A: where target is social_account_id_b SELECT tsa.social_account_id as source_social_account_id, tsa.user_id as source_user_id, tsa.given_name as source_given_name, tsa.family_name as source_family_name, tsa.linkedin_identifier as source_linkedin_identifier, rs.social_account_id_a as connected_social_account_id, rs.score, rs.model_version, rs.created_at FROM target_social_accounts tsa INNER JOIN relationship_scores rs ON tsa.social_account_id = rs.social_account_id_b ), latest_relationships AS ( -- Get only the latest model version for each relationship pair SELECT DISTINCT ON (source_social_account_id, connected_social_account_id) * FROM all_relationships ORDER BY source_social_account_id, connected_social_account_id, model_version DESC, created_at DESC ), ranked_relationships AS ( -- Rank relationships by score for each source person SELECT lr.*, ROW_NUMBER() OVER (PARTITION BY lr.source_social_account_id ORDER BY lr.score DESC) as score_rank, COUNT(*) OVER (PARTITION BY lr.source_social_account_id) as total_connections FROM latest_relationships lr ), filtered_relationships AS ( -- Apply 50% filter with max 50 limit SELECT * FROM ranked_relationships rr WHERE rr.score_rank <= LEAST( GREATEST(CEIL(rr.total_connections * 0.5), 1), -- At least 1, max 50% of total 50 -- Hard limit of 50 ) ) -- Final output with connection details SELECT -- Source person info (for grouping/debugging) COALESCE(fr.source_given_name || ' ' || fr.source_family_name, 'Unknown User') as source_user_name, fr.source_linkedin_identifier, -- Connection info (the suggestions) COALESCE(cu.given_name || ' ' || cu.family_name, 'Unknown User') as suggestion_for_user_name, CASE WHEN csa.public_identifier IS NOT NULL THEN ${LINKEDIN_URL_PREFIX} || csa.public_identifier || '/' ELSE 'LinkedIn ID: ' || COALESCE(csa.internal_identifier, csa.internal_identifier_regular) END as linkedin_url, fr.score, -- Additional context fr.score_rank, fr.total_connections FROM filtered_relationships fr -- Join to get connected person's details INNER JOIN social_accounts csa ON fr.connected_social_account_id = csa.id INNER JOIN users cu ON csa.user_id = cu.id ORDER BY fr.source_linkedin_identifier, fr.score DESC; `; const result = await db.execute(query); return result; } /** * Get reference users information (the users we're analyzing) */ async function getReferenceUsers(db, identifiers) { if (identifiers.length === 0) { return []; } // Convert array to PostgreSQL array format for IN clause const identifierList = identifiers.map(id => `'${id}'`).join(","); const query = (0, drizzle_orm_1.sql) ` WITH target_social_accounts AS ( -- Find social accounts for our target LinkedIn identifiers SELECT DISTINCT sa.id as social_account_id, sa.user_id, u.given_name, u.family_name, COALESCE(sa.internal_identifier, sa.internal_identifier_regular) as linkedin_identifier, sa.public_identifier FROM social_accounts sa INNER JOIN users u ON sa.user_id = u.id WHERE sa.platform = 'linkedin' AND (sa.internal_identifier IN (${drizzle_orm_1.sql.raw(identifierList)}) OR sa.internal_identifier_regular IN (${drizzle_orm_1.sql.raw(identifierList)})) ), all_relationships AS ( -- Get all relationships bidirectionally using UNION to count total connections -- Direction A->B: where target is social_account_id_a SELECT tsa.social_account_id as source_social_account_id, rs.social_account_id_b as connected_social_account_id, rs.score, rs.model_version, rs.created_at FROM target_social_accounts tsa INNER JOIN relationship_scores rs ON tsa.social_account_id = rs.social_account_id_a UNION ALL -- Direction B->A: where target is social_account_id_b SELECT tsa.social_account_id as source_social_account_id, rs.social_account_id_a as connected_social_account_id, rs.score, rs.model_version, rs.created_at FROM target_social_accounts tsa INNER JOIN relationship_scores rs ON tsa.social_account_id = rs.social_account_id_b ), latest_relationships AS ( -- Get only the latest model version for each relationship pair SELECT DISTINCT ON (source_social_account_id, connected_social_account_id) * FROM all_relationships ORDER BY source_social_account_id, connected_social_account_id, model_version DESC, created_at DESC ), connection_counts AS ( -- Count total connections per user SELECT source_social_account_id, COUNT(*) as connections_count FROM latest_relationships GROUP BY source_social_account_id ) SELECT DISTINCT COALESCE(tsa.given_name || ' ' || tsa.family_name, 'Unknown User') as full_name, lp.profile_picture_url as profile_image_url, tsa.linkedin_identifier as internal_identifier, CASE WHEN tsa.public_identifier IS NOT NULL THEN ${LINKEDIN_URL_PREFIX} || tsa.public_identifier || '/' ELSE 'LinkedIn ID: ' || tsa.linkedin_identifier END as linkedin_identifier, COALESCE(cc.connections_count, 0) as connections_count FROM target_social_accounts tsa LEFT JOIN linkedin.profiles lp ON tsa.social_account_id = lp.social_account_id LEFT JOIN connection_counts cc ON tsa.social_account_id = cc.source_social_account_id ORDER BY full_name; `; const result = await db.execute(query); return result; } /** * Generate comprehensive analysis report for missing accounts and relationships */ async function generateAnalysisReport(db, identifiers) { if (identifiers.length === 0) { return { found_accounts: [], missing_accounts: [], accounts_with_no_relationships: [], total_requested: 0, total_found: 0, total_with_relationships: 0, }; } // Convert array to PostgreSQL array format for IN clause const identifierList = identifiers.map(id => `'${id}'`).join(","); const query = (0, drizzle_orm_1.sql) ` WITH requested_identifiers AS ( SELECT unnest(ARRAY[${drizzle_orm_1.sql.raw(identifierList)}]) as linkedin_identifier ), found_social_accounts AS ( SELECT DISTINCT ri.linkedin_identifier, sa.id as social_account_id, sa.user_id, u.given_name, u.family_name, COALESCE(sa.internal_identifier, sa.internal_identifier_regular) as found_linkedin_identifier, sa.public_identifier FROM requested_identifiers ri LEFT JOIN social_accounts sa ON ( sa.internal_identifier = ri.linkedin_identifier OR sa.internal_identifier_regular = ri.linkedin_identifier ) LEFT JOIN users u ON sa.user_id = u.id WHERE sa.platform = 'linkedin' OR sa.platform IS NULL ), accounts_with_relationships AS ( -- Check which found accounts have relationships SELECT DISTINCT fsa.linkedin_identifier, fsa.social_account_id, COUNT(DISTINCT rs1.social_account_id_b) + COUNT(DISTINCT rs2.social_account_id_a) as relationship_count FROM found_social_accounts fsa LEFT JOIN relationship_scores rs1 ON fsa.social_account_id = rs1.social_account_id_a LEFT JOIN relationship_scores rs2 ON fsa.social_account_id = rs2.social_account_id_b WHERE fsa.social_account_id IS NOT NULL GROUP BY fsa.linkedin_identifier, fsa.social_account_id ) SELECT ri.linkedin_identifier as requested_identifier, fsa.social_account_id, COALESCE(fsa.given_name || ' ' || fsa.family_name, 'Unknown User') as full_name, fsa.found_linkedin_identifier, fsa.public_identifier, COALESCE(awr.relationship_count, 0) as relationship_count, CASE WHEN fsa.social_account_id IS NULL THEN 'not_found' WHEN COALESCE(awr.relationship_count, 0) = 0 THEN 'no_relationships' ELSE 'found_with_relationships' END as status FROM requested_identifiers ri LEFT JOIN found_social_accounts fsa ON ri.linkedin_identifier = fsa.linkedin_identifier LEFT JOIN accounts_with_relationships awr ON fsa.social_account_id = awr.social_account_id ORDER BY ri.linkedin_identifier; `; const result = await db.execute(query); // Process results const found_accounts = []; const missing_accounts = []; const accounts_with_no_relationships = []; result.forEach((row) => { if (row.status === "not_found") { missing_accounts.push(row.requested_identifier); } else if (row.status === "no_relationships") { accounts_with_no_relationships.push(row.requested_identifier); } else if (row.status === "found_with_relationships") { found_accounts.push({ full_name: row.full_name, profile_image_url: null, // Will be filled by getReferenceUsers if needed internal_identifier: row.found_linkedin_identifier, linkedin_identifier: row.public_identifier ? `${LINKEDIN_URL_PREFIX}${row.public_identifier}/` : `LinkedIn ID: ${row.found_linkedin_identifier}`, connections_count: row.relationship_count, }); } }); return { found_accounts, missing_accounts, accounts_with_no_relationships, total_requested: identifiers.length, total_found: found_accounts.length + accounts_with_no_relationships.length, total_with_relationships: found_accounts.length, }; } /** * Extract LinkedIn identifier from URL */ function extractLinkedInIdentifier(linkedinUrl) { try { // Handle different LinkedIn URL formats: // https://www.linkedin.com/in/username // https://linkedin.com/in/username // https://www.linkedin.com/profile/view?id=ACoAAA... const url = new URL(linkedinUrl); // Format: /in/username if (url.pathname.startsWith("/in/")) { const username = url.pathname.replace("/in/", "").replace("/", ""); return username || null; } // Format: /profile/view?id=ACoAAA... if (url.pathname.startsWith("/profile/view")) { const id = url.searchParams.get("id"); return id || null; } return null; } catch (error) { console.error("Failed to parse LinkedIn URL:", linkedinUrl, error); return null; } } /** * Find users by LinkedIn identifiers (for Supabase sync) */ async function findUsersByLinkedInIdentifiers(db, identifiers) { if (identifiers.length === 0) { return []; } // Convert array to PostgreSQL array format for IN clause const identifierList = identifiers.map(id => `'${id}'`).join(","); const query = (0, drizzle_orm_1.sql) ` SELECT DISTINCT u.id, COALESCE(sa.internal_identifier, sa.internal_identifier_regular) as linkedin_identifier_regular, sa.public_identifier as linkedin_identifier, COALESCE(u.given_name || ' ' || u.family_name, 'Unknown User') as full_name, lp.profile_picture_url as profile_image_url FROM social_accounts sa INNER JOIN users u ON sa.user_id = u.id LEFT JOIN linkedin.profiles lp ON sa.id = lp.social_account_id WHERE sa.platform = 'linkedin' AND (sa.internal_identifier IN (${drizzle_orm_1.sql.raw(identifierList)}) OR sa.internal_identifier_regular IN (${drizzle_orm_1.sql.raw(identifierList)})) ORDER BY full_name; `; const result = await db.execute(query); return result; } //# sourceMappingURL=close-connections.js.map