eggi-ai-db-schema
Version:
Type-safe database schema and ORM client for Eggi.AI with direct RDS connection
370 lines (350 loc) • 15.1 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.findCloseConnections = findCloseConnections;
exports.getReferenceUsers = getReferenceUsers;
exports.generateAnalysisReport = generateAnalysisReport;
exports.extractLinkedInIdentifier = extractLinkedInIdentifier;
exports.findUsersByLinkedInIdentifiers = findUsersByLinkedInIdentifiers;
const drizzle_orm_1 = require("drizzle-orm");
const LINKEDIN_URL_PREFIX = "https://www.linkedin.com/in/";
/**
* Find close connections for given LinkedIn identifiers
* This function implements the complex bidirectional relationship analysis
* with 50% filtering and max 50 connections per person
*/
async function findCloseConnections(db, identifiers) {
if (identifiers.length === 0) {
return [];
}
// Convert array to PostgreSQL array format for IN clause
const identifierList = identifiers.map(id => `'${id}'`).join(",");
const query = (0, drizzle_orm_1.sql) `
WITH target_social_accounts AS (
-- Find social accounts for our target LinkedIn identifiers
SELECT DISTINCT
sa.id as social_account_id,
sa.user_id,
u.given_name,
u.family_name,
COALESCE(sa.internal_identifier, sa.internal_identifier_regular) as linkedin_identifier
FROM social_accounts sa
INNER JOIN users u ON sa.user_id = u.id
WHERE sa.platform = 'linkedin'
AND (sa.internal_identifier IN (${drizzle_orm_1.sql.raw(identifierList)})
OR sa.internal_identifier_regular IN (${drizzle_orm_1.sql.raw(identifierList)}))
),
all_relationships AS (
-- Get all relationships bidirectionally using UNION
-- Direction A->B: where target is social_account_id_a
SELECT
tsa.social_account_id as source_social_account_id,
tsa.user_id as source_user_id,
tsa.given_name as source_given_name,
tsa.family_name as source_family_name,
tsa.linkedin_identifier as source_linkedin_identifier,
rs.social_account_id_b as connected_social_account_id,
rs.score,
rs.model_version,
rs.created_at
FROM target_social_accounts tsa
INNER JOIN relationship_scores rs ON tsa.social_account_id = rs.social_account_id_a
UNION ALL
-- Direction B->A: where target is social_account_id_b
SELECT
tsa.social_account_id as source_social_account_id,
tsa.user_id as source_user_id,
tsa.given_name as source_given_name,
tsa.family_name as source_family_name,
tsa.linkedin_identifier as source_linkedin_identifier,
rs.social_account_id_a as connected_social_account_id,
rs.score,
rs.model_version,
rs.created_at
FROM target_social_accounts tsa
INNER JOIN relationship_scores rs ON tsa.social_account_id = rs.social_account_id_b
),
latest_relationships AS (
-- Get only the latest model version for each relationship pair
SELECT DISTINCT ON (source_social_account_id, connected_social_account_id) *
FROM all_relationships
ORDER BY source_social_account_id, connected_social_account_id, model_version DESC, created_at DESC
),
ranked_relationships AS (
-- Rank relationships by score for each source person
SELECT
lr.*,
ROW_NUMBER() OVER (PARTITION BY lr.source_social_account_id ORDER BY lr.score DESC) as score_rank,
COUNT(*) OVER (PARTITION BY lr.source_social_account_id) as total_connections
FROM latest_relationships lr
),
filtered_relationships AS (
-- Apply 50% filter with max 50 limit
SELECT *
FROM ranked_relationships rr
WHERE rr.score_rank <= LEAST(
GREATEST(CEIL(rr.total_connections * 0.5), 1), -- At least 1, max 50% of total
50 -- Hard limit of 50
)
)
-- Final output with connection details
SELECT
-- Source person info (for grouping/debugging)
COALESCE(fr.source_given_name || ' ' || fr.source_family_name, 'Unknown User') as source_user_name,
fr.source_linkedin_identifier,
-- Connection info (the suggestions)
COALESCE(cu.given_name || ' ' || cu.family_name, 'Unknown User') as suggestion_for_user_name,
CASE
WHEN csa.public_identifier IS NOT NULL THEN
${LINKEDIN_URL_PREFIX} || csa.public_identifier || '/'
ELSE
'LinkedIn ID: ' || COALESCE(csa.internal_identifier, csa.internal_identifier_regular)
END as linkedin_url,
fr.score,
-- Additional context
fr.score_rank,
fr.total_connections
FROM filtered_relationships fr
-- Join to get connected person's details
INNER JOIN social_accounts csa ON fr.connected_social_account_id = csa.id
INNER JOIN users cu ON csa.user_id = cu.id
ORDER BY
fr.source_linkedin_identifier,
fr.score DESC;
`;
const result = await db.execute(query);
return result;
}
/**
* Get reference users information (the users we're analyzing)
*/
async function getReferenceUsers(db, identifiers) {
if (identifiers.length === 0) {
return [];
}
// Convert array to PostgreSQL array format for IN clause
const identifierList = identifiers.map(id => `'${id}'`).join(",");
const query = (0, drizzle_orm_1.sql) `
WITH target_social_accounts AS (
-- Find social accounts for our target LinkedIn identifiers
SELECT DISTINCT
sa.id as social_account_id,
sa.user_id,
u.given_name,
u.family_name,
COALESCE(sa.internal_identifier, sa.internal_identifier_regular) as linkedin_identifier,
sa.public_identifier
FROM social_accounts sa
INNER JOIN users u ON sa.user_id = u.id
WHERE sa.platform = 'linkedin'
AND (sa.internal_identifier IN (${drizzle_orm_1.sql.raw(identifierList)})
OR sa.internal_identifier_regular IN (${drizzle_orm_1.sql.raw(identifierList)}))
),
all_relationships AS (
-- Get all relationships bidirectionally using UNION to count total connections
-- Direction A->B: where target is social_account_id_a
SELECT
tsa.social_account_id as source_social_account_id,
rs.social_account_id_b as connected_social_account_id,
rs.score,
rs.model_version,
rs.created_at
FROM target_social_accounts tsa
INNER JOIN relationship_scores rs ON tsa.social_account_id = rs.social_account_id_a
UNION ALL
-- Direction B->A: where target is social_account_id_b
SELECT
tsa.social_account_id as source_social_account_id,
rs.social_account_id_a as connected_social_account_id,
rs.score,
rs.model_version,
rs.created_at
FROM target_social_accounts tsa
INNER JOIN relationship_scores rs ON tsa.social_account_id = rs.social_account_id_b
),
latest_relationships AS (
-- Get only the latest model version for each relationship pair
SELECT DISTINCT ON (source_social_account_id, connected_social_account_id) *
FROM all_relationships
ORDER BY source_social_account_id, connected_social_account_id, model_version DESC, created_at DESC
),
connection_counts AS (
-- Count total connections per user
SELECT
source_social_account_id,
COUNT(*) as connections_count
FROM latest_relationships
GROUP BY source_social_account_id
)
SELECT DISTINCT
COALESCE(tsa.given_name || ' ' || tsa.family_name, 'Unknown User') as full_name,
lp.profile_picture_url as profile_image_url,
tsa.linkedin_identifier as internal_identifier,
CASE
WHEN tsa.public_identifier IS NOT NULL THEN
${LINKEDIN_URL_PREFIX} || tsa.public_identifier || '/'
ELSE
'LinkedIn ID: ' || tsa.linkedin_identifier
END as linkedin_identifier,
COALESCE(cc.connections_count, 0) as connections_count
FROM target_social_accounts tsa
LEFT JOIN linkedin.profiles lp ON tsa.social_account_id = lp.social_account_id
LEFT JOIN connection_counts cc ON tsa.social_account_id = cc.source_social_account_id
ORDER BY full_name;
`;
const result = await db.execute(query);
return result;
}
/**
* Generate comprehensive analysis report for missing accounts and relationships
*/
async function generateAnalysisReport(db, identifiers) {
if (identifiers.length === 0) {
return {
found_accounts: [],
missing_accounts: [],
accounts_with_no_relationships: [],
total_requested: 0,
total_found: 0,
total_with_relationships: 0,
};
}
// Convert array to PostgreSQL array format for IN clause
const identifierList = identifiers.map(id => `'${id}'`).join(",");
const query = (0, drizzle_orm_1.sql) `
WITH requested_identifiers AS (
SELECT unnest(ARRAY[${drizzle_orm_1.sql.raw(identifierList)}]) as linkedin_identifier
),
found_social_accounts AS (
SELECT DISTINCT
ri.linkedin_identifier,
sa.id as social_account_id,
sa.user_id,
u.given_name,
u.family_name,
COALESCE(sa.internal_identifier, sa.internal_identifier_regular) as found_linkedin_identifier,
sa.public_identifier
FROM requested_identifiers ri
LEFT JOIN social_accounts sa ON (
sa.internal_identifier = ri.linkedin_identifier
OR sa.internal_identifier_regular = ri.linkedin_identifier
)
LEFT JOIN users u ON sa.user_id = u.id
WHERE sa.platform = 'linkedin' OR sa.platform IS NULL
),
accounts_with_relationships AS (
-- Check which found accounts have relationships
SELECT DISTINCT
fsa.linkedin_identifier,
fsa.social_account_id,
COUNT(DISTINCT rs1.social_account_id_b) + COUNT(DISTINCT rs2.social_account_id_a) as relationship_count
FROM found_social_accounts fsa
LEFT JOIN relationship_scores rs1 ON fsa.social_account_id = rs1.social_account_id_a
LEFT JOIN relationship_scores rs2 ON fsa.social_account_id = rs2.social_account_id_b
WHERE fsa.social_account_id IS NOT NULL
GROUP BY fsa.linkedin_identifier, fsa.social_account_id
)
SELECT
ri.linkedin_identifier as requested_identifier,
fsa.social_account_id,
COALESCE(fsa.given_name || ' ' || fsa.family_name, 'Unknown User') as full_name,
fsa.found_linkedin_identifier,
fsa.public_identifier,
COALESCE(awr.relationship_count, 0) as relationship_count,
CASE
WHEN fsa.social_account_id IS NULL THEN 'not_found'
WHEN COALESCE(awr.relationship_count, 0) = 0 THEN 'no_relationships'
ELSE 'found_with_relationships'
END as status
FROM requested_identifiers ri
LEFT JOIN found_social_accounts fsa ON ri.linkedin_identifier = fsa.linkedin_identifier
LEFT JOIN accounts_with_relationships awr ON fsa.social_account_id = awr.social_account_id
ORDER BY ri.linkedin_identifier;
`;
const result = await db.execute(query);
// Process results
const found_accounts = [];
const missing_accounts = [];
const accounts_with_no_relationships = [];
result.forEach((row) => {
if (row.status === "not_found") {
missing_accounts.push(row.requested_identifier);
}
else if (row.status === "no_relationships") {
accounts_with_no_relationships.push(row.requested_identifier);
}
else if (row.status === "found_with_relationships") {
found_accounts.push({
full_name: row.full_name,
profile_image_url: null, // Will be filled by getReferenceUsers if needed
internal_identifier: row.found_linkedin_identifier,
linkedin_identifier: row.public_identifier
? `${LINKEDIN_URL_PREFIX}${row.public_identifier}/`
: `LinkedIn ID: ${row.found_linkedin_identifier}`,
connections_count: row.relationship_count,
});
}
});
return {
found_accounts,
missing_accounts,
accounts_with_no_relationships,
total_requested: identifiers.length,
total_found: found_accounts.length + accounts_with_no_relationships.length,
total_with_relationships: found_accounts.length,
};
}
/**
* Extract LinkedIn identifier from URL
*/
function extractLinkedInIdentifier(linkedinUrl) {
try {
// Handle different LinkedIn URL formats:
// https://www.linkedin.com/in/username
// https://linkedin.com/in/username
// https://www.linkedin.com/profile/view?id=ACoAAA...
const url = new URL(linkedinUrl);
// Format: /in/username
if (url.pathname.startsWith("/in/")) {
const username = url.pathname.replace("/in/", "").replace("/", "");
return username || null;
}
// Format: /profile/view?id=ACoAAA...
if (url.pathname.startsWith("/profile/view")) {
const id = url.searchParams.get("id");
return id || null;
}
return null;
}
catch (error) {
console.error("Failed to parse LinkedIn URL:", linkedinUrl, error);
return null;
}
}
/**
* Find users by LinkedIn identifiers (for Supabase sync)
*/
async function findUsersByLinkedInIdentifiers(db, identifiers) {
if (identifiers.length === 0) {
return [];
}
// Convert array to PostgreSQL array format for IN clause
const identifierList = identifiers.map(id => `'${id}'`).join(",");
const query = (0, drizzle_orm_1.sql) `
SELECT DISTINCT
u.id,
COALESCE(sa.internal_identifier, sa.internal_identifier_regular) as linkedin_identifier_regular,
sa.public_identifier as linkedin_identifier,
COALESCE(u.given_name || ' ' || u.family_name, 'Unknown User') as full_name,
lp.profile_picture_url as profile_image_url
FROM social_accounts sa
INNER JOIN users u ON sa.user_id = u.id
LEFT JOIN linkedin.profiles lp ON sa.id = lp.social_account_id
WHERE sa.platform = 'linkedin'
AND (sa.internal_identifier IN (${drizzle_orm_1.sql.raw(identifierList)})
OR sa.internal_identifier_regular IN (${drizzle_orm_1.sql.raw(identifierList)}))
ORDER BY full_name;
`;
const result = await db.execute(query);
return result;
}
//# sourceMappingURL=close-connections.js.map