donobu
Version:
Create browser automations with an LLM agent and replay them as Playwright scripts.
248 lines (245 loc) • 10.3 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.ExtractPublicFacebookEntityDataTool = void 0;
const Tool_1 = require("./Tool");
const DonobuException_1 = require("../exceptions/DonobuException");
const InvalidParamValueException_1 = require("../exceptions/InvalidParamValueException");
const MiscUtils_1 = require("../utils/MiscUtils");
const JsonUtils_1 = require("../utils/JsonUtils");
const Logger_1 = require("../utils/Logger");
const FACEBOOK_ENTITY_PAGE_INFO_SCHEMA = {
type: 'object',
properties: {
pageId: {
type: ['string', 'null'],
description: "The entity's Facebook page ID.",
},
companyLegalName: {
type: ['string', 'null'],
description: 'The legal name of the entity AS FOUND ON THE GIVEN FACEBOOK WEBPAGE TEXT! This is not the marketing name! ' +
'The legal name is in the section of the Facebook website that says "<some-legal-entity-name> is responsible for this Page." ' +
'IMPORTANT: If this string pattern is not found, then the `legalEntityName` must be set to null.',
},
businessName: {
type: ['string', 'null'],
description: "The entity's business name. This is not necessarily the legal name of the entity. This could be the name used in marketing, for example.",
},
topCountryOfPageManagers: {
type: ['string', 'null'],
description: "The country with the most number of page managers for this entity's Facebook page.",
},
managerIsVerified: {
type: ['boolean', 'null'],
description: 'The person that manages the Facebook page for this entity has been verified.',
},
companyIsVerified: {
type: ['boolean', 'null'],
description: 'The company itself has a verified Facebook status.',
},
creationDate: {
type: ['string', 'null'],
description: 'Creation date of the page in the format yyyy-mm-dd.',
},
isRunningAds: {
type: ['boolean', 'null'],
description: 'Is set to true if the entity reports that it is running ads. Otherwise false.',
},
physicalAddress: {
type: ['string', 'null'],
description: "The entity's physical address.",
},
categories: {
type: 'array',
items: {
type: 'string',
},
description: "The entity's categories.",
},
numberOfReviews: {
type: ['integer', 'null'],
description: 'The number of reviews for the entity.',
},
percentRecommended: {
type: ['integer', 'null'],
description: 'The percent recommended based on reviews.',
},
numberOfLikes: {
type: ['integer', 'null'],
description: 'The number of likes for the entity.',
},
numberOfFollowers: {
type: ['integer', 'null'],
description: 'The number of followers for the entity.',
},
phone: {
type: ['string', 'null'],
description: "The entity's phone number.",
},
websiteUrl: {
type: ['string', 'null'],
description: "The entity's website URL.",
},
},
required: [
'pageId',
'companyLegalName',
'businessName',
'topCountryOfPageManagers',
'managerIsVerified',
'companyIsVerified',
'creationDate',
'isRunningAds',
'physicalAddress',
'categories',
'numberOfReviews',
'percentRecommended',
'numberOfLikes',
'numberOfFollowers',
'phone',
'websiteUrl',
],
additionalProperties: false,
};
class ExtractPublicFacebookEntityDataTool extends Tool_1.Tool {
constructor() {
super(ExtractPublicFacebookEntityDataTool.NAME, 'Source Facebook transparency data for a given Facebook entity.', 'ExtractPublicFacebookEntityDataToolCoreParameters', 'ExtractPublicFacebookEntityDataToolGptParameters', true);
}
async call(context, parameters) {
try {
const initialUrl = this.getInitialFacebookPageUrl(parameters);
const page = context.page;
const aboutProfileTransparencyPageUrl = this.getAboutProfileTransparencyPageUrl(initialUrl);
await page.goto(aboutProfileTransparencyPageUrl);
await page.waitForLoadState();
// Detect if the given entity exists
const bodyText = await page.evaluate(() => document.body.innerText);
if (bodyText.includes("This content isn't available right now")) {
context.metadata.nextState = 'FAILED';
if (parameters.facebookEntityName) {
throw new InvalidParamValueException_1.InvalidParamValueException('facebookEntityName', parameters.facebookEntityName ?? null);
}
else {
throw new InvalidParamValueException_1.InvalidParamValueException('facebookEntityUrl', parameters.facebookEntityUrl ?? null);
}
}
// Close the login modal
await this.closeModal(page);
// Click into modal to see extended transparency information
await page
.locator('div[aria-label="See all transparency information"]')
.click({
delay: MiscUtils_1.MiscUtils.generateHumanLikeClickDurationInMs(),
timeout: 5000,
});
await page.waitForTimeout(1500);
await page.waitForLoadState();
const transparencyInfoRawText = await page.evaluate(() => document.body.innerText);
// Click out of the extended transparency information modal
await this.closeModal(page);
// Navigate to remaining public profile details
const basicInfoUrl = this.getAboutContactAndBasicInfoPageUrl(page.url());
await page.goto(basicInfoUrl);
await page.waitForLoadState();
// Close the login modal
await this.closeModal(page);
await page.waitForLoadState();
const basicInfoRawText = await page.evaluate(() => document.body.innerText);
const prompt = `
\`\`\`
${transparencyInfoRawText}
${basicInfoRawText}
\`\`\`
Above is the raw textual content of a business/organization entity's Facebook webpage.
Note that since this is the raw textual content, the text may be a bit jumbled, have its styling lost,
careful positioning lost, etc.
Extract this entity's information from the given raw textual content. Only use the supplied raw textual
content. DO NOT use your internal model knowledge to fill in gaps of information. ONLY use the supplied
text!`;
const resp = await context.gptClient.getStructuredOutput([
{
type: 'user',
items: [{ type: 'text', text: prompt }],
},
], FACEBOOK_ENTITY_PAGE_INFO_SCHEMA);
MiscUtils_1.MiscUtils.updateTokenCounts(resp, context.metadata);
context.metadata.nextState = 'SUCCESS';
return {
isSuccessful: true,
forLlm: 'Successfully extracted Facebook data.',
metadata: resp.output,
};
}
catch (error) {
if (error instanceof DonobuException_1.DonobuException) {
context.metadata.nextState = 'FAILED';
return {
isSuccessful: false,
forLlm: error.userFacingMessage,
metadata: JsonUtils_1.JsonUtils.objectToJson(error),
};
}
Logger_1.appLogger.error('Failed to extract Facebook data due to unexpected exception!', error);
context.metadata.nextState = 'FAILED';
return {
isSuccessful: false,
forLlm: error instanceof Error ? error.message : String(error),
metadata: null,
};
}
}
async callFromGpt(context, parameters) {
return this.call(context, parameters);
}
getInitialFacebookPageUrl(parameters) {
if (parameters.facebookEntityName) {
return `https://www.facebook.com/${parameters.facebookEntityName}`;
}
else if (parameters.facebookEntityUrl) {
const tmp = parameters.facebookEntityUrl;
return !tmp.startsWith('http') ? 'https://' + tmp : tmp;
}
else {
throw new InvalidParamValueException_1.InvalidParamValueException('facebookEntityUrl', null);
}
}
getAboutProfileTransparencyPageUrl(url) {
try {
const uri = new URL(url);
const params = new URLSearchParams(uri.search);
params.delete('sk');
params.append('sk', 'about_profile_transparency');
uri.search = params.toString();
return uri.toString();
}
catch (_error) {
throw new Error(`Invalid URL: ${url}`);
}
}
getAboutContactAndBasicInfoPageUrl(url) {
try {
const uri = new URL(url);
const params = new URLSearchParams(uri.search);
params.delete('sk');
params.append('sk', 'about_contact_and_basic_info');
uri.search = params.toString();
return uri.toString();
}
catch (_error) {
throw new Error(`Invalid URL: ${url}`);
}
}
async closeModal(page) {
try {
await page.locator('div[aria-label="Close"]').first().click({
delay: MiscUtils_1.MiscUtils.generateHumanLikeClickDurationInMs(),
timeout: 5000,
});
}
catch (_error) {
Logger_1.appLogger.warn('Failed to detect Facebook modal to close, but continuing anyway...');
}
}
}
exports.ExtractPublicFacebookEntityDataTool = ExtractPublicFacebookEntityDataTool;
ExtractPublicFacebookEntityDataTool.NAME = 'extractPublicFacebookEntityData';
//# sourceMappingURL=ExtractPublicFacebookEntityDataTool.js.map