@eeacms/volto-chatbot
Version:
@eeacms/volto-chatbot: Volto add-on
236 lines (213 loc) • 7.97 kB
JavaScript
// /**
// * Represents a claim object with all relevant information.
// */
// export interface GenerativeClaim {
// claimId: number;
// claimString: string;
// subclaims: string[];
// segments: number[];
// explanation: string;
// supported: boolean;
// probabilities: Map<string, number>;
// }
//
// export interface OpenAITokenLogProb {
// token: string;
// bytes: number[];
// logprob: number;
// }
//
// export interface OpenAILogProb {
// token: string;
// bytes: number[];
// logprob: number;
// top_logprobs: OpenAITokenLogProb[];
// }
//
/**
* Gets the claim id from a subsegment.
* @param subsegment A subsegment string of the form "<|r1|".
* @returns The numeric claim id.
*/
function getClaimIdFromSubsegment(subsegment) {
const textClaimId = subsegment.split('|')[1];
const integerClaimId = parseInt(textClaimId.split('r')[1]);
return integerClaimId;
}
/**
* Gets the citations from a subsegment.
* @param subsegment A subsegment string of the form "|s1|,|s2|,|s3|,|s4|".
* @returns The list of numeric citations.
*/
function getClaimCitationsFromSubsegment(subsegment) {
const citationSegments = subsegment.split(',');
const segments = [];
for (const citationSegment of citationSegments) {
const segment = citationSegment.replaceAll('|', '').replaceAll('s', '');
if (segment.includes('-')) {
const segmentRange = segment.split('-');
for (
let i = parseInt(segmentRange[0].trim());
i <= parseInt(segmentRange[1].trim());
i++
) {
segments.push(i);
}
} else if (segment.includes('to')) {
const segmentRange = segment.split('to');
for (
let i = parseInt(segmentRange[0].trim());
i <= parseInt(segmentRange[1].trim());
i++
) {
segments.push(i);
}
} else {
const segmentInt = parseInt(segment);
if (!isNaN(segmentInt)) {
segments.push(parseInt(segment));
}
}
}
return segments;
}
/**
* Gets the support status from a subsegment.
* @param subsegment A subsegment string of the form "|supported|" or "|unsupported|".
* @returns True if the claim is supported, false otherwise.
*/
function getSupportStatusFromSubsegment(subsegment) {
return subsegment.startsWith('|supported|');
}
/**
* Gets a claim from a segment string.
* @param segment A segment string containing all information for claim verification.
* @returns The claim object with all relevant information.
*/
function getClaimFromSegment(segment) {
const claim_segments = segment.split('><');
const claimId = getClaimIdFromSubsegment(claim_segments[0]);
const claimString = claim_segments[1];
const subclaims = [];
let claimProgressIndex = 3; // Start at 3 to skip the claim id, claim string and the subclaims tag
for (let i = claimProgressIndex; i < claim_segments.length; i++) {
const subsegment = claim_segments[i];
if (subsegment.startsWith('end||subclaims')) {
claimProgressIndex = i + 1;
break;
} else {
subclaims.push(subsegment);
}
}
let cite_tag_index = -1;
let explanation_index = -1;
let label_index = -1;
for (let i = claimProgressIndex; i < claim_segments.length; i++) {
const subsegment = claim_segments[i];
if (subsegment.startsWith('|cite|')) {
cite_tag_index = i;
} else if (subsegment.startsWith('|explain|')) {
explanation_index = i + 1;
} else if (
subsegment.startsWith('|supported|') ||
subsegment.startsWith('|unsupported|')
) {
label_index = i;
}
}
const segments = getClaimCitationsFromSubsegment(
claim_segments[cite_tag_index + 1],
);
const explanation = claim_segments[explanation_index];
const supported = getSupportStatusFromSubsegment(claim_segments[label_index]);
const claim = {
claimId,
claimString,
subclaims,
segments,
explanation,
supported,
probabilities: new Map(),
};
return claim;
}
/**
* Gets all claims from a response.
* @param response A string containing all claims and their information.
* @returns A list of claim objects.
*/
export function getClaimsFromResponse(response) {
// Example response: <|r1|><There is no information about the average lifespan of a giant squid in the deep waters of the Pacific Ocean in the provided document.><|subclaims|><The document contains information about the average lifespan of a giant squid.><The information about giant squid lifespan is related to the Pacific Ocean.><end||subclaims><|cite|><|s1 to s49|><end||cite><|explain|><Upon reviewing the entire document, there is no mention of giant squid or any related topic, including their average lifespan or the Pacific Ocean. The document is focused on international relations, diplomacy, and conflict resolution.><end||explain><|supported|><end||r><|r2|><The document is focused on international relations, diplomacy, and conflict resolution, and does not mention giant squid or any related topic.><|subclaims|><The document is focused on international relations, diplomacy, and conflict resolution.><The document does not mention giant squid or any related topic.><end||subclaims><|cite|><|s1|,|s2|,|s3|,|s4|><end||cite><|explain|><The first four sentences clearly establish the document's focus on international relations, diplomacy, and conflict resolution, and there is no mention of giant squid or any related topic throughout the document.><end||explain><|supported|><end||r><|r3|><The document mentions cats.><|subclaims|><The document makes some mention of cats.><end||subclaims><|cite|><None><end||cite><|explain|><There is no mention of cats anywhere in the document.><end||explain><|unsupported|><end||r>
// eslint-disable-next-line no-console
// console.log('getClaimsFromResponse', response);
let segments = response.split('<end||r>');
const claims = [];
for (const segment of segments) {
if (segment.length === 0) {
continue;
}
const claim = getClaimFromSegment(segment);
claims.push(claim);
}
return claims;
}
function exp(x) {
return Math.pow(Math.E, x);
}
function softmax(logits) {
const softmaxes = [];
const exp_values = [];
let total = 0;
for (let i = 0; i < logits.length; i++) {
const exponential = exp(logits[i]);
total += exponential;
exp_values.push(exponential);
}
for (let i = 0; i < exp_values.length; i++) {
softmaxes.push(exp_values[i] / total);
}
return softmaxes;
}
function getTokenProbabilitiesFromLogit(logit, tokenChoices) {
const tokenLogits = new Map();
let smallestLogit = 1000000;
for (const token of tokenChoices) {
const tokenLogit = logit.top_logprobs.find(
(logit) => logit.token === token,
);
if (tokenLogit !== undefined) {
smallestLogit = Math.min(smallestLogit, tokenLogit.logprob);
tokenLogits.set(token, tokenLogit.logprob);
}
}
// If any class tokens aren't present, set them below the smallest logit.
// Their true value is definitely <= the smallest logit value.
for (const token of tokenChoices) {
if (!tokenLogits.has(token)) {
tokenLogits.set(token, smallestLogit - 1e-6);
}
}
const classTokens = Array.from(tokenLogits.keys());
const logitValues = Array.from(tokenLogits.values());
const softmaxValues = softmax(logitValues);
const tokenProbabilities = new Map();
for (let i = 0; i < classTokens.length; i++) {
tokenProbabilities.set(classTokens[i], softmaxValues[i]);
}
return tokenProbabilities;
}
export function getTokenProbabilitiesFromLogits(logits, tokenChoices) {
const tokenProbabilities = [];
for (const logit of logits) {
const tokenPresent = tokenChoices.has(logit.token);
if (!tokenPresent) {
continue;
}
const tokenProbability = getTokenProbabilitiesFromLogit(
logit,
tokenChoices,
);
tokenProbabilities.push(tokenProbability);
}
return tokenProbabilities;
}