@huggingface/inference
Version:
Typescript client for the Hugging Face Inference Providers and Inference Endpoints
40 lines (36 loc) • 1.53 kB
text/typescript
import type {
VisualQuestionAnsweringInput,
VisualQuestionAnsweringInputData,
VisualQuestionAnsweringOutput,
} from "@huggingface/tasks";
import { resolveProvider } from "../../lib/getInferenceProviderMapping";
import { getProviderHelper } from "../../lib/getProviderHelper";
import type { BaseArgs, Options, RequestArgs } from "../../types";
import { base64FromBytes } from "../../utils/base64FromBytes";
import { innerRequest } from "../../utils/request";
/// Override the type to properly set inputs.image as Blob
export type VisualQuestionAnsweringArgs = BaseArgs &
VisualQuestionAnsweringInput & { inputs: VisualQuestionAnsweringInputData & { image: Blob } };
/**
* Answers a question on an image. Recommended model: dandelin/vilt-b32-finetuned-vqa.
*/
export async function visualQuestionAnswering(
args: VisualQuestionAnsweringArgs,
options?: Options
): Promise<VisualQuestionAnsweringOutput[number]> {
const provider = await resolveProvider(args.provider, args.model, args.endpointUrl);
const providerHelper = getProviderHelper(provider, "visual-question-answering");
const reqArgs: RequestArgs = {
...args,
inputs: {
question: args.inputs.question,
// convert Blob or ArrayBuffer to base64
image: base64FromBytes(new Uint8Array(await args.inputs.image.arrayBuffer())),
},
} as RequestArgs;
const { data: res } = await innerRequest<VisualQuestionAnsweringOutput>(reqArgs, providerHelper, {
...options,
task: "visual-question-answering",
});
return providerHelper.getResponse(res);
}