Bug 1897117 - Change the request structure for the ML image-to-text pipeline - r=gregtatum

Differential Revision: https://phabricator.services.mozilla.com/D210659
This commit is contained in:
Tarek Ziadé 2024-05-16 16:56:18 +00:00
parent acb920569c
commit 175bc1a8f6
2 changed files with 16 additions and 18 deletions

View file

@ -62,9 +62,11 @@ async function echo(request, _model, _tokenizer, _processor) {
*
* @async
* @param {object} request - The request object containing image data.
* @param {string} [request.imageUrl] - The URL of the image to process. Either `imageUrl` or `data` must be provided, but not both.
* @param {ArrayBuffer} [request.data] - The raw image data to process. Either `data` or `imageUrl` must be provided, but not both.
* @param {string} request.mimeType - The MIME type of the image data.
* @param {string} [request.url] - The URL of the image to process. If `url` is not provided, other fields are used.
* @param {ArrayBuffer} [request.data] - The raw image data to process. Ignored if `url` is provided.
* @param {number} [request.width] - The image width. Ignored if `url` is provided.
* @param {number} [request.height] - The image height. Ignored if `url` is provided.
* @param {number} [request.channels] - The image channels. Can be 1, 2, 3 or 4. Defaults to 4. Ignored if `url` is provided.
* @param {object} model - The model used for inference.
* @param {object} tokenizer - The tokenizer used for decoding.
* @param {object} processor - The processor used for preparing image data.
@ -80,11 +82,15 @@ async function imageToText(request, model, tokenizer, processor) {
let start = Date.now();
let rawImage;
if ("imageUrl" in request) {
rawImage = await RawImage.fromUrl(request.imageUrl);
if ("url" in request) {
rawImage = await RawImage.fromURL(request.url);
} else {
const blob = new Blob([request.data], { type: request.mimeType });
rawImage = await RawImage.fromBlob(blob);
rawImage = new RawImage(
request.data,
request.width,
request.height,
request.channels || 4
);
}
debug("Image loaded in ", Date.now() - start);

View file

@ -21,18 +21,10 @@ In the example below, an image is converted to text using the `image-to-text` ta
// We then create the engine object, using the options
const engine = engineParent.getEngine(options);
// Preparing a request
const request = {url: "https://huggingface.co/datasets/mishig/sample_images/resolve/main/football-match.jpg"};
// At this point we are ready to do some inference.
// We need to get the image as an array buffer and wrap it into a request object
const response = await fetch("https://huggingface.co/datasets/mishig/sample_images/resolve/main/football-match.jpg");
const buffer = await response.arrayBuffer();
const mimeType = response.headers.get('Content-Type');
const request = {
data: buffer,
mimeType: mimeType
};
// Finally, we run the engine with the request object
const res = await engine.run(request);
// The result is a string containing the text extracted from the image