forked from mirrors/gecko-dev
Bug 1897117 - Change the request structure for the ML image-to-text pipeline - r=gregtatum
Differential Revision: https://phabricator.services.mozilla.com/D210659
This commit is contained in:
parent
acb920569c
commit
175bc1a8f6
2 changed files with 16 additions and 18 deletions
|
|
@ -62,9 +62,11 @@ async function echo(request, _model, _tokenizer, _processor) {
|
|||
*
|
||||
* @async
|
||||
* @param {object} request - The request object containing image data.
|
||||
* @param {string} [request.imageUrl] - The URL of the image to process. Either `imageUrl` or `data` must be provided, but not both.
|
||||
* @param {ArrayBuffer} [request.data] - The raw image data to process. Either `data` or `imageUrl` must be provided, but not both.
|
||||
* @param {string} request.mimeType - The MIME type of the image data.
|
||||
* @param {string} [request.url] - The URL of the image to process. If `url` is not provided, other fields are used.
|
||||
* @param {ArrayBuffer} [request.data] - The raw image data to process. Ignored if `url` is provided.
|
||||
* @param {number} [request.width] - The image width. Ignored if `url` is provided.
|
||||
* @param {number} [request.height] - The image height. Ignored if `url` is provided.
|
||||
* @param {number} [request.channels] - The image channels. Can be 1, 2, 3 or 4. Defaults to 4. Ignored if `url` is provided.
|
||||
* @param {object} model - The model used for inference.
|
||||
* @param {object} tokenizer - The tokenizer used for decoding.
|
||||
* @param {object} processor - The processor used for preparing image data.
|
||||
|
|
@ -80,11 +82,15 @@ async function imageToText(request, model, tokenizer, processor) {
|
|||
let start = Date.now();
|
||||
let rawImage;
|
||||
|
||||
if ("imageUrl" in request) {
|
||||
rawImage = await RawImage.fromUrl(request.imageUrl);
|
||||
if ("url" in request) {
|
||||
rawImage = await RawImage.fromURL(request.url);
|
||||
} else {
|
||||
const blob = new Blob([request.data], { type: request.mimeType });
|
||||
rawImage = await RawImage.fromBlob(blob);
|
||||
rawImage = new RawImage(
|
||||
request.data,
|
||||
request.width,
|
||||
request.height,
|
||||
request.channels || 4
|
||||
);
|
||||
}
|
||||
|
||||
debug("Image loaded in ", Date.now() - start);
|
||||
|
|
|
|||
|
|
@ -21,18 +21,10 @@ In the example below, an image is converted to text using the `image-to-text` ta
|
|||
// We then create the engine object, using the options
|
||||
const engine = engineParent.getEngine(options);
|
||||
|
||||
// Preparing a request
|
||||
const request = {url: "https://huggingface.co/datasets/mishig/sample_images/resolve/main/football-match.jpg"};
|
||||
|
||||
// At this point we are ready to do some inference.
|
||||
|
||||
// We need to get the image as an array buffer and wrap it into a request object
|
||||
const response = await fetch("https://huggingface.co/datasets/mishig/sample_images/resolve/main/football-match.jpg");
|
||||
const buffer = await response.arrayBuffer();
|
||||
const mimeType = response.headers.get('Content-Type');
|
||||
const request = {
|
||||
data: buffer,
|
||||
mimeType: mimeType
|
||||
};
|
||||
|
||||
// Finally, we run the engine with the request object
|
||||
const res = await engine.run(request);
|
||||
|
||||
// The result is a string containing the text extracted from the image
|
||||
|
|
|
|||
Loading…
Reference in a new issue