forked from mirrors/gecko-dev
171 lines
5.5 KiB
JavaScript
171 lines
5.5 KiB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
// The WebIDL binder places static methods on the prototype, rather than
|
|
// on the constructor, which is a bit clumsy, and is definitely not
|
|
// idiomatic.
|
|
LanguageInfo.detectLanguage = LanguageInfo.prototype.detectLanguage;
|
|
|
|
// Closure is overzealous in its function call optimization, and tries
|
|
// to turn these singleton methods into unbound function calls.
|
|
ensureCache.alloc = ensureCache.alloc.bind(ensureCache);
|
|
ensureCache.prepare = ensureCache.prepare.bind(ensureCache);
|
|
|
|
// From public/encodings.h. Unfortunately, the WebIDL binder doesn't
|
|
// allow us to define or automatically derive these in the IDL.
|
|
var Encodings = {
|
|
"ISO_8859_1" : 0,
|
|
"ISO_8859_2" : 1,
|
|
"ISO_8859_3" : 2,
|
|
"ISO_8859_4" : 3,
|
|
"ISO_8859_5" : 4,
|
|
"ISO_8859_6" : 5,
|
|
"ISO_8859_7" : 6,
|
|
"ISO_8859_8" : 7,
|
|
"ISO_8859_9" : 8,
|
|
"ISO_8859_10" : 9,
|
|
"JAPANESE_EUC_JP" : 10,
|
|
"EUC_JP" : 10,
|
|
"JAPANESE_SHIFT_JIS" : 11,
|
|
"SHIFT_JIS" : 11,
|
|
"JAPANESE_JIS" : 12,
|
|
"JIS" : 12,
|
|
"CHINESE_BIG5" : 13,
|
|
"BIG5" : 13,
|
|
"CHINESE_GB" : 14,
|
|
"CHINESE_EUC_CN" : 15,
|
|
"EUC_CN" : 15,
|
|
"KOREAN_EUC_KR" : 16,
|
|
"EUC_KR" : 16,
|
|
"UNICODE_UNUSED" : 17,
|
|
"CHINESE_EUC_DEC" : 18,
|
|
"EUC_DEC" : 18,
|
|
"CHINESE_CNS" : 19,
|
|
"CNS" : 19,
|
|
"CHINESE_BIG5_CP950" : 20,
|
|
"BIG5_CP950" : 20,
|
|
"JAPANESE_CP932" : 21,
|
|
"CP932" : 21,
|
|
"UTF8" : 22,
|
|
"UNKNOWN_ENCODING" : 23,
|
|
"ASCII_7BIT" : 24,
|
|
"RUSSIAN_KOI8_R" : 25,
|
|
"KOI8_R" : 25,
|
|
"RUSSIAN_CP1251" : 26,
|
|
"CP1251" : 26,
|
|
"MSFT_CP1252" : 27,
|
|
"CP1252" : 27,
|
|
"RUSSIAN_KOI8_RU" : 28,
|
|
"KOI8_RU" : 28,
|
|
"MSFT_CP1250" : 29,
|
|
"CP1250" : 29,
|
|
"ISO_8859_15" : 30,
|
|
"MSFT_CP1254" : 31,
|
|
"CP1254" : 31,
|
|
"MSFT_CP1257" : 32,
|
|
"CP1257" : 32,
|
|
"ISO_8859_11" : 33,
|
|
"MSFT_CP874" : 34,
|
|
"CP874" : 34,
|
|
"MSFT_CP1256" : 35,
|
|
"CP1256" : 35,
|
|
"MSFT_CP1255" : 36,
|
|
"CP1255" : 36,
|
|
"ISO_8859_8_I" : 37,
|
|
"HEBREW_VISUAL" : 38,
|
|
"CZECH_CP852" : 39,
|
|
"CP852" : 39,
|
|
"CZECH_CSN_369103" : 40,
|
|
"CSN_369103" : 40,
|
|
"MSFT_CP1253" : 41,
|
|
"CP1253" : 41,
|
|
"RUSSIAN_CP866" : 42,
|
|
"CP866" : 42,
|
|
"ISO_8859_13" : 43,
|
|
"ISO_2022_KR" : 44,
|
|
"GBK" : 45,
|
|
"GB18030" : 46,
|
|
"BIG5_HKSCS" : 47,
|
|
"ISO_2022_CN" : 48,
|
|
"TSCII" : 49,
|
|
"TAMIL_MONO" : 50,
|
|
"TAMIL_BI" : 51,
|
|
"JAGRAN" : 52,
|
|
"MACINTOSH_ROMAN" : 53,
|
|
"UTF7" : 54,
|
|
"BHASKAR" : 55,
|
|
"HTCHANAKYA" : 56,
|
|
"UTF16BE" : 57,
|
|
"UTF16LE" : 58,
|
|
"UTF32BE" : 59,
|
|
"UTF32LE" : 60,
|
|
"BINARYENC" : 61,
|
|
"HZ_GB_2312" : 62,
|
|
"UTF8UTF8" : 63,
|
|
"TAM_ELANGO" : 64,
|
|
"TAM_LTTMBARANI" : 65,
|
|
"TAM_SHREE" : 66,
|
|
"TAM_TBOOMIS" : 67,
|
|
"TAM_TMNEWS" : 68,
|
|
"TAM_WEBTAMIL" : 69,
|
|
"KDDI_SHIFT_JIS" : 70,
|
|
"DOCOMO_SHIFT_JIS" : 71,
|
|
"SOFTBANK_SHIFT_JIS" : 72,
|
|
"KDDI_ISO_2022_JP" : 73,
|
|
"ISO_2022_JP" : 73,
|
|
"SOFTBANK_ISO_2022_JP" : 74,
|
|
};
|
|
|
|
// Accept forms both with and without underscores/hypens.
|
|
for (let code of Object.keys(Encodings)) {
|
|
if (code["includes"]("_"))
|
|
Encodings[code.replace(/_/g, "")] = Encodings[code];
|
|
}
|
|
|
|
addOnPreMain(function() {
|
|
|
|
onmessage = function(aMsg) {
|
|
let data = aMsg["data"];
|
|
|
|
let langInfo;
|
|
if (data["tld"] == undefined && data["encoding"] == undefined && data["language"] == undefined) {
|
|
langInfo = LanguageInfo.detectLanguage(data["text"], !data["isHTML"]);
|
|
} else {
|
|
// Do our best to find the given encoding in the encodings table.
|
|
// Otherwise, just fall back to unknown.
|
|
let enc = String(data["encoding"]).toUpperCase().replace(/[_-]/g, "");
|
|
|
|
let encoding;
|
|
if (Encodings.hasOwnProperty(enc))
|
|
encoding = Encodings[enc];
|
|
else
|
|
encoding = Encodings["UNKNOWN_ENCODING"];
|
|
|
|
langInfo = LanguageInfo.detectLanguage(data["text"], !data["isHTML"],
|
|
data["tld"] || null,
|
|
encoding,
|
|
data["language"] || null);
|
|
}
|
|
|
|
postMessage({
|
|
"language": langInfo.getLanguageCode(),
|
|
"confident": langInfo.getIsReliable(),
|
|
|
|
"languages": new Array(3).fill(0).map((_, index) => {
|
|
let lang = langInfo.get_languages(index);
|
|
return {
|
|
"languageCode": lang.getLanguageCode(),
|
|
"percent": lang.getPercent(),
|
|
};
|
|
}).filter(lang => {
|
|
// Ignore empty results.
|
|
return lang["languageCode"] != "un" || lang["percent"] > 0;
|
|
}),
|
|
});
|
|
|
|
Module.destroy(langInfo);
|
|
};
|
|
|
|
postMessage("ready");
|
|
});
|