--- Convert 8-bit encodings to UTF-8 --- @module luaxml-encodings -- this table is generated automatically by this command: -- texlua encodings/make_encodings.lua local encodings = { ['ibm866'] = 'ÐБВГДЕЖЗИЙКЛМÐОПРСТУФХЦЧШЩЪЫЬÐЮЯабвгдежзийклмноп░▒▓│┤╡╢╖╕╣║╗â•╜╛â”└┴┬├─┼╞╟╚╔╩╦╠â•╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌â–▀рÑтуфхцчшщъыьÑÑŽÑÐёЄєЇїЎў°∙·√№¤■ ', ['iso-8859-5'] = '�������������������������������� ÐЂЃЄЅІЇЈЉЊЋЌÂÐŽÐÐБВГДЕЖЗИЙКЛМÐОПРСТУФХЦЧШЩЪЫЬÐЮЯабвгдежзийклмнопрÑтуфхцчшщъыьÑÑŽÑ№ёђѓєѕіїјљњћќ§ўџ', ['macintosh'] = 'ÄÅÇÉÑÖÜáà âäãåçéèêëÃìîïñóòôöõúùûü†°¢£§•¶ß®©™´¨≠ÆØ∞±≤≥¥µ∂∑âˆÏ€âˆ«ÂªÂºÎ©Ã¦Ã¸Â¿Â¡Â¬âˆšÆ’≈∆«»… ÀÃÕŒœ–—“â€â€˜â€™Ã·â—ŠÃ¿Å¸â„€‹›ï¬ï¬‚‡·‚„‰ÂÊÃËÈÃÃŽÃÌÓÔÒÚÛÙıˆ˜¯˘˙˚¸Ë˛ˇ', ['windows-1251'] = 'ЂЃ‚ѓ„…†‡€‰Љ‹ЊЌЋÐђ‘’“â€â€¢â€“—�™љ›њќћџ ЎўЈ¤Ò¦§Ð©Є«¬Â®Ї°±Ііґµ¶·ё№є»јЅѕїÐБВГДЕЖЗИЙКЛМÐОПРСТУФХЦЧШЩЪЫЬÐЮЯабвгдежзийклмнопрÑтуфхцчшщъыьÑÑŽÑ', ['iso-8859-13'] = '�������������������������������� â€Â¢Â£Â¤â€žÂ¦Â§Ã˜Â©Å–«¬Â®Æ°±²³“µ¶·ø¹ŗ»¼½¾æĄĮĀĆÄÅĘĒČÉŹĖĢĶĪĻŠŃŅÓŌÕÖ×ŲÅŚŪÜŻŽßąįÄćäåęēÄéźėģķīļšńņóÅõö÷ųłśūüżž’', ['iso-8859-6'] = '�������������������������������� ���¤�������،Âï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½Ø›ï¿½ï¿½ï¿½ØŸï¿½Ø¡Ø¢Ø£Ø¤Ø¥Ø¦Ø§Ø¨Ø©ØªØ«Ø¬ØØ®Ø¯Ø°Ø±Ø²Ø³Ø´ØµØ¶Ø·Ø¸Ø¹Øºï¿½ï¿½ï¿½ï¿½ï¿½Ù€ÙقكلمنهوىيًٌÙÙŽÙÙّْ', ['iso-8859-8'] = '�������������������������������� �¢£¤¥¦§¨©×«¬Â®¯°±²³´µ¶·¸¹÷»¼½¾��������������������������������‗×בגדהוזחטיךכל××ž×Ÿ× ×¡×¢×£×¤×¥×¦×§×¨×©×ªï¿½ï¿½â€Žâ€', ['iso-8859-4'] = '�������������������������������� ĄĸŖ¤Ĩϧ¨ŠĒĢŦÂޝ°ą˛ŗ´ĩšēģŧŊžŋĀÃÂÃÄÅÆĮČÉĘËĖÃÎĪÄŅŌĶÔÕÖרŲÚÛÜŨŪßÄáâãäåæįÄéęëėÃîīđņÅķôõö÷øųúûüũū˙', ['koi8-r'] = '─│┌â”└┘├┤┬┴┼▀▄█▌â–░▒▓⌠■∙√≈≤≥ ⌡°²·÷â•║╒ё╓╔╕╖╗╘╙╚╛╜â•╞╟╠╡Ð╢╣╤╥╦╧╨╩╪╫╬©юабцдефгхийклмнопÑÑ€ÑтужвьызшÑщчъЮÐБЦДЕФГХИЙКЛМÐОПЯРСТУЖВЬЫЗШÐЩЧЪ', ['iso-8859-7'] = '�������������������������������� ‘’£€₯¦§¨©ͺ«¬Â�―°±²³΄΅Ά·ΈΉΊ»Ό½ΎÎÎΑΒΓΔΕΖΗΘΙΚΛΜÎΞΟΠΡ�ΣΤΥΦΧΨΩΪΫάÎήίΰαβγδεζηθικλμνξοπÏςστυφχψωϊϋόÏÏŽ', ['iso-8859-3'] = '�������������������������������� Ħ˘£¤�Ĥ§¨İŞĞĴÂ�ݰħ²³´µĥ·¸ışğĵ½�żÀÃÂ�ÄĊĈÇÈÉÊËÌÃÃŽÃï¿½Ã‘Ã’Ã“Ã”Ä Ã–Ã—ÄœÃ™ÃšÃ›ÃœÅ¬ÅœÃŸÃ Ã¡Ã¢ï¿½Ã¤Ä‹Ä‰Ã§Ã¨Ã©ÃªÃ«Ã¬Ãîï�ñòóôġö÷ÄùúûüÅÅË™', ['windows-1256'] = '€پ‚ƒ„…†‡ˆ‰ٹ‹Œچژڈگ‘’“â€â€¢â€“—ک™ڑ›œ‌â€ÚºÂ ،¢£¤¥¦§¨©ھ«¬ÂÂ®Â¯Â°Â±Â²Â³Â´ÂµÂ¶Â·Â¸Â¹Ø›Â»Â¼Â½Â¾ØŸÛØ¡Ø¢Ø£Ø¤Ø¥Ø¦Ø§Ø¨Ø©ØªØ«Ø¬ØØ®Ø¯Ø°Ø±Ø²Ø³Ø´ØµØ¶Ã—طظعغـÙقكà لâمنهوçèéêëىيîïًٌÙَôÙÙ÷ّùْûü‎â€Û’', ['windows-1258'] = '€�‚ƒ„…†‡ˆ‰�‹Œ����‘’“â€â€¢â€“—˜™�›œ��Ÿ ¡¢£¤¥¦§¨©ª«¬Â®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÃÂĂÄÅÆÇÈÉÊË̀ÃÃŽÃÄÃ‘Ì‰Ã“Ã”Æ Ã–Ã—Ã˜Ã™ÃšÃ›ÃœÆ¯ÌƒÃŸÃ Ã¡Ã¢ÄƒÃ¤Ã¥Ã¦Ã§Ã¨Ã©ÃªÃ«ÌÃîïđṇ̃óôơö÷øùúûüư₫ÿ', ['iso-8859-14'] = '�������������������������������� Ḃḃ£ĊċḊ§Ẁ©ẂḋỲÂÂ®Å¸á¸žá¸ŸÄ Ä¡á¹€á¹Â¶á¹–áºá¹—ẃṠỳẄẅṡÀÃÂÃÄÅÆÇÈÉÊËÌÃÃŽÃŴÑÒÓÔÕÖṪØÙÚÛÜÃŶßà áâãäåæçèéêëìÃîïŵñòóôõöṫøùúûüýŷÿ', ['windows-1252'] = '€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“â€â€¢â€“—˜™š›œ�žŸ ¡¢£¤¥¦§¨©ª«¬Â®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÃÂÃÄÅÆÇÈÉÊËÌÃÃŽÃÃÑÒÓÔÕÖרÙÚÛÜÃÞßà áâãäåæçèéêëìÃîïðñòóôõö÷øùúûüýþÿ', ['iso-8859-15'] = '�������������������������������� ¡¢£€¥Š§š©ª«¬Â®¯°±²³Žµ¶·ž¹º»ŒœŸ¿ÀÃÂÃÄÅÆÇÈÉÊËÌÃÃŽÃÃÑÒÓÔÕÖרÙÚÛÜÃÞßà áâãäåæçèéêëìÃîïðñòóôõö÷øùúûüýþÿ', ['x-mac-cyrillic'] = 'ÐБВГДЕЖЗИЙКЛМÐОПРСТУФХЦЧШЩЪЫЬÐЮЯ†°Ò£§•¶І®©™Ђђ≠Ѓѓ∞±≤≥іµґЈЄєЇїЉљЊњјЅ¬√ƒ≈∆«»… ЋћЌќѕ–—“â€â€˜â€™Ã·â€žÐŽÑžÐÑŸâ„–ÐÑ‘ÑабвгдежзийклмнопрÑтуфхцчшщъыьÑю€', ['windows-1254'] = '€�‚ƒ„…†‡ˆ‰Š‹Œ����‘’“â€â€¢â€“—˜™š›œ��Ÿ ¡¢£¤¥¦§¨©ª«¬Â®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÃÂÃÄÅÆÇÈÉÊËÌÃÃŽÃĞÑÒÓÔÕÖרÙÚÛÜİŞßà áâãäåæçèéêëìÃîïğñòóôõö÷øùúûüışÿ', ['windows-1255'] = '€�‚ƒ„…†‡ˆ‰�‹�����‘’“â€â€¢â€“—˜™�›���� ¡¢£₪¥¦§¨©×«¬Â®¯°±²³´µ¶·¸¹÷»¼½¾¿ְֱֲֳִֵֶַָֹֺֻּֽ־ֿ׀×ׂ׃װױײ׳״�������×בגדהוזחטיךכל××ž×Ÿ× ×¡×¢×£×¤×¥×¦×§×¨×©×ªï¿½ï¿½â€Žâ€', ['windows-1250'] = '€�‚�„…†‡�‰Š‹ŚŤŽŹ�‘’“â€â€¢â€“—�™š›śťžź ˇ˘Å¤Ą¦§¨©Ş«¬Â®Ż°±˛ł´µ¶·¸ąş»ĽËľżŔÃÂĂÄĹĆÇČÉĘËĚÃÃŽÄŽÄŃŇÓÔÅÖ×ŘŮÚŰÜÃŢßŕáâăäĺćçÄéęëěÃîÄđńňóôőö÷řůúűüýţ˙', ['koi8-u'] = '─│┌â”└┘├┤┬┴┼▀▄█▌â–░▒▓⌠■∙√≈≤≥ ⌡°²·÷â•║╒ёє╔ії╗╘╙╚╛ґў╞╟╠╡ÐЄ╣ІЇ╦╧╨╩╪ÒЎ©юабцдефгхийклмнопÑÑ€ÑтужвьызшÑщчъЮÐБЦДЕФГХИЙКЛМÐОПЯРСТУЖВЬЫЗШÐЩЧЪ', ['iso-8859-10'] = '�������������������������������� ĄĒĢĪĨͧĻÄŠŦŽÂŪŊ°ąēģīĩķ·ļđšŧž―ūŋĀÃÂÃÄÅÆĮČÉĘËĖÃÃŽÃÃŅŌÓÔÕÖŨØŲÚÛÜÃÞßÄáâãäåæįÄéęëėÃîïðņÅóôõöũøųúûüýþĸ', ['windows-1257'] = '€�‚�„…†‡�‰�‹�¨ˇ¸�‘’“â€â€¢â€“—�™�›�¯˛� �¢£¤�¦§Ø©Ŗ«¬Â®Æ°±²³´µ¶·ø¹ŗ»¼½¾æĄĮĀĆÄÅĘĒČÉŹĖĢĶĪĻŠŃŅÓŌÕÖ×ŲÅŚŪÜŻŽßąįÄćäåęēÄéźėģķīļšńņóÅõö÷ųłśūüżž˙', ['windows-1253'] = '€�‚ƒ„…†‡�‰�‹�����‘’“â€â€¢â€“—�™�›���� ΅Ά£¤¥¦§¨©�«¬Â®―°±²³΄µ¶·ΈΉΊ»Ό½ΎÎÎΑΒΓΔΕΖΗΘΙΚΛΜÎΞΟΠΡ�ΣΤΥΦΧΨΩΪΫάÎήίΰαβγδεζηθικλμνξοπÏςστυφχψωϊϋόÏÏŽ', ['iso-8859-2'] = '�������������������������������� Ą˘Å¤ĽŚ§¨ŠŞŤŹÂŽŻ°ą˛ł´ľśˇ¸šşťźËžżŔÃÂĂÄĹĆÇČÉĘËĚÃÃŽÄŽÄŃŇÓÔÅÖ×ŘŮÚŰÜÃŢßŕáâăäĺćçÄéęëěÃîÄđńňóôőö÷řůúűüýţ˙', ['windows-874'] = '€����…�����������‘’“â€â€¢â€“—�������� à¸à¸‚ฃคฅฆงจฉชซฌà¸à¸Žà¸à¸à¸‘ฒณดตถทธนบปผà¸à¸žà¸Ÿà¸ มยรฤลฦวศษสหฬà¸à¸®à¸¯à¸°à¸±à¸²à¸³à¸´à¸µà¸¶à¸·à¸¸à¸¹à¸ºï¿½ï¿½ï¿½ï¿½à¸¿à¹€à¹à¹‚ใไๅๆ็่้๊๋์à¹à¹Žà¹à¹à¹‘๒๓๔๕๖๗๘๙๚๛', ['iso-8859-16'] = '�������������������������������� ĄąÅ€„Чš©Ș«ŹÂźŻ°±ČłŽâ€Â¶Â·Å¾Äș»ŒœŸżÀÃÂĂÄĆÆÇÈÉÊËÌÃÃŽÃÄŃÒÓÔÅÖŚŰÙÚÛÜĘȚßà áâăäćæçèéêëìÃîïđńòóôőöśűùúûüęțÿ', } local utfchar = utf8.char --- Try to find an encoding in HTML string ---@param str string HTML document ---@param len number count of characters from the start of the string where it should search for the encoding metadata ---@return string encoding identifier, or nil and message if no encoding was found local function find_html_encoding(str, len) -- try to find encoding in the html document -- we limit search length, because encoding should be in the document head, ideally near the start local len = len or 4096 local sub = str:sub(1, len) for meta in sub:gmatch("<meta (.-)>") do local charset = meta:match("charset%s*=%s*[\"']?(.-)[\"']") if charset then return string.lower(charset) end end return nil, "Cannot find the document encoding" end local function load_mapping(enc_name) local enc_name = enc_name or "" local enc = encodings[enc_name] if not enc then return nil, "Cannot load encoding " .. enc_name end local mapping,i = {}, 128 for pos, codepoint in utf8.codes(enc) do mapping[i] = utfchar(codepoint) i = i + 1 end return mapping end --- Convert string to utf-8 ---@param text string for converting ---@param mapping table ---@return string converted string local function recode(text, mapping) return text:gsub("(.)", function(char) local charpoint = string.byte(char) if charpoint > 127 then return mapping[charpoint] else return false end end) end return { encodings = encodings, find_html_encoding = find_html_encoding, load_mapping = load_mapping, recode = recode }