UTF-8 utilities
The utf8_utils module implements additional infrastructure for the UTF8 utilities.
All functions and symbols are in “utf8_utils” module, use require to get access to it.
require daslib/utf8_utils
Constants
- s_utf8d = [[uint[364] 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x8; 0x8; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0xa; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x4; 0x3; 0x3; 0xb; 0x6; 0x6; 0x6; 0x5; 0x8; 0x8; 0x8; 0x8; 0x8; 0x8; 0x8; 0x8; 0x8; 0x8; 0x8; 0x0; 0xc; 0x18; 0x24; 0x3c; 0x60; 0x54; 0xc; 0xc; 0xc; 0x30; 0x48; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0x0; 0xc; 0xc; 0xc; 0xc; 0xc; 0x0; 0xc; 0x0; 0xc; 0xc; 0xc; 0x18; 0xc; 0xc; 0xc; 0xc; 0xc; 0x18; 0xc; 0x18; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0x18; 0xc; 0xc; 0xc; 0xc; 0xc; 0x18; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0x18; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0x24; 0xc; 0x24; 0xc; 0xc; 0xc; 0x24; 0xc; 0xc; 0xc; 0xc; 0xc; 0x24; 0xc; 0x24; 0xc; 0xc; 0xc; 0x24; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc]]
|detail/Variable-utf8_utils-s_utf8d|
- UTF8_ACCEPT = 0x0
Uncategorized
- utf16_to_utf32(high: uint; low: uint) : uint()
|detail/function-utf8_utils-utf16_to_utf32-0xce3eab4402448264|
- Arguments
high : uint
low : uint
- utf8_encode(dest_array: array<uint8>; ch: uint)
Converts a codepoint (UTF-32 symbol) to UTF-8 and appends it to the UTF-8 byte array
- Arguments
dest_array : array<uint8>
ch : uint
- utf8_encode(ch: uint) : array<uint8>()
Converts a codepoint (UTF-32 symbol) to the UTF-8 byte array
- Arguments
ch : uint
- utf8_encode(dest_array: array<uint8>; source_utf32_string: array<uint>)
Converts UTF-32 string to UTF-8 and appends it to the UTF-8 byte array
- Arguments
dest_array : array<uint8>
source_utf32_string : array<uint> implicit
- utf8_encode(source_utf32_string: array<uint>) : array<uint8>()
Converts UTF-32 string to UTF-8 and returns it as a UTF-8 byte array
- Arguments
source_utf32_string : array<uint> implicit
- utf8_length(utf8_string: array<uint8>) : int()
Returns the number of characters in the UTF-8 string
- Arguments
utf8_string : array<uint8> implicit
- utf8_length(utf8_string: string) : int()
Returns the number of characters in the UTF-8 string
- Arguments
utf8_string : string
- is_first_byte_of_utf8_char(ch: uint8) : bool()
|detail/function-utf8_utils-is_first_byte_of_utf8_char-0xde083daff4aefb23|
- Arguments
ch : uint8
- contains_utf8_bom(utf8_string: array<uint8>) : bool()
|detail/function-utf8_utils-contains_utf8_bom-0x5bd957db87460665|
- Arguments
utf8_string : array<uint8> implicit
- contains_utf8_bom(utf8_string: string) : bool()
|detail/function-utf8_utils-contains_utf8_bom-0xf749642b9629daa7|
- Arguments
utf8_string : string
- is_utf8_string_valid(utf8_string: array<uint8>) : bool()
|detail/function-utf8_utils-is_utf8_string_valid-0x2dcc5c1b2e5f2a36|
- Arguments
utf8_string : array<uint8> implicit
- is_utf8_string_valid(utf8_string: string) : bool()
|detail/function-utf8_utils-is_utf8_string_valid-0xfa05575fb077659e|
- Arguments
utf8_string : string
- utf8_decode(dest_utf32_string: array<uint>; source_utf8_string: array<uint8>)
Converts UTF-8 string to UTF-32 and appends it to the array of codepoints (UTF-32 string)
- Arguments
dest_utf32_string : array<uint>
source_utf8_string : array<uint8> implicit
- utf8_decode(source_utf8_string: array<uint8>) : array<uint>()
Converts UTF-8 string to UTF-32 and returns it as an array of codepoints (UTF-32 string)
- Arguments
source_utf8_string : array<uint8> implicit
- utf8_decode(source_utf8_string: string) : array<uint>()
Converts UTF-8 string to UTF-32 and returns it as an array of codepoints (UTF-32 string)
- Arguments
source_utf8_string : string
- utf8_decode(dest_utf32_string: array<uint>; source_utf8_string: string)
Converts UTF-8 string to UTF-32 and appends it to the array of codepoints (UTF-32 string)
- Arguments
dest_utf32_string : array<uint>
source_utf8_string : string
- decode_unicode_escape(str: string) : string()
|detail/function-utf8_utils-decode_unicode_escape-0xb8b396646ee69cae|
- Arguments
str : string