UTF-8 utilities

The utf8_utils module implements additional infrastructure for the UTF8 utilities.

All functions and symbols are in “utf8_utils” module, use require to get access to it.

require daslib/utf8_utils

Constants

s_utf8d = [[uint[364] 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x0; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x1; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x9; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x7; 0x8; 0x8; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0x2; 0xa; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x3; 0x4; 0x3; 0x3; 0xb; 0x6; 0x6; 0x6; 0x5; 0x8; 0x8; 0x8; 0x8; 0x8; 0x8; 0x8; 0x8; 0x8; 0x8; 0x8; 0x0; 0xc; 0x18; 0x24; 0x3c; 0x60; 0x54; 0xc; 0xc; 0xc; 0x30; 0x48; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0x0; 0xc; 0xc; 0xc; 0xc; 0xc; 0x0; 0xc; 0x0; 0xc; 0xc; 0xc; 0x18; 0xc; 0xc; 0xc; 0xc; 0xc; 0x18; 0xc; 0x18; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0x18; 0xc; 0xc; 0xc; 0xc; 0xc; 0x18; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0x18; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0x24; 0xc; 0x24; 0xc; 0xc; 0xc; 0x24; 0xc; 0xc; 0xc; 0xc; 0xc; 0x24; 0xc; 0x24; 0xc; 0xc; 0xc; 0x24; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc; 0xc]]

|detail/Variable-utf8_utils-s_utf8d|

UTF8_ACCEPT = 0x0

|detail/Variable-utf8_utils-UTF8_ACCEPT|

Uncategorized

utf16_to_utf32(high: uint; low: uint) : uint()

|detail/function-utf8_utils-utf16_to_utf32-0xce3eab4402448264|

Arguments
  • high : uint

  • low : uint

utf8_encode(dest_array: array<uint8>; ch: uint)

Converts a codepoint (UTF-32 symbol) to UTF-8 and appends it to the UTF-8 byte array

Arguments
  • dest_array : array<uint8>

  • ch : uint

utf8_encode(ch: uint) : array<uint8>()

Converts a codepoint (UTF-32 symbol) to the UTF-8 byte array

Arguments
  • ch : uint

utf8_encode(dest_array: array<uint8>; source_utf32_string: array<uint>)

Converts UTF-32 string to UTF-8 and appends it to the UTF-8 byte array

Arguments
  • dest_array : array<uint8>

  • source_utf32_string : array<uint> implicit

utf8_encode(source_utf32_string: array<uint>) : array<uint8>()

Converts UTF-32 string to UTF-8 and returns it as a UTF-8 byte array

Arguments
  • source_utf32_string : array<uint> implicit

utf8_length(utf8_string: array<uint8>) : int()

Returns the number of characters in the UTF-8 string

Arguments
  • utf8_string : array<uint8> implicit

utf8_length(utf8_string: string) : int()

Returns the number of characters in the UTF-8 string

Arguments
  • utf8_string : string

is_first_byte_of_utf8_char(ch: uint8) : bool()

|detail/function-utf8_utils-is_first_byte_of_utf8_char-0xde083daff4aefb23|

Arguments
  • ch : uint8

contains_utf8_bom(utf8_string: array<uint8>) : bool()

|detail/function-utf8_utils-contains_utf8_bom-0x5bd957db87460665|

Arguments
  • utf8_string : array<uint8> implicit

contains_utf8_bom(utf8_string: string) : bool()

|detail/function-utf8_utils-contains_utf8_bom-0xf749642b9629daa7|

Arguments
  • utf8_string : string

is_utf8_string_valid(utf8_string: array<uint8>) : bool()

|detail/function-utf8_utils-is_utf8_string_valid-0x2dcc5c1b2e5f2a36|

Arguments
  • utf8_string : array<uint8> implicit

is_utf8_string_valid(utf8_string: string) : bool()

|detail/function-utf8_utils-is_utf8_string_valid-0xfa05575fb077659e|

Arguments
  • utf8_string : string

utf8_decode(dest_utf32_string: array<uint>; source_utf8_string: array<uint8>)

Converts UTF-8 string to UTF-32 and appends it to the array of codepoints (UTF-32 string)

Arguments
  • dest_utf32_string : array<uint>

  • source_utf8_string : array<uint8> implicit

utf8_decode(source_utf8_string: array<uint8>) : array<uint>()

Converts UTF-8 string to UTF-32 and returns it as an array of codepoints (UTF-32 string)

Arguments
  • source_utf8_string : array<uint8> implicit

utf8_decode(source_utf8_string: string) : array<uint>()

Converts UTF-8 string to UTF-32 and returns it as an array of codepoints (UTF-32 string)

Arguments
  • source_utf8_string : string

utf8_decode(dest_utf32_string: array<uint>; source_utf8_string: string)

Converts UTF-8 string to UTF-32 and appends it to the array of codepoints (UTF-32 string)

Arguments
  • dest_utf32_string : array<uint>

  • source_utf8_string : string

decode_unicode_escape(str: string) : string()

|detail/function-utf8_utils-decode_unicode_escape-0xb8b396646ee69cae|

Arguments
  • str : string