#include <stdlib.h>
#include <string.h>
#include "stringprep.h"
#include "gunidecomp.h"
#include "gunicomp.h"
#include <unistr.h>
#include <stdio.h>
Go to the source code of this file.
|
#define | gboolean int |
|
#define | gchar char |
|
#define | guchar unsigned char |
|
#define | gint int |
|
#define | guint unsigned int |
|
#define | gushort unsigned short |
|
#define | gint16 int16_t |
|
#define | guint16 uint16_t |
|
#define | gunichar uint32_t |
|
#define | gsize size_t |
|
#define | gssize ssize_t |
|
#define | g_malloc malloc |
|
#define | g_free free |
|
#define | g_return_val_if_fail(expr, val) |
|
#define | FALSE (0) |
|
#define | TRUE (!FALSE) |
|
#define | G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0])) |
|
#define | G_UNLIKELY(expr) (expr) |
|
#define | g_utf8_next_char(p) ((p) + g_utf8_skip[*(const guchar *)(p)]) |
|
#define | UTF8_COMPUTE(Char, Mask, Len) |
|
#define | UTF8_LENGTH(Char) |
|
#define | UTF8_GET(Result, Chars, Count, Mask, Len) |
|
#define | CC_PART1(Page, Char) |
|
#define | CC_PART2(Page, Char) |
|
#define | COMBINING_CLASS(Char) |
|
#define | SBase 0xAC00 |
|
#define | LBase 0x1100 |
|
#define | VBase 0x1161 |
|
#define | TBase 0x11A7 |
|
#define | LCount 19 |
|
#define | VCount 21 |
|
#define | TCount 28 |
|
#define | NCount (VCount * TCount) |
|
#define | SCount (LCount * NCount) |
|
#define | CI(Page, Char) |
|
#define | COMPOSE_INDEX(Char) (((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff)) |
|
◆ CC_PART1
#define CC_PART1 |
( |
|
Page, |
|
|
|
Char |
|
) |
| |
Value:
: (cclass_data[combining_class_table_part1[Page]][Char]))
#define G_UNICODE_MAX_TABLE_INDEX
Definition at line 530 of file nfkc.c.
◆ CC_PART2
#define CC_PART2 |
( |
|
Page, |
|
|
|
Char |
|
) |
| |
Value:
: (cclass_data[combining_class_table_part2[Page]][Char]))
Definition at line 535 of file nfkc.c.
◆ CI
#define CI |
( |
|
Page, |
|
|
|
Char |
|
) |
| |
Value:
: (compose_data[compose_table[Page]][Char]))
Definition at line 703 of file nfkc.c.
◆ COMBINING_CLASS
#define COMBINING_CLASS |
( |
|
Char | ) |
|
Value:
?
CC_PART1 ((Char) >> 8, (Char) & 0xff) \
?
CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
: 0))
#define G_UNICODE_LAST_CHAR
#define G_UNICODE_LAST_CHAR_PART1
#define CC_PART2(Page, Char)
#define CC_PART1(Page, Char)
Definition at line 540 of file nfkc.c.
◆ COMPOSE_INDEX
#define COMPOSE_INDEX |
( |
|
Char | ) |
(((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff)) |
◆ FALSE
◆ g_free
◆ g_malloc
◆ G_N_ELEMENTS
#define G_N_ELEMENTS |
( |
|
arr | ) |
(sizeof (arr) / sizeof ((arr)[0])) |
◆ g_return_val_if_fail
#define g_return_val_if_fail |
( |
|
expr, |
|
|
|
val |
|
) |
| |
Value: { \
if (!(expr)) \
return (val); \
}
Definition at line 53 of file nfkc.c.
◆ G_UNLIKELY
#define G_UNLIKELY |
( |
|
expr | ) |
(expr) |
◆ g_utf8_next_char
#define g_utf8_next_char |
( |
|
p | ) |
((p) + g_utf8_skip[*(const guchar *)(p)]) |
◆ gboolean
◆ gchar
◆ gint
◆ gint16
◆ gsize
◆ gssize
◆ guchar
#define guchar unsigned char |
◆ guint
#define guint unsigned int |
◆ guint16
◆ gunichar
#define gunichar uint32_t |
◆ gushort
#define gushort unsigned short |
◆ LBase
◆ LCount
◆ NCount
◆ SBase
◆ SCount
◆ TBase
◆ TCount
◆ TRUE
◆ UTF8_COMPUTE
#define UTF8_COMPUTE |
( |
|
Char, |
|
|
|
Mask, |
|
|
|
Len |
|
) |
| |
◆ UTF8_GET
#define UTF8_GET |
( |
|
Result, |
|
|
|
Chars, |
|
|
|
Count, |
|
|
|
Mask, |
|
|
|
Len |
|
) |
| |
Value: (Result) = (Chars)[0] & (Mask); \
for ((Count) = 1; (Count) < (Len); ++(Count)) \
{ \
if (((Chars)[(Count)] & 0xc0) != 0x80) \
{ \
(Result) = -1; \
break; \
} \
(Result) <<= 6; \
(Result) |= ((Chars)[(Count)] & 0x3f); \
}
Definition at line 193 of file nfkc.c.
◆ UTF8_LENGTH
#define UTF8_LENGTH |
( |
|
Char | ) |
|
Value: ((Char) < 0x80 ? 1 : \
((Char) < 0x800 ? 2 : \
((Char) < 0x10000 ? 3 : \
((Char) < 0x200000 ? 4 : \
((Char) < 0x4000000 ? 5 : 6)))))
Definition at line 186 of file nfkc.c.
◆ VBase
◆ VCount
◆ GNormalizeMode
Enumerator |
---|
G_NORMALIZE_DEFAULT | |
G_NORMALIZE_NFD | |
G_NORMALIZE_DEFAULT_COMPOSE | |
G_NORMALIZE_NFC | |
G_NORMALIZE_ALL | |
G_NORMALIZE_NFKD | |
G_NORMALIZE_ALL_COMPOSE | |
G_NORMALIZE_NFKC | |
Definition at line 114 of file nfkc.c.
◆ stringprep_ucs4_nfkc_normalize()
uint32_t* stringprep_ucs4_nfkc_normalize |
( |
const uint32_t * |
str, |
|
|
ssize_t |
len |
|
) |
| |
stringprep_ucs4_nfkc_normalize:
- Parameters
-
str | a Unicode string. |
len | length of @str array, or -1 if @str is nul-terminated. |
Converts a UCS4 string into canonical form, see stringprep_utf8_nfkc_normalize() for more information.
Return value: a newly allocated Unicode string, that is the NFKC normalized form of @str.
Definition at line 1096 of file nfkc.c.
◆ stringprep_ucs4_to_utf8()
char* stringprep_ucs4_to_utf8 |
( |
const uint32_t * |
str, |
|
|
ssize_t |
len, |
|
|
size_t * |
items_read, |
|
|
size_t * |
items_written |
|
) |
| |
stringprep_ucs4_to_utf8:
- Parameters
-
str | a UCS-4 encoded string |
len | the maximum length of @str to use. If @len < 0, then the string is terminated with a 0 character. |
items_read | location to store number of characters read read, or NULL. |
items_written | location to store number of bytes written or NULL. The value here stored does not include the trailing 0 byte. |
Convert a string from a 32-bit fixed width representation as UCS-4. to UTF-8. The result will be terminated with a 0 byte.
Return value: a pointer to a newly allocated UTF-8 string. This value must be deallocated by the caller. If an error occurs, NULL will be returned.
Definition at line 1039 of file nfkc.c.
◆ stringprep_unichar_to_utf8()
int stringprep_unichar_to_utf8 |
( |
uint32_t |
c, |
|
|
char * |
outbuf |
|
) |
| |
stringprep_unichar_to_utf8:
- Parameters
-
c | a ISO10646 character code |
outbuf | output buffer, must have at least 6 bytes of space. If NULL, the length will be computed and returned and nothing will be written to @outbuf. |
Converts a single character to UTF-8.
Return value: number of bytes written.
Definition at line 982 of file nfkc.c.
◆ stringprep_utf8_nfkc_normalize()
char* stringprep_utf8_nfkc_normalize |
( |
const char * |
str, |
|
|
ssize_t |
len |
|
) |
| |
stringprep_utf8_nfkc_normalize:
- Parameters
-
str | a UTF-8 encoded string. |
len | length of @str, in bytes, or -1 if @str is nul-terminated. |
Converts a string into canonical form, standardizing such issues as whether a character with an accent is represented as a base character and combining accent or as a single precomposed character.
The normalization mode is NFKC (ALL COMPOSE). It standardizes differences that do not affect the text content, such as the above-mentioned accent representation. It standardizes the "compatibility" characters in Unicode, such as SUPERSCRIPT THREE to the standard forms (in this case DIGIT THREE). Formatting information may be lost but for most text operations such characters should be considered the same. It returns a result with composed forms rather than a maximally decomposed form.
Return value: a newly allocated string, that is the NFKC normalized form of @str.
Definition at line 1068 of file nfkc.c.
◆ stringprep_utf8_to_ucs4()
uint32_t* stringprep_utf8_to_ucs4 |
( |
const char * |
str, |
|
|
ssize_t |
len, |
|
|
size_t * |
items_written |
|
) |
| |
stringprep_utf8_to_ucs4:
- Parameters
-
str | a UTF-8 encoded string |
len | the maximum length of @str to use. If @len < 0, then the string is nul-terminated. |
items_written | location to store the number of characters in the result, or NULL. |
Convert a string from UTF-8 to a 32-bit fixed width representation as UCS-4. The function now performs error checking to verify that the input is valid UTF-8 (before it was documented to not do error checking).
Return value: a pointer to a newly allocated UCS-4 string. This value must be deallocated by the caller.
Definition at line 1006 of file nfkc.c.
◆ stringprep_utf8_to_unichar()
uint32_t stringprep_utf8_to_unichar |
( |
const char * |
p | ) |
|
stringprep_utf8_to_unichar:
- Parameters
-
p | a pointer to Unicode character encoded as UTF-8 |
Converts a sequence of bytes encoded as UTF-8 to a Unicode character. If does
not point to a valid UTF-8 encoded character, results are undefined.
Return value: the resulting character.
Definition at line 965 of file nfkc.c.