42 #define guchar unsigned char
44 #define guint unsigned int
45 #define gushort unsigned short
46 #define gint16 int16_t
47 #define guint16 uint16_t
48 #define gunichar uint32_t
50 #define gssize ssize_t
51 #define g_malloc malloc
53 #define g_return_val_if_fail(expr,val) { \
84 # define TRUE (!FALSE)
87 #define G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0]))
89 #define G_UNLIKELY(expr) (expr)
127 #define g_utf8_next_char(p) ((p) + g_utf8_skip[*(const guchar *)(p)])
152 #define UTF8_COMPUTE(Char, Mask, Len) \
158 else if ((Char & 0xe0) == 0xc0) \
163 else if ((Char & 0xf0) == 0xe0) \
168 else if ((Char & 0xf8) == 0xf0) \
173 else if ((Char & 0xfc) == 0xf8) \
178 else if ((Char & 0xfe) == 0xfc) \
186 #define UTF8_LENGTH(Char) \
187 ((Char) < 0x80 ? 1 : \
188 ((Char) < 0x800 ? 2 : \
189 ((Char) < 0x10000 ? 3 : \
190 ((Char) < 0x200000 ? 4 : \
191 ((Char) < 0x4000000 ? 5 : 6)))))
193 #define UTF8_GET(Result, Chars, Count, Mask, Len) \
194 (Result) = (Chars)[0] & (Mask); \
195 for ((Count) = 1; (Count) < (Len); ++(Count)) \
197 if (((Chars)[(Count)] & 0xc0) != 0x80) \
203 (Result) |= ((Chars)[(Count)] & 0x3f); \
206 static const gchar utf8_skip_data[256] = {
207 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
209 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
211 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
213 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
215 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
217 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
219 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
221 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5,
225 static const gchar *
const g_utf8_skip = utf8_skip_data;
241 g_utf8_strlen (
const gchar *p)
269 g_utf8_get_char (
const gchar *p)
271 int i, mask = 0, len;
273 unsigned char c = (
unsigned char) *p;
312 else if (c < 0x10000)
317 else if (c < 0x200000)
322 else if (c < 0x4000000)
335 for (i = len - 1; i > 0; --i)
337 outbuf[i] = (c & 0x3f) | 0x80;
340 outbuf[0] = c | first;
384 while (p < str + len && *p)
396 for (i = 0; i < n_chars; i++)
421 wc |= (
guchar) (*p++) & 0x3f;
424 while ((wc & mask) != 0);
463 g_ucs4_to_utf8 (
const gunichar *str,
467 gchar *result = NULL;
472 for (i = 0; i < len; i++)
477 if (str[i] >= 0x80000000)
483 result =
g_malloc (result_length + 1);
489 while (p < result + result_length)
490 p += g_unichar_to_utf8 (str[i++], p);
495 *items_written = p - result;
530 #define CC_PART1(Page, Char) \
531 ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
532 ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
533 : (cclass_data[combining_class_table_part1[Page]][Char]))
535 #define CC_PART2(Page, Char) \
536 ((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
537 ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
538 : (cclass_data[combining_class_table_part2[Page]][Char]))
540 #define COMBINING_CLASS(Char) \
541 (((Char) <= G_UNICODE_LAST_CHAR_PART1) \
542 ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
543 : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
544 ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
555 #define NCount (VCount * TCount)
556 #define SCount (LCount * NCount)
579 for (i = 0; i < len - 1; ++i)
582 if (next != 0 && last > next)
586 for (j = i + 1; j > 0; --j)
592 string[j] =
string[j - 1];
624 r[2] =
TBase + TIndex;
638 if (ch >= decomp_table[start].ch && ch <= decomp_table[end - 1].ch)
642 int half = (start + end) / 2;
643 if (ch == decomp_table[half].ch)
649 offset = decomp_table[half].compat_offset;
651 offset = decomp_table[half].canon_offset;
655 offset = decomp_table[half].canon_offset;
660 return &(decomp_expansion_string[offset]);
662 else if (half == start)
664 else if (ch > decomp_table[half].ch)
691 if ((SIndex %
TCount) == 0)
695 *result = a + TIndex;
703 #define CI(Page, Char) \
704 ((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
705 ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
706 : (compose_data[compose_table[Page]][Char]))
708 #define COMPOSE_INDEX(Char) \
709 (((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))
716 if (combine_hangul (a, b, result))
778 while ((max_len < 0 || p < str + max_len) && *p)
786 decompose_hangul (wc, NULL, &result_len);
791 decomp = find_decomposition (wc, do_compat);
794 n_wc += g_utf8_strlen (decomp);
809 while ((max_len < 0 || p < str + max_len) && *p)
814 gsize old_n_wc = n_wc;
819 decompose_hangul (wc, wc_buffer + n_wc, &result_len);
824 decomp = find_decomposition (wc, do_compat);
830 wc_buffer[n_wc++] = g_utf8_get_char (pd);
833 wc_buffer[n_wc++] = wc;
842 g_unicode_canonical_ordering (wc_buffer + last_start,
844 last_start = old_n_wc;
853 g_unicode_canonical_ordering (wc_buffer + last_start,
862 if (do_compose && n_wc > 0)
868 for (i = 0; i < n_wc; i++)
873 (last_cc == 0 || last_cc != cc) &&
874 combine (wc_buffer[last_start], wc_buffer[i],
875 &wc_buffer[last_start]))
877 for (j = i + 1; j < n_wc; j++)
878 wc_buffer[j - 1] = wc_buffer[j];
941 gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
942 gchar *result = NULL;
945 result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL);
967 return g_utf8_get_char (p);
984 return g_unichar_to_utf8 (c, outbuf);
1015 if (u8_check ((
const uint8_t *) str, n))
1018 return g_utf8_to_ucs4_fast (str, len, items_written);
1040 size_t *items_read,
size_t *items_written)
1042 return g_ucs4_to_utf8 (str, len, items_read, items_written);
1077 if (u8_check ((
const uint8_t *) str, n))
1099 uint32_t *result_wc;
#define COMPOSE_SECOND_SINGLE_START
#define COMPOSE_SECOND_START
#define COMPOSE_FIRST_START
#define COMPOSE_FIRST_SINGLE_START
#define G_UNICODE_NOT_PRESENT_OFFSET
#define g_return_val_if_fail(expr, val)
#define UTF8_COMPUTE(Char, Mask, Len)
int stringprep_unichar_to_utf8(uint32_t c, char *outbuf)
#define COMPOSE_INDEX(Char)
#define G_N_ELEMENTS(arr)
char * stringprep_utf8_nfkc_normalize(const char *str, ssize_t len)
#define UTF8_LENGTH(Char)
#define g_utf8_next_char(p)
char * stringprep_ucs4_to_utf8(const uint32_t *str, ssize_t len, size_t *items_read, size_t *items_written)
#define UTF8_GET(Result, Chars, Count, Mask, Len)
uint32_t stringprep_utf8_to_unichar(const char *p)
#define COMBINING_CLASS(Char)
@ G_NORMALIZE_DEFAULT_COMPOSE
@ G_NORMALIZE_ALL_COMPOSE
uint32_t * stringprep_utf8_to_ucs4(const char *str, ssize_t len, size_t *items_written)
uint32_t * stringprep_ucs4_nfkc_normalize(const uint32_t *str, ssize_t len)