#include <stdlib.h>
#include <string.h>
#include "stringprep.h"
#include "gunidecomp.h"
#include "gunicomp.h"
#include <unistr.h>
#include <stdio.h>

Macros
#define	gboolean int

#define	gchar char

#define	guchar unsigned char

#define	gint int

#define	guint unsigned int

#define	gushort unsigned short

#define	gint16 int16_t

#define	guint16 uint16_t

#define	gunichar uint32_t

#define	gsize size_t

#define	gssize ssize_t

#define	g_malloc malloc

#define	g_free free

#define	g_return_val_if_fail(expr, val)

#define	FALSE (0)

#define	TRUE (!FALSE)

#define	G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0]))

#define	G_UNLIKELY(expr) (expr)

#define	g_utf8_next_char(p) ((p) + g_utf8_skip[(const guchar )(p)])

#define	UTF8_COMPUTE(Char, Mask, Len)

#define	UTF8_LENGTH(Char)

#define	UTF8_GET(Result, Chars, Count, Mask, Len)

#define	CC_PART1(Page, Char)

#define	CC_PART2(Page, Char)

#define	COMBINING_CLASS(Char)

#define	SBase 0xAC00

#define	LBase 0x1100

#define	VBase 0x1161

#define	TBase 0x11A7

#define	LCount 19

#define	VCount 21

#define	TCount 28

#define	NCount (VCount * TCount)

#define	SCount (LCount * NCount)

#define	CI(Page, Char)

#define	COMPOSE_INDEX(Char) (((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))

Enumerations
enum	GNormalizeMode { G_NORMALIZE_DEFAULT , G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT , G_NORMALIZE_DEFAULT_COMPOSE , G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE , G_NORMALIZE_ALL , G_NORMALIZE_NFKD = G_NORMALIZE_ALL , G_NORMALIZE_ALL_COMPOSE , G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE }

Functions
uint32_t	stringprep_utf8_to_unichar (const char *p)

int	stringprep_unichar_to_utf8 (uint32_t c, char *outbuf)

uint32_t *	stringprep_utf8_to_ucs4 (const char str, ssize_t len, size_t items_written)

char *	stringprep_ucs4_to_utf8 (const uint32_t str, ssize_t len, size_t items_read, size_t *items_written)

char *	stringprep_utf8_nfkc_normalize (const char *str, ssize_t len)

uint32_t *	stringprep_ucs4_nfkc_normalize (const uint32_t *str, ssize_t len)

Macro Definition Documentation

◆ CC_PART1

#define CC_PART1	(	Page,
		Char
	)

Value:

  ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX)     \
   ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX)    \
   : (cclass_data[combining_class_table_part1[Page]][Char]))

Definition at line 530 of file nfkc.c.

◆ CC_PART2

#define CC_PART2	(	Page,
		Char
	)

Value:

  ((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX)     \
   ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX)    \
   : (cclass_data[combining_class_table_part2[Page]][Char]))

Definition at line 535 of file nfkc.c.

◆ CI

#define CI	(	Page,
		Char
	)

Value:

  ((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX)   \
   ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX)  \
   : (compose_data[compose_table[Page]][Char]))

Definition at line 703 of file nfkc.c.

◆ COMBINING_CLASS

#define COMBINING_CLASS ( Char )

Value:

  (((Char) <= G_UNICODE_LAST_CHAR_PART1)                        \
   ? CC_PART1 ((Char) >> 8, (Char) & 0xff)                      \
   : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR)      \
      ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff)       \
      : 0))

Definition at line 540 of file nfkc.c.

◆ COMPOSE_INDEX

#define COMPOSE_INDEX ( Char ) (((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))

Definition at line 708 of file nfkc.c.

◆ FALSE

#define FALSE (0)

Definition at line 80 of file nfkc.c.

◆ g_free

#define g_free free

Definition at line 52 of file nfkc.c.

◆ g_malloc

#define g_malloc malloc

Definition at line 51 of file nfkc.c.

◆ G_N_ELEMENTS

#define G_N_ELEMENTS ( arr ) (sizeof (arr) / sizeof ((arr)[0]))

Definition at line 87 of file nfkc.c.

◆ g_return_val_if_fail

#define g_return_val_if_fail	(	expr,
		val
	)

Value:

    {           \
    if (!(expr))                                        \
      return (val);                                     \
  }

Definition at line 53 of file nfkc.c.

◆ G_UNLIKELY

#define G_UNLIKELY ( expr ) (expr)

Definition at line 89 of file nfkc.c.

◆ g_utf8_next_char

#define g_utf8_next_char ( p ) ((p) + g_utf8_skip[*(const guchar *)(p)])

Definition at line 127 of file nfkc.c.

◆ gboolean

#define gboolean int

Definition at line 40 of file nfkc.c.

◆ gchar

#define gchar char

Definition at line 41 of file nfkc.c.

◆ gint

#define gint int

Definition at line 43 of file nfkc.c.

◆ gint16

#define gint16 int16_t

Definition at line 46 of file nfkc.c.

◆ gsize

#define gsize size_t

Definition at line 49 of file nfkc.c.

◆ gssize

#define gssize ssize_t

Definition at line 50 of file nfkc.c.

◆ guchar

#define guchar unsigned char

Definition at line 42 of file nfkc.c.

◆ guint

#define guint unsigned int

Definition at line 44 of file nfkc.c.

◆ guint16

#define guint16 uint16_t

Definition at line 47 of file nfkc.c.

◆ gunichar

#define gunichar uint32_t

Definition at line 48 of file nfkc.c.

◆ gushort

#define gushort unsigned short

Definition at line 45 of file nfkc.c.

◆ LBase

#define LBase 0x1100

Definition at line 549 of file nfkc.c.

◆ LCount

#define LCount 19

Definition at line 552 of file nfkc.c.

◆ NCount

#define NCount (VCount * TCount)

Definition at line 555 of file nfkc.c.

◆ SBase

#define SBase 0xAC00

Definition at line 548 of file nfkc.c.

◆ SCount

#define SCount (LCount * NCount)

Definition at line 556 of file nfkc.c.

◆ TBase

#define TBase 0x11A7

Definition at line 551 of file nfkc.c.

◆ TCount

#define TCount 28

Definition at line 554 of file nfkc.c.

◆ TRUE

#define TRUE (!FALSE)

Definition at line 84 of file nfkc.c.

◆ UTF8_COMPUTE

#define UTF8_COMPUTE	(	Char,
		Mask,
		Len
	)

Definition at line 152 of file nfkc.c.

◆ UTF8_GET

#define UTF8_GET	(	Result,
		Chars,
		Count,
		Mask,
		Len
	)

Value:

  (Result) = (Chars)[0] & (Mask);                                             \
  for ((Count) = 1; (Count) < (Len); ++(Count))                               \
    {                                                                         \
      if (((Chars)[(Count)] & 0xc0) != 0x80)                                  \
        {                                                                     \
          (Result) = -1;                                                      \
          break;                                                              \
        }                                                                     \
      (Result) <<= 6;                                                         \
      (Result) |= ((Chars)[(Count)] & 0x3f);                                  \
    }

Definition at line 193 of file nfkc.c.

◆ UTF8_LENGTH

#define UTF8_LENGTH ( Char )

Value:

  ((Char) < 0x80 ? 1 :                          \
   ((Char) < 0x800 ? 2 :                        \
    ((Char) < 0x10000 ? 3 :                     \
     ((Char) < 0x200000 ? 4 :                   \
      ((Char) < 0x4000000 ? 5 : 6)))))

Definition at line 186 of file nfkc.c.

◆ VBase

#define VBase 0x1161

Definition at line 550 of file nfkc.c.

◆ VCount

#define VCount 21

Definition at line 553 of file nfkc.c.

Enumeration Type Documentation

◆ GNormalizeMode

enum GNormalizeMode

Enumerator
G_NORMALIZE_DEFAULT
G_NORMALIZE_NFD
G_NORMALIZE_DEFAULT_COMPOSE
G_NORMALIZE_NFC
G_NORMALIZE_ALL
G_NORMALIZE_NFKD
G_NORMALIZE_ALL_COMPOSE
G_NORMALIZE_NFKC

Definition at line 114 of file nfkc.c.

Function Documentation

◆ stringprep_ucs4_nfkc_normalize()

uint32_t* stringprep_ucs4_nfkc_normalize	(	const uint32_t *	str,
		ssize_t	len
	)

stringprep_ucs4_nfkc_normalize:

Parameters

str	a Unicode string.
len	length of @str array, or -1 if @str is nul-terminated.

Converts a UCS4 string into canonical form, see stringprep_utf8_nfkc_normalize() for more information.

Return value: a newly allocated Unicode string, that is the NFKC normalized form of @str.

Definition at line 1096 of file nfkc.c.

◆ stringprep_ucs4_to_utf8()

char* stringprep_ucs4_to_utf8	(	const uint32_t *	str,
		ssize_t	len,
		size_t *	items_read,
		size_t *	items_written
	)

stringprep_ucs4_to_utf8:

Parameters

str	a UCS-4 encoded string
len	the maximum length of @str to use. If @len < 0, then the string is terminated with a 0 character.
items_read	location to store number of characters read read, or NULL.
items_written	location to store number of bytes written or NULL. The value here stored does not include the trailing 0 byte.

Convert a string from a 32-bit fixed width representation as UCS-4. to UTF-8. The result will be terminated with a 0 byte.

Return value: a pointer to a newly allocated UTF-8 string. This value must be deallocated by the caller. If an error occurs, NULL will be returned.

Definition at line 1039 of file nfkc.c.

◆ stringprep_unichar_to_utf8()

int stringprep_unichar_to_utf8	(	uint32_t	c,
		char *	outbuf
	)

stringprep_unichar_to_utf8:

Parameters

c	a ISO10646 character code
outbuf	output buffer, must have at least 6 bytes of space. If NULL, the length will be computed and returned and nothing will be written to @outbuf.

Converts a single character to UTF-8.

Return value: number of bytes written.

Definition at line 982 of file nfkc.c.

◆ stringprep_utf8_nfkc_normalize()

char* stringprep_utf8_nfkc_normalize	(	const char *	str,
		ssize_t	len
	)

stringprep_utf8_nfkc_normalize:

Parameters

str	a UTF-8 encoded string.
len	length of @str, in bytes, or -1 if @str is nul-terminated.

Converts a string into canonical form, standardizing such issues as whether a character with an accent is represented as a base character and combining accent or as a single precomposed character.

The normalization mode is NFKC (ALL COMPOSE). It standardizes differences that do not affect the text content, such as the above-mentioned accent representation. It standardizes the "compatibility" characters in Unicode, such as SUPERSCRIPT THREE to the standard forms (in this case DIGIT THREE). Formatting information may be lost but for most text operations such characters should be considered the same. It returns a result with composed forms rather than a maximally decomposed form.

Return value: a newly allocated string, that is the NFKC normalized form of @str.

Definition at line 1068 of file nfkc.c.

◆ stringprep_utf8_to_ucs4()

uint32_t* stringprep_utf8_to_ucs4	(	const char *	str,
		ssize_t	len,
		size_t *	items_written
	)

stringprep_utf8_to_ucs4:

Parameters

str	a UTF-8 encoded string
len	the maximum length of @str to use. If @len < 0, then the string is nul-terminated.
items_written	location to store the number of characters in the result, or NULL.

Convert a string from UTF-8 to a 32-bit fixed width representation as UCS-4. The function now performs error checking to verify that the input is valid UTF-8 (before it was documented to not do error checking).

Return value: a pointer to a newly allocated UCS-4 string. This value must be deallocated by the caller.

Definition at line 1006 of file nfkc.c.

◆ stringprep_utf8_to_unichar()

uint32_t stringprep_utf8_to_unichar ( const char * p )

stringprep_utf8_to_unichar:

Parameters

p	a pointer to Unicode character encoded as UTF-8

Converts a sequence of bytes encoded as UTF-8 to a Unicode character. If does not point to a valid UTF-8 encoded character, results are undefined.

Return value: the resulting character.

Definition at line 965 of file nfkc.c.

Macros

Enumerations

Functions

Macro Definition Documentation

◆ CC_PART1

◆ CC_PART2

◆ CI

◆ COMBINING_CLASS

◆ COMPOSE_INDEX

◆ FALSE

◆ g_free

◆ g_malloc

◆ G_N_ELEMENTS

◆ g_return_val_if_fail

◆ G_UNLIKELY

◆ g_utf8_next_char

◆ gboolean

◆ gchar

◆ gint

◆ gint16

◆ gsize

◆ gssize

◆ guchar

◆ guint

◆ guint16

◆ gunichar

◆ gushort

◆ LBase

◆ LCount

◆ NCount

◆ SBase

◆ SCount

◆ TBase

◆ TCount

◆ TRUE

◆ UTF8_COMPUTE

◆ UTF8_GET

◆ UTF8_LENGTH

◆ VBase

◆ VCount

Enumeration Type Documentation

◆ GNormalizeMode

Function Documentation

◆ stringprep_ucs4_nfkc_normalize()

◆ stringprep_ucs4_to_utf8()

◆ stringprep_unichar_to_utf8()

◆ stringprep_utf8_nfkc_normalize()

◆ stringprep_utf8_to_ucs4()

◆ stringprep_utf8_to_unichar()