Introduction
GNU Libidn is an implementation of the Stringprep, Punycode and IDNA specifications defined by the IETF Internationalized Domain Names (IDN) working group, used for internationalized domain names. The package is available under the GNU Lesser General Public License.
The library contains a generic Stringprep implementation that does Unicode 3.2 NFKC normalization, mapping and prohibitation of characters, and bidirectional character handling. Profiles for Nameprep, iSCSI, SASL and XMPP are included. Punycode and ASCII Compatible Encoding (ACE) via IDNA are supported. A mechanism to define Top-Level Domain (TLD) specific validation tables, and to compare strings against those tables, is included. Default tables for some TLDs are also included.
The Stringprep API consists of two main functions, one for converting data from the system's native representation into UTF-8, and one function to perform the Stringprep processing. Adding a new Stringprep profile for your application within the API is straightforward. The Punycode API consists of one encoding function and one decoding function. The IDNA API consists of the ToASCII and ToUnicode functions, as well as an high-level interface for converting entire domain names to and from the ACE encoded form. The TLD API consists of one set of functions to extract the TLD name from a domain string, one set of functions to locate the proper TLD table to use based on the TLD name, and core functions to validate a string against a TLD table, and some utility wrappers to perform all the steps in one call.
The library is used by, e.g., GNU SASL and Shishi to process user names and passwords. Libidn can be built into GNU Libc to enable a new system-wide getaddrinfo() flag for IDN processing.
Libidn is developed for the GNU/Linux system, but runs on over 20 Unix platforms (including Solaris, IRIX, AIX, and Tru64) and Windows. Libidn is written in C and (parts of) the API is accessible from C, C++, Emacs Lisp, Python and Java.
The project web page:
https://www.gnu.org/software/libidn/
The software archive:
ftp://alpha.gnu.org/pub/gnu/libidn/
For more information see:
http://www.ietf.org/html.charters/idn-charter.html
http://www.ietf.org/rfc/rfc3454.txt (stringprep specification)
http://www.ietf.org/rfc/rfc3490.txt (idna specification)
http://www.ietf.org/rfc/rfc3491.txt (nameprep specification)
http://www.ietf.org/rfc/rfc3492.txt (punycode specification)
http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-04.txt
http://www.ietf.org/internet-drafts/draft-ietf-krb-wg-utf8-profile-01.txt
http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt
http://www.ietf.org/internet-drafts/draft-ietf-sasl-saslprep-00.txt
http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt
http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt
Further information and paid contract development:
Simon Josefsson simon.nosp@m.@jos.nosp@m.efsso.nosp@m.n.or.nosp@m.g
Examples
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
int
main (void)
{
char buf[BUFSIZ];
char *p;
int rc;
size_t i;
setlocale (LC_ALL, "");
fflush (stdout);
if (!fgets (buf, BUFSIZ, stdin))
perror ("fgets");
buf[strlen (buf) - 1] = '\0';
printf ("Before locale2utf8 (length %ld): ", (long int) strlen (buf));
for (i = 0; i < strlen (buf); i++)
printf ("%02x ", (unsigned) buf[i] & 0xFF);
printf ("\n");
if (p)
{
strcpy (buf, p);
free (p);
}
else
printf ("Could not convert string to UTF-8, continuing anyway...\n");
printf ("Before stringprep (length %ld): ", (long int) strlen (buf));
for (i = 0; i < strlen (buf); i++)
printf ("%02x ", (unsigned) buf[i] & 0xFF);
printf ("\n");
else
{
printf ("After stringprep (length %ld): ", (long int) strlen (buf));
for (i = 0; i < strlen (buf); i++)
printf ("%02x ", (unsigned) buf[i] & 0xFF);
printf ("\n");
}
return 0;
}
const char * stringprep_strerror(Stringprep_rc rc)
int stringprep(char *in, size_t maxlen, Stringprep_profile_flags flags, const Stringprep_profile *profile)
IDNAPI const char * stringprep_locale_charset(void)
IDNAPI char * stringprep_locale_to_utf8(const char *str)
#define stringprep_nameprep(in, maxlen)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
int
main (void)
{
char buf[BUFSIZ];
char *p;
int rc;
size_t i;
setlocale (LC_ALL, "");
fflush (stdout);
if (!fgets (buf, BUFSIZ, stdin))
perror ("fgets");
buf[strlen (buf) - 1] = '\0';
printf ("Read string (length %ld): ", (long int) strlen (buf));
for (i = 0; i < strlen (buf); i++)
printf ("%02x ", (unsigned) buf[i] & 0xFF);
printf ("\n");
{
return EXIT_FAILURE;
}
printf ("ACE label (length %ld): '%s'\n", (long int) strlen (p), p);
for (i = 0; i < strlen (p); i++)
printf ("%02x ", (unsigned) p[i] & 0xFF);
printf ("\n");
free (p);
return 0;
}
int idna_to_ascii_lz(const char *input, char **output, int flags)
IDNAPI const char * idna_strerror(Idna_rc rc)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
int
main (void)
{
char buf[BUFSIZ];
char *p;
int rc;
size_t i;
setlocale (LC_ALL, "");
fflush (stdout);
if (!fgets (buf, BUFSIZ, stdin))
perror ("fgets");
buf[strlen (buf) - 1] = '\0';
printf ("Read string (length %ld): ", (long int) strlen (buf));
for (i = 0; i < strlen (buf); i++)
printf ("%02x ", (unsigned) buf[i] & 0xFF);
printf ("\n");
{
printf (
"ToUnicode() failed (%d): %s\n", rc,
idna_strerror (rc));
return EXIT_FAILURE;
}
printf ("ACE label (length %ld): '%s'\n", (long int) strlen (p), p);
for (i = 0; i < strlen (p); i++)
printf ("%02x ", (unsigned) p[i] & 0xFF);
printf ("\n");
free (p);
return 0;
}
int idna_to_unicode_lzlz(const char *input, char **output, int flags)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int
main (void)
{
char buf[BUFSIZ];
char *p;
uint32_t *r;
int rc;
size_t errpos, i;
fflush (stdout);
if (!fgets (buf, BUFSIZ, stdin))
perror ("fgets");
buf[strlen (buf) - 1] = '\0';
printf ("Read string (length %ld): ", (long int) strlen (buf));
for (i = 0; i < strlen (buf); i++)
printf ("%02x ", (unsigned) buf[i] & 0xFF);
printf ("\n");
if (p)
{
strcpy (buf, p);
free (p);
}
else
printf ("Could not convert string to UTF-8, continuing anyway...\n");
{
printf (
"idna_to_ascii_8z failed (%d): %s\n", rc,
idna_strerror (rc));
return 2;
}
printf ("ToASCII string (length %ld): %s\n", (long int) strlen (p), p);
free (p);
{
printf ("idna_to_unicode_8z4z failed (%d): %s\n",
return 2;
}
printf ("ToUnicode string: ");
for (i = 0; r[i]; i++)
printf ("U+%04x ", r[i]);
printf ("\n");
free (r);
{
printf ("Domain rejected by TLD check, Unicode position %ld\n",
(long int) errpos);
return 1;
}
{
printf (
"tld_check_4z() failed (%d): %s\n", rc,
tld_strerror (rc));
return 2;
}
printf ("Domain accepted by TLD check\n");
return 0;
}
int idna_to_ascii_8z(const char *input, char **output, int flags)
int idna_to_unicode_8z4z(const char *input, uint32_t **output, int flags)
const char * tld_strerror(Tld_rc rc)
int tld_check_4z(const uint32_t *in, size_t *errpos, const Tld_table **overrides)