Gnash
0.8.10
|
00001 // utf8.h: utilities for converting to and from UTF-8 00002 // 00003 // Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. 00004 // 00005 // This program is free software; you can redistribute it and/or modify 00006 // it under the terms of the GNU General Public License as published by 00007 // the Free Software Foundation; either version 3 of the License, or 00008 // (at your option) any later version. 00009 // 00010 // This program is distributed in the hope that it will be useful, 00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 // GNU General Public License for more details. 00014 // 00015 // You should have received a copy of the GNU General Public License 00016 // along with this program; if not, write to the Free Software 00017 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00018 // 00019 // Based on the public domain work of Thatcher Ulrich <tu@tulrich.com> 2004 00020 00021 #ifndef UTF8_H 00022 #define UTF8_H 00023 00024 #include <string> 00025 #include <boost/cstdint.hpp> // for C99 int types 00026 #include <vector> 00027 00028 #include "dsodefs.h" // For DSOEXPORT 00029 00030 namespace gnash { 00031 00033 // 00055 // 00059 namespace utf8 { 00060 00062 // 00066 // 00069 DSOEXPORT std::wstring decodeCanonicalString(const std::string& str, int version); 00070 00072 // 00082 DSOEXPORT std::string encodeCanonicalString(const std::wstring& wstr, int version); 00083 00085 // 00090 DSOEXPORT boost::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator& it, 00091 const std::string::const_iterator& e); 00092 00095 DSOEXPORT std::string encodeUnicodeCharacter(boost::uint32_t ucs_character); 00096 00098 // 00101 DSOEXPORT std::string encodeLatin1Character(boost::uint32_t ucsCharacter); 00102 00103 enum TextEncoding { 00104 encUNSPECIFIED, 00105 encUTF8, 00106 encUTF16BE, 00107 encUTF16LE, 00108 encUTF32BE, 00109 encUTF32LE, 00110 encSCSU, 00111 encUTF7, 00112 encUTFEBCDIC, 00113 encBOCU1 00114 }; 00115 00117 // 00140 DSOEXPORT char* stripBOM(char* in, size_t& size, TextEncoding& encoding); 00141 00143 DSOEXPORT const char* textEncodingName(TextEncoding enc); 00144 00145 enum EncodingGuess { 00146 ENCGUESS_UNICODE = 0, 00147 ENCGUESS_JIS = 1, 00148 ENCGUESS_OTHER = 2 00149 }; 00150 00152 // Shift-Jis, UTF8, and other. Puts the DisplayObject count in length, 00153 // and the offsets to the DisplayObjects in offsets, if offsets is not NULL. 00154 // If not NULL, offsets should be at least s.length(). 00155 // offsets are not accurate if the return value is GUESSENC_OTHER 00156 // 00159 DSOEXPORT EncodingGuess guessEncoding(const std::string& s, int& length, 00160 std::vector<int>& offsets); 00161 00162 00163 } // namespace utf8 00164 } // namespace gnash 00165 00166 #endif // UTF8_H 00167 00168 00169 // Local Variables: 00170 // mode: C++ 00171 // c-basic-offset: 8 00172 // tab-width: 8 00173 // indent-tabs-mode: t 00174 // End: