From 73f3fb396dc2ba089b4e32b0bd63dc3e615f8466 Mon Sep 17 00:00:00 2001 From: deva Date: Mon, 20 Apr 2009 09:50:30 +0000 Subject: Added utf8 decode. --- server/src/Makefile.am | 2 + server/src/queryparser.cc | 2 +- server/src/utf8.cc | 374 ++++++++++++++++++++++++++++++++++++++++++++++ server/src/utf8.h | 96 ++++++++++++ 4 files changed, 473 insertions(+), 1 deletion(-) create mode 100644 server/src/utf8.cc create mode 100644 server/src/utf8.h diff --git a/server/src/Makefile.am b/server/src/Makefile.am index 086905a..78c2fce 100644 --- a/server/src/Makefile.am +++ b/server/src/Makefile.am @@ -29,6 +29,7 @@ pracrod_SOURCES = \ templateparser.cc \ transactionparser.cc \ tcpsocket.cc \ + utf8.cc \ widgetgenerator.cc \ xml_encode_decode.cc @@ -57,6 +58,7 @@ EXTRA_DIST = \ templateparser.h \ transactionparser.h \ tcpsocket.h \ + utf8.h \ widgetgenerator.h \ xml_encode_decode.h diff --git a/server/src/queryparser.cc b/server/src/queryparser.cc index f8d4a09..76b24a6 100644 --- a/server/src/queryparser.cc +++ b/server/src/queryparser.cc @@ -59,7 +59,7 @@ void QueryParser::startTag(std::string name, std::map< std::string, std::string> } if(name == "value") { - stack.back()->values[attributes["name"]] = attributes["value"]; + stack.back()->values[attributes["name"]] = utf8.decode(attributes["value"]); } } diff --git a/server/src/utf8.cc b/server/src/utf8.cc new file mode 100644 index 0000000..2909a94 --- /dev/null +++ b/server/src/utf8.cc @@ -0,0 +1,374 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/*************************************************************************** + * utf8.cc + * + * Tue Feb 27 19:18:23 CET 2007 + * Copyright 2006 Bent Bisballe Nyeng + * deva@aasimon.org + ****************************************************************************/ + +/* + * This file is part of Artefact. + * + * Artefact is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Artefact is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Artefact; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + */ +#include "utf8.h" + +#include "debug.h" + +#include + +UTF8::UTF8(std::string encoding) + throw(UTF8CreateException) +{ + this->encoding = encoding; + if(encoding != "ISO-8859-1") throw UTF8CreateException("Encoding not implemented."); + + // ENCODE MAP + map_encode[""] = "€"; + map_encode[""] = ""; + map_encode[""] = "‚"; + map_encode[""] = "ƒ"; + map_encode[""] = "„"; + map_encode[""] = "…"; + map_encode[""] = "†"; + map_encode[""] = "‡"; + map_encode[""] = "ˆ"; + map_encode[""] = "‰"; + map_encode[""] = "Š"; + map_encode[""] = "‹"; + map_encode[""] = "Œ"; + map_encode[""] = ""; + map_encode[""] = "Ž"; + map_encode[""] = ""; + map_encode[""] = ""; + map_encode[""] = "‘"; + map_encode[""] = "’"; + map_encode[""] = "“"; + map_encode[""] = "”"; + map_encode[""] = "•"; + map_encode[""] = "–"; + map_encode[""] = "—"; + map_encode[""] = "˜"; + map_encode[""] = "™"; + map_encode[""] = "š"; + map_encode[""] = "›"; + map_encode[""] = "œ"; + map_encode[""] = ""; + map_encode[""] = "ž"; + map_encode[""] = "Ÿ"; + map_encode[""] = " "; + map_encode[""] = "¡"; + map_encode[""] = "¢"; + map_encode[""] = "£"; + map_encode[""] = "¤"; + map_encode[""] = "¥"; + map_encode[""] = "¦"; + map_encode[""] = "§"; + map_encode[""] = "¨"; + map_encode[""] = "©"; + map_encode[""] = "ª"; + map_encode[""] = "«"; + map_encode[""] = "¬"; + map_encode[""] = "­"; + map_encode[""] = "®"; + map_encode[""] = "¯"; + map_encode[""] = "°"; + map_encode[""] = "±"; + map_encode[""] = "²"; + map_encode[""] = "³"; + map_encode[""] = "´"; + map_encode[""] = "µ"; + map_encode[""] = "¶"; + map_encode[""] = "·"; + map_encode[""] = "¸"; + map_encode[""] = "¹"; + map_encode[""] = "º"; + map_encode[""] = "»"; + map_encode[""] = "¼"; + map_encode[""] = "½"; + map_encode[""] = "¾"; + map_encode[""] = "¿"; + map_encode[""] = "À"; + map_encode[""] = "Á"; + map_encode[""] = "Â"; + map_encode[""] = "Ã"; + map_encode[""] = "Ä"; + map_encode[""] = "Å"; + map_encode[""] = "Æ"; + map_encode[""] = "Ç"; + map_encode[""] = "È"; + map_encode[""] = "É"; + map_encode[""] = "Ê"; + map_encode[""] = "Ë"; + map_encode[""] = "Ì"; + map_encode[""] = "Í"; + map_encode[""] = "Î"; + map_encode[""] = "Ï"; + map_encode[""] = "Ð"; + map_encode[""] = "Ñ"; + map_encode[""] = "Ò"; + map_encode[""] = "Ó"; + map_encode[""] = "Ô"; + map_encode[""] = "Õ"; + map_encode[""] = "Ö"; + map_encode[""] = "×"; + map_encode[""] = "Ø"; + map_encode[""] = "Ù"; + map_encode[""] = "Ú"; + map_encode[""] = "Û"; + map_encode[""] = "Ü"; + map_encode[""] = "Ý"; + map_encode[""] = "Þ"; + map_encode[""] = "ß"; + map_encode[""] = "à"; + map_encode[""] = "á"; + map_encode[""] = "â"; + map_encode[""] = "ã"; + map_encode[""] = "ä"; + map_encode[""] = "å"; + map_encode[""] = "æ"; + map_encode[""] = "ç"; + map_encode[""] = "è"; + map_encode[""] = "é"; + map_encode[""] = "ê"; + map_encode[""] = "ë"; + map_encode[""] = "ì"; + map_encode[""] = "í"; + map_encode[""] = "î"; + map_encode[""] = "ï"; + map_encode[""] = "ð"; + map_encode[""] = "ñ"; + map_encode[""] = "ò"; + map_encode[""] = "ó"; + map_encode[""] = "ô"; + map_encode[""] = "õ"; + map_encode[""] = "ö"; + map_encode[""] = "÷"; + map_encode[""] = "ø"; + map_encode[""] = "ù"; + map_encode[""] = "ú"; + map_encode[""] = "û"; + map_encode[""] = "ü"; + map_encode[""] = "ý"; + map_encode[""] = "þ"; + map_encode[""] = "ÿ"; + + // DECODE MAP + map_decode["€"] = ""; + map_decode[""] = ""; + map_decode["‚"] = ""; + map_decode["ƒ"] = ""; + map_decode["„"] = ""; + map_decode["…"] = ""; + map_decode["†"] = ""; + map_decode["‡"] = ""; + map_decode["ˆ"] = ""; + map_decode["‰"] = ""; + map_decode["Š"] = ""; + map_decode["‹"] = ""; + map_decode["Œ"] = ""; + map_decode[""] = ""; + map_decode["Ž"] = ""; + map_decode[""] = ""; + map_decode[""] = ""; + map_decode["‘"] = ""; + map_decode["’"] = ""; + map_decode["“"] = ""; + map_decode["”"] = ""; + map_decode["•"] = ""; + map_decode["–"] = ""; + map_decode["—"] = ""; + map_decode["˜"] = ""; + map_decode["™"] = ""; + map_decode["š"] = ""; + map_decode["›"] = ""; + map_decode["œ"] = ""; + map_decode[""] = ""; + map_decode["ž"] = ""; + map_decode["Ÿ"] = ""; + map_decode[" "] = ""; + map_decode["¡"] = ""; + map_decode["¢"] = ""; + map_decode["£"] = ""; + map_decode["¤"] = ""; + map_decode["¥"] = ""; + map_decode["¦"] = ""; + map_decode["§"] = ""; + map_decode["¨"] = ""; + map_decode["©"] = ""; + map_decode["ª"] = ""; + map_decode["«"] = ""; + map_decode["¬"] = ""; + map_decode["­"] = ""; + map_decode["®"] = ""; + map_decode["¯"] = ""; + map_decode["°"] = ""; + map_decode["±"] = ""; + map_decode["²"] = ""; + map_decode["³"] = ""; + map_decode["´"] = ""; + map_decode["µ"] = ""; + map_decode["¶"] = ""; + map_decode["·"] = ""; + map_decode["¸"] = ""; + map_decode["¹"] = ""; + map_decode["º"] = ""; + map_decode["»"] = ""; + map_decode["¼"] = ""; + map_decode["½"] = ""; + map_decode["¾"] = ""; + map_decode["¿"] = ""; + map_decode["À"] = ""; + map_decode["Á"] = ""; + map_decode["Â"] = ""; + map_decode["Ã"] = ""; + map_decode["Ä"] = ""; + map_decode["Å"] = ""; + map_decode["Æ"] = ""; + map_decode["Ç"] = ""; + map_decode["È"] = ""; + map_decode["É"] = ""; + map_decode["Ê"] = ""; + map_decode["Ë"] = ""; + map_decode["Ì"] = ""; + map_decode["Í"] = ""; + map_decode["Î"] = ""; + map_decode["Ï"] = ""; + map_decode["Ð"] = ""; + map_decode["Ñ"] = ""; + map_decode["Ò"] = ""; + map_decode["Ó"] = ""; + map_decode["Ô"] = ""; + map_decode["Õ"] = ""; + map_decode["Ö"] = ""; + map_decode["×"] = ""; + map_decode["Ø"] = ""; + map_decode["Ù"] = ""; + map_decode["Ú"] = ""; + map_decode["Û"] = ""; + map_decode["Ü"] = ""; + map_decode["Ý"] = ""; + map_decode["Þ"] = ""; + map_decode["ß"] = ""; + map_decode["à"] = ""; + map_decode["á"] = ""; + map_decode["â"] = ""; + map_decode["ã"] = ""; + map_decode["ä"] = ""; + map_decode["å"] = ""; + map_decode["æ"] = ""; + map_decode["ç"] = ""; + map_decode["è"] = ""; + map_decode["é"] = ""; + map_decode["ê"] = ""; + map_decode["ë"] = ""; + map_decode["ì"] = ""; + map_decode["í"] = ""; + map_decode["î"] = ""; + map_decode["ï"] = ""; + map_decode["ð"] = ""; + map_decode["ñ"] = ""; + map_decode["ò"] = ""; + map_decode["ó"] = ""; + map_decode["ô"] = ""; + map_decode["õ"] = ""; + map_decode["ö"] = ""; + map_decode["÷"] = ""; + map_decode["ø"] = ""; + map_decode["ù"] = ""; + map_decode["ú"] = ""; + map_decode["û"] = ""; + map_decode["ü"] = ""; + map_decode["ý"] = ""; + map_decode["þ"] = ""; + map_decode["ÿ"] = ""; +} + +std::string UTF8::encode(std::string s) + throw(UTF8EncodeException) +{ + std::string ret; + + for(int i = 0; i < (int)s.length(); i++) { + std::string c; + + if((unsigned char)s[i] <= 0x7F) c = s.substr(i, 1); + else c = map_encode[s.substr(i, 1)]; + + if(c.length() == 0) throw UTF8EncodeException("Unknown character in string"); + + ret.append(c); + + } + + return ret; + +} + +std::string UTF8::decode(std::string s) + throw(UTF8DecodeException) +{ + std::string ret; + + int width = 1; + for(int i = 0; i < (int)s.length(); i+=width) { + if(/*(unsigned char)s[i]>=0x00&&*/(unsigned char)s[i] <= 0x7F) width = 1; // 00-7F 1 byte + if((unsigned char)s[i] >= 0xC2 && (unsigned char)s[i] <= 0xDF) width = 2; // C2-DF 2 bytes + if((unsigned char)s[i] >= 0xE0 && (unsigned char)s[i] <= 0xEF) width = 3; // E0-EF 3 bytes + if((unsigned char)s[i] >= 0xF0 && (unsigned char)s[i] <= 0xF4) width = 4; // F0-F4 4 bytes + + std::string c; + + if(width == 1) c = s.substr(i, 1); + else c = map_decode[s.substr(i, width)]; + + if(c.length() == 0) throw UTF8DecodeException("Unknown character in string"); + + ret.append(c); + } + + return ret; +} + +#ifdef TEST_UTF8 + +int main() +{ + try { + UTF8 utf8("ISO-8859-1"); + + std::string a = "AaBb"; + printf("a [%s]\n", a.c_str()); + std::string b = utf8.encode(a); + printf("b [%s]\n", b.c_str()); + b = utf8.encode(b); + printf("b [%s]\n", b.c_str()); + std::string c = utf8.decode(b); + printf("c [%s]\n", c.c_str()); + c = utf8.decode(c); + printf("c [%s]\n", c.c_str()); + + if(a == c) return 0; + else return 1; + } catch( Pentominos::Exception &e ) { + fprintf(stderr, "%s\n", e.what()); + return 1; + } + + return 0; +} + +#endif//TEST_UTF8 diff --git a/server/src/utf8.h b/server/src/utf8.h new file mode 100644 index 0000000..98f6ff9 --- /dev/null +++ b/server/src/utf8.h @@ -0,0 +1,96 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/*************************************************************************** + * utf8.h + * + * Tue Feb 27 19:18:23 CET 2007 + * Copyright 2006 Bent Bisballe Nyeng + * deva@aasimon.org + ****************************************************************************/ + +/* + * This file is part of Artefact. + * + * Artefact is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Artefact is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Artefact; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + */ +#ifndef __ARTEFACT_UTF8_H__ +#define __ARTEFACT_UTF8_H__ + +#include +#include + +#include "exception.h" + +/** + * This exception is thrown by UTF8 when the subsystem fails to initialize. + */ +class UTF8CreateException: public Exception { +public: + UTF8CreateException(std::string reason) : + Exception("Error during creation of the UTF8 subsystem: " + reason) {} +}; + +/** + * This exception is thrown by UTF8 when the subsystem fails encode the gives string. + */ +class UTF8EncodeException: public Exception { +public: + UTF8EncodeException(std::string reason) : + Exception("Error during UTF8 encoding: " + reason) {} +}; + +/** + * This exception is thrown by UTF8 when the subsystem fails decode the gives string. + */ +class UTF8DecodeException: public Exception { +public: + UTF8DecodeException(std::string reason) : + Exception("Error during UTF8 decoding: " + reason) {} +}; + +/** + * UTF-8 handler class.\n + * It is used to convert between UTF-8 and some native charset Default + * is ISO-8859-1. (Currently only the ISO-8859-1 charset is implemented!) + */ +class UTF8 { +public: + /** + * Constructor. + * @param encoding A string containing native charset. Default is ISO-8859-1 + */ + UTF8(std::string encoding = "ISO-8859-1") throw(UTF8CreateException); + + /** + * Encode a string from native encoding to UTF-8 + * @param s The string to encode. + * @return The UTF-8 encoded string. + */ + std::string encode(std::string s) throw(UTF8EncodeException); + + /** + * Decode a string from UTF-8 to native encoding. + * @param s The UTF-8 string to decode. + * @return The decoded string. + */ + std::string decode(std::string s) throw(UTF8DecodeException); + +private: + std::string encoding; + + std::map< std::string, std::string > map_encode; + std::map< std::string, std::string > map_decode; +}; + +#endif/*__ARTEFACT_UTF8_H__*/ -- cgit v1.2.3