summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordeva <deva>2009-04-20 09:50:30 +0000
committerdeva <deva>2009-04-20 09:50:30 +0000
commit73f3fb396dc2ba089b4e32b0bd63dc3e615f8466 (patch)
treefc7aba453c7dbec309aef32991684c91cd51083f
parent46983af88f19c184c9e0ac96ebcc62827d73b944 (diff)
Added utf8 decode.
-rw-r--r--server/src/Makefile.am2
-rw-r--r--server/src/queryparser.cc2
-rw-r--r--server/src/utf8.cc374
-rw-r--r--server/src/utf8.h96
4 files changed, 473 insertions, 1 deletions
diff --git a/server/src/Makefile.am b/server/src/Makefile.am
index 086905a..78c2fce 100644
--- a/server/src/Makefile.am
+++ b/server/src/Makefile.am
@@ -29,6 +29,7 @@ pracrod_SOURCES = \
templateparser.cc \
transactionparser.cc \
tcpsocket.cc \
+ utf8.cc \
widgetgenerator.cc \
xml_encode_decode.cc
@@ -57,6 +58,7 @@ EXTRA_DIST = \
templateparser.h \
transactionparser.h \
tcpsocket.h \
+ utf8.h \
widgetgenerator.h \
xml_encode_decode.h
diff --git a/server/src/queryparser.cc b/server/src/queryparser.cc
index f8d4a09..76b24a6 100644
--- a/server/src/queryparser.cc
+++ b/server/src/queryparser.cc
@@ -59,7 +59,7 @@ void QueryParser::startTag(std::string name, std::map< std::string, std::string>
}
if(name == "value") {
- stack.back()->values[attributes["name"]] = attributes["value"];
+ stack.back()->values[attributes["name"]] = utf8.decode(attributes["value"]);
}
}
diff --git a/server/src/utf8.cc b/server/src/utf8.cc
new file mode 100644
index 0000000..2909a94
--- /dev/null
+++ b/server/src/utf8.cc
@@ -0,0 +1,374 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/***************************************************************************
+ * utf8.cc
+ *
+ * Tue Feb 27 19:18:23 CET 2007
+ * Copyright 2006 Bent Bisballe Nyeng
+ * deva@aasimon.org
+ ****************************************************************************/
+
+/*
+ * This file is part of Artefact.
+ *
+ * Artefact is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Artefact is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Artefact; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include "utf8.h"
+
+#include "debug.h"
+
+#include <errno.h>
+
+UTF8::UTF8(std::string encoding)
+ throw(UTF8CreateException)
+{
+ this->encoding = encoding;
+ if(encoding != "ISO-8859-1") throw UTF8CreateException("Encoding not implemented.");
+
+ // ENCODE MAP
+ map_encode[""] = "€";
+ map_encode[""] = "";
+ map_encode[""] = "‚";
+ map_encode[""] = "ƒ";
+ map_encode[""] = "„";
+ map_encode[""] = "…";
+ map_encode[""] = "†";
+ map_encode[""] = "‡";
+ map_encode[""] = "ˆ";
+ map_encode[""] = "‰";
+ map_encode[""] = "Š";
+ map_encode[""] = "‹";
+ map_encode[""] = "Œ";
+ map_encode[""] = "";
+ map_encode[""] = "Ž";
+ map_encode[""] = "";
+ map_encode[""] = "";
+ map_encode[""] = "‘";
+ map_encode[""] = "’";
+ map_encode[""] = "“";
+ map_encode[""] = "”";
+ map_encode[""] = "•";
+ map_encode[""] = "–";
+ map_encode[""] = "—";
+ map_encode[""] = "˜";
+ map_encode[""] = "™";
+ map_encode[""] = "š";
+ map_encode[""] = "›";
+ map_encode[""] = "œ";
+ map_encode[""] = "";
+ map_encode[""] = "ž";
+ map_encode[""] = "Ÿ";
+ map_encode[""] = " ";
+ map_encode[""] = "¡";
+ map_encode[""] = "¢";
+ map_encode[""] = "£";
+ map_encode[""] = "¤";
+ map_encode[""] = "¥";
+ map_encode[""] = "¦";
+ map_encode[""] = "§";
+ map_encode[""] = "¨";
+ map_encode[""] = "©";
+ map_encode[""] = "ª";
+ map_encode[""] = "«";
+ map_encode[""] = "¬";
+ map_encode[""] = "­";
+ map_encode[""] = "®";
+ map_encode[""] = "¯";
+ map_encode[""] = "°";
+ map_encode[""] = "±";
+ map_encode[""] = "²";
+ map_encode[""] = "³";
+ map_encode[""] = "´";
+ map_encode[""] = "µ";
+ map_encode[""] = "¶";
+ map_encode[""] = "·";
+ map_encode[""] = "¸";
+ map_encode[""] = "¹";
+ map_encode[""] = "º";
+ map_encode[""] = "»";
+ map_encode[""] = "¼";
+ map_encode[""] = "½";
+ map_encode[""] = "¾";
+ map_encode[""] = "¿";
+ map_encode[""] = "À";
+ map_encode[""] = "Á";
+ map_encode[""] = "Â";
+ map_encode[""] = "Ã";
+ map_encode[""] = "Ä";
+ map_encode[""] = "Å";
+ map_encode[""] = "Æ";
+ map_encode[""] = "Ç";
+ map_encode[""] = "È";
+ map_encode[""] = "É";
+ map_encode[""] = "Ê";
+ map_encode[""] = "Ë";
+ map_encode[""] = "Ì";
+ map_encode[""] = "Í";
+ map_encode[""] = "Î";
+ map_encode[""] = "Ï";
+ map_encode[""] = "Ð";
+ map_encode[""] = "Ñ";
+ map_encode[""] = "Ò";
+ map_encode[""] = "Ó";
+ map_encode[""] = "Ô";
+ map_encode[""] = "Õ";
+ map_encode[""] = "Ö";
+ map_encode[""] = "×";
+ map_encode[""] = "Ø";
+ map_encode[""] = "Ù";
+ map_encode[""] = "Ú";
+ map_encode[""] = "Û";
+ map_encode[""] = "Ü";
+ map_encode[""] = "Ý";
+ map_encode[""] = "Þ";
+ map_encode[""] = "ß";
+ map_encode[""] = "à";
+ map_encode[""] = "á";
+ map_encode[""] = "â";
+ map_encode[""] = "ã";
+ map_encode[""] = "ä";
+ map_encode[""] = "å";
+ map_encode[""] = "æ";
+ map_encode[""] = "ç";
+ map_encode[""] = "è";
+ map_encode[""] = "é";
+ map_encode[""] = "ê";
+ map_encode[""] = "ë";
+ map_encode[""] = "ì";
+ map_encode[""] = "í";
+ map_encode[""] = "î";
+ map_encode[""] = "ï";
+ map_encode[""] = "ð";
+ map_encode[""] = "ñ";
+ map_encode[""] = "ò";
+ map_encode[""] = "ó";
+ map_encode[""] = "ô";
+ map_encode[""] = "õ";
+ map_encode[""] = "ö";
+ map_encode[""] = "÷";
+ map_encode[""] = "ø";
+ map_encode[""] = "ù";
+ map_encode[""] = "ú";
+ map_encode[""] = "û";
+ map_encode[""] = "ü";
+ map_encode[""] = "ý";
+ map_encode[""] = "þ";
+ map_encode[""] = "ÿ";
+
+ // DECODE MAP
+ map_decode["€"] = "";
+ map_decode[""] = "";
+ map_decode["‚"] = "";
+ map_decode["ƒ"] = "";
+ map_decode["„"] = "";
+ map_decode["…"] = "";
+ map_decode["†"] = "";
+ map_decode["‡"] = "";
+ map_decode["ˆ"] = "";
+ map_decode["‰"] = "";
+ map_decode["Š"] = "";
+ map_decode["‹"] = "";
+ map_decode["Œ"] = "";
+ map_decode[""] = "";
+ map_decode["Ž"] = "";
+ map_decode[""] = "";
+ map_decode[""] = "";
+ map_decode["‘"] = "";
+ map_decode["’"] = "";
+ map_decode["“"] = "";
+ map_decode["”"] = "";
+ map_decode["•"] = "";
+ map_decode["–"] = "";
+ map_decode["—"] = "";
+ map_decode["˜"] = "";
+ map_decode["™"] = "";
+ map_decode["š"] = "";
+ map_decode["›"] = "";
+ map_decode["œ"] = "";
+ map_decode[""] = "";
+ map_decode["ž"] = "";
+ map_decode["Ÿ"] = "";
+ map_decode[" "] = "";
+ map_decode["¡"] = "";
+ map_decode["¢"] = "";
+ map_decode["£"] = "";
+ map_decode["¤"] = "";
+ map_decode["¥"] = "";
+ map_decode["¦"] = "";
+ map_decode["§"] = "";
+ map_decode["¨"] = "";
+ map_decode["©"] = "";
+ map_decode["ª"] = "";
+ map_decode["«"] = "";
+ map_decode["¬"] = "";
+ map_decode["­"] = "";
+ map_decode["®"] = "";
+ map_decode["¯"] = "";
+ map_decode["°"] = "";
+ map_decode["±"] = "";
+ map_decode["²"] = "";
+ map_decode["³"] = "";
+ map_decode["´"] = "";
+ map_decode["µ"] = "";
+ map_decode["¶"] = "";
+ map_decode["·"] = "";
+ map_decode["¸"] = "";
+ map_decode["¹"] = "";
+ map_decode["º"] = "";
+ map_decode["»"] = "";
+ map_decode["¼"] = "";
+ map_decode["½"] = "";
+ map_decode["¾"] = "";
+ map_decode["¿"] = "";
+ map_decode["À"] = "";
+ map_decode["Á"] = "";
+ map_decode["Â"] = "";
+ map_decode["Ã"] = "";
+ map_decode["Ä"] = "";
+ map_decode["Å"] = "";
+ map_decode["Æ"] = "";
+ map_decode["Ç"] = "";
+ map_decode["È"] = "";
+ map_decode["É"] = "";
+ map_decode["Ê"] = "";
+ map_decode["Ë"] = "";
+ map_decode["Ì"] = "";
+ map_decode["Í"] = "";
+ map_decode["Î"] = "";
+ map_decode["Ï"] = "";
+ map_decode["Ð"] = "";
+ map_decode["Ñ"] = "";
+ map_decode["Ò"] = "";
+ map_decode["Ó"] = "";
+ map_decode["Ô"] = "";
+ map_decode["Õ"] = "";
+ map_decode["Ö"] = "";
+ map_decode["×"] = "";
+ map_decode["Ø"] = "";
+ map_decode["Ù"] = "";
+ map_decode["Ú"] = "";
+ map_decode["Û"] = "";
+ map_decode["Ü"] = "";
+ map_decode["Ý"] = "";
+ map_decode["Þ"] = "";
+ map_decode["ß"] = "";
+ map_decode["à"] = "";
+ map_decode["á"] = "";
+ map_decode["â"] = "";
+ map_decode["ã"] = "";
+ map_decode["ä"] = "";
+ map_decode["å"] = "";
+ map_decode["æ"] = "";
+ map_decode["ç"] = "";
+ map_decode["è"] = "";
+ map_decode["é"] = "";
+ map_decode["ê"] = "";
+ map_decode["ë"] = "";
+ map_decode["ì"] = "";
+ map_decode["í"] = "";
+ map_decode["î"] = "";
+ map_decode["ï"] = "";
+ map_decode["ð"] = "";
+ map_decode["ñ"] = "";
+ map_decode["ò"] = "";
+ map_decode["ó"] = "";
+ map_decode["ô"] = "";
+ map_decode["õ"] = "";
+ map_decode["ö"] = "";
+ map_decode["÷"] = "";
+ map_decode["ø"] = "";
+ map_decode["ù"] = "";
+ map_decode["ú"] = "";
+ map_decode["û"] = "";
+ map_decode["ü"] = "";
+ map_decode["ý"] = "";
+ map_decode["þ"] = "";
+ map_decode["ÿ"] = "";
+}
+
+std::string UTF8::encode(std::string s)
+ throw(UTF8EncodeException)
+{
+ std::string ret;
+
+ for(int i = 0; i < (int)s.length(); i++) {
+ std::string c;
+
+ if((unsigned char)s[i] <= 0x7F) c = s.substr(i, 1);
+ else c = map_encode[s.substr(i, 1)];
+
+ if(c.length() == 0) throw UTF8EncodeException("Unknown character in string");
+
+ ret.append(c);
+
+ }
+
+ return ret;
+
+}
+
+std::string UTF8::decode(std::string s)
+ throw(UTF8DecodeException)
+{
+ std::string ret;
+
+ int width = 1;
+ for(int i = 0; i < (int)s.length(); i+=width) {
+ if(/*(unsigned char)s[i]>=0x00&&*/(unsigned char)s[i] <= 0x7F) width = 1; // 00-7F 1 byte
+ if((unsigned char)s[i] >= 0xC2 && (unsigned char)s[i] <= 0xDF) width = 2; // C2-DF 2 bytes
+ if((unsigned char)s[i] >= 0xE0 && (unsigned char)s[i] <= 0xEF) width = 3; // E0-EF 3 bytes
+ if((unsigned char)s[i] >= 0xF0 && (unsigned char)s[i] <= 0xF4) width = 4; // F0-F4 4 bytes
+
+ std::string c;
+
+ if(width == 1) c = s.substr(i, 1);
+ else c = map_decode[s.substr(i, width)];
+
+ if(c.length() == 0) throw UTF8DecodeException("Unknown character in string");
+
+ ret.append(c);
+ }
+
+ return ret;
+}
+
+#ifdef TEST_UTF8
+
+int main()
+{
+ try {
+ UTF8 utf8("ISO-8859-1");
+
+ std::string a = "AaBb";
+ printf("a [%s]\n", a.c_str());
+ std::string b = utf8.encode(a);
+ printf("b [%s]\n", b.c_str());
+ b = utf8.encode(b);
+ printf("b [%s]\n", b.c_str());
+ std::string c = utf8.decode(b);
+ printf("c [%s]\n", c.c_str());
+ c = utf8.decode(c);
+ printf("c [%s]\n", c.c_str());
+
+ if(a == c) return 0;
+ else return 1;
+ } catch( Pentominos::Exception &e ) {
+ fprintf(stderr, "%s\n", e.what());
+ return 1;
+ }
+
+ return 0;
+}
+
+#endif//TEST_UTF8
diff --git a/server/src/utf8.h b/server/src/utf8.h
new file mode 100644
index 0000000..98f6ff9
--- /dev/null
+++ b/server/src/utf8.h
@@ -0,0 +1,96 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/***************************************************************************
+ * utf8.h
+ *
+ * Tue Feb 27 19:18:23 CET 2007
+ * Copyright 2006 Bent Bisballe Nyeng
+ * deva@aasimon.org
+ ****************************************************************************/
+
+/*
+ * This file is part of Artefact.
+ *
+ * Artefact is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Artefact is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Artefact; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+#ifndef __ARTEFACT_UTF8_H__
+#define __ARTEFACT_UTF8_H__
+
+#include <string>
+#include <map>
+
+#include "exception.h"
+
+/**
+ * This exception is thrown by UTF8 when the subsystem fails to initialize.
+ */
+class UTF8CreateException: public Exception {
+public:
+ UTF8CreateException(std::string reason) :
+ Exception("Error during creation of the UTF8 subsystem: " + reason) {}
+};
+
+/**
+ * This exception is thrown by UTF8 when the subsystem fails encode the gives string.
+ */
+class UTF8EncodeException: public Exception {
+public:
+ UTF8EncodeException(std::string reason) :
+ Exception("Error during UTF8 encoding: " + reason) {}
+};
+
+/**
+ * This exception is thrown by UTF8 when the subsystem fails decode the gives string.
+ */
+class UTF8DecodeException: public Exception {
+public:
+ UTF8DecodeException(std::string reason) :
+ Exception("Error during UTF8 decoding: " + reason) {}
+};
+
+/**
+ * UTF-8 handler class.\n
+ * It is used to convert between UTF-8 and some native charset Default
+ * is ISO-8859-1. (Currently only the ISO-8859-1 charset is implemented!)
+ */
+class UTF8 {
+public:
+ /**
+ * Constructor.
+ * @param encoding A string containing native charset. Default is ISO-8859-1
+ */
+ UTF8(std::string encoding = "ISO-8859-1") throw(UTF8CreateException);
+
+ /**
+ * Encode a string from native encoding to UTF-8
+ * @param s The string to encode.
+ * @return The UTF-8 encoded string.
+ */
+ std::string encode(std::string s) throw(UTF8EncodeException);
+
+ /**
+ * Decode a string from UTF-8 to native encoding.
+ * @param s The UTF-8 string to decode.
+ * @return The decoded string.
+ */
+ std::string decode(std::string s) throw(UTF8DecodeException);
+
+private:
+ std::string encoding;
+
+ std::map< std::string, std::string > map_encode;
+ std::map< std::string, std::string > map_decode;
+};
+
+#endif/*__ARTEFACT_UTF8_H__*/