From b8ea04e415d584803a240e9e18d24c895052f4b0 Mon Sep 17 00:00:00 2001 From: deva Date: Wed, 29 Jul 2009 15:04:13 +0000 Subject: Added testscripts for SAXParser. Added SAXParser documentation. --- server/src/saxparser.cc | 226 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 213 insertions(+), 13 deletions(-) (limited to 'server/src/saxparser.cc') diff --git a/server/src/saxparser.cc b/server/src/saxparser.cc index f728928..ee03de1 100644 --- a/server/src/saxparser.cc +++ b/server/src/saxparser.cc @@ -92,7 +92,7 @@ SAXParser::SAXParser() SAXParser::~SAXParser() { - XML_ParserFree(p); + if(p) XML_ParserFree(p); } int SAXParser::parse() @@ -113,6 +113,14 @@ int SAXParser::parse() return 0; } +static bool iswhitespace(char *buf, size_t size) +{ + for(size_t i = 0; i < size; i++) + if(buf[i] != ' ' && buf[i] != '\n' && buf[i] != '\t' && buf[i] != '\r') + return false; + return true; +} + bool SAXParser::parse(char *data, size_t size) { PRACRO_DEBUG(sax, "parse %d bytes\n", size); @@ -122,6 +130,8 @@ bool SAXParser::parse(char *data, size_t size) if(! XML_Parse(p, data, size, false) ) { if(XML_GetErrorCode(p) == XML_ERROR_JUNK_AFTER_DOC_ELEMENT) return true; + if(XML_GetErrorCode(p) == XML_ERROR_FINISHED && iswhitespace(data, size)) return true; + if(done && XML_GetErrorCode(p) == XML_ERROR_UNCLOSED_TOKEN) return true; parseError(data, size, XML_ErrorString(XML_GetErrorCode(p)), (int)XML_GetCurrentLineNumber(p)); return false; } @@ -129,6 +139,8 @@ bool SAXParser::parse(char *data, size_t size) if(done) { if(! XML_Parse(p, data, 0, true) ) { if(XML_GetErrorCode(p) == XML_ERROR_JUNK_AFTER_DOC_ELEMENT) return true; + if(XML_GetErrorCode(p) == XML_ERROR_FINISHED && iswhitespace(data, size)) return true; + if(XML_GetErrorCode(p) == XML_ERROR_UNCLOSED_TOKEN) return true; parseError(data, 0, XML_ErrorString(XML_GetErrorCode(p)), (int)XML_GetCurrentLineNumber(p)); return false; } @@ -155,17 +167,58 @@ unsigned int SAXParser::usedBytes() #ifdef TEST_SAXPARSER -/** - * Compile with: g++ -DTEST_SAXPARSER sax_parser.cc -lexpat -otext_saxparser - * Run with: ./test_saxparser [xmlfile] - */ + +#define XMLFILE "/tmp/saxparsertest.xml" + +#include "exception.h" + #include #include #include - -class MyParser :public SAXParser { +#include +#include +#include + +static char xml[] = +"\n" +"\n" +" \n" +" \n" +" \n" +" \n" +" \n" +"\n \t\n\r" + ; + +static char xml_notrailingwhitespace[] = +"\n" +"\n" +" \n" +" \n" +" \n" +" \n" +" \n" +"" + ; + +static char xml_fail[] = +"\n" +"\n" +" \n" +"\n" + ; + +static char xml_fail2[] = +"\n" +"\n" +" \n" +"\n" +"this is junk\n" + ; + +class MyFileParser :public SAXParser { public: - MyParser(char *file) { + MyFileParser(const char *file) { fd = open(file, O_RDONLY); } @@ -175,17 +228,164 @@ public: void startTag(std::string name, std::map< std::string, std::string> attributes) { - printf("<%s>\n", name.c_str()); + //printf("<%s>\n", name.c_str()); + } + + void parseError(char *buf, size_t len, std::string error, int lineno) + { + throw Exception(error); } private: int fd; }; -int main(int argc, char *argv[]) { - if(argc < 2) return 1; - MyParser parser(argv[1]); - parser.parse(); +class MyBufferParser :public SAXParser { +public: + void startTag(std::string name, std::map< std::string, std::string> attributes) + { + //printf("<%s>\n", name.c_str()); + } + + void parseError(char *buf, size_t len, std::string error, int lineno) + { + throw Exception(error); + } +}; + +int main(int argc, char *argv[]) +{ + FILE *fp = fopen(XMLFILE, "w"); + if(!fp) { + printf("Could not write to %s\n", XMLFILE); + return 1; + } + fprintf(fp, xml); + fclose(fp); + + // Test callback parser + { + MyFileParser parser(XMLFILE); + parser.parse(); + } + + // Test buffer parser + for(size_t sz = 1; sz < 1000; sz++) { + bool test = false; + MyBufferParser parser; + std::string buf = xml; + size_t pos = 0; + while(pos < buf.length()) { + std::string substr = buf.substr(pos, sz); + + try { + test |= parser.parse((char*)substr.c_str(), substr.length()); + } catch(Exception &e) { + printf("Buffer parser failed on size %d: %s [%s]\n", sz, e.what(), substr.c_str()); + } + pos += sz; + } + + if(!test) { + printf("Buffer parser failed on size %d\n", sz); + return 1; + } + } + + fp = fopen(XMLFILE, "w"); + if(!fp) { + printf("Could not write to %s\n", XMLFILE); + return 1; + } + fprintf(fp, xml_notrailingwhitespace); + fprintf(fp, xml_notrailingwhitespace); + fclose(fp); + // Test buffer parser with multiple documents in the same buffer + { + fp = fopen(XMLFILE, "r"); + if(!fp) { + printf("Could not write to %s\n", XMLFILE); + return 1; + } + for(size_t sz = 1; sz < 1000; sz++) { + MyBufferParser *parser = NULL; + rewind(fp); + size_t numdocs = 0; + char *buf = new char[sz + 1]; + memset(buf, 0, sz + 1); + size_t size; + while( (size = fread(buf, 1, sz, fp)) > 0) { + while(size) { + if(parser == NULL) { + parser = new MyBufferParser(); + } + if(parser->parse(buf, size)) { + + // Got one + numdocs++; + + size = size - parser->usedBytes(); + strcpy(buf, buf + parser->usedBytes()); + delete parser; parser = NULL; + } else { + size = 0; + memset(buf, 0, sz + 1); + } + } + } + if(numdocs != 2) { + printf("Failed to parse two documents.\n"); + return 1; + } + if(parser) delete parser; parser = NULL; + delete[] buf; + } + fclose(fp); + } + + fp = fopen(XMLFILE, "w"); + if(!fp) { + printf("Could not write to %s\n", XMLFILE); + return 1; + } + fprintf(fp, xml_fail); + fclose(fp); + + // Test failure + { + MyFileParser parser(XMLFILE); + try { + parser.parse(); + } catch(Exception &e) { + goto goon; + } + printf("This test should fail...\n"); + return 1; + } + goon: + + fp = fopen(XMLFILE, "w"); + if(!fp) { + printf("Could not write to %s\n", XMLFILE); + return 1; + } + fprintf(fp, xml_fail2); + fclose(fp); + + // Test failure + { + MyFileParser parser(XMLFILE); + try { + parser.parse(); + } catch(Exception &e) { + goto goonagain; + } + printf("This test should fail...\n"); + return 1; + } + goonagain: + + unlink(XMLFILE); } #endif/*TEST_SAXPARSER*/ -- cgit v1.2.3