summaryrefslogtreecommitdiff
path: root/server/src/saxparser.cc
diff options
context:
space:
mode:
Diffstat (limited to 'server/src/saxparser.cc')
-rw-r--r--server/src/saxparser.cc226
1 files changed, 213 insertions, 13 deletions
diff --git a/server/src/saxparser.cc b/server/src/saxparser.cc
index f728928..ee03de1 100644
--- a/server/src/saxparser.cc
+++ b/server/src/saxparser.cc
@@ -92,7 +92,7 @@ SAXParser::SAXParser()
SAXParser::~SAXParser()
{
- XML_ParserFree(p);
+ if(p) XML_ParserFree(p);
}
int SAXParser::parse()
@@ -113,6 +113,14 @@ int SAXParser::parse()
return 0;
}
+static bool iswhitespace(char *buf, size_t size)
+{
+ for(size_t i = 0; i < size; i++)
+ if(buf[i] != ' ' && buf[i] != '\n' && buf[i] != '\t' && buf[i] != '\r')
+ return false;
+ return true;
+}
+
bool SAXParser::parse(char *data, size_t size)
{
PRACRO_DEBUG(sax, "parse %d bytes\n", size);
@@ -122,6 +130,8 @@ bool SAXParser::parse(char *data, size_t size)
if(! XML_Parse(p, data, size, false) ) {
if(XML_GetErrorCode(p) == XML_ERROR_JUNK_AFTER_DOC_ELEMENT) return true;
+ if(XML_GetErrorCode(p) == XML_ERROR_FINISHED && iswhitespace(data, size)) return true;
+ if(done && XML_GetErrorCode(p) == XML_ERROR_UNCLOSED_TOKEN) return true;
parseError(data, size, XML_ErrorString(XML_GetErrorCode(p)), (int)XML_GetCurrentLineNumber(p));
return false;
}
@@ -129,6 +139,8 @@ bool SAXParser::parse(char *data, size_t size)
if(done) {
if(! XML_Parse(p, data, 0, true) ) {
if(XML_GetErrorCode(p) == XML_ERROR_JUNK_AFTER_DOC_ELEMENT) return true;
+ if(XML_GetErrorCode(p) == XML_ERROR_FINISHED && iswhitespace(data, size)) return true;
+ if(XML_GetErrorCode(p) == XML_ERROR_UNCLOSED_TOKEN) return true;
parseError(data, 0, XML_ErrorString(XML_GetErrorCode(p)), (int)XML_GetCurrentLineNumber(p));
return false;
}
@@ -155,17 +167,58 @@ unsigned int SAXParser::usedBytes()
#ifdef TEST_SAXPARSER
-/**
- * Compile with: g++ -DTEST_SAXPARSER sax_parser.cc -lexpat -otext_saxparser
- * Run with: ./test_saxparser [xmlfile]
- */
+
+#define XMLFILE "/tmp/saxparsertest.xml"
+
+#include "exception.h"
+
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-
-class MyParser :public SAXParser {
+#include <string.h>
+#include <stdio.h>
+#include <memory.h>
+
+static char xml[] =
+"<?xml version='1.0' encoding='UTF-8'?>\n"
+"<pracro version=\"1.0\" user=\"testuser\" cpr=\"1505050505\">\n"
+" <commit version=\"\" macro=\"referral\" course=\"amd_forunders\">\n"
+" <field value=\"Some docs\" name=\"referral.doctor\"/>\n"
+" <field value=\"DIMS\" name=\"referral.diagnosecode\"/>\n"
+" <field value=\"Avs\" name=\"referral.diagnose\"/>\n"
+" </commit>\n"
+"</pracro>\n \t\n\r"
+ ;
+
+static char xml_notrailingwhitespace[] =
+"<?xml version='1.0' encoding='UTF-8'?>\n"
+"<pracro version=\"1.0\" user=\"testuser\" cpr=\"1505050505\">\n"
+" <commit version=\"\" macro=\"referral\" course=\"amd_forunders\">\n"
+" <field value=\"Some docs\" name=\"referral.doctor\"/>\n"
+" <field value=\"DIMS\" name=\"referral.diagnosecode\"/>\n"
+" <field value=\"Avs\" name=\"referral.diagnose\"/>\n"
+" </commit>\n"
+"</pracro>"
+ ;
+
+static char xml_fail[] =
+"<?xml version='1.0' encoding='UTF-8'?>\n"
+"<pracro version=\"1.0\" user\"testuser\" cpr=\"1505050505\">\n"
+" <request macro=\"test\" course=\"test\"/>\n"
+"</pracro>\n"
+ ;
+
+static char xml_fail2[] =
+"<?xml version='1.0' encoding='UTF-8'?>\n"
+"<pracro version=\"1.0\" user\"testuser\" cpr=\"1505050505\">\n"
+" <request macro=\"test\" course=\"test\"/>\n"
+"</pracro>\n"
+"this is junk\n"
+ ;
+
+class MyFileParser :public SAXParser {
public:
- MyParser(char *file) {
+ MyFileParser(const char *file) {
fd = open(file, O_RDONLY);
}
@@ -175,17 +228,164 @@ public:
void startTag(std::string name, std::map< std::string, std::string> attributes)
{
- printf("<%s>\n", name.c_str());
+ //printf("<%s>\n", name.c_str());
+ }
+
+ void parseError(char *buf, size_t len, std::string error, int lineno)
+ {
+ throw Exception(error);
}
private:
int fd;
};
-int main(int argc, char *argv[]) {
- if(argc < 2) return 1;
- MyParser parser(argv[1]);
- parser.parse();
+class MyBufferParser :public SAXParser {
+public:
+ void startTag(std::string name, std::map< std::string, std::string> attributes)
+ {
+ //printf("<%s>\n", name.c_str());
+ }
+
+ void parseError(char *buf, size_t len, std::string error, int lineno)
+ {
+ throw Exception(error);
+ }
+};
+
+int main(int argc, char *argv[])
+{
+ FILE *fp = fopen(XMLFILE, "w");
+ if(!fp) {
+ printf("Could not write to %s\n", XMLFILE);
+ return 1;
+ }
+ fprintf(fp, xml);
+ fclose(fp);
+
+ // Test callback parser
+ {
+ MyFileParser parser(XMLFILE);
+ parser.parse();
+ }
+
+ // Test buffer parser
+ for(size_t sz = 1; sz < 1000; sz++) {
+ bool test = false;
+ MyBufferParser parser;
+ std::string buf = xml;
+ size_t pos = 0;
+ while(pos < buf.length()) {
+ std::string substr = buf.substr(pos, sz);
+
+ try {
+ test |= parser.parse((char*)substr.c_str(), substr.length());
+ } catch(Exception &e) {
+ printf("Buffer parser failed on size %d: %s [%s]\n", sz, e.what(), substr.c_str());
+ }
+ pos += sz;
+ }
+
+ if(!test) {
+ printf("Buffer parser failed on size %d\n", sz);
+ return 1;
+ }
+ }
+
+ fp = fopen(XMLFILE, "w");
+ if(!fp) {
+ printf("Could not write to %s\n", XMLFILE);
+ return 1;
+ }
+ fprintf(fp, xml_notrailingwhitespace);
+ fprintf(fp, xml_notrailingwhitespace);
+ fclose(fp);
+ // Test buffer parser with multiple documents in the same buffer
+ {
+ fp = fopen(XMLFILE, "r");
+ if(!fp) {
+ printf("Could not write to %s\n", XMLFILE);
+ return 1;
+ }
+ for(size_t sz = 1; sz < 1000; sz++) {
+ MyBufferParser *parser = NULL;
+ rewind(fp);
+ size_t numdocs = 0;
+ char *buf = new char[sz + 1];
+ memset(buf, 0, sz + 1);
+ size_t size;
+ while( (size = fread(buf, 1, sz, fp)) > 0) {
+ while(size) {
+ if(parser == NULL) {
+ parser = new MyBufferParser();
+ }
+ if(parser->parse(buf, size)) {
+
+ // Got one
+ numdocs++;
+
+ size = size - parser->usedBytes();
+ strcpy(buf, buf + parser->usedBytes());
+ delete parser; parser = NULL;
+ } else {
+ size = 0;
+ memset(buf, 0, sz + 1);
+ }
+ }
+ }
+ if(numdocs != 2) {
+ printf("Failed to parse two documents.\n");
+ return 1;
+ }
+ if(parser) delete parser; parser = NULL;
+ delete[] buf;
+ }
+ fclose(fp);
+ }
+
+ fp = fopen(XMLFILE, "w");
+ if(!fp) {
+ printf("Could not write to %s\n", XMLFILE);
+ return 1;
+ }
+ fprintf(fp, xml_fail);
+ fclose(fp);
+
+ // Test failure
+ {
+ MyFileParser parser(XMLFILE);
+ try {
+ parser.parse();
+ } catch(Exception &e) {
+ goto goon;
+ }
+ printf("This test should fail...\n");
+ return 1;
+ }
+ goon:
+
+ fp = fopen(XMLFILE, "w");
+ if(!fp) {
+ printf("Could not write to %s\n", XMLFILE);
+ return 1;
+ }
+ fprintf(fp, xml_fail2);
+ fclose(fp);
+
+ // Test failure
+ {
+ MyFileParser parser(XMLFILE);
+ try {
+ parser.parse();
+ } catch(Exception &e) {
+ goto goonagain;
+ }
+ printf("This test should fail...\n");
+ return 1;
+ }
+ goonagain:
+
+ unlink(XMLFILE);
}
#endif/*TEST_SAXPARSER*/