summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--server/src/Makefile.am7
-rw-r--r--server/src/saxparser.cc226
-rw-r--r--server/src/saxparser.h82
3 files changed, 302 insertions, 13 deletions
diff --git a/server/src/Makefile.am b/server/src/Makefile.am
index dbc481a..f970117 100644
--- a/server/src/Makefile.am
+++ b/server/src/Makefile.am
@@ -102,6 +102,7 @@ EXTRA_DIST = \
################
TESTFILES = \
+ test_saxparser \
test_versionstr \
test_macrolist \
test_queryhandlerpentominos \
@@ -132,6 +133,12 @@ test: $(TESTFILES)
test_clean:
rm -f $(TESTFILES) $(TESTLOGS)
+TEST_SAXPARSER_FILES = \
+ saxparser.cc \
+ $(BASICFILES)
+test_saxparser: $(TEST_SAXPARSER_FILES)
+ @../../tools/test $(TEST_SAXPARSER_FILES) $(BASICFLAGS) $(PARSERFLAGS)
+
TEST_VERSIONSTR_FILES = \
versionstr.cc \
$(BASICFILES)
diff --git a/server/src/saxparser.cc b/server/src/saxparser.cc
index f728928..ee03de1 100644
--- a/server/src/saxparser.cc
+++ b/server/src/saxparser.cc
@@ -92,7 +92,7 @@ SAXParser::SAXParser()
SAXParser::~SAXParser()
{
- XML_ParserFree(p);
+ if(p) XML_ParserFree(p);
}
int SAXParser::parse()
@@ -113,6 +113,14 @@ int SAXParser::parse()
return 0;
}
+static bool iswhitespace(char *buf, size_t size)
+{
+ for(size_t i = 0; i < size; i++)
+ if(buf[i] != ' ' && buf[i] != '\n' && buf[i] != '\t' && buf[i] != '\r')
+ return false;
+ return true;
+}
+
bool SAXParser::parse(char *data, size_t size)
{
PRACRO_DEBUG(sax, "parse %d bytes\n", size);
@@ -122,6 +130,8 @@ bool SAXParser::parse(char *data, size_t size)
if(! XML_Parse(p, data, size, false) ) {
if(XML_GetErrorCode(p) == XML_ERROR_JUNK_AFTER_DOC_ELEMENT) return true;
+ if(XML_GetErrorCode(p) == XML_ERROR_FINISHED && iswhitespace(data, size)) return true;
+ if(done && XML_GetErrorCode(p) == XML_ERROR_UNCLOSED_TOKEN) return true;
parseError(data, size, XML_ErrorString(XML_GetErrorCode(p)), (int)XML_GetCurrentLineNumber(p));
return false;
}
@@ -129,6 +139,8 @@ bool SAXParser::parse(char *data, size_t size)
if(done) {
if(! XML_Parse(p, data, 0, true) ) {
if(XML_GetErrorCode(p) == XML_ERROR_JUNK_AFTER_DOC_ELEMENT) return true;
+ if(XML_GetErrorCode(p) == XML_ERROR_FINISHED && iswhitespace(data, size)) return true;
+ if(XML_GetErrorCode(p) == XML_ERROR_UNCLOSED_TOKEN) return true;
parseError(data, 0, XML_ErrorString(XML_GetErrorCode(p)), (int)XML_GetCurrentLineNumber(p));
return false;
}
@@ -155,17 +167,58 @@ unsigned int SAXParser::usedBytes()
#ifdef TEST_SAXPARSER
-/**
- * Compile with: g++ -DTEST_SAXPARSER sax_parser.cc -lexpat -otext_saxparser
- * Run with: ./test_saxparser [xmlfile]
- */
+
+#define XMLFILE "/tmp/saxparsertest.xml"
+
+#include "exception.h"
+
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-
-class MyParser :public SAXParser {
+#include <string.h>
+#include <stdio.h>
+#include <memory.h>
+
+static char xml[] =
+"<?xml version='1.0' encoding='UTF-8'?>\n"
+"<pracro version=\"1.0\" user=\"testuser\" cpr=\"1505050505\">\n"
+" <commit version=\"\" macro=\"referral\" course=\"amd_forunders\">\n"
+" <field value=\"Some docs\" name=\"referral.doctor\"/>\n"
+" <field value=\"DIMS\" name=\"referral.diagnosecode\"/>\n"
+" <field value=\"Avs\" name=\"referral.diagnose\"/>\n"
+" </commit>\n"
+"</pracro>\n \t\n\r"
+ ;
+
+static char xml_notrailingwhitespace[] =
+"<?xml version='1.0' encoding='UTF-8'?>\n"
+"<pracro version=\"1.0\" user=\"testuser\" cpr=\"1505050505\">\n"
+" <commit version=\"\" macro=\"referral\" course=\"amd_forunders\">\n"
+" <field value=\"Some docs\" name=\"referral.doctor\"/>\n"
+" <field value=\"DIMS\" name=\"referral.diagnosecode\"/>\n"
+" <field value=\"Avs\" name=\"referral.diagnose\"/>\n"
+" </commit>\n"
+"</pracro>"
+ ;
+
+static char xml_fail[] =
+"<?xml version='1.0' encoding='UTF-8'?>\n"
+"<pracro version=\"1.0\" user\"testuser\" cpr=\"1505050505\">\n"
+" <request macro=\"test\" course=\"test\"/>\n"
+"</pracro>\n"
+ ;
+
+static char xml_fail2[] =
+"<?xml version='1.0' encoding='UTF-8'?>\n"
+"<pracro version=\"1.0\" user\"testuser\" cpr=\"1505050505\">\n"
+" <request macro=\"test\" course=\"test\"/>\n"
+"</pracro>\n"
+"this is junk\n"
+ ;
+
+class MyFileParser :public SAXParser {
public:
- MyParser(char *file) {
+ MyFileParser(const char *file) {
fd = open(file, O_RDONLY);
}
@@ -175,17 +228,164 @@ public:
void startTag(std::string name, std::map< std::string, std::string> attributes)
{
- printf("<%s>\n", name.c_str());
+ //printf("<%s>\n", name.c_str());
+ }
+
+ void parseError(char *buf, size_t len, std::string error, int lineno)
+ {
+ throw Exception(error);
}
private:
int fd;
};
-int main(int argc, char *argv[]) {
- if(argc < 2) return 1;
- MyParser parser(argv[1]);
- parser.parse();
+class MyBufferParser :public SAXParser {
+public:
+ void startTag(std::string name, std::map< std::string, std::string> attributes)
+ {
+ //printf("<%s>\n", name.c_str());
+ }
+
+ void parseError(char *buf, size_t len, std::string error, int lineno)
+ {
+ throw Exception(error);
+ }
+};
+
+int main(int argc, char *argv[])
+{
+ FILE *fp = fopen(XMLFILE, "w");
+ if(!fp) {
+ printf("Could not write to %s\n", XMLFILE);
+ return 1;
+ }
+ fprintf(fp, xml);
+ fclose(fp);
+
+ // Test callback parser
+ {
+ MyFileParser parser(XMLFILE);
+ parser.parse();
+ }
+
+ // Test buffer parser
+ for(size_t sz = 1; sz < 1000; sz++) {
+ bool test = false;
+ MyBufferParser parser;
+ std::string buf = xml;
+ size_t pos = 0;
+ while(pos < buf.length()) {
+ std::string substr = buf.substr(pos, sz);
+
+ try {
+ test |= parser.parse((char*)substr.c_str(), substr.length());
+ } catch(Exception &e) {
+ printf("Buffer parser failed on size %d: %s [%s]\n", sz, e.what(), substr.c_str());
+ }
+ pos += sz;
+ }
+
+ if(!test) {
+ printf("Buffer parser failed on size %d\n", sz);
+ return 1;
+ }
+ }
+
+ fp = fopen(XMLFILE, "w");
+ if(!fp) {
+ printf("Could not write to %s\n", XMLFILE);
+ return 1;
+ }
+ fprintf(fp, xml_notrailingwhitespace);
+ fprintf(fp, xml_notrailingwhitespace);
+ fclose(fp);
+ // Test buffer parser with multiple documents in the same buffer
+ {
+ fp = fopen(XMLFILE, "r");
+ if(!fp) {
+ printf("Could not write to %s\n", XMLFILE);
+ return 1;
+ }
+ for(size_t sz = 1; sz < 1000; sz++) {
+ MyBufferParser *parser = NULL;
+ rewind(fp);
+ size_t numdocs = 0;
+ char *buf = new char[sz + 1];
+ memset(buf, 0, sz + 1);
+ size_t size;
+ while( (size = fread(buf, 1, sz, fp)) > 0) {
+ while(size) {
+ if(parser == NULL) {
+ parser = new MyBufferParser();
+ }
+ if(parser->parse(buf, size)) {
+
+ // Got one
+ numdocs++;
+
+ size = size - parser->usedBytes();
+ strcpy(buf, buf + parser->usedBytes());
+ delete parser; parser = NULL;
+ } else {
+ size = 0;
+ memset(buf, 0, sz + 1);
+ }
+ }
+ }
+ if(numdocs != 2) {
+ printf("Failed to parse two documents.\n");
+ return 1;
+ }
+ if(parser) delete parser; parser = NULL;
+ delete[] buf;
+ }
+ fclose(fp);
+ }
+
+ fp = fopen(XMLFILE, "w");
+ if(!fp) {
+ printf("Could not write to %s\n", XMLFILE);
+ return 1;
+ }
+ fprintf(fp, xml_fail);
+ fclose(fp);
+
+ // Test failure
+ {
+ MyFileParser parser(XMLFILE);
+ try {
+ parser.parse();
+ } catch(Exception &e) {
+ goto goon;
+ }
+ printf("This test should fail...\n");
+ return 1;
+ }
+ goon:
+
+ fp = fopen(XMLFILE, "w");
+ if(!fp) {
+ printf("Could not write to %s\n", XMLFILE);
+ return 1;
+ }
+ fprintf(fp, xml_fail2);
+ fclose(fp);
+
+ // Test failure
+ {
+ MyFileParser parser(XMLFILE);
+ try {
+ parser.parse();
+ } catch(Exception &e) {
+ goto goonagain;
+ }
+ printf("This test should fail...\n");
+ return 1;
+ }
+ goonagain:
+
+ unlink(XMLFILE);
}
#endif/*TEST_SAXPARSER*/
diff --git a/server/src/saxparser.h b/server/src/saxparser.h
index da33440..9f2faa2 100644
--- a/server/src/saxparser.h
+++ b/server/src/saxparser.h
@@ -31,21 +31,92 @@
#include <map>
#include <expat.h>
+/**
+ * This class implements a SAX Parser, utilising the eXpat XML parser library.
+ * It uses virtual methods for the callbacks, and transforms tagnames and
+ * attributes into C++ values (std::string and std::vector).
+ */
class SAXParser {
public:
+ /**
+ * Constructor.
+ * It initialises the eXpat library.
+ */
SAXParser();
+
+ /**
+ * Destructor.
+ * It frees the eXpat library resources.
+ */
virtual ~SAXParser();
+ /**
+ * Call this method to use the reimplemented readData method for input.
+ * The entire document is parsed through this single call.
+ * @return An integer wityh value 0 on success, or 1 on failure.
+ * @see int readData(char *data, size_t size)
+ */
int parse();
+ /**
+ * Character data callback method.
+ * Reimplement this to get character callbacks.
+ * This callback might be called several times, if a character block is big. In
+ * that cae it might be nessecary to buffer to received bytes.
+ * @param data A std::string containing the character data.
+ */
virtual void characterData(std::string &data) {}
+
+ /**
+ * Start tag callback mehtod.
+ * Reimplement this to get start tag callbacks.
+ * It is called each time a new start tag is seen.
+ * @param name A std::string containing the tag name.
+ * @param attributes A std::map of std::string to std::string containing all
+ * attributes for the tag.
+ */
virtual void startTag(std::string name, std::map< std::string, std::string> attributes) {}
+
+ /**
+ * End tag callback mehtod.
+ * Reimplement this to get end tag callbacks.
+ * It is called each time an end tag is seen.
+ * @param name A std::string containing the tag name.
+ */
virtual void endTag(std::string name) {}
+ /**
+ * Error callback method.
+ * Reimplement this to handle error messages.
+ * A default implementation prints out the current buffer, linenumber and error
+ * message to the screen.
+ * @param buf A char* containing the current buffer being parsed.
+ * @param len A size_t containing the length of the current buffer being parsed.
+ * @param error A std::string containing the error message.
+ * @param lineno An integer containing the line number on which the error occurred.
+ */
virtual void parseError(char *buf, size_t len, std::string error, int lineno);
+ /**
+ * Buffer parse method.
+ * Use this method to parse an external buffer with xml data.
+ * This method can be called several times (ie. in a read loop).
+ * @param buf A char* containing the buffer to parse.
+ * @param size A size_t comntaining the size of the buffer to parse.
+ * @return A boolean with the value true if a complete document has been seen.
+ * false otherwise.
+ * @see bool parse(char *buf, size_t size)
+ */
bool parse(char *buf, size_t size);
+ /**
+ * Get the number of bytes used from the last buffer.
+ * If the buffer parse method is used, and the buffer comes from a stream of xml
+ * doxuments, this method can be used to figure out how many bytes from the stream
+ * should be replayed, to another parser.
+ * @return an integer containing the number of bytes used from the last buffer.
+ * @see bool parse(char *buf, size_t size)
+ */
unsigned int usedBytes();
// private stuff that needs to be public!
@@ -53,6 +124,17 @@ public:
bool done;
protected:
+ /**
+ * Read data callback method.
+ * This method is used when the parse() method is used.
+ * It can be used to connect the parser with eg. a file.
+ * @param data A char* containing the buffer to be filled.
+ * @param size A size_t containing the maximum number of bytes to be filled (ie.
+ * the size of data)
+ * @return An integer contaning the actual number of bytes filled. 0 if no more
+ * bytes are available.
+ * @see int parse()
+ */
virtual int readData(char *data, size_t size) { return 0; }
XML_Parser p;