summaryrefslogtreecommitdiff
path: root/src/saxparser.h
blob: cdc108c46de6f293b8b1c4ad68ae49575bde20cc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/***************************************************************************
 *            saxparser.h
 *
 *  Mon Mar 24 14:40:15 CET 2008
 *  Copyright 2008 Bent Bisballe Nyeng
 *  deva@aasimon.org
 ****************************************************************************/

/*
 *  This file is part of Pracro.
 *
 *  Pracro is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  Pracro is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with Pracro; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 */
#pragma once

#include <string>
#include <map>
#include <expat.h>

typedef std::map< std::string, std::string> attributes_t;

//! This class implements a SAX Parser, utilising the eXpat XML parser library.
//! It uses virtual methods for the callbacks, and transforms tagnames and
//! attributes into C++ values (std::string and std::vector).
class SAXParser
{
public:
	//! Constructor.
	//! It initialises the eXpat library.
	SAXParser();

	//! Destructor.
	//! It frees the eXpat library resources.
	virtual ~SAXParser();

	//! Call this method to use the reimplemented readData method for input.
	//! The entire document is parsed through this single call.
	//! \return An integer with value 0 on success, or 1 on failure.
	//! \see int readData(char *data, size_t size)
	int parse();

	//! Character data callback method.
	//! Reimplement this to get character callbacks.
	//! This callback might be called several times, if a character block is big.
	//! In that cae it might be nessecary to buffer to received bytes.
	//! \param data A std::string containing the character data.
	virtual void characterData(const std::string &data);

	//! Start tag callback mehtod.
	//! Reimplement this to get start tag callbacks.
	//! It is called each time a new start tag is seen.
	//! \param name A std::string containing the tag name.
	//! \param attributes A std::map of std::string to std::string containing all
	//! attributes for the tag.
	virtual void startTag(std::string name, attributes_t &attr);

	//! End tag callback mehtod.
	//! Reimplement this to get end tag callbacks.
	//! It is called each time an end tag is seen.
	//! \param name A std::string containing the tag name.
	virtual void endTag(std::string name);

	//! Error callback method.
	//! Reimplement this to handle error messages.
	//! A default implementation prints out the current buffer, linenumber and
	//! error message to the screen.
	//! \param buf A char* containing the current buffer being parsed.
	//! \param len A size_t containing the length of the current buffer being
	//!  parsed.
	//! \param error A std::string containing the error message.
	//! \param lineno An integer containing the line number on which the error
	//!  occurred.
	virtual void parseError(const char *buf, size_t len, std::string error,
	                        int lineno);

	//! Buffer parse method.
	//! Use this method to parse an external buffer with xml data.
	//! This method can be called several times (ie. in a read loop).
	//! \param buf A char* containing the buffer to parse.
	//! \param size A size_t comntaining the size of the buffer to parse.
	//! \return A boolean with the value true if a complete document has been
	//!  seen. false otherwise.
	//! \see bool parse(char *buf, size_t size)
	bool parse(const char *buf, size_t size);

	//! Get the number of bytes used from the last buffer.
	//! If the buffer parse method is used, and the buffer comes from a stream of
	//! xml doxuments, this method can be used to figure out how many bytes from
	//! the stream should be replayed, to another parser.
	//! \return an integer containing the number of bytes used from the last
	//!  buffer.
	//! \see bool parse(char *buf, size_t size)
	unsigned int usedBytes();

	// private stuff that needs to be public!
	std::string outertag;
	bool done;

protected:
	//! Read data callback method.
	//! This method is used when the parse() method is used.
	//! It can be used to connect the parser with eg. a file.
	//! \param data A char* containing the buffer to be filled.
	//! \param size A size_t containing the maximum number of bytes to be filled
	//! (ie. the size of data)
	//! \return An integer contaning the actual number of bytes filled. 0 if no
	//! more bytes are available.
	//! \see int parse()
	virtual int readData(char *data, size_t size);

	XML_Parser p;

	unsigned int bufferbytes;
	unsigned int totalbytes;
};