libfilezilla
Loading...
Searching...
No Matches
xml.hpp
1#ifndef LIBFILEZILLA_XML_HEADER
2#define LIBFILEZILLA_XML_HEADER
3
4#include <functional>
5#include <string>
6#include <tuple>
7#include <vector>
8
9#include "buffer.hpp"
10#include "logger.hpp"
11
12namespace fz {
13
14namespace xml {
15
17enum class callback_event
18{
20 open,
21
23 close,
24
26 attribute,
27
30 value,
31
33 parsing_instruction,
34
35 doctype,
36};
37
47class FZ_PUBLIC_SYMBOL parser final
48{
49public:
56 typedef std::function<bool(callback_event type, std::string_view path, std::string_view name, std::string && value)> callback_t;
57
58 parser();
59 parser(callback_t const& cb);
60 parser(callback_t && cb);
61
65 void set_callback(callback_t const& cb);
66
68 bool parse(std::string_view data);
69
72 bool finalize();
73
75 std::string get_error() const;
76
77private:
78 bool FZ_PRIVATE_SYMBOL decode_ref();
79 bool FZ_PRIVATE_SYMBOL is_valid_tag_or_attr(std::string_view s) const;
80 bool FZ_PRIVATE_SYMBOL normalize_value();
81
82 bool FZ_PRIVATE_SYMBOL parse_valid_utf8(std::string_view data);
83 bool FZ_PRIVATE_SYMBOL parse(char const* const begin, char const* const end);
84 void FZ_PRIVATE_SYMBOL set_error(std::string_view msg, size_t offset);
85
86 bool FZ_PRIVATE_SYMBOL deduce_encoding(std::string_view & data);
87
88 enum class state {
89 content,
90 tag_start, // Just after reading <
91 tag_name, // Reading tag name
92 tag_closing, // In a closing tag, matching the tag name
93 tag_end, // Just before reading >
94
95 attributes,
96 attribute_quote,
97 attribute_value,
98
99 // <?xml and other parsing intructions
100 pi,
101 pi_value,
102
103 // entity and character references
104 reference,
105 attrvalue_reference,
106
107 comment_start,
108 comment_end,
109
110 doctype_start,
111 doctype_name,
112 doctype_value,
113
114 cdata_start,
115 cdata_end,
116
117 done,
118 error
119 };
120
121 callback_t cb_;
122
123 std::string path_;
124 std::vector<size_t> nodes_;
125 std::string value_;
126 size_t processed_{};
127 std::string converted_{};
128
129 union {
130 size_t utf8_state_{};
131 uint32_t utf16_state_;
132 };
133
134 state s_{ state::content };
135
136 enum class encoding {
137 unknown,
138 utf8,
139 utf16le,
140 utf16be
141 };
142 encoding encoding_{};
143
144 union {
145 size_t tag_match_pos_{};
146 char quotes_;
147 unsigned char dashes_;
148 };
149
150 bool got_xmldecl_{};
151 bool got_doctype_{};
152 bool got_element_{};
153};
154
167class FZ_PUBLIC_SYMBOL namespace_parser final
168{
169public:
173
174 void set_callback(parser::callback_t && cb);
175 void set_callback(parser::callback_t const& cb);
176
177 bool parse(std::string_view data);
178 bool finalize();
179
180 std::string get_error() const;
181
183 typedef std::function<bool(callback_event type, std::string_view path, std::string_view name, std::string_view value)> raw_callback_t;
184 void set_raw_callback(raw_callback_t && cb);
185 void set_raw_callback(raw_callback_t const& cb);
186private:
187 std::string_view FZ_PRIVATE_SYMBOL apply_namespaces(std::string_view in);
188 bool FZ_PRIVATE_SYMBOL apply_namespace_to_path();
189
190 bool FZ_PRIVATE_SYMBOL on_callback(callback_event type, std::string_view path, std::string_view name, std::string && value);
191
192 parser parser_;
193
195 raw_callback_t raw_cb_;
196
197 std::string path_;
198 fz::buffer applied_;
199 std::vector<size_t> nodes_;
200 std::vector<std::pair<std::string, std::string>> attributes_;
201 std::vector<std::tuple<size_t, std::string, std::string>> namespaces_;
202 bool needs_namespace_expansion_{};
203 bool error_{};
204};
205
207class FZ_PUBLIC_SYMBOL pretty_printer
208{
209public:
210 pretty_printer() = default;
211 virtual ~pretty_printer();
212
213 void log(callback_event type, std::string_view, std::string_view name, std::string_view value);
214
215protected:
216 virtual void on_line(std::string_view line) = 0;
217
218private:
219 void FZ_PRIVATE_SYMBOL finish_line();
220 void FZ_PRIVATE_SYMBOL print_line();
221
222 size_t depth_{};
223 std::string value_;
224 std::string line_;
225};
226
228class FZ_PUBLIC_SYMBOL pretty_logger : public pretty_printer
229{
230public:
232
233protected:
234 virtual void on_line(std::string_view line) override;
235
236 logmsg::type level_;
237 logger_interface & logger_;
238};
239
240
241}
242}
243
244#endif
Declares fz::buffer.
The buffer class is a simple buffer where data can be appended at the end and consumed at the front....
Definition: buffer.hpp:27
Abstract interface for logging strings.
Definition: logger.hpp:51
A stremable XML parser that resolves namespace declarations and namespace prefixes.
Definition: xml.hpp:168
std::function< bool(callback_event type, std::string_view path, std::string_view name, std::string_view value)> raw_callback_t
Additional raw callback to look at events before namespace processing takes place.
Definition: xml.hpp:183
A streaming XML parser.
Definition: xml.hpp:48
void set_callback(callback_t &&cb)
std::string get_error() const
Returns an error description. Empty if parsing was stopped by a callback.
bool parse(std::string_view data)
Processes the block of data. Can be partial.
std::function< bool(callback_event type, std::string_view path, std::string_view name, std::string &&value)> callback_t
Definition: xml.hpp:56
A slow pretty printer for XML as it is being parsed.
Definition: xml.hpp:229
A slow pretty printer for XML as it is being parsed.
Definition: xml.hpp:208
Interface for logging.
type
Definition: logger.hpp:16
The namespace used by libfilezilla.
Definition: apply.hpp:17