comparison C++/modules/Ini/Ini.cpp @ 427:aa9cc55338be

Ini: rewrite the parse/analyze process
author David Demelier <markand@malikania.fr>
date Wed, 14 Oct 2015 15:08:45 +0200
parents cee5c74c1c83
children 31bddece9860
comparison
equal deleted inserted replaced
426:cee5c74c1c83 427:aa9cc55338be
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */ 17 */
18 18
19 #include <cctype> 19 #include <cctype>
20 #include <cerrno>
21 #include <cstring>
22 #include <fstream>
23 #include <iostream> 20 #include <iostream>
24 #include <iterator> 21 #include <iterator>
25 #include <memory> 22 #include <fstream>
26 #include <ostream>
27 #include <sstream> 23 #include <sstream>
28 #include <vector> 24 #include <stdexcept>
29 25
30 #if defined(_WIN32) 26 #if defined(_WIN32)
31 # include <Shlwapi.h> // for PathIsRelative 27 # include <Shlwapi.h> // for PathIsRelative
32 #endif 28 #endif
33 29
34 #include "Ini.h" 30 #include "Ini.h"
35 31
36 namespace ini {
37
38 namespace { 32 namespace {
39 33
40 /* -------------------------------------------------------- 34 using namespace ini;
41 * Tokens 35
42 * -------------------------------------------------------- */ 36 using Iterator = std::istreambuf_iterator<char>;
43 37 using TokenIterator = std::vector<Token>::const_iterator;
44 enum class TokenType { 38
45 Comment = '#', 39 #if defined(_WIN32)
46 SectionBegin = '[', 40 inline bool isAbsolute(const std::string &path) noexcept
47 SectionEnd = ']', 41 {
48 Escape = '\\', 42 return !PathIsRelative(path.c_str());
49 QuoteSimple = '\'', 43 }
50 QuoteDouble = '"', 44 #else
51 NewLine = '\n', 45 inline bool isAbsolute(const std::string &path) noexcept
52 Assign = '=', 46 {
53 Include = '@', 47 return path.size() > 0 && path[0] == '/';
54 Word, 48 }
55 Space 49 #endif
56 }; 50
57 51 inline bool isQuote(char c) noexcept
58 class Token { 52 {
59 private: 53 return c == '\'' || c == '"';
60 TokenType m_type; 54 }
61 int m_line; 55
62 int m_position; 56 inline bool isSpace(char c) noexcept
63 std::string m_value; 57 {
64 58 /* Custom version because std::isspace includes \n as space */
65 public: 59 return c == ' ' || c == '\t';
66 inline Token(TokenType type, int line, int position, std::string value = "") 60 }
67 : m_type(type) 61
68 , m_line(line) 62 inline bool isReserved(char c) noexcept
69 , m_position(position) 63 {
70 , m_value(std::move(value)) 64 return c == '[' || c == ']' || c == '@' || c == '#' || c == '=' || c == '\'' || c == '"';
71 { 65 }
72 } 66
73 67 void analyzeLine(Tokens &list, int &line, int &column, Iterator &it)
74 inline TokenType type() const noexcept 68 {
75 { 69 list.push_back({ Token::Line, line++, column });
76 return m_type; 70 ++ it;
77 } 71 column = 0;
78 72 }
79 inline int line() const noexcept 73
80 { 74 void analyzeComment(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
81 return m_line; 75 {
82 } 76 std::string value{1, *it};
83 77 int save = column;
84 inline int position() const noexcept 78
85 { 79 while (it != end && *it != '\n') {
86 return m_position; 80 ++ column;
87 } 81 value += *it++;
88 82 }
89 inline std::string value() const 83
90 { 84 list.push_back({ Token::Comment, line, save, std::move(value) });
91 switch (m_type) { 85 }
92 case TokenType::Comment: 86
93 return "#"; 87 void analyzeSection(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
94 case TokenType::SectionBegin: 88 {
95 return "["; 89 std::string value;
96 case TokenType::SectionEnd: 90 int save = column;
97 return "]"; 91
98 case TokenType::QuoteSimple: 92 /* Read section name */
99 return "'"; 93 ++ it;
100 case TokenType::QuoteDouble: 94 while (it != end && *it != ']') {
101 return "\""; 95 if (*it == '\n') {
102 case TokenType::NewLine: 96 throw Error{line, column, "section not terminated, missing ']'"};
103 return "\n"; 97 }
104 case TokenType::Assign: 98 if (isReserved(*it)) {
105 return "="; 99 throw Error{line, column, "section name expected after '[', got '" + std::string(1, *it) + "'"};
106 case TokenType::Include: 100 }
107 return "@"; 101 ++ column;
108 case TokenType::Space: 102 value += *it++;
109 return m_value; 103 }
110 case TokenType::Word: 104
111 return m_value; 105 if (it == end) {
106 throw Error{line, column, "section name expected after '[', got <EOF>"};
107 }
108
109 /* Remove ']' */
110 ++ it;
111
112 list.push_back({ Token::Section, line, save, std::move(value) });
113 }
114
115 void analyzeAssign(Tokens &list, int &line, int &column, Iterator &it)
116 {
117 list.push_back({ Token::Assign, line, column++ });
118 ++ it;
119 }
120
121 void analyzeSpaces(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
122 {
123 std::string value;
124 int save = column;
125
126 while (it != end && (*it == ' ' || *it == '\t')) {
127 ++ column;
128 value += *it++;
129 }
130
131 list.push_back({ Token::Space, line, save, std::move(value) });
132 }
133
134 void analyzeQuotedWord(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
135 {
136 std::string value;
137 int save = column;
138 char quote = *it++;
139
140 while (it != end && *it != quote) {
141 // TODO: escape sequence
142 ++ column;
143 value += *it++;
144 }
145
146 if (it == end) {
147 throw Error{line, column, "undisclosed '" + std::string(1, quote) + "', got <EOF>"};
148 }
149
150 /* Remove quote */
151 ++ it;
152
153 list.push_back({ Token::QuotedWord, line, save, std::move(value) });
154 }
155
156 void analyzeWord(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
157 {
158 std::string value;
159 int save = column;
160
161 while (it != end && !std::isspace(*it) && !isReserved(*it)) {
162 ++ column;
163 value += *it++;
164 }
165
166 list.push_back({ Token::Word, line, save, std::move(value) });
167 }
168
169 void analyzeInclude(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
170 {
171 std::string include;
172 int save = column;
173
174 /* Read include */
175 ++ it;
176 while (it != end && !isSpace(*it)) {
177 ++ column;
178 include += *it++;
179 }
180
181 if (include != "include") {
182 throw Error{line, column, "expected include after '@' token"};
183 }
184
185 list.push_back({ Token::Include, line, save });
186 }
187
188 Tokens analyze(std::istreambuf_iterator<char> &it, std::istreambuf_iterator<char> end)
189 {
190 Tokens list;
191 int line = 1;
192 int column = 0;
193
194 while (it != end) {
195 if (*it == '\n') {
196 analyzeLine(list, line, column, it);
197 } else if (*it == '#') {
198 analyzeComment(list, line, column, it, end);
199 } else if (*it == '[') {
200 analyzeSection(list, line, column, it, end);
201 } else if (*it == '=') {
202 analyzeAssign(list, line, column, it);
203 } else if (isSpace(*it)) {
204 analyzeSpaces(list, line, column, it, end);
205 } else if (*it == '@') {
206 analyzeInclude(list, line, column, it, end);
207 } else if (isQuote(*it)) {
208 analyzeQuotedWord(list, line, column, it, end);
209 } else {
210 analyzeWord(list, line, column, it, end);
211 }
212 }
213
214 return list;
215 }
216
217 void parseSpaces(TokenIterator &it, TokenIterator end)
218 {
219 while (it != end && it->type() == Token::Space) {
220 ++ it;
221 }
222 }
223
224 void parseOption(Section &sc, TokenIterator &it, TokenIterator end)
225 {
226 std::string key = it->value();
227 std::string value;
228
229 TokenIterator save = it;
230
231 /* Optional spaces before '=' */
232 parseSpaces(++it, end);
233
234 /* No '=' or something else? */
235 if (it == end) {
236 throw Error{save->line(), save->column(), "expected '=' assignment, got <EOF>"};
237 }
238 if (it->type() != Token::Assign) {
239 throw Error{it->line(), it->column(), "expected '=' assignment, got " + it->value()};
240 }
241
242 /* Optional spaces after '=' */
243 parseSpaces(++it, end);
244
245 /* Empty options are allowed so just test for words */
246 if (it != end) {
247 if (it->type() == Token::Word || it->type() == Token::QuotedWord) {
248 value = it++->value();
249 }
250 }
251
252 sc.emplace_back(std::move(key), std::move(value));
253 }
254
255 void parseInclude(Document &doc, TokenIterator &it, TokenIterator end)
256 {
257 TokenIterator save = it;
258
259 if (++it == end) {
260 throw Error{save->line(), save->column(), "expected file name after '@include' statement, got <EOF>"};
261 }
262
263 /* Get file name */
264 parseSpaces(it, end);
265
266 if (it->type() != Token::Word && it->type() != Token::QuotedWord) {
267 throw Error{it->line(), it->column(), "expected file name after '@include' statement, got " + it->value()};
268 }
269
270 if (doc.path().empty()) {
271 throw Error{it->line(), it->column(), "'@include' statement invalid with buffer documents"};
272 }
273
274 std::string value = (it++)->value();
275 std::string file;
276
277 if (!isAbsolute(value)) {
278 #if defined(_WIN32)
279 file = doc.path() + "\\" + value;
280 #else
281 file = doc.path() + "/" + value;
282 #endif
283 } else {
284 file = value;
285 }
286
287 Document child{File{file}};
288
289 for (const auto &sc : child) {
290 doc.push_back(sc);
291 }
292 }
293
294 void parseSection(Document &doc, TokenIterator &it, TokenIterator end)
295 {
296 Section sc{it->value()};
297
298 /* Skip [section] */
299 ++ it;
300
301 /* Read until next section */
302 while (it != end && it->type() != Token::Section) {
303 switch (it->type()) {
304 case Token::Line:
305 case Token::Comment:
306 case Token::Space:
307 it ++;
308 continue;
112 default: 309 default:
113 break; 310 break;
114 } 311 }
115 312
116 return ""; 313 if (it->type() != Token::Word) {
117 } 314 throw Error{it->line(), it->column(), "unexpected token '" + it->value() + "' in section definition"};
118 315 }
119 inline std::string toString() const 316
120 { 317 parseOption(sc, it, end);
121 switch (m_type) { 318 }
122 case TokenType::Comment: 319
123 return "'#'"; 320 doc.push_back(std::move(sc));
124 case TokenType::SectionBegin: 321 }
125 return "'['"; 322
126 case TokenType::SectionEnd: 323 void parse(Document &doc, const Tokens &tokens)
127 return "']'"; 324 {
128 case TokenType::QuoteSimple: 325 TokenIterator it = tokens.cbegin();
129 return "'"; 326 TokenIterator end = tokens.cend();
130 case TokenType::QuoteDouble: 327
131 return "\""; 328 while (it != end) {
132 case TokenType::NewLine: 329 /* Just ignore this */
133 return "<newline>"; 330 switch (it->type()) {
134 case TokenType::Assign: 331 case Token::Include:
135 return "="; 332 parseInclude(doc, it, end);
136 case TokenType::Include: 333 break;
137 return "@"; 334 case Token::Section:
138 case TokenType::Space: 335 parseSection(doc, it, end);
139 return "<blank>"; 336 break;
140 case TokenType::Word: 337 case Token::Comment:
141 return "`" + m_value + "'"; 338 case Token::Line:
339 case Token::Space:
340 ++ it;
341 break;
142 default: 342 default:
143 break; 343 throw Error{it->line(), it->column(), "unexpected '" + it->value() + "' on root document"};
144 } 344 }
145 345 }
146 return ""; 346 }
147 }
148 };
149
150 using TokenStack = std::vector<Token>;
151
152 /* --------------------------------------------------------
153 * Builder
154 * -------------------------------------------------------- */
155
156 class Builder {
157 private:
158 std::string m_path;
159 std::string m_base;
160 Document &m_ini;
161
162 private:
163 inline bool isReserved(char c) const noexcept
164 {
165 return c == '\n' || c == '#' || c == '"' || c == '\'' || c == '=' || c == '[' || c == ']' || c == '@';
166 }
167
168 std::string base(std::string path)
169 {
170 auto pos = path.find_last_of("/\\");
171
172 if (pos != std::string::npos) {
173 path.erase(pos);
174 } else {
175 path = ".";
176 }
177
178 return path;
179 }
180
181 #if defined(_WIN32)
182 bool isAbsolute(const std::string &path)
183 {
184 return !PathIsRelative(path.c_str());
185 }
186 #else
187 bool isAbsolute(const std::string &path)
188 {
189 return path.size() > 0 && path[0] == '/';
190 }
191 #endif
192
193 std::vector<Token> analyze(std::istream &stream) const
194 {
195 std::istreambuf_iterator<char> it(stream);
196 std::istreambuf_iterator<char> end;
197 std::vector<Token> tokens;
198
199 int lineno{1};
200 int position{0};
201
202 while (it != end) {
203 std::string value;
204
205 if (isReserved(*it)) {
206 while (it != end && isReserved(*it)) {
207 // Single character tokens
208 switch (*it) {
209 case '#':
210 /* Skip comments */
211 while (it != end && *it != '\n') {
212 ++ it;
213 }
214 tokens.push_back({ TokenType::Comment, lineno, position });
215 position = 0;
216 break;
217 case '\n':
218 ++lineno;
219 position = 0;
220 case '[':
221 case ']':
222 case '\'':
223 case '"':
224 case '=':
225 case '@':
226 tokens.push_back({ static_cast<TokenType>(*it), lineno, position });
227 ++it;
228 ++position;
229 default:
230 break;
231 }
232 }
233 } else if (std::isspace(*it)) {
234 while (it != end && std::isspace(*it) && *it != '\n') {
235 value.push_back(*it++);
236 ++position;
237 }
238
239 tokens.push_back({ TokenType::Space, lineno, position, std::move(value) });
240 } else {
241 while (it != end && !std::isspace(*it) && !isReserved(*it)) {
242 value.push_back(*it++);
243 ++position;
244 }
245
246 tokens.push_back({ TokenType::Word, lineno, position, std::move(value) });
247 }
248 }
249
250 return tokens;
251 }
252
253 void readComment(TokenStack::iterator &it, TokenStack::iterator)
254 {
255 ++ it;
256 }
257
258 void readSpace(TokenStack::iterator &it, TokenStack::iterator end)
259 {
260 while (it != end && it->type() == TokenType::Space) {
261 ++ it;
262 }
263 }
264
265 void readNewLine(TokenStack::iterator &it, TokenStack::iterator end)
266 {
267 while (it != end && it->type() == TokenType::NewLine) {
268 ++ it;
269 }
270 }
271
272 Section readSection(TokenStack::iterator &it, TokenStack::iterator end)
273 {
274 // Empty [
275 if (++it == end) {
276 throw Error{it[-1].line(), it[-1].position(), "section declaration expected, got <EOF>"};
277 }
278
279 // Get the section name
280 if (it->type() != TokenType::Word) {
281 throw Error{it->line(), it->position(), "word expected after [, got " + it->toString()};
282 }
283
284 Section section(it->value());
285
286 // [unterminated
287 if (++it == end) {
288 throw Error{it[-1].line(), it[-1].position(), "unterminated section"};
289 }
290
291 // Check if terminated
292 if (it->type() != TokenType::SectionEnd) {
293 throw Error{it->line(), it->position(), "] expected, got " + it->toString()};
294 }
295
296 // Remove ]
297 ++ it;
298
299 if (it == end) {
300 return section;
301 }
302
303 // Now read all that is allowed to be in a section
304 while (it != end && it->type() != TokenType::SectionBegin) {
305 if (it->type() == TokenType::Space) {
306 readSpace(it, end);
307 } else if (it->type() == TokenType::NewLine) {
308 readNewLine(it, end);
309 } else if (it->type() == TokenType::Comment) {
310 readComment(it, end);
311 } else if (it->type() == TokenType::Word) {
312 section.push_back(readOption(it, end));
313 } else {
314 throw Error{it->line(), it->position(), "unexpected token " + it->toString()};
315 }
316 }
317
318 return section;
319 }
320
321 Option readOption(TokenStack::iterator &it, TokenStack::iterator end)
322 {
323 std::string key = it++->value();
324
325 readSpace(it, end);
326
327 if (it == end) {
328 throw Error{it[-1].line(), it[-1].position(), "expected '=' after option declaration, got <EOF>"};
329 }
330
331 if (it->type() != TokenType::Assign) {
332 ++ it;
333 throw Error{it[-1].line(), it[-1].position(), "expected '=' after option declaration, got " + it[-1].toString()};
334 }
335
336 readSpace(++it, end);
337
338 std::ostringstream oss;
339
340 if (it->type() == TokenType::QuoteSimple || it->type() == TokenType::QuoteDouble) {
341 TokenStack::iterator save = it++;
342
343 while (it != end && it->type() != save->type()) {
344 oss << it++->value();
345 }
346
347 if (it == end) {
348 throw Error{save->line(), save->position(), "undisclosed quote: " + save->toString() + " expected"};
349 }
350
351 ++ it;
352 } else if (it->type() == TokenType::Word) {
353 oss << it++->value();
354 } else if (it->type() != TokenType::NewLine && it->type() != TokenType::Comment) {
355 // No value requested, must be NewLine or comment
356 throw Error{it->line(), it->position(), "expected option value after '=', got " + it->toString()};
357 }
358
359 return Option{std::move(key), oss.str()};
360 }
361
362 void readInclude(TokenStack::iterator &it, TokenStack::iterator end)
363 {
364 if (++it == end) {
365 throw Error{it[-1].line(), it[-1].position(), "expected `include` after '@' token, got <EOF>"};
366 }
367
368 if (it->type() != TokenType::Word && it->value() != "include") {
369 throw Error{it->line(), it->position(), "expected `include' after '@' token, got " + it->toString()};
370 }
371
372 readSpace(++it, end);
373
374 TokenStack::iterator save = it;
375
376 if (it == end) {
377 throw Error{it[-1].line(), it[-1].position(), "expected filename after @include statement, got <EOF>"};
378 }
379
380 // First quote
381 if (it->type() != TokenType::QuoteSimple && it->type() != TokenType::QuoteDouble) {
382 throw Error{it->line(), it->position(), "expected filename after @include statement"};
383 }
384
385 // Filename
386 if (++it == end) {
387 throw Error{it[-1].line(), it[-1].position(), "expected filename after @include statement, got <EOF>"};
388 }
389
390 if (it->type() != TokenType::Word) {
391 throw Error{it->line(), it->position(), "expected filename after @include statement"};
392 }
393
394 std::string value = it->value();
395 std::string fullpath;
396
397 if (isAbsolute(value)) {
398 fullpath = value;
399 } else {
400 fullpath = m_base + "/" + it->value();
401 }
402
403 // Must be closed with the same quote
404 if (++it == end) {
405 throw Error{save->line(), save->position(), "undiclosed quote: " + save->toString() + " expected, got <EOF>"};
406 }
407 if (it->type() != save->type()) {
408 throw Error{save->line(), save->position(), "undiclosed quote: " + save->toString() + " expected"};
409 }
410
411 // Remove quote
412 ++ it;
413
414 Builder(m_ini, fullpath);
415 }
416
417 public:
418 Builder(Document &ini, std::string path)
419 : m_path(path)
420 , m_base(base(std::move(path)))
421 , m_ini(ini)
422 {
423 std::ifstream file(m_path);
424
425 if (!file.is_open())
426 throw std::runtime_error(std::strerror(errno));
427
428 std::vector<Token> ts = analyze(file);
429
430 auto it = ts.begin();
431 auto end = ts.end();
432
433 while (it != end) {
434 if (it->type() == TokenType::Space) {
435 readSpace(it, end);
436 } else if (it->type() == TokenType::NewLine) {
437 readNewLine(it, end);
438 } else if (it->type() == TokenType::Comment) {
439 readComment(it, end);
440 } else if (it->type() == TokenType::Include) {
441 readInclude(it, end);
442 } else if (it->type() == TokenType::SectionBegin) {
443 m_ini.push_back(readSection(it, end));
444 } else {
445 throw Error(it->line(), it->position(), "unexpected " + it->toString() + " on root document");
446 }
447 }
448 }
449 };
450 347
451 } // !namespace 348 } // !namespace
452 349
453 /* -------------------------------------------------------- 350 namespace ini {
454 * Document 351
455 * -------------------------------------------------------- */ 352 Tokens Document::analyze(const File &file)
456 353 {
457 Document::Document(const std::string &path) 354 std::fstream stream{file.path};
458 { 355 std::istreambuf_iterator<char> it{stream};
459 Builder(*this, path); 356 std::istreambuf_iterator<char> end{};
357
358 return ::analyze(it, end);
359 }
360
361 Tokens Document::analyze(const Buffer &buffer)
362 {
363 std::istringstream stream{buffer.text};
364 std::istreambuf_iterator<char> it{stream};
365 std::istreambuf_iterator<char> end{};
366
367 return ::analyze(it, end);
368 }
369
370 Document::Document(const File &file)
371 : m_path{file.path}
372 {
373 /* Update path */
374 auto pos = m_path.find_last_of("/\\");
375
376 if (pos != std::string::npos) {
377 m_path.erase(pos);
378 } else {
379 m_path = ".";
380 }
381
382 parse(*this, analyze(file));
383 }
384
385 Document::Document(const Buffer &buffer)
386 {
387 dump(analyze(buffer));
388 parse(*this, analyze(buffer));
389 }
390
391 void Document::dump(const Tokens &tokens)
392 {
393 for (const Token &token: tokens) {
394 // TODO: type
395 std::cout << token.line() << ":" << token.column() << ": " << token.value() << std::endl;
396 }
460 } 397 }
461 398
462 } // !ini 399 } // !ini