code: C++/modules/Ini/Ini.cpp comparison

comparison C++/modules/Ini/Ini.cpp @ 427:aa9cc55338be

Ini: rewrite the parse/analyze process

author	David Demelier <markand@malikania.fr>
date	Wed, 14 Oct 2015 15:08:45 +0200
parents	cee5c74c1c83
children	31bddece9860

comparison

equal deleted inserted replaced

-:cee5c74c1c83
+:aa9cc55338be
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
 #include <cctype>
-#include <cerrno>
-#include <cstring>
-#include <fstream>
 #include <iostream>
 #include <iterator>
-#include <memory>
+#include <fstream>
-#include <ostream>
 #include <sstream>
-#include <vector>
+#include <stdexcept>
 #if defined(_WIN32)
 #  include <Shlwapi.h>	// for PathIsRelative
 #endif
 #include "Ini.h"
-namespace ini {
 namespace {
-/* --------------------------------------------------------
+using namespace ini;
-* Tokens
-* -------------------------------------------------------- */
+using Iterator = std::istreambuf_iterator<char>;
+using TokenIterator = std::vector<Token>::const_iterator;
-enum class TokenType {
-	Comment = '#',
+#if defined(_WIN32)
-	SectionBegin = '[',
+inline bool isAbsolute(const std::string &path) noexcept
-	SectionEnd = ']',
+{
-	Escape = '\\',
+	return !PathIsRelative(path.c_str());
-	QuoteSimple = '\'',
+}
-	QuoteDouble = '"',
+#else
-	NewLine = '\n',
+inline bool isAbsolute(const std::string &path) noexcept
-	Assign = '=',
+{
-	Include = '@',
+	return path.size() > 0 && path[0] == '/';
-	Word,
+}
-	Space
+#endif
-};
+inline bool isQuote(char c) noexcept
-class Token {
+{
-private:
+	return c == '\'' || c == '"';
-	TokenType m_type;
+}
-	int m_line;
-	int m_position;
+inline bool isSpace(char c) noexcept
-	std::string m_value;
+{
+	/* Custom version because std::isspace includes \n as space */
-public:
+	return c == ' ' || c == '\t';
-	inline Token(TokenType type, int line, int position, std::string value = "")
+}
-		: m_type(type)
-		, m_line(line)
+inline bool isReserved(char c) noexcept
-		, m_position(position)
+{
-		, m_value(std::move(value))
+	return c == '[' || c == ']' || c == '@' || c == '#' || c == '=' || c == '\'' || c == '"';
-	{
+}
-	}
+void analyzeLine(Tokens &list, int &line, int &column, Iterator &it)
-	inline TokenType type() const noexcept
+{
-	{
+	list.push_back({ Token::Line, line++, column });
-		return m_type;
+	++ it;
-	}
+	column = 0;
+}
-	inline int line() const noexcept
-	{
+void analyzeComment(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
-		return m_line;
+{
-	}
+	std::string value{1, *it};
+	int save = column;
-	inline int position() const noexcept
-	{
+	while (it != end && *it != '\n') {
-		return m_position;
+		++ column;
-	}
+		value += *it++;
+	}
-	inline std::string value() const
-	{
+	list.push_back({ Token::Comment, line, save, std::move(value) });
-		switch (m_type) {
+}
-		case TokenType::Comment:
-			return "#";
+void analyzeSection(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
-		case TokenType::SectionBegin:
+{
-			return "[";
+	std::string value;
-		case TokenType::SectionEnd:
+	int save = column;
-			return "]";
-		case TokenType::QuoteSimple:
+	/* Read section name */
-			return "'";
+	++ it;
-		case TokenType::QuoteDouble:
+	while (it != end && *it != ']') {
-			return "\"";
+		if (*it == '\n') {
-		case TokenType::NewLine:
+			throw Error{line, column, "section not terminated, missing ']'"};
-			return "\n";
+		}
-		case TokenType::Assign:
+		if (isReserved(*it)) {
-			return "=";
+			throw Error{line, column, "section name expected after '[', got '" + std::string(1, *it) + "'"};
-		case TokenType::Include:
+		}
-			return "@";
+		++ column;
-		case TokenType::Space:
+		value += *it++;
-			return m_value;
+	}
-		case TokenType::Word:
-			return m_value;
+	if (it == end) {
+		throw Error{line, column, "section name expected after '[', got <EOF>"};
+	}
+	/* Remove ']' */
+	++ it;
+	list.push_back({ Token::Section, line, save, std::move(value) });
+}
+void analyzeAssign(Tokens &list, int &line, int &column, Iterator &it)
+{
+	list.push_back({ Token::Assign, line, column++ });
+	++ it;
+}
+void analyzeSpaces(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
+{
+	std::string value;
+	int save = column;
+	while (it != end && (*it == ' ' || *it == '\t')) {
+		++ column;
+		value += *it++;
+	}
+	list.push_back({ Token::Space, line, save, std::move(value) });
+}
+void analyzeQuotedWord(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
+{
+	std::string value;
+	int save = column;
+	char quote = *it++;
+	while (it != end && *it != quote) {
+		// TODO: escape sequence
+		++ column;
+		value += *it++;
+	}
+	if (it == end) {
+		throw Error{line, column, "undisclosed '" + std::string(1, quote) + "', got <EOF>"};
+	}
+	/* Remove quote */
+	++ it;
+	list.push_back({ Token::QuotedWord, line, save, std::move(value) });
+}
+void analyzeWord(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
+{
+	std::string value;
+	int save = column;
+	while (it != end && !std::isspace(*it) && !isReserved(*it)) {
+		++ column;
+		value += *it++;
+	}
+	list.push_back({ Token::Word, line, save, std::move(value) });
+}
+void analyzeInclude(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
+{
+	std::string include;
+	int save = column;
+	/* Read include */
+	++ it;
+	while (it != end && !isSpace(*it)) {
+		++ column;
+		include += *it++;
+	}
+	if (include != "include") {
+		throw Error{line, column, "expected include after '@' token"};
+	}
+	list.push_back({ Token::Include, line, save });
+}
+Tokens analyze(std::istreambuf_iterator<char> &it, std::istreambuf_iterator<char> end)
+{
+	Tokens list;
+	int line = 1;
+	int column = 0;
+	while (it != end) {
+		if (*it == '\n') {
+			analyzeLine(list, line, column, it);
+		} else if (*it == '#') {
+			analyzeComment(list, line, column, it, end);
+		} else if (*it == '[') {
+			analyzeSection(list, line, column, it, end);
+		} else if (*it == '=') {
+			analyzeAssign(list, line, column, it);
+		} else if (isSpace(*it)) {
+			analyzeSpaces(list, line, column, it, end);
+		} else if (*it == '@') {
+			analyzeInclude(list, line, column, it, end);
+		} else if (isQuote(*it)) {
+			analyzeQuotedWord(list, line, column, it, end);
+		} else {
+			analyzeWord(list, line, column, it, end);
+		}
+	}
+	return list;
+}
+void parseSpaces(TokenIterator &it, TokenIterator end)
+{
+	while (it != end && it->type() == Token::Space) {
+		++ it;
+	}
+}
+void parseOption(Section &sc, TokenIterator &it, TokenIterator end)
+{
+	std::string key = it->value();
+	std::string value;
+	TokenIterator save = it;
+	/* Optional spaces before '=' */
+	parseSpaces(++it, end);
+	/* No '=' or something else? */
+	if (it == end) {
+		throw Error{save->line(), save->column(), "expected '=' assignment, got <EOF>"};
+	}
+	if (it->type() != Token::Assign) {
+		throw Error{it->line(), it->column(), "expected '=' assignment, got " + it->value()};
+	}
+	/* Optional spaces after '=' */
+	parseSpaces(++it, end);
+	/* Empty options are allowed so just test for words */
+	if (it != end) {
+		if (it->type() == Token::Word || it->type() == Token::QuotedWord) {
+			value = it++->value();
+		}
+	}
+	sc.emplace_back(std::move(key), std::move(value));
+}
+void parseInclude(Document &doc, TokenIterator &it, TokenIterator end)
+{
+	TokenIterator save = it;
+	if (++it == end) {
+		throw Error{save->line(), save->column(), "expected file name after '@include' statement, got <EOF>"};
+	}
+	/* Get file name */
+	parseSpaces(it, end);
+	if (it->type() != Token::Word && it->type() != Token::QuotedWord) {
+		throw Error{it->line(), it->column(), "expected file name after '@include' statement, got " + it->value()};
+	}
+	if (doc.path().empty()) {
+		throw Error{it->line(), it->column(), "'@include' statement invalid with buffer documents"};
+	}
+	std::string value = (it++)->value();
+	std::string file;
+	if (!isAbsolute(value)) {
+#if defined(_WIN32)
+		file = doc.path() + "\\" + value;
+#else
+		file = doc.path() + "/" + value;
+#endif
+	} else {
+		file = value;
+	}
+	Document child{File{file}};
+	for (const auto &sc : child) {
+		doc.push_back(sc);
+	}
+}
+void parseSection(Document &doc, TokenIterator &it, TokenIterator end)
+{
+	Section sc{it->value()};
+	/* Skip [section] */
+	++ it;
+	/* Read until next section */
+	while (it != end && it->type() != Token::Section) {
+		switch (it->type()) {
+		case Token::Line:
+		case Token::Comment:
+		case Token::Space:
+			it ++;
+			continue;
 		default:
 			break;
 		}
-		return "";
+		if (it->type() != Token::Word) {
-	}
+			throw Error{it->line(), it->column(), "unexpected token '" + it->value() + "' in section definition"};
+		}
-	inline std::string toString() const
-	{
+		parseOption(sc, it, end);
-		switch (m_type) {
+	}
-		case TokenType::Comment:
-			return "'#'";
+	doc.push_back(std::move(sc));
-		case TokenType::SectionBegin:
+}
-			return "'['";
-		case TokenType::SectionEnd:
+void parse(Document &doc, const Tokens &tokens)
-			return "']'";
+{
-		case TokenType::QuoteSimple:
+	TokenIterator it = tokens.cbegin();
-			return "'";
+	TokenIterator end = tokens.cend();
-		case TokenType::QuoteDouble:
-			return "\"";
+	while (it != end) {
-		case TokenType::NewLine:
+		/* Just ignore this */
-			return "<newline>";
+		switch (it->type()) {
-		case TokenType::Assign:
+		case Token::Include:
-			return "=";
+			parseInclude(doc, it, end);
-		case TokenType::Include:
+			break;
-			return "@";
+		case Token::Section:
-		case TokenType::Space:
+			parseSection(doc, it, end);
-			return "<blank>";
+			break;
-		case TokenType::Word:
+		case Token::Comment:
-			return "`" + m_value + "'";
+		case Token::Line:
+		case Token::Space:
+			++ it;
+			break;
 		default:
-			break;
+			throw Error{it->line(), it->column(), "unexpected '" + it->value() + "' on root document"};
 		}
+	}
-		return "";
+}
-	}
-};
-using TokenStack = std::vector<Token>;
-/* --------------------------------------------------------
-* Builder
-* -------------------------------------------------------- */
-class Builder {
-private:
-	std::string m_path;
-	std::string m_base;
-	Document &m_ini;
-private:
-	inline bool isReserved(char c) const noexcept
-	{
-		return c == '\n' || c == '#' || c == '"' || c == '\'' || c == '=' || c == '[' || c == ']' || c == '@';
-	}
-	std::string base(std::string path)
-	{
-		auto pos = path.find_last_of("/\\");
-		if (pos != std::string::npos) {
-			path.erase(pos);
-		} else {
-			path = ".";
-		}
-		return path;
-	}
-#if defined(_WIN32)
-	bool isAbsolute(const std::string &path)
-	{
-		return !PathIsRelative(path.c_str());
-	}
-#else
-	bool isAbsolute(const std::string &path)
-	{
-		return path.size() > 0 && path[0] == '/';
-	}
-#endif
-	std::vector<Token> analyze(std::istream &stream) const
-	{
-		std::istreambuf_iterator<char> it(stream);
-		std::istreambuf_iterator<char> end;
-		std::vector<Token> tokens;
-		int lineno{1};
-		int position{0};
-		while (it != end) {
-			std::string value;
-			if (isReserved(*it)) {
-				while (it != end && isReserved(*it)) {
-					// Single character tokens
-					switch (*it) {
-					case '#':
-						/* Skip comments */
-						while (it != end && *it != '\n') {
-							++ it;
-						}
-						tokens.push_back({ TokenType::Comment, lineno, position });
-						position = 0;
-						break;
-					case '\n':
-						++lineno;
-						position = 0;
-					case '[':
-					case ']':
-					case '\'':
-					case '"':
-					case '=':
-					case '@':
-						tokens.push_back({ static_cast<TokenType>(*it), lineno, position });
-						++it;
-						++position;
-					default:
-						break;
-					}
-				}
-			} else if (std::isspace(*it)) {
-				while (it != end && std::isspace(*it) && *it != '\n') {
-					value.push_back(*it++);
-					++position;
-				}
-				tokens.push_back({ TokenType::Space, lineno, position, std::move(value) });
-			} else {
-				while (it != end && !std::isspace(*it) && !isReserved(*it)) {
-					value.push_back(*it++);
-					++position;
-				}
-				tokens.push_back({ TokenType::Word, lineno, position, std::move(value) });
-			}
-		}
-		return tokens;
-	}
-	void readComment(TokenStack::iterator &it, TokenStack::iterator)
-	{
-		++ it;
-	}
-	void readSpace(TokenStack::iterator &it, TokenStack::iterator end)
-	{
-		while (it != end && it->type() == TokenType::Space) {
-			++ it;
-		}
-	}
-	void readNewLine(TokenStack::iterator &it, TokenStack::iterator end)
-	{
-		while (it != end && it->type() == TokenType::NewLine) {
-			++ it;
-		}
-	}
-	Section readSection(TokenStack::iterator &it, TokenStack::iterator end)
-	{
-		// Empty [
-		if (++it == end) {
-			throw Error{it[-1].line(), it[-1].position(), "section declaration expected, got <EOF>"};
-		}
-		// Get the section name
-		if (it->type() != TokenType::Word) {
-			throw Error{it->line(), it->position(), "word expected after [, got " + it->toString()};
-		}
-		Section section(it->value());
-		// [unterminated
-		if (++it == end) {
-			throw Error{it[-1].line(), it[-1].position(), "unterminated section"};
-		}
-		// Check if terminated
-		if (it->type() != TokenType::SectionEnd) {
-			throw Error{it->line(), it->position(), "] expected, got " + it->toString()};
-		}
-		// Remove ]
-		++ it;
-		if (it == end) {
-			return section;
-		}
-		// Now read all that is allowed to be in a section
-		while (it != end && it->type() != TokenType::SectionBegin) {
-			if (it->type() == TokenType::Space) {
-				readSpace(it, end);
-			} else if (it->type() == TokenType::NewLine) {
-				readNewLine(it, end);
-			} else if (it->type() == TokenType::Comment) {
-				readComment(it, end);
-			} else if (it->type() == TokenType::Word) {
-				section.push_back(readOption(it, end));
-			} else {
-				throw Error{it->line(), it->position(), "unexpected token " + it->toString()};
-			}
-		}
-		return section;
-	}
-	Option readOption(TokenStack::iterator &it, TokenStack::iterator end)
-	{
-		std::string key = it++->value();
-		readSpace(it, end);
-		if (it == end) {
-			throw Error{it[-1].line(), it[-1].position(), "expected '=' after option declaration, got <EOF>"};
-		}
-		if (it->type() != TokenType::Assign) {
-			++ it;
-			throw Error{it[-1].line(), it[-1].position(), "expected '=' after option declaration, got " + it[-1].toString()};
-		}
-		readSpace(++it, end);
-		std::ostringstream oss;
-		if (it->type() == TokenType::QuoteSimple || it->type() == TokenType::QuoteDouble) {
-			TokenStack::iterator save = it++;
-			while (it != end && it->type() != save->type()) {
-				oss << it++->value();
-			}
-			if (it == end) {
-				throw Error{save->line(), save->position(), "undisclosed quote: " + save->toString() + " expected"};
-			}
-			++ it;
-		} else if (it->type() == TokenType::Word) {
-			oss << it++->value();
-		} else if (it->type() != TokenType::NewLine && it->type() != TokenType::Comment) {
-			// No value requested, must be NewLine or comment
-			throw Error{it->line(), it->position(), "expected option value after '=', got " + it->toString()};
-		}
-		return Option{std::move(key), oss.str()};
-	}
-	void readInclude(TokenStack::iterator &it, TokenStack::iterator end)
-	{
-		if (++it == end) {
-			throw Error{it[-1].line(), it[-1].position(), "expected `include` after '@' token, got <EOF>"};
-		}
-		if (it->type() != TokenType::Word && it->value() != "include") {
-			throw Error{it->line(), it->position(), "expected `include' after '@' token, got " + it->toString()};
-		}
-		readSpace(++it, end);
-		TokenStack::iterator save = it;
-		if (it == end) {
-			throw Error{it[-1].line(), it[-1].position(), "expected filename after @include statement, got <EOF>"};
-		}
-		// First quote
-		if (it->type() != TokenType::QuoteSimple && it->type() != TokenType::QuoteDouble) {
-			throw Error{it->line(), it->position(), "expected filename after @include statement"};
-		}
-		// Filename
-		if (++it == end) {
-			throw Error{it[-1].line(), it[-1].position(), "expected filename after @include statement, got <EOF>"};
-		}
-		if (it->type() != TokenType::Word) {
-			throw Error{it->line(), it->position(), "expected filename after @include statement"};
-		}
-		std::string value = it->value();
-		std::string fullpath;
-		if (isAbsolute(value)) {
-			fullpath = value;
-		} else {
-			fullpath = m_base + "/" + it->value();
-		}
-		// Must be closed with the same quote
-		if (++it == end) {
-			throw Error{save->line(), save->position(), "undiclosed quote: " + save->toString() + " expected, got <EOF>"};
-		}
-		if (it->type() != save->type()) {
-			throw Error{save->line(), save->position(), "undiclosed quote: " + save->toString() + " expected"};
-		}
-		// Remove quote
-		++ it;
-		Builder(m_ini, fullpath);
-	}
-public:
-	Builder(Document &ini, std::string path)
-		: m_path(path)
-		, m_base(base(std::move(path)))
-		, m_ini(ini)
-	{
-		std::ifstream file(m_path);
-		if (!file.is_open())
-			throw std::runtime_error(std::strerror(errno));
-		std::vector<Token> ts = analyze(file);
-		auto it = ts.begin();
-		auto end = ts.end();
-		while (it != end) {
-			if (it->type() == TokenType::Space) {
-				readSpace(it, end);
-			} else if (it->type() == TokenType::NewLine) {
-				readNewLine(it, end);
-			} else if (it->type() == TokenType::Comment) {
-				readComment(it, end);
-			} else if (it->type() == TokenType::Include) {
-				readInclude(it, end);
-			} else if (it->type() == TokenType::SectionBegin) {
-				m_ini.push_back(readSection(it, end));
-			} else {
-				throw Error(it->line(), it->position(), "unexpected " + it->toString() + " on root document");
-			}
-		}
-	}
-};
 } // !namespace
-/* --------------------------------------------------------
+namespace ini {
-* Document
-* -------------------------------------------------------- */
+Tokens Document::analyze(const File &file)
+{
-Document::Document(const std::string &path)
+	std::fstream stream{file.path};
-{
+	std::istreambuf_iterator<char> it{stream};
-	Builder(*this, path);
+	std::istreambuf_iterator<char> end{};
+	return ::analyze(it, end);
+}
+Tokens Document::analyze(const Buffer &buffer)
+{
+	std::istringstream stream{buffer.text};
+	std::istreambuf_iterator<char> it{stream};
+	std::istreambuf_iterator<char> end{};
+	return ::analyze(it, end);
+}
+Document::Document(const File &file)
+	: m_path{file.path}
+{
+	/* Update path */
+	auto pos = m_path.find_last_of("/\\");
+	if (pos != std::string::npos) {
+		m_path.erase(pos);
+	} else {
+		m_path = ".";
+	}
+	parse(*this, analyze(file));
+}
+Document::Document(const Buffer &buffer)
+{
+	dump(analyze(buffer));
+	parse(*this, analyze(buffer));
+}
+void Document::dump(const Tokens &tokens)
+{
+	for (const Token &token: tokens) {
+		// TODO: type
+		std::cout << token.line() << ":" << token.column() << ": " << token.value() << std::endl;
+	}
 }
 } // !ini

Mercurial > code

comparison C++/modules/Ini/Ini.cpp @ 427:aa9cc55338be