view C++/modules/Ini/Ini.cpp @ 427:aa9cc55338be

Ini: rewrite the parse/analyze process
author David Demelier <markand@malikania.fr>
date Wed, 14 Oct 2015 15:08:45 +0200
parents cee5c74c1c83
children 31bddece9860
line wrap: on
line source

/*
 * Ini.cpp -- .ini file parsing
 *
 * Copyright (c) 2013-2015 David Demelier <markand@malikania.fr>
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <cctype>
#include <iostream>
#include <iterator>
#include <fstream>
#include <sstream>
#include <stdexcept>

#if defined(_WIN32)
#  include <Shlwapi.h>	// for PathIsRelative
#endif

#include "Ini.h"

namespace {

using namespace ini;

using Iterator = std::istreambuf_iterator<char>;
using TokenIterator = std::vector<Token>::const_iterator;

#if defined(_WIN32)
inline bool isAbsolute(const std::string &path) noexcept
{
	return !PathIsRelative(path.c_str());
}
#else
inline bool isAbsolute(const std::string &path) noexcept
{
	return path.size() > 0 && path[0] == '/';
}
#endif

inline bool isQuote(char c) noexcept
{
	return c == '\'' || c == '"';
}

inline bool isSpace(char c) noexcept
{
	/* Custom version because std::isspace includes \n as space */
	return c == ' ' || c == '\t';
}

inline bool isReserved(char c) noexcept
{
	return c == '[' || c == ']' || c == '@' || c == '#' || c == '=' || c == '\'' || c == '"';
}

void analyzeLine(Tokens &list, int &line, int &column, Iterator &it)
{
	list.push_back({ Token::Line, line++, column });
	++ it;
	column = 0;
}

void analyzeComment(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
{
	std::string value{1, *it};
	int save = column;

	while (it != end && *it != '\n') {
		++ column;
		value += *it++;
	}

	list.push_back({ Token::Comment, line, save, std::move(value) });
}

void analyzeSection(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
{
	std::string value;
	int save = column;

	/* Read section name */
	++ it;
	while (it != end && *it != ']') {
		if (*it == '\n') {
			throw Error{line, column, "section not terminated, missing ']'"};
		}
		if (isReserved(*it)) {
			throw Error{line, column, "section name expected after '[', got '" + std::string(1, *it) + "'"};
		}
		++ column;
		value += *it++;
	}

	if (it == end) {
		throw Error{line, column, "section name expected after '[', got <EOF>"};
	}

	/* Remove ']' */
	++ it;

	list.push_back({ Token::Section, line, save, std::move(value) });
}

void analyzeAssign(Tokens &list, int &line, int &column, Iterator &it)
{
	list.push_back({ Token::Assign, line, column++ });
	++ it;
}

void analyzeSpaces(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
{
	std::string value;
	int save = column;

	while (it != end && (*it == ' ' || *it == '\t')) {
		++ column;
		value += *it++;
	}

	list.push_back({ Token::Space, line, save, std::move(value) });
}

void analyzeQuotedWord(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
{
	std::string value;
	int save = column;
	char quote = *it++;

	while (it != end && *it != quote) {
		// TODO: escape sequence
		++ column;
		value += *it++;
	}

	if (it == end) {
		throw Error{line, column, "undisclosed '" + std::string(1, quote) + "', got <EOF>"};
	}

	/* Remove quote */
	++ it;

	list.push_back({ Token::QuotedWord, line, save, std::move(value) });
}

void analyzeWord(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
{
	std::string value;
	int save = column;

	while (it != end && !std::isspace(*it) && !isReserved(*it)) {
		++ column;
		value += *it++;
	}

	list.push_back({ Token::Word, line, save, std::move(value) });
}

void analyzeInclude(Tokens &list, int &line, int &column, Iterator &it, Iterator end)
{
	std::string include;
	int save = column;

	/* Read include */
	++ it;
	while (it != end && !isSpace(*it)) {
		++ column;
		include += *it++;
	}

	if (include != "include") {
		throw Error{line, column, "expected include after '@' token"};
	}

	list.push_back({ Token::Include, line, save });
}

Tokens analyze(std::istreambuf_iterator<char> &it, std::istreambuf_iterator<char> end)
{
	Tokens list;
	int line = 1;
	int column = 0;

	while (it != end) {
		if (*it == '\n') {
			analyzeLine(list, line, column, it);
		} else if (*it == '#') {
			analyzeComment(list, line, column, it, end);
		} else if (*it == '[') {
			analyzeSection(list, line, column, it, end);
		} else if (*it == '=') {
			analyzeAssign(list, line, column, it);
		} else if (isSpace(*it)) {
			analyzeSpaces(list, line, column, it, end);
		} else if (*it == '@') {
			analyzeInclude(list, line, column, it, end);
		} else if (isQuote(*it)) {
			analyzeQuotedWord(list, line, column, it, end);
		} else {
			analyzeWord(list, line, column, it, end);
		}
	}

	return list;
}

void parseSpaces(TokenIterator &it, TokenIterator end)
{
	while (it != end && it->type() == Token::Space) {
		++ it;
	}
}

void parseOption(Section &sc, TokenIterator &it, TokenIterator end)
{
	std::string key = it->value();
	std::string value;

	TokenIterator save = it;

	/* Optional spaces before '=' */
	parseSpaces(++it, end);

	/* No '=' or something else? */
	if (it == end) {
		throw Error{save->line(), save->column(), "expected '=' assignment, got <EOF>"};
	}
	if (it->type() != Token::Assign) {
		throw Error{it->line(), it->column(), "expected '=' assignment, got " + it->value()};
	}

	/* Optional spaces after '=' */
	parseSpaces(++it, end);

	/* Empty options are allowed so just test for words */
	if (it != end) {
		if (it->type() == Token::Word || it->type() == Token::QuotedWord) {
			value = it++->value();
		}
	}

	sc.emplace_back(std::move(key), std::move(value));
}

void parseInclude(Document &doc, TokenIterator &it, TokenIterator end)
{
	TokenIterator save = it;

	if (++it == end) {
		throw Error{save->line(), save->column(), "expected file name after '@include' statement, got <EOF>"};
	}

	/* Get file name */
	parseSpaces(it, end);

	if (it->type() != Token::Word && it->type() != Token::QuotedWord) {
		throw Error{it->line(), it->column(), "expected file name after '@include' statement, got " + it->value()};
	}

	if (doc.path().empty()) {
		throw Error{it->line(), it->column(), "'@include' statement invalid with buffer documents"};
	}

	std::string value = (it++)->value();
	std::string file;

	if (!isAbsolute(value)) {
#if defined(_WIN32)
		file = doc.path() + "\\" + value;
#else
		file = doc.path() + "/" + value;
#endif
	} else {
		file = value;
	}

	Document child{File{file}};

	for (const auto &sc : child) {
		doc.push_back(sc);
	}
}

void parseSection(Document &doc, TokenIterator &it, TokenIterator end)
{
	Section sc{it->value()};

	/* Skip [section] */
	++ it;

	/* Read until next section */
	while (it != end && it->type() != Token::Section) {
		switch (it->type()) {
		case Token::Line:
		case Token::Comment:
		case Token::Space:
			it ++;
			continue;
		default:
			break;
		}

		if (it->type() != Token::Word) {
			throw Error{it->line(), it->column(), "unexpected token '" + it->value() + "' in section definition"};
		}

		parseOption(sc, it, end);
	}

	doc.push_back(std::move(sc));
}

void parse(Document &doc, const Tokens &tokens)
{
	TokenIterator it = tokens.cbegin();
	TokenIterator end = tokens.cend();

	while (it != end) {
		/* Just ignore this */
		switch (it->type()) {
		case Token::Include:
			parseInclude(doc, it, end);
			break;
		case Token::Section:
			parseSection(doc, it, end);
			break;
		case Token::Comment:
		case Token::Line:
		case Token::Space:
			++ it;
			break;
		default:
			throw Error{it->line(), it->column(), "unexpected '" + it->value() + "' on root document"};
		}
	}
}

} // !namespace

namespace ini {

Tokens Document::analyze(const File &file)
{
	std::fstream stream{file.path};
	std::istreambuf_iterator<char> it{stream};
	std::istreambuf_iterator<char> end{};

	return ::analyze(it, end);
}

Tokens Document::analyze(const Buffer &buffer)
{
	std::istringstream stream{buffer.text};
	std::istreambuf_iterator<char> it{stream};
	std::istreambuf_iterator<char> end{};

	return ::analyze(it, end);
}

Document::Document(const File &file)
	: m_path{file.path}
{
	/* Update path */
	auto pos = m_path.find_last_of("/\\");

	if (pos != std::string::npos) {
		m_path.erase(pos);
	} else {
		m_path = ".";
	}

	parse(*this, analyze(file));
}

Document::Document(const Buffer &buffer)
{
	dump(analyze(buffer));
	parse(*this, analyze(buffer));
}

void Document::dump(const Tokens &tokens)
{
	for (const Token &token: tokens) {
		// TODO: type
		std::cout << token.line() << ":" << token.column() << ": " << token.value() << std::endl;
	}
}

} // !ini