view C++/Ini.cpp @ 326:fb6c42173634

Ini: - Add support for empty values - Add tests about multi definition
author David Demelier <markand@malikania.fr>
date Tue, 03 Mar 2015 19:43:23 +0100
parents d52a69f9f029
children 78e8f9a3b233
line wrap: on
line source

/*
 * Ini.cpp -- .ini file parsing
 *
 * Copyright (c) 2013, 2014 David Demelier <markand@malikania.fr>
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <cctype>
#include <iostream>
#include <iterator>
#include <memory>
#include <ostream>
#include <sstream>
#include <vector>

#include "Ini.h"

namespace {

/* --------------------------------------------------------
 * Tokens
 * -------------------------------------------------------- */

enum class TokenType {
	Comment = '#',
	SectionBegin = '[',
	SectionEnd = ']',
	Escape = '\\',
	QuoteSimple = '\'',
	QuoteDouble = '"',
	NewLine = '\n',
	Assign = '=',
	Include = '@',
	Word,
	Space
};

std::ostream &operator<<(std::ostream &out, const TokenType &type)
{
	switch (type) {
	case TokenType::Comment:
		out << "Comment";
		break;
	case TokenType::SectionBegin:
		out << "SectionBegin";
		break;
	case TokenType::SectionEnd:
		out << "SectionEnd";
		break;
	case TokenType::Escape:
		out << "Escape";
		break;
	case TokenType::QuoteSimple:
		out << "QuoteSimple";
		break;
	case TokenType::QuoteDouble:
		out << "QuoteDouble";
		break;
	case TokenType::NewLine:
		out << "NewLine";
		break;
	case TokenType::Assign:
		out << "Assign";
		break;
	case TokenType::Include:
		out << "Include";
		break;
	case TokenType::Word:
		out << "Word";
		break;
	case TokenType::Space:
		out << "Space";
		break;
	default:
		break;
	}

	return out;
}

class Token {
private:
	TokenType m_type;
	int m_line;
	int m_position;
	std::string m_value;

public:
	inline Token(TokenType type, int line, int position, std::string value = "")
		: m_type(type)
		, m_line(line)
		, m_position(position)
		, m_value(std::move(value))
	{
	}

	inline TokenType type() const noexcept
	{
		return m_type;
	}

	inline int line() const noexcept
	{
		return m_line;
	}

	inline int position() const noexcept
	{
		return m_position;
	}

	inline std::string value() const
	{
		switch (m_type) {
		case TokenType::Comment:
			return "#";
		case TokenType::SectionBegin:
			return "[";
		case TokenType::SectionEnd:
			return "]";
		case TokenType::QuoteSimple:
			return "'";
		case TokenType::QuoteDouble:
			return "\"";
		case TokenType::NewLine:
			return "\n";
		case TokenType::Assign:
			return "=";
		case TokenType::Include:
			return "@";
		case TokenType::Space:
			return m_value;
		case TokenType::Word:
			return m_value;
		default:
			break;
		}

		return "";
	}

	inline std::string toString() const
	{
		switch (m_type) {
		case TokenType::Comment:
			return "'#'";
		case TokenType::SectionBegin:
			return "'['";
		case TokenType::SectionEnd:
			return "']'";
		case TokenType::QuoteSimple:
			return "'";
		case TokenType::QuoteDouble:
			return "\"";
		case TokenType::NewLine:
			return "<newline>";
		case TokenType::Assign:
			return "=";
		case TokenType::Include:
			return "@";
		case TokenType::Space:
			return "<blank>";
		case TokenType::Word:
			return "`" + m_value + "'";
		default:
			break;
		}

		return "";
	}
};

std::ostream &operator<<(std::ostream &out, const Token &token)
{
	out << token.type();

	if (token.type() == TokenType::Space)
		out << ": size = " << token.value().size();
	else if (token.type() == TokenType::Word)
		out << ": value = [" << token.value() << "]";

	return out;
}

using TokenStack = std::vector<Token>;

/* --------------------------------------------------------
 * IniBuilder
 * -------------------------------------------------------- */

class Error : public std::exception {
private:
	int m_line;
	int m_offset;
	std::string m_error;

public:
	inline Error(const Token &token, std::string error)
		: m_line(token.line())
		, m_offset(token.position())
		, m_error(error)
	{
	}

	int line() const noexcept
	{
		return m_line;
	}

	int offset() const noexcept
	{
		return m_offset;
	}

	const char *what() const noexcept
	{
		return m_error.c_str();
	}
};

class IniBuilder {
private:
	inline bool isReserved(char c) const noexcept
	{
		return c == '\n' || c == '#' || c == '"' || c == '\'' || c == '=' || c == '[' || c == ']' || c == '@';
	}

	std::vector<Token> analyze(std::istream &stream) const
	{
		std::istreambuf_iterator<char> it(stream);
		std::istreambuf_iterator<char> end;
		std::vector<Token> tokens;

		int lineno{1};
		int position{0};

		while (it != end) {
			std::string value;

			if (isReserved(*it)) {
				while (it != end && isReserved(*it)) {
					// Single character tokens
					switch (*it) {
					case '\n':
						++lineno;
						position = 0;
					case '#':
					case '[':
					case ']':
					case '\'':
					case '"':
					case '=':
					case '@':
						tokens.push_back({ static_cast<TokenType>(*it), lineno, position });
						++it;
						++position;
					default:
						break;
					}
				}
			} else if (std::isspace(*it)) {
				while (it != end && std::isspace(*it) && *it != '\n')
					value.push_back(*it++);

				tokens.push_back({ TokenType::Space, lineno, position, std::move(value) });
			} else {
				while (it != end && !std::isspace(*it) && !isReserved(*it))
					value.push_back(*it++);

				tokens.push_back({ TokenType::Word, lineno, position, std::move(value) });
			}
		}

		return tokens;
	}

	void readComment(TokenStack::iterator &it, TokenStack::iterator end)
	{
		while (it != end && it->type() != TokenType::NewLine)
			++ it;

		// remove new line
		++ it;
	}

	void readSpace(TokenStack::iterator &it, TokenStack::iterator end)
	{
		while (it != end && it->type() == TokenType::Space)	
			++ it;
	}

	void readNewLine(TokenStack::iterator &it, TokenStack::iterator end)
	{
		while (it != end && it->type() == TokenType::NewLine)
			++ it;
	}

	IniSection readSection(TokenStack::iterator &it, TokenStack::iterator end)
	{
		if (++it == end || it->type() != TokenType::Word)
			throw Error(*it, "word expected after [, got " + it->toString());

		IniSection section(it->value());

		if (++it == end || it->type() != TokenType::SectionEnd)
			throw Error(*it, "] expected, got " + it->toString());

		// Remove ]
		++ it;

		if (it == end)
			return section;

		while (it != end && it->type() != TokenType::SectionBegin) {
			if (it->type() == TokenType::Space)
				readSpace(it, end);
			else if (it->type() == TokenType::NewLine)
				readNewLine(it, end);
			else if (it->type() == TokenType::Comment)
				readComment(it, end);
			else if (it->type() == TokenType::Word)
				section.push_back(readOption(it, end));
			else {
				throw Error(*it, "unexpected token " + it->toString());
			}
		}

		return section;
	}

	IniOption readOption(TokenStack::iterator &it, TokenStack::iterator end)
	{
		std::string key = it->value();

		if (++it == end) {
			throw Error(*it, "expected '=' after option declaration, got <EOF>");
		}

		readSpace(it, end);

		if (it == end || it++->type() != TokenType::Assign) {
			throw Error(*it, "expected '=' after option declaration, got " + it->toString());
		}

		readSpace(it, end);

		std::ostringstream oss;

		if (it->type() == TokenType::QuoteSimple || it->type() == TokenType::QuoteDouble) {
			TokenStack::iterator save = it++;
	
			while (it != end && it->type() != save->type()) {
				oss << it++->value();
			}

			if (it == end)
				throw Error(*save, "undisclosed quote: " + save->toString() + " expected");

			++ it;
		} else if (it->type() == TokenType::Word) {
			oss << it++->value();
		} else if (it->type() != TokenType::NewLine && it->type() != TokenType::Comment) {
			// No value requested, must be NewLine or comment
			throw Error(*it, "expected option value after '=', got " + it->toString());
		}
		

		return IniOption(std::move(key), oss.str());
	}

public:
	IniBuilder(Ini &ini, std::istream &stream)
	{
		std::vector<Token> ts = analyze(stream);

		auto it = ts.begin();
		auto end = ts.end();

		try {
			while (it != end) {
				if (it->type() == TokenType::Space)
					readSpace(it, end);
				else if (it->type() == TokenType::NewLine)
					readNewLine(it, end);
				else if (it->type() == TokenType::Comment)
					readComment(it, end);
				else if (it->type() == TokenType::SectionBegin)
					ini.push_back(readSection(it, end));
				else
					throw Error(*it, "unexpected " + it->toString() + " on root document");
			}
		} catch (const Error &ex) {
			std::cerr << ex.line() << ":" << ex.offset() << ":" << ex.what() << std::endl;
		}
	}
};

} // !namespace

/* --------------------------------------------------------
 * Ini
 * -------------------------------------------------------- */

Ini::Ini(std::istream &stream)
{
	IniBuilder builder(*this, stream);
}