changeset 705:4b5dba257d81

Plugin links: brand new plugin, closes #872 @4h
author David Demelier <markand@malikania.fr>
date Fri, 06 Jul 2018 22:10:10 +0200
parents 13381b9b9215
children bd7feaa002cb
files CHANGES.md CMakeLists.txt irccd/main.cpp plugins/CMakeLists.txt plugins/links/links.cpp plugins/links/links.md
diffstat 6 files changed, 503 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/CHANGES.md	Fri Jul 06 21:01:15 2018 +0200
+++ b/CHANGES.md	Fri Jul 06 22:10:10 2018 +0200
@@ -41,7 +41,8 @@
 Plugins:
 
   - Introduce brand new joke plugin (#609),
-  - Introduce brand new tictactoe plugin (#393).
+  - Introduce brand new tictactoe plugin (#393),
+  - Introduce brand new links plugin (#872).
 
 irccd 2.2.0 2017-09-26
 ----------------------
--- a/CMakeLists.txt	Fri Jul 06 21:01:15 2018 +0200
+++ b/CMakeLists.txt	Fri Jul 06 22:10:10 2018 +0200
@@ -59,7 +59,7 @@
 
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${irccd_SOURCE_DIR}/cmake/packages)
 set(CMAKE_POSITION_INDEPENDENT_CODE On)
-set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED On)
 set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS On)
 
--- a/irccd/main.cpp	Fri Jul 06 21:01:15 2018 +0200
+++ b/irccd/main.cpp	Fri Jul 06 22:10:10 2018 +0200
@@ -38,6 +38,7 @@
 #include <irccd/string_util.hpp>
 #include <irccd/system.hpp>
 
+#include <irccd/daemon/dynlib_plugin.hpp>
 #include <irccd/daemon/irccd.hpp>
 #include <irccd/daemon/logger.hpp>
 
@@ -254,6 +255,8 @@
     instance->plugins().add_loader(js_plugin_loader::defaults(*instance));
 #endif
 
+    instance->plugins().add_loader(std::make_unique<dynlib_plugin_loader>());
+
     try {
         instance->set_config(open(options));
         instance->load();
--- a/plugins/CMakeLists.txt	Fri Jul 06 21:01:15 2018 +0200
+++ b/plugins/CMakeLists.txt	Fri Jul 06 22:10:10 2018 +0200
@@ -19,7 +19,7 @@
 project(plugins)
 
 set(
-    IRCCD_PLUGINS
+    IRCCD_JAVASCRIPT_PLUGINS
     ask
     auth
     hangman
@@ -29,10 +29,21 @@
     plugin
     roulette
     tictactoe
+)
+
+set(
+    IRCCD_NATIVE_PLUGINS
+    links
+)
+
+set(
+    IRCCD_PLUGINS
+    ${IRCCD_JAVASCRIPT_PLUGINS}
+    ${IRCCD_NATIVE_PLUGINS}
     CACHE INTERNAL ""
 )
 
-foreach (plugin ${IRCCD_PLUGINS})
+foreach (plugin ${IRCCD_JAVASCRIPT_PLUGINS})
     irccd_define_plugin(
         NAME ${plugin}
         TYPE JS
@@ -40,3 +51,12 @@
         DOCS ${plugins_SOURCE_DIR}/${plugin}/${plugin}.md
     )
 endforeach ()
+
+foreach (plugin ${IRCCD_NATIVE_PLUGINS})
+    irccd_define_plugin(
+        NAME ${plugin}
+        TYPE NATIVE
+        SOURCES ${plugins_SOURCE_DIR}/${plugin}/${plugin}.cpp
+        DOCS ${plugins_SOURCE_DIR}/${plugin}/${plugin}.md
+    )
+endforeach ()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plugins/links/links.cpp	Fri Jul 06 22:10:10 2018 +0200
@@ -0,0 +1,431 @@
+/*
+ * links.cpp -- links plugin
+ *
+ * Copyright (c) 2013-2018 David Demelier <markand@malikania.fr>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <memory>
+#include <regex>
+#include <sstream>
+#include <string>
+#include <variant>
+
+#include <boost/algorithm/string/trim_all.hpp>
+
+#include <boost/dll.hpp>
+
+#include <boost/asio.hpp>
+#include <boost/asio/ssl.hpp>
+
+#include <boost/beast.hpp>
+
+#include <irccd/string_util.hpp>
+
+#include <irccd/daemon/irc.hpp>
+#include <irccd/daemon/irccd.hpp>
+#include <irccd/daemon/plugin.hpp>
+#include <irccd/daemon/server.hpp>
+
+using boost::asio::async_connect;
+using boost::asio::deadline_timer;
+using boost::asio::io_context;
+using boost::asio::ip::tcp;
+using boost::asio::ssl::context;
+using boost::asio::ssl::stream;
+using boost::asio::ssl::stream_base;
+
+using boost::beast::flat_buffer;
+using boost::beast::http::async_read;
+using boost::beast::http::async_write;
+using boost::beast::http::empty_body;
+using boost::beast::http::field;
+using boost::beast::http::request;
+using boost::beast::http::response;
+using boost::beast::http::string_body;
+using boost::beast::http::verb;
+using boost::beast::http::status;
+
+using boost::posix_time::seconds;
+using boost::system::error_code;
+
+using std::get;
+using std::enable_shared_from_this;
+using std::monostate;
+using std::move;
+using std::regex;
+using std::regex_match;
+using std::regex_search;
+using std::shared_ptr;
+using std::smatch;
+using std::string;
+using std::variant;
+
+namespace irccd {
+
+namespace {
+
+// {{{ globals
+
+// User options.
+struct config {
+    static inline unsigned timeout{30U};
+};
+
+// User formats.
+struct formats {
+    static inline string info{"#{title}"};
+};
+
+// }}}
+
+// {{{ url
+
+struct url {
+    string protocol;
+    string host;
+    string path{"/"};
+
+    static auto parse(const string&) -> url;
+};
+
+auto url::parse(const string& link) -> url
+{
+    static const regex regex("^(https?):\\/\\/([^\\/\\?]+)(.*)$");
+
+    url ret;
+
+    if (smatch match; regex_match(link, match, regex)) {
+        ret.protocol = match[1];
+        ret.host = match[2];
+
+        if (match.length(3) > 0)
+            ret.path = match[3];
+        if (ret.path[0] != '/')
+            ret.path.insert(ret.path.begin(), '/');
+    }
+
+    return ret;
+}
+
+// }}}
+
+// {{{ requester
+
+class requester : public enable_shared_from_this<requester> {
+private:
+    using socket = variant<monostate, tcp::socket, stream<tcp::socket>>;
+
+    shared_ptr<server> server_;
+    string channel_;
+    string origin_;
+
+    url url_;
+    context ctx_{context::sslv23};
+    socket socket_;
+    flat_buffer buffer_;
+    request<empty_body> req_;
+    response<string_body> res_;
+    deadline_timer timer_;
+    tcp::resolver resolver_;
+
+    void notify(const string&);
+    void parse();
+    void handle_read(const error_code&);
+    void read();
+    void handle_write(const error_code&);
+    void write();
+    void handle_handshake(const error_code&);
+    void handshake();
+    void handle_connect(const error_code&);
+    void connect(const tcp::resolver::results_type&);
+    void handle_resolve(const error_code&, const tcp::resolver::results_type&);
+    void resolve();
+    void handle_timer(const error_code&);
+    void timer();
+    void start();
+
+    requester(io_context&, shared_ptr<server>, string, string, url);
+
+public:
+    static void run(io_context&, const message_event&);
+};
+
+void requester::notify(const string& title)
+{
+    string_util::subst subst;
+
+    subst.keywords.emplace("channel", channel_);
+    subst.keywords.emplace("nickname", irc::user::parse(origin_).nick());
+    subst.keywords.emplace("origin", origin_);
+    subst.keywords.emplace("server", server_->get_name());
+    subst.keywords.emplace("title", title);
+
+    server_->message(channel_, format(formats::info, subst));
+}
+
+void requester::parse()
+{
+    /*
+     * Use a regex because Boost's XML parser is strict and many web pages may
+     * have invalid or broken tags.
+     */
+    static const regex regex("<title>([^<]+)<\\/title>");
+
+    string data(res_.body().data());
+    smatch match;
+
+    if (regex_search(data, match, regex))
+        notify(match[1]);
+}
+
+void requester::handle_read(const error_code& code)
+{
+    timer_.cancel();
+
+    if (code)
+        return;
+
+    // Request again in case of relocation.
+    if (res_.result() == status::moved_permanently) {
+        const string host(res_[field::location].data());
+
+        // Clean '\r\n'
+        url_ = url::parse(boost::algorithm::trim_all_copy(host));
+        start();
+    } else
+        parse();
+}
+
+void requester::read()
+{
+    const auto self = shared_from_this();
+    const auto wrap = [self] (auto code, auto) {
+        self->handle_read(code);
+    };
+
+    timer();
+
+    switch (socket_.index()) {
+    case 1:
+        async_read(get<1>(socket_), buffer_, res_, wrap);
+        break;
+    case 2:
+        async_read(get<2>(socket_), buffer_, res_, wrap);
+        break;
+    default:
+        break;
+    }
+}
+
+void requester::handle_write(const error_code& code)
+{
+    timer_.cancel();
+
+    if (!code)
+        read();
+}
+
+void requester::write()
+{
+    req_.version(11);
+    req_.method(verb::get);
+    req_.target(url_.path);
+    req_.set(field::host, url_.host);
+    req_.set(field::user_agent, BOOST_BEAST_VERSION_STRING);
+
+    const auto self = shared_from_this();
+    const auto wrap = [self] (auto code, auto) {
+        self->handle_write(code);
+    };
+
+    timer();
+
+    switch (socket_.index()) {
+    case 1:
+        async_write(get<1>(socket_), req_, wrap);
+        break;
+    case 2:
+        async_write(get<2>(socket_), req_, wrap);
+        break;
+    default:
+        break;
+    }
+}
+
+void requester::handle_handshake(const error_code& code)
+{
+    timer_.cancel();
+
+    if (!code)
+        write();
+}
+
+void requester::handshake()
+{
+    const auto self = shared_from_this();
+
+    timer();
+
+    switch (socket_.index()) {
+    case 1:
+        handle_handshake(error_code());
+        break;
+    case 2:
+        get<2>(socket_).async_handshake(stream_base::client, [self] (auto code) {
+            self->handle_handshake(code);
+        });
+        break;
+    default:
+        break;
+    }
+}
+
+void requester::handle_connect(const error_code& code)
+{
+    timer_.cancel();
+
+    if (!code)
+        handshake();
+}
+
+void requester::connect(const tcp::resolver::results_type& eps)
+{
+    const auto self = shared_from_this();
+    const auto wrap = [self] (auto code, auto) {
+        self->handle_connect(code);
+    };
+
+    timer();
+
+    switch (socket_.index()) {
+    case 1:
+        async_connect(get<1>(socket_), eps, wrap);
+        break;
+    case 2:
+        async_connect(get<2>(socket_).lowest_layer(), eps, wrap);
+        break;
+    default:
+        break;
+    }
+}
+
+void requester::handle_resolve(const error_code& code, const tcp::resolver::results_type& eps)
+{
+    timer_.cancel();
+
+    if (!code)
+        connect(eps);
+}
+
+void requester::resolve()
+{
+    auto self = shared_from_this();
+
+    timer();
+    resolver_.async_resolve(url_.host, url_.protocol, [self] (auto code, auto eps) {
+        self->handle_resolve(code, eps);
+    });
+}
+
+void requester::handle_timer(const error_code& code)
+{
+    // Force close sockets to cancel all pending operations.
+    if (code && code != boost::asio::error::operation_aborted)
+        socket_.emplace<monostate>();
+}
+
+void requester::timer()
+{
+    const auto self = shared_from_this();
+
+    timer_.expires_from_now(seconds(config::timeout));
+    timer_.async_wait([self] (auto code) {
+        self->handle_timer(code);
+    });
+}
+
+void requester::start()
+{
+    if (url_.protocol == "http")
+        socket_.emplace<tcp::socket>(resolver_.get_io_service());
+    else
+        socket_.emplace<stream<tcp::socket>>(resolver_.get_io_service(), ctx_);
+
+    resolve();
+}
+
+requester::requester(io_context& io,
+                     shared_ptr<server> server,
+                     string channel,
+                     string origin,
+                     url url)
+    : server_(move(server))
+    , channel_(move(channel))
+    , origin_(move(origin))
+    , url_(move(url))
+    , timer_(io)
+    , resolver_(io)
+{
+}
+
+void requester::run(io_context& io, const message_event& ev)
+{
+    auto url = url::parse(ev.message);
+
+    if (url.protocol.empty() || url.host.empty())
+        return;
+
+    shared_ptr<requester>(new requester(io, ev.server, ev.channel, ev.origin, move(url)))->start();
+}
+
+// }}}
+
+// {{{ links_plugin
+
+class links_plugin : public plugin {
+public:
+    using plugin::plugin;
+
+    void set_config(plugin_config) override;
+    void set_formats(plugin_formats) override;
+    void handle_message(irccd&, const message_event&) override;
+};
+
+void links_plugin::set_config(plugin_config conf)
+{
+    if (const auto v = string_util::to_uint(conf["timeout"]); v)
+        config::timeout = *v;
+}
+
+void links_plugin::set_formats(plugin_formats formats)
+{
+    if (const auto it = formats.find("info"); it != formats.end())
+        formats::info = it->second;
+}
+
+void links_plugin::handle_message(irccd& irccd, const message_event& ev)
+{
+    requester::run(irccd.get_service(), ev);
+}
+
+// }}}
+
+} // !namespace
+
+extern "C" BOOST_SYMBOL_EXPORT links_plugin irccd_plugin_links;
+
+links_plugin irccd_plugin_links("links", "");
+
+} // !irccd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plugins/links/links.md	Fri Jul 06 22:10:10 2018 +0200
@@ -0,0 +1,44 @@
+The plugin **links** is use to analyze links sent on channels. It will load the
+web page and extract its title on the same channel.
+
+## Installation
+
+The plugin **links** is distributed with irccd. To enable it add the following
+to your `plugins` section:
+
+```ini
+[plugins]
+links = ""
+```
+
+## Usage
+
+The plugin will automatically fetch web page titles on message that contains
+either *http://something* or *https://something*.
+
+Example of possible output:
+
+```nohighlight
+markand: http://example.org
+irccd: Example Domain
+```
+
+## Configuration
+
+The following options are available under the `[plugin.links]` section:
+
+- **timeout**: (int) timeout in seconds before dropping a request (default: 30).
+
+## Formats
+
+The **links** plugin supports the following formats in `[format.links]` section:
+
+- **info**: message written when title was parsed correctly
+
+### Keywords supported
+
+The following keywords are supported:
+
+| Format | Keywords                                                  | Notes                 |
+|--------|-----------------------------------------------------------|-----------------------|
+| info   | channel, nickname, origin, server, title                  | title is webpage link |