diff plugins/links/requester.cpp @ 768:d8bf53170fb6

Plugin links: fix various errors, closes #912 @2h Import uriparser to decode URLs correctly with fragment and query support for a better link decomposition. Improve pre-check message to allow people writing text after the link, e.g: http://example.org <- you should check this Improve relocation. Split code for better style.
author David Demelier <markand@malikania.fr>
date Wed, 24 Oct 2018 21:05:00 +0200
parents
children 8c44bbcbbab9
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plugins/links/requester.cpp	Wed Oct 24 21:05:00 2018 +0200
@@ -0,0 +1,312 @@
+/*
+ * requester.cpp -- convenient HTTP get requester
+ *
+ * Copyright (c) 2013-2018 David Demelier <markand@malikania.fr>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <regex>
+
+#include <irccd/daemon/server.hpp>
+
+#include <irccd/string_util.hpp>
+
+#include "links.hpp"
+#include "requester.hpp"
+#include "uri.hpp"
+
+using std::error_code;
+using std::get;
+using std::monostate;
+using std::move;
+using std::regex;
+using std::regex_match;
+using std::regex_search;
+using std::shared_ptr;
+using std::smatch;
+using std::string;
+using std::variant;
+
+using boost::beast::flat_buffer;
+using boost::beast::http::async_read;
+using boost::beast::http::async_write;
+using boost::beast::http::empty_body;
+using boost::beast::http::field;
+using boost::beast::http::request;
+using boost::beast::http::response;
+using boost::beast::http::status;
+using boost::beast::http::string_body;
+using boost::beast::http::verb;
+
+using boost::asio::async_connect;
+using boost::asio::deadline_timer;
+using boost::asio::io_context;
+using boost::asio::ip::tcp;
+using boost::asio::ssl::context;
+using boost::asio::ssl::stream;
+using boost::asio::ssl::stream_base;
+
+using boost::posix_time::seconds;
+
+namespace irccd {
+
+using string_util::subst;
+using string_util::format;
+
+void requester::notify(const string& title)
+{
+    subst subst;
+
+    subst.keywords.emplace("channel", channel_);
+    subst.keywords.emplace("nickname", irc::user::parse(origin_).nick);
+    subst.keywords.emplace("origin", origin_);
+    subst.keywords.emplace("server", server_->get_id());
+    subst.keywords.emplace("title", title);
+
+    server_->message(channel_, format(links_plugin::format_info, subst));
+}
+
+void requester::parse()
+{
+    /*
+     * Use a regex because Boost's XML parser is strict and many web pages may
+     * have invalid or broken tags.
+     */
+    static const regex regex("<title>([^<]+)<\\/title>");
+
+    string data(res_.body().data());
+    smatch match;
+
+    if (regex_search(data, match, regex))
+        notify(match[1]);
+}
+
+void requester::handle_read(const error_code& code)
+{
+    timer_.cancel();
+
+    if (code)
+        return;
+
+    // Request again in case of relocation.
+    if (const auto it = res_.find(field::location); it != res_.end() && level_ < 32U) {
+        const string location(it->value().data(), it->value().size());
+        auto& io = timer_.get_io_service();
+        auto uri = uri::parse(location);
+
+        if (!uri)
+            return;
+
+        shared_ptr<requester>(new requester(io, server_, channel_, origin_, move(*uri), level_ + 1))->start();
+    } else if (res_.result() == status::ok)
+        parse();
+}
+
+void requester::read()
+{
+    const auto self = shared_from_this();
+    const auto wrap = [self] (auto code, auto) {
+        self->handle_read(code);
+    };
+
+    timer();
+
+    switch (socket_.index()) {
+    case 1:
+        async_read(get<1>(socket_), buffer_, res_, wrap);
+        break;
+#if defined(IRCCD_HAVE_SSL)
+    case 2:
+        async_read(get<2>(socket_), buffer_, res_, wrap);
+        break;
+#endif
+    default:
+        break;
+    }
+}
+
+void requester::handle_write(const error_code& code)
+{
+    timer_.cancel();
+
+    if (!code)
+        read();
+}
+
+void requester::write()
+{
+    req_.version(11);
+    req_.method(verb::get);
+    req_.target(uri_.path);
+    req_.set(field::host, uri_.host);
+    req_.set(field::user_agent, BOOST_BEAST_VERSION_STRING);
+
+    const auto self = shared_from_this();
+    const auto wrap = [self] (auto code, auto) {
+        self->handle_write(code);
+    };
+
+    timer();
+
+    switch (socket_.index()) {
+    case 1:
+        async_write(get<1>(socket_), req_, wrap);
+        break;
+#if defined(IRCCD_HAVE_SSL)
+    case 2:
+        async_write(get<2>(socket_), req_, wrap);
+        break;
+#endif
+    default:
+        break;
+    }
+}
+
+void requester::handle_handshake(const error_code& code)
+{
+    timer_.cancel();
+
+    if (!code)
+        write();
+}
+
+void requester::handshake()
+{
+    const auto self = shared_from_this();
+
+    timer();
+
+    switch (socket_.index()) {
+    case 1:
+        handle_handshake(error_code());
+        break;
+#if defined(IRCCD_HAVE_SSL)
+    case 2:
+        get<2>(socket_).async_handshake(stream_base::client, [self] (auto code) {
+            self->handle_handshake(code);
+        });
+        break;
+#endif
+    default:
+        break;
+    }
+}
+
+void requester::handle_connect(const error_code& code)
+{
+    timer_.cancel();
+
+    if (!code)
+        handshake();
+}
+
+void requester::connect(const tcp::resolver::results_type& eps)
+{
+    const auto self = shared_from_this();
+    const auto wrap = [self] (auto code, auto) {
+        self->handle_connect(code);
+    };
+
+    timer();
+
+    switch (socket_.index()) {
+    case 1:
+        async_connect(get<1>(socket_), eps, wrap);
+        break;
+#if defined(IRCCD_HAVE_SSL)
+    case 2:
+        async_connect(get<2>(socket_).lowest_layer(), eps, wrap);
+        break;
+#endif
+    default:
+        break;
+    }
+}
+
+void requester::handle_resolve(const error_code& code, const tcp::resolver::results_type& eps)
+{
+    timer_.cancel();
+
+    if (!code)
+        connect(eps);
+}
+
+void requester::resolve()
+{
+    auto self = shared_from_this();
+
+    timer();
+    resolver_.async_resolve(uri_.host, uri_.port, [self] (auto code, auto eps) {
+        self->handle_resolve(code, eps);
+    });
+}
+
+void requester::handle_timer(const error_code& code)
+{
+    // Force close sockets to cancel all pending operations.
+    if (code && code != std::errc::operation_canceled)
+        socket_.emplace<monostate>();
+}
+
+void requester::timer()
+{
+    const auto self = shared_from_this();
+
+    timer_.expires_from_now(seconds(links_plugin::conf_timeout));
+    timer_.async_wait([self] (auto code) {
+        self->handle_timer(code);
+    });
+}
+
+void requester::start()
+{
+    if (uri_.scheme == "http")
+        socket_.emplace<tcp::socket>(resolver_.get_io_service());
+#if defined(IRCCD_HAVE_SSL)
+    else if (uri_.scheme == "https")
+        socket_.emplace<stream<tcp::socket>>(resolver_.get_io_service(), ctx_);
+#endif
+
+    // Only do the resolve if scheme is correct.
+    if (socket_.index() != 0)
+        resolve();
+}
+
+requester::requester(io_context& io,
+                     shared_ptr<server> server,
+                     string channel,
+                     string origin,
+                     uri uri,
+                     size_t level)
+    : level_(level)
+    , server_(move(server))
+    , channel_(move(channel))
+    , origin_(move(origin))
+    , uri_(move(uri))
+    , timer_(io)
+    , resolver_(io)
+{
+}
+
+void requester::run(io_context& io, shared_ptr<server> server, string origin, string channel, string link)
+{
+    auto uri = uri::parse(link);
+
+    if (!uri)
+        return;
+
+    shared_ptr<requester>(new requester(io, server, channel, origin, move(*uri), 0))->start();
+}
+
+} // !irccd