diff --git a/src/app/htmlparser.h b/src/app/htmlparser.h
index c656542b9a8937f718373a9ad63ba026b43d5e8a..3b23b573d07dc7706273d287a8764718523b26aa 100644
--- a/src/app/htmlparser.h
+++ b/src/app/htmlparser.h
@@ -39,6 +39,7 @@ public:
         doc_ = tidyCreate();
         tidyOptSetBool(doc_, TidyQuiet, yes);
         tidyOptSetBool(doc_, TidyShowWarnings, no);
+        tidyOptSetInt(doc_, TidyUseCustomTags, TidyCustomEmpty);
@@ -51,46 +52,88 @@ public:
         return tidyParseString(doc_, html.toLocal8Bit().data()) >= 0;
-    using TagInfoList = QMap<TidyTagId, QList<QString>>;
+    using TagNodeList = QMap<TidyTagId, QList<TidyNode>>;
     // A function that traverses the DOM tree and fills a QVariantMap with a list
-    // of the tags and their values. The result is structured as follows:
-    // {tagId1: ["tagValue1", "tagValue2", ...],
-    //  tagId: ["tagValue1", "tagValue2", ...],
+    // of the tags and their nodes. The result is structured as follows:
+    // {tagId1: [tagNode1, tagNode2, ...],
+    //  tagId2: [tagNode3, tagNode4, ...],
     //  ... }
-    TagInfoList getTags(QList<TidyTagId> tags, int maxDepth = -1)
+    TagNodeList getTagsNodes(const QList<TidyTagId>& tags, int maxDepth = -1)
-        TagInfoList result;
+        TagNodeList result;
-            [&result](const QString& value, TidyTagId tag) { result[tag].append(value); },
+            [&result](TidyNode node, TidyTagId tag) { result[tag].append(node); },
         return result;
-    QString getFirstTagValue(TidyTagId tag, int maxDepth = -1)
+    // The same as the above function, only it returns the first node for a single tag.
+    TidyNode getFirstTagNode(TidyTagId tag, int maxDepth = -1)
-        QString result;
+        TidyNode result = nullptr;
-            [&result](const QString& value, TidyTagId) { result = value; },
+            [&result](TidyNode node, TidyTagId) { result = node; },
         return result;
+    // Extract the text value from a node.
+    QString getNodeText(TidyNode node)
+    {
+        TidyBuffer nodeValue = {};
+        if (!node || tidyNodeGetText(doc_, node, &nodeValue) != yes) {
+            return QString();
+        }
+        QString result = QString::fromUtf8((char*) nodeValue.bp, nodeValue.size);
+        tidyBufFree(&nodeValue);
+        return result;
+    }
+    // Extract the attribute value from a node.
+    QString getNodeAttr(TidyNode node, TidyAttrId attrId)
+    {
+        TidyAttr attr = tidyAttrGetById(node, attrId);
+        if (!attr) {
+            return QString();
+        }
+        const auto* attrValue = tidyAttrValue(attr);
+        if (!attrValue) {
+            return QString();
+        }
+        return QString::fromLocal8Bit(attrValue);
+    }
+    // Extract the inner HTML of a node.
+    QString getNodeInnerHtml(TidyNode node)
+    {
+        if (!node) {
+            return QString();
+        }
+        const auto* child = tidyGetChild(node);
+        return child ? getNodeText(child) : QString();
+    }
+    QString getTagInnerHtml(TidyTagId tag)
+    {
+        return getNodeInnerHtml(getFirstTagNode(tag));
+    }
+    // NOLINTNEXTLINE(misc-no-recursion)
     void traverseNode(TidyNode node,
-                      QList<TidyTagId> tags,
-                      const std::function<void(const QString&, TidyTagId)>& cb,
+                      const QList<TidyTagId>& tags,
+                      const std::function<void(TidyNode, TidyTagId)>& cb,
                       int depth = -1)
-        TidyBuffer nodeValue = {};
         for (auto tag : tags) {
-            if (tidyNodeGetId(node) == tag && tidyNodeGetText(doc_, node, &nodeValue) == yes && cb) {
-                cb(QString::fromLocal8Bit(nodeValue.bp), tag);
+            if (tidyNodeGetId(node) == tag && cb) {
+                cb(node, tag);
                 if (depth != -1 && --depth == 0) {
diff --git a/src/app/messageparser.cpp b/src/app/messageparser.cpp
index 7941e12dbefc5ca424eef323abdd64bf1939c479..a501b0b4ee3fd3949676d9adb333109f168d1cff 100644
--- a/src/app/messageparser.cpp
+++ b/src/app/messageparser.cpp
@@ -25,6 +25,18 @@
 #include "md4c-html.h"
+namespace {
+// A callback function that will be called by the md4c library (`md_html`) to output the HTML.
+htmlChunkCb(const MD_CHAR* data, MD_SIZE data_size, void* userData)
+    QByteArray* array = static_cast<QByteArray*>(userData);
+    if (data_size > 0) {
+        array->append(data, int(data_size));
+    }
+} // namespace
 MessageParser::MessageParser(PreviewEngine* previewEngine, QObject* parent)
     : QObject(parent)
     , previewEngine_(previewEngine)
@@ -51,9 +63,9 @@ MessageParser::parseMessage(const QString& messageId,
             // Now that we have the HTML, we can parse it to get a list of tags and their values.
             // We are only interested in the <a> and <pre> tags.
-            auto tagsMap = htmlParser_->getTags({TidyTag_A, TidyTag_DEL, TidyTag_PRE});
+            auto tagsMap = htmlParser_->getTagsNodes({TidyTag_A, TidyTag_DEL, TidyTag_PRE});
-            static QString styleTag("<style>%1</style>");
+            static const QString styleTag("<style>%1</style>");
             QString style;
             // Check for any <pre> tags. If there are any, we need to:
@@ -89,11 +101,9 @@ MessageParser::parseMessage(const QString& messageId,
                 // If the user has enabled link previews, then we need to generate the link preview.
                 if (previewLinks) {
                     // Get the first link in the message.
-                    auto anchorTag = tagsMap[TidyTag_A].first();
-                    static QRegularExpression hrefRegex("href=\"(.*?)\"");
-                    auto match = hrefRegex.match(anchorTag);
-                    if (match.hasMatch()) {
-                        Q_EMIT previewEngine_->parseLink(messageId, match.captured(1));
+                    auto href = htmlParser_->getNodeAttr(tagsMap[TidyTag_A].first(), TidyAttr_HREF);
+                    if (!href.isEmpty()) {
+                        Q_EMIT previewEngine_->parseLink(messageId, href);
@@ -110,13 +120,13 @@ void
 MessageParser::preprocessMarkdown(QString& markdown)
     // Match all instances of the linefeed character.
-    static QRegularExpression newlineRegex("\n");
+    static const QRegularExpression newlineRegex("\\r?\\n");
     static const QString newline = "  \n";
     // Replace all instances of the linefeed character with 2 spaces + a linefeed character
     // in order to force a line break in the HTML.
     // Note: we should only do this for non-code fenced blocks.
-    static QRegularExpression codeFenceRe("`{1,3}([\\s\\S]*?)`{1,3}");
+    static const QRegularExpression codeFenceRe("`{1,3}([\\s\\S]*?)`{1,3}");
     auto match = codeFenceRe.globalMatch(markdown);
     // If there are no code blocks, then we can just replace all linefeeds with 2 spaces
@@ -132,7 +142,7 @@ MessageParser::preprocessMarkdown(QString& markdown)
     enum BlockType { Text, Code };
     QVector<QPair<BlockType, QString>> codeBlocks;
-    int start = 0;
+    qsizetype start = 0;
     while (match.hasNext()) {
         auto m = match.next();
         auto nonCodelength = m.capturedStart() - start;
@@ -158,27 +168,16 @@ MessageParser::preprocessMarkdown(QString& markdown)
-// A callback function that will be called by the md4c library (`md_html`) to output the HTML.
-static void
-htmlChunkCb(const MD_CHAR* data, MD_SIZE data_size, void* userData)
-    QByteArray* array = static_cast<QByteArray*>(userData);
-    if (data_size > 0) {
-        array->append(data, int(data_size));
-    }
 MessageParser::markdownToHtml(const char* markdown)
-    size_t data_len = strlen(markdown);
+    const size_t data_len = strlen(markdown);
     if (data_len <= 0) {
         return QString();
-    } else {
-        QByteArray array;
-        int result = md_html(markdown, MD_SIZE(data_len), &htmlChunkCb, &array, md_flags, 0);
-        return result == 0 ? QString::fromUtf8(array) : QString();
+    QByteArray array;
+    const int result = md_html(markdown, MD_SIZE(data_len), &htmlChunkCb, &array, md_flags, 0);
+    return result == 0 ? QString::fromUtf8(array) : QString();
diff --git a/src/app/previewengine.cpp b/src/app/previewengine.cpp
index 5f4490f1166027b754687f0fe77d7cc6f4e933ff..5e56fbb88bd2d734f6c18ab18ba42e348a0e642e 100644
--- a/src/app/previewengine.cpp
+++ b/src/app/previewengine.cpp
@@ -19,15 +19,6 @@
 #include <QRegularExpression>
-static QString
-getInnerHtml(const QString& tag)
-    static const QRegularExpression re(">([^<]+)<");
-    const auto match = re.match(tag);
-    return match.hasMatch() ? match.captured(1) : QString {};
-// Portable newline regex.
 const QRegularExpression PreviewEngine::newlineRe("\\r?\\n");
 PreviewEngine::PreviewEngine(ConnectivityMonitor* cm, QObject* parent)
@@ -39,12 +30,11 @@ PreviewEngine::PreviewEngine(ConnectivityMonitor* cm, QObject* parent)
-PreviewEngine::getTagContent(QList<QString>& tags, const QString& value)
+PreviewEngine::getTagContent(const QList<QString>& tags, const QString& value)
     Q_FOREACH (auto tag, tags) {
         const QRegularExpression re("(property|name)=\"(og:|twitter:|)" + value
                                     + "\".*?content=\"([^\"]+)\"");
         const auto match = re.match(tag.remove(newlineRe));
         if (match.hasMatch()) {
             return match.captured(3);
@@ -54,45 +44,44 @@ PreviewEngine::getTagContent(QList<QString>& tags, const QString& value)
-PreviewEngine::getTitle(HtmlParser::TagInfoList& metaTags)
+PreviewEngine::getTitle(const QList<QString>& metaTags)
     // Try with opengraph/twitter props
-    QString title = getTagContent(metaTags[TidyTag_META], "title");
+    QString title = getTagContent(metaTags, "title");
     if (title.isEmpty()) { // Try with title tag
-        title = getInnerHtml(htmlParser_->getFirstTagValue(TidyTag_TITLE));
+        title = htmlParser_->getTagInnerHtml(TidyTag_TITLE);
     if (title.isEmpty()) { // Try with h1 tag
-        title = getInnerHtml(htmlParser_->getFirstTagValue(TidyTag_H1));
+        title = htmlParser_->getTagInnerHtml(TidyTag_H1);
     if (title.isEmpty()) { // Try with h2 tag
-        title = getInnerHtml(htmlParser_->getFirstTagValue(TidyTag_H2));
+        title = htmlParser_->getTagInnerHtml(TidyTag_H2);
     return title;
-PreviewEngine::getDescription(HtmlParser::TagInfoList& metaTags)
+PreviewEngine::getDescription(const QList<QString>& metaTags)
     // Try with og/twitter props
-    QString d = getTagContent(metaTags[TidyTag_META], "description");
-    if (d.isEmpty()) { // Try with first paragraph
-        d = getInnerHtml(htmlParser_->getFirstTagValue(TidyTag_P));
+    QString desc = getTagContent(metaTags, "description");
+    if (desc.isEmpty()) { // Try with first paragraph
+        desc = htmlParser_->getTagInnerHtml(TidyTag_P);
-    return d;
+    return desc;
-PreviewEngine::getImage(HtmlParser::TagInfoList& metaTags)
+PreviewEngine::getImage(const QList<QString>& metaTags)
     // Try with og/twitter props
-    QString image = getTagContent(metaTags[TidyTag_META], "image");
+    QString image = getTagContent(metaTags, "image");
     if (image.isEmpty()) { // Try with href of link tag (rel="image_src")
-        auto tags = htmlParser_->getTags({TidyTag_LINK});
-        Q_FOREACH (auto tag, tags[TidyTag_LINK]) {
-            static const QRegularExpression re("rel=\"image_src\".*?href=\"([^\"]+)\"");
-            const auto match = re.match(tag.remove(newlineRe));
-            if (match.hasMatch()) {
-                return match.captured(1);
+        auto tagsNodes = htmlParser_->getTagsNodes({TidyTag_LINK});
+        Q_FOREACH (auto tag, tagsNodes[TidyTag_LINK]) {
+            QString href = htmlParser_->getNodeAttr(tag, TidyAttr_HREF);
+            if (!href.isEmpty()) {
+                return href;
@@ -104,7 +93,12 @@ PreviewEngine::onParseLink(const QString& messageId, const QString& link)
     sendGetRequest(QUrl(link), [this, messageId, link](const QByteArray& html) {
-        auto metaTags = htmlParser_->getTags({TidyTag_META});
+        auto tagsNodes = htmlParser_->getTagsNodes({TidyTag_META});
+        auto metaTagNodes = tagsNodes[TidyTag_META];
+        QList<QString> metaTags;
+        Q_FOREACH (auto tag, metaTagNodes) {
+            metaTags.append(htmlParser_->getNodeText(tag));
+        }
         QString domain = QUrl(link).host();
         if (domain.isEmpty()) {
             domain = link;
diff --git a/src/app/previewengine.h b/src/app/previewengine.h
index db14a96886179febfea78db377d11ed95e996a03..2f0144ad603c2f2039c3d967cbb324bad13e4603 100644
--- a/src/app/previewengine.h
+++ b/src/app/previewengine.h
@@ -39,10 +39,10 @@ private:
     // An instance of HtmlParser used to parse HTML.
     HtmlParser* htmlParser_;
-    QString getTagContent(QList<QString>& tags, const QString& value);
-    QString getTitle(HtmlParser::TagInfoList& metaTags);
-    QString getDescription(HtmlParser::TagInfoList& metaTags);
-    QString getImage(HtmlParser::TagInfoList& metaTags);
+    QString getTagContent(const QList<QString>& tags, const QString& value);
+    QString getTitle(const QList<QString>& metaTags);
+    QString getDescription(const QList<QString>& metaTags);
+    QString getImage(const QList<QString>& metaTags);
     static const QRegularExpression newlineRe;
diff --git a/tests/unittests/messageparser_unittest.cpp b/tests/unittests/messageparser_unittest.cpp
index 4cbe16d9509c878ba88ec6573dd25709a5c39748..8f91834bf583be90953bf0575976c04df4d8cf55 100644
--- a/tests/unittests/messageparser_unittest.cpp
+++ b/tests/unittests/messageparser_unittest.cpp
@@ -117,7 +117,6 @@ TEST_F(MessageParserFixture, EndOfLineCharactersAreParsedCorrectly)
     auto backgroundColor = QColor::fromRgb(0, 0, 255);
     QSignalSpy messageParsedSpy(globalEnv.messageParser.data(), &MessageParser::messageParsed);
-    QSignalSpy linkInfoReadySpy(globalEnv.messageParser.data(), &MessageParser::linkInfoReady);
     // Parse a message with a link.
@@ -148,7 +147,6 @@ TEST_F(MessageParserFixture, FencedCodeIsParsedCorrectly)
     auto backgroundColor = QColor::fromRgb(0, 0, 255);
     QSignalSpy messageParsedSpy(globalEnv.messageParser.data(), &MessageParser::messageParsed);
-    QSignalSpy linkInfoReadySpy(globalEnv.messageParser.data(), &MessageParser::linkInfoReady);
     // Parse a message with a link.
@@ -169,3 +167,41 @@ TEST_F(MessageParserFixture, FencedCodeIsParsedCorrectly)
               "}</style><p>Text with</p>\n<pre><code>code\n</code></pre>\n");
+ * WHEN  We parse a text body with a youtube link.
+ * THEN  PreviewEngine::parseLink should be called with the correct arguments.
+ */
+TEST_F(MessageParserFixture, YoutubeLinkIsParsedCorrectly)
+    auto url = "https://www.youtube.com/watch?v=1234567890";
+    auto msg = "blah blah " + QString(url) + " blah blah";
+    QSignalSpy messageParsedSpy(globalEnv.messageParser.data(), &MessageParser::messageParsed);
+    QSignalSpy linkInfoReadySpy(globalEnv.messageParser.data(), &MessageParser::linkInfoReady);
+    // Parse a message with a link.
+    globalEnv.messageParser->parseMessage("msgId_05",
+                                          msg,
+                                          true,
+                                          QColor::fromRgb(0, 0, 255),
+                                          QColor::fromRgb(0, 0, 255));
+    // Wait for the messageParsed signal which should be emitted once.
+    messageParsedSpy.wait();
+    EXPECT_EQ(messageParsedSpy.count(), 1);
+    QList<QVariant> messageParserArguments = messageParsedSpy.takeFirst();
+    EXPECT_TRUE(messageParserArguments.at(0).typeId() == qMetaTypeId<QString>());
+    // Wait for the linkInfoReady signal which should be emitted once.
+    linkInfoReadySpy.wait();
+    EXPECT_EQ(linkInfoReadySpy.count(), 1);
+    QList<QVariant> linkInfoReadyArguments = linkInfoReadySpy.takeFirst();
+    EXPECT_TRUE(linkInfoReadyArguments.at(0).typeId() == qMetaTypeId<QString>());
+    EXPECT_EQ(linkInfoReadyArguments.at(0).toString(), "msgId_05");
+    EXPECT_TRUE(linkInfoReadyArguments.at(1).typeId() == qMetaTypeId<QVariantMap>());
+    QVariantMap linkInfo = linkInfoReadyArguments.at(1).toMap();
+    EXPECT_EQ(linkInfo["url"].toString(), url);