From 154ce53e2def2f340ebe05cbefc855fd81f0f9a3 Mon Sep 17 00:00:00 2001 From: Andreas Traczyk <andreas.traczyk@savoirfairelinux.com> Date: Mon, 29 Jan 2024 12:17:55 -0500 Subject: [PATCH] link-preview: fix UTF-8 decoding The HTML string was being converted to a local 8-bit string prior to parsing with tidy. According to Qt documentation, this works on Unix systems as toLocal8Bit() is equivalent to toUtf8(), however, it may not on Windows, and may result in character replacement. Gitlab: #1536 Change-Id: I7aec98cffbd9d72771d1c489f52b38dbb3a1ed9f --- src/app/htmlparser.h | 9 ++- tests/CMakeLists.txt | 2 + tests/unittests/previewengine_unittest.cpp | 93 ++++++++++++++++++++++ 3 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 tests/unittests/previewengine_unittest.cpp diff --git a/src/app/htmlparser.h b/src/app/htmlparser.h index 0b35385e1..76f636e0e 100644 --- a/src/app/htmlparser.h +++ b/src/app/htmlparser.h @@ -49,7 +49,7 @@ public: bool parseHtmlString(const QString& html) { - return tidyParseString(doc_, html.toLocal8Bit().data()) >= 0; + return tidyParseString(doc_, html.toUtf8().data()) >= 0; } using TagNodeList = QMap<TidyTagId, QList<TidyNode>>; @@ -86,11 +86,14 @@ public: // Extract the text value from a node. QString getNodeText(TidyNode node) { - TidyBuffer nodeValue = {}; + TidyBuffer nodeValue = {0}; if (!node || tidyNodeGetText(doc_, node, &nodeValue) != yes) { return QString(); } - QString result = QString::fromUtf8((char*) nodeValue.bp, nodeValue.size); + QString result; + if (nodeValue.bp && nodeValue.size > 0) { + result = QString::fromUtf8(reinterpret_cast<char*>(nodeValue.bp), nodeValue.size); + } tidyBufFree(&nodeValue); return result; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d50908cfa..bc68fcf03 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,6 +8,7 @@ set(QT_TESTING_MODULES QuickTest Test Widgets + HttpServer ) find_package(Qt${QT_VERSION_MAJOR} CONFIG REQUIRED ${QT_TESTING_MODULES}) foreach(MODULE ${QT_TESTING_MODULES}) @@ -84,6 +85,7 @@ set(UNIT_TESTS_SOURCE_FILES ${CMAKE_SOURCE_DIR}/tests/unittests/account_unittest.cpp ${CMAKE_SOURCE_DIR}/tests/unittests/contact_unittest.cpp ${CMAKE_SOURCE_DIR}/tests/unittests/messageparser_unittest.cpp + ${CMAKE_SOURCE_DIR}/tests/unittests/previewengine_unittest.cpp ${CMAKE_SOURCE_DIR}/tests/unittests/globaltestenvironment.h ${COMMON_TESTS_SOURCES}) diff --git a/tests/unittests/previewengine_unittest.cpp b/tests/unittests/previewengine_unittest.cpp new file mode 100644 index 000000000..70808dfd2 --- /dev/null +++ b/tests/unittests/previewengine_unittest.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2024 Savoir-faire Linux Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include "globaltestenvironment.h" + +#include <QtHttpServer/QHttpServer> + +class PreviewEngineFixture : public ::testing::Test +{ +public: + // Prepare unit test context. Called at + // prior each unit test execution + void SetUp() override { + server = new QHttpServer(); + // Setup a server that can return an HTML body. + server->listen(QHostAddress::LocalHost, 8000); + } + + // Close unit test context. Called + // after each unit test ending + void TearDown() override { + delete server; + } + + // An instance of QHttpServer used to create a server. + QHttpServer* server; +}; + +/*! + * WHEN We parse a link + * THEN The infoReady signal should be emitted once with the correct info + */ +TEST_F(PreviewEngineFixture, ParsingALinkEmitsInfoReadySignal) +{ + auto link = QString("http://localhost:8000/test"); + server->route("/test", [] () { + return QString("<meta property=\"og:title\" content=\"Test title\">"); + }); + + QSignalSpy infoReadySpy(globalEnv.previewEngine.data(), &PreviewEngine::infoReady); + + Q_EMIT globalEnv.previewEngine->parseLink("msgId_01", link); + + // Wait for the infoReady signal which should be emitted once with the correct ID. + infoReadySpy.wait(); + EXPECT_EQ(infoReadySpy.count(), 1) << "infoReady signal should be emitted once"; + + QList<QVariant> infoReadyArguments = infoReadySpy.takeFirst(); + EXPECT_TRUE(infoReadyArguments.at(0).typeId() == qMetaTypeId<QString>()); + EXPECT_EQ(infoReadyArguments.at(0).toString(), "msgId_01"); +} + +/*! + * WHEN We parse a link that has a description containing characters encoded using UTF-8 + * THEN The description should be parsed and match the original string + */ +TEST_F(PreviewEngineFixture, UTF8CharactersAreParsedCorrectly) +{ + auto link = QString("http://localhost:8000/test"); + server->route("/test", [] () { + return QString("<meta property=\"og:description\" content=\"Test de caractères Utf-8"); + }); + + QSignalSpy infoReadySpy(globalEnv.previewEngine.data(), &PreviewEngine::infoReady); + + Q_EMIT globalEnv.previewEngine->parseLink("msgId_01", link); + + // Wait for the infoReady signal which should be emitted once. + infoReadySpy.wait(); + EXPECT_EQ(infoReadySpy.count(), 1) << "infoReady signal should be emitted once"; + + QList<QVariant> infoReadyArguments = infoReadySpy.takeFirst(); + EXPECT_TRUE(infoReadyArguments.at(1).typeId() == qMetaTypeId<QVariantMap>()); + + // Check that the description is parsed correctly. + QVariantMap info = infoReadyArguments.at(1).toMap(); + EXPECT_TRUE(info.contains("description")); + EXPECT_EQ(info["description"].toString(), "Test de caractères Utf-8"); +} -- GitLab