Skip to content
Snippets Groups Projects
Commit 154ce53e authored by Andreas Traczyk's avatar Andreas Traczyk
Browse files

link-preview: fix UTF-8 decoding

The HTML string was being converted to a local 8-bit string prior to parsing with tidy.

According to Qt documentation, this works on Unix systems as toLocal8Bit() is equivalent to toUtf8(), however, it may not on Windows, and may result in character replacement.

Gitlab: #1536
Change-Id: I7aec98cffbd9d72771d1c489f52b38dbb3a1ed9f
parent 35f85028
No related branches found
No related tags found
No related merge requests found
......@@ -49,7 +49,7 @@ public:
bool parseHtmlString(const QString& html)
{
return tidyParseString(doc_, html.toLocal8Bit().data()) >= 0;
return tidyParseString(doc_, html.toUtf8().data()) >= 0;
}
using TagNodeList = QMap<TidyTagId, QList<TidyNode>>;
......@@ -86,11 +86,14 @@ public:
// Extract the text value from a node.
QString getNodeText(TidyNode node)
{
TidyBuffer nodeValue = {};
TidyBuffer nodeValue = {0};
if (!node || tidyNodeGetText(doc_, node, &nodeValue) != yes) {
return QString();
}
QString result = QString::fromUtf8((char*) nodeValue.bp, nodeValue.size);
QString result;
if (nodeValue.bp && nodeValue.size > 0) {
result = QString::fromUtf8(reinterpret_cast<char*>(nodeValue.bp), nodeValue.size);
}
tidyBufFree(&nodeValue);
return result;
}
......
......@@ -8,6 +8,7 @@ set(QT_TESTING_MODULES
QuickTest
Test
Widgets
HttpServer
)
find_package(Qt${QT_VERSION_MAJOR} CONFIG REQUIRED ${QT_TESTING_MODULES})
foreach(MODULE ${QT_TESTING_MODULES})
......@@ -84,6 +85,7 @@ set(UNIT_TESTS_SOURCE_FILES
${CMAKE_SOURCE_DIR}/tests/unittests/account_unittest.cpp
${CMAKE_SOURCE_DIR}/tests/unittests/contact_unittest.cpp
${CMAKE_SOURCE_DIR}/tests/unittests/messageparser_unittest.cpp
${CMAKE_SOURCE_DIR}/tests/unittests/previewengine_unittest.cpp
${CMAKE_SOURCE_DIR}/tests/unittests/globaltestenvironment.h
${COMMON_TESTS_SOURCES})
......
/*
* Copyright (C) 2024 Savoir-faire Linux Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include "globaltestenvironment.h"
#include <QtHttpServer/QHttpServer>
class PreviewEngineFixture : public ::testing::Test
{
public:
// Prepare unit test context. Called at
// prior each unit test execution
void SetUp() override {
server = new QHttpServer();
// Setup a server that can return an HTML body.
server->listen(QHostAddress::LocalHost, 8000);
}
// Close unit test context. Called
// after each unit test ending
void TearDown() override {
delete server;
}
// An instance of QHttpServer used to create a server.
QHttpServer* server;
};
/*!
* WHEN We parse a link
* THEN The infoReady signal should be emitted once with the correct info
*/
TEST_F(PreviewEngineFixture, ParsingALinkEmitsInfoReadySignal)
{
auto link = QString("http://localhost:8000/test");
server->route("/test", [] () {
return QString("<meta property=\"og:title\" content=\"Test title\">");
});
QSignalSpy infoReadySpy(globalEnv.previewEngine.data(), &PreviewEngine::infoReady);
Q_EMIT globalEnv.previewEngine->parseLink("msgId_01", link);
// Wait for the infoReady signal which should be emitted once with the correct ID.
infoReadySpy.wait();
EXPECT_EQ(infoReadySpy.count(), 1) << "infoReady signal should be emitted once";
QList<QVariant> infoReadyArguments = infoReadySpy.takeFirst();
EXPECT_TRUE(infoReadyArguments.at(0).typeId() == qMetaTypeId<QString>());
EXPECT_EQ(infoReadyArguments.at(0).toString(), "msgId_01");
}
/*!
* WHEN We parse a link that has a description containing characters encoded using UTF-8
* THEN The description should be parsed and match the original string
*/
TEST_F(PreviewEngineFixture, UTF8CharactersAreParsedCorrectly)
{
auto link = QString("http://localhost:8000/test");
server->route("/test", [] () {
return QString("<meta property=\"og:description\" content=\"Test de caractères Utf-8");
});
QSignalSpy infoReadySpy(globalEnv.previewEngine.data(), &PreviewEngine::infoReady);
Q_EMIT globalEnv.previewEngine->parseLink("msgId_01", link);
// Wait for the infoReady signal which should be emitted once.
infoReadySpy.wait();
EXPECT_EQ(infoReadySpy.count(), 1) << "infoReady signal should be emitted once";
QList<QVariant> infoReadyArguments = infoReadySpy.takeFirst();
EXPECT_TRUE(infoReadyArguments.at(1).typeId() == qMetaTypeId<QVariantMap>());
// Check that the description is parsed correctly.
QVariantMap info = infoReadyArguments.at(1).toMap();
EXPECT_TRUE(info.contains("description"));
EXPECT_EQ(info["description"].toString(), "Test de caractères Utf-8");
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment