Skip to content

Commit

Permalink
fix whitespace normalization used by webmap md5 hash
Browse files Browse the repository at this point in the history
  • Loading branch information
nschimme committed Mar 10, 2024
1 parent 603b13f commit 559766f
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 15 deletions.
7 changes: 2 additions & 5 deletions src/expandoracommon/parseevent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
#include <cassert>
#include <cstdint>
#include <memory>
#include <regex>

#include "../global/TextUtils.h"
#include "../mapdata/ExitDirection.h"
#include "../parser/CommandId.h"
#include "../parser/ConnectedRoomFlags.h"
#include "../parser/ExitsFlags.h"
#include "../parser/PromptFlags.h"
#include "../parser/parserutils.h"
#include "property.h"

ParseEvent::ArrayOfProperties::ArrayOfProperties() = default;
Expand Down Expand Up @@ -109,12 +109,9 @@ SharedParseEvent ParseEvent::createEvent(const CommandEnum c,
auto result = std::make_shared<ParseEvent>(c);
ParseEvent *const event = result.get();

static const std::regex normalizeWhitespacePattern(R"(\s+)", std::regex::optimize);

// the moved strings are used by const ref here before they're moved.
event->setProperty(moved_roomName);
event->setProperty(RoomDesc{
std::regex_replace(moved_roomDesc.getStdString(), normalizeWhitespacePattern, " ")});
event->setProperty(RoomDesc{ParserUtils::normalizeWhitespace(moved_roomDesc.getStdString())});
event->setProperty(terrain);

// After this block, the moved values are gone.
Expand Down
14 changes: 7 additions & 7 deletions src/mapstorage/jsonmapstorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,22 @@ class NODISCARD WebHasher final
: m_hash(QCryptographicHash::Md5)
{}

void add(QString str)
void add(const RoomName &roomName, const RoomDesc &roomDesc)
{
auto name = roomName.toQString();
// This is most likely unnecessary because the parser did it for us...
// We need plain ASCII so that accentuation changes do not affect the
// hashes and because MD5 is defined on bytes, not encoded chars.
ParserUtils::toAsciiInPlace(str);
ParserUtils::toAsciiInPlace(name);
// Roomdescs may see whitespacing fixes over the years (ex: removing double
// spaces after periods). MMapper ignores such changes when comparing rooms,
// but the web mapper may only look up rooms by hash. Normalizing the
// whitespaces makes the hash more resilient.
str.replace(QRegularExpression(" +"), " ");
str.replace(QRegularExpression(" *\r?\n"), "\n");
auto desc = ::toQStringLatin1(ParserUtils::normalizeWhitespace(roomDesc.getStdString()));
ParserUtils::toAsciiInPlace(desc);

// REVISIT: should this be latin1 or utf8?
m_hash.addData(str.toLatin1());
m_hash.addData(name.toLatin1() + "\n" + desc.toLatin1());
}

QByteArray result() const { return m_hash.result(); }
Expand All @@ -85,8 +86,7 @@ class NODISCARD RoomHashIndex final
public:
void addRoom(const Room &room)
{
m_hasher.add(room.getName().toQString() + "\n");
m_hasher.add(room.getDescription().toQString());
m_hasher.add(room.getName(), room.getDescription());
m_index.insert(m_hasher.result().toHex(), room.getPosition());
m_hasher.reset();
}
Expand Down
9 changes: 7 additions & 2 deletions src/parser/parserutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
#include "parserutils.h"

#include <array>
#include <cassert>
#include <iostream>
#include <stdexcept>
#include <regex>
#include <QRegularExpression>
#include <QtCore>

Expand Down Expand Up @@ -106,4 +105,10 @@ void latin1ToAscii(std::ostream &os, const std::string_view sv)
}
}

std::string normalizeWhitespace(const std::string &str)
{
static const std::regex pattern(R"(\s+)", std::regex::optimize);
return std::regex_replace(str, pattern, " ");
}

} // namespace ParserUtils
2 changes: 2 additions & 0 deletions src/parser/parserutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ QString &toAsciiInPlace(QString &str);
std::string &latin1ToAsciiInPlace(std::string &str);
NODISCARD std::string latin1ToAscii(const std::string_view sv);
void latin1ToAscii(std::ostream &, const std::string_view sv);
NODISCARD std::string normalizeWhitespace(const std::string &str);

} // namespace ParserUtils
5 changes: 4 additions & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ file(GLOB_RECURSE expandoracommon_SRCS
../src/mapdata/ExitFieldVariant.h
../src/parser/CommandId.cpp
../src/parser/CommandId.h
../src/parser/parserutils.cpp
../src/parser/parserutils.h
)
set(TestExpandoraCommon_SRCS testexpandoracommon.cpp)
add_executable(TestExpandoraCommon ${TestExpandoraCommon_SRCS} ${expandoracommon_SRCS})
Expand Down Expand Up @@ -87,6 +89,7 @@ set(parser_SRCS
../src/parser/CommandId.cpp
../src/parser/CommandId.h
../src/parser/parserutils.cpp
../src/parser/parserutils.h
)
set(TestParser_SRCS testparser.cpp)
add_executable(TestParser ${TestParser_SRCS} ${parser_SRCS})
Expand Down Expand Up @@ -292,4 +295,4 @@ set_target_properties(
COMPILE_FLAGS "${WARNING_FLAGS}"
UNITY_BUILD ${USE_UNITY_BUILD}
)
add_test(NAME TestRoomManager COMMAND TestRoomManager)
add_test(NAME TestRoomManager COMMAND TestRoomManager)

0 comments on commit 559766f

Please sign in to comment.