From 2b7be2f8fbf39391b11707b9baafc1fce6957c7a Mon Sep 17 00:00:00 2001 From: Ruslan Kabatsayev Date: Tue, 19 Mar 2024 13:32:11 +0400 Subject: [PATCH] Add a converter for sky cultures from old format to new It's still quite raw, especially regarding the conversion of descriptions and their translations, but at least it converts the other data. --- .../AsterismOldLoader.cpp | 296 +++++ .../AsterismOldLoader.hpp | 19 + util/skyculture-converter/CMakeLists.txt | 21 + .../ConstellationOldLoader.cpp | 490 ++++++++ .../ConstellationOldLoader.hpp | 51 + .../DescriptionOldLoader.cpp | 1075 +++++++++++++++++ .../DescriptionOldLoader.hpp | 38 + util/skyculture-converter/NamesOldLoader.cpp | 255 ++++ util/skyculture-converter/NamesOldLoader.hpp | 25 + util/skyculture-converter/main.cpp | 192 +++ 10 files changed, 2462 insertions(+) create mode 100644 util/skyculture-converter/AsterismOldLoader.cpp create mode 100644 util/skyculture-converter/AsterismOldLoader.hpp create mode 100644 util/skyculture-converter/CMakeLists.txt create mode 100644 util/skyculture-converter/ConstellationOldLoader.cpp create mode 100644 util/skyculture-converter/ConstellationOldLoader.hpp create mode 100644 util/skyculture-converter/DescriptionOldLoader.cpp create mode 100644 util/skyculture-converter/DescriptionOldLoader.hpp create mode 100644 util/skyculture-converter/NamesOldLoader.cpp create mode 100644 util/skyculture-converter/NamesOldLoader.hpp create mode 100644 util/skyculture-converter/main.cpp diff --git a/util/skyculture-converter/AsterismOldLoader.cpp b/util/skyculture-converter/AsterismOldLoader.cpp new file mode 100644 index 00000000000000..18d9c36d727916 --- /dev/null +++ b/util/skyculture-converter/AsterismOldLoader.cpp @@ -0,0 +1,296 @@ +#include +#include +#include +#include +#include +#include + +#include "AsterismOldLoader.hpp" + +class Asterism +{ +public: + struct Star + { + int HIP = -1; + double RA = NAN, DE = NAN; + bool operator==(const Star& rhs) const + { + if (HIP > 0) return HIP == rhs.HIP; + return RA == rhs.RA && DE == rhs.DE; + } + }; +private: + //! International name (translated using gettext) + QString nameI18; + //! Name in english (second column in asterism_names.eng.fab) + QString englishName; + //! Abbreviation + //! A skyculture designer must invent it. (usually 2-5 letters) + //! This MUST be filled and be unique within a sky culture. + QString abbreviation; + //! Context for name + QString context; + //! Number of segments in the lines + unsigned int numberOfSegments; + //! Type of asterism + int typeOfAsterism; + bool flagAsterism; + + std::vector asterism; + + friend class AsterismOldLoader; +public: + bool read(const QString& record); +}; + +std::ostream& operator<<(std::ostream& s, const Asterism::Star& star) +{ + if (star.HIP > 0) + s << star.HIP; + else + s << "[" << star.RA << ", " << star.DE << "]"; + return s; +} + +bool Asterism::read(const QString& record) +{ + abbreviation.clear(); + numberOfSegments = 0; + typeOfAsterism = 1; + flagAsterism = true; + + QString buf(record); + QTextStream istr(&buf, QIODevice::ReadOnly); + // We allow mixed-case abbreviations now that they can be displayed on screen. We then need toUpper() in comparisons. + istr >> abbreviation >> typeOfAsterism >> numberOfSegments; + if (istr.status()!=QTextStream::Ok) + return false; + + asterism.resize(numberOfSegments*2); + for (unsigned int i=0;i> HP; + if(HP == 0) + { + return false; + } + asterism[i] = Star{int(HP), NAN, NAN}; + break; + } + case 2: // A small asterism with lines by J2000.0 coordinates + { + double RA, DE; + istr >> RA >> DE; + asterism[i] = Star{-1, RA, DE}; + break; + } + } + } + + return true; +} + +Asterism* AsterismOldLoader::findFromAbbreviation(const QString& abbrev) const +{ + for (const auto asterism : asterisms) + if (asterism->abbreviation == abbrev) + return asterism; + return nullptr; +} + +void AsterismOldLoader::load(const QString& skyCultureDir, const QString& cultureId) +{ + this->cultureId = cultureId; + QString fic = skyCultureDir+"/asterism_lines.fab"; + if (fic.isEmpty()) + { + hasAsterism = false; + qWarning() << "No asterisms in " << skyCultureDir; + } + else + { + hasAsterism = true; + loadLines(fic); + } + + // load asterism names + fic = skyCultureDir + "/asterism_names.eng.fab"; + if (!fic.isEmpty()) + loadNames(fic); +} + +void AsterismOldLoader::loadLines(const QString &fileName) +{ + QFile in(fileName); + if (!in.open(QIODevice::ReadOnly | QIODevice::Text)) + { + qWarning() << "Can't open asterism data file" << QDir::toNativeSeparators(fileName); + return; + } + + int totalRecords=0; + QString record; + static const QRegularExpression commentRx("^(\\s*#.*|\\s*)$"); + while (!in.atEnd()) + { + record = QString::fromUtf8(in.readLine()); + if (!commentRx.match(record).hasMatch()) + totalRecords++; + } + in.seek(0); + + // delete existing data, if any + for (auto* asterism : asterisms) + delete asterism; + + asterisms.clear(); + Asterism *aster = Q_NULLPTR; + + // read the file of line patterns, adding a record per non-comment line + int currentLineNumber = 0; // line in file + int readOk = 0; // count of records processed OK + while (!in.atEnd()) + { + record = QString::fromUtf8(in.readLine()); + currentLineNumber++; + if (commentRx.match(record).hasMatch()) + continue; + + aster = new Asterism; + if(aster->read(record)) + { + asterisms.push_back(aster); + ++readOk; + } + else + { + qWarning() << "ERROR reading asterism lines record at line " << currentLineNumber; + delete aster; + } + } + in.close(); + qDebug() << "Loaded" << readOk << "/" << totalRecords << "asterism records successfully"; +} + +void AsterismOldLoader::loadNames(const QString& namesFile) +{ + // Asterism not loaded yet + if (asterisms.empty()) return; + + // clear previous names + for (auto* asterism : asterisms) + { + asterism->englishName.clear(); + } + + // Open file + QFile commonNameFile(namesFile); + if (!commonNameFile.open(QIODevice::ReadOnly | QIODevice::Text)) + { + qDebug() << "Cannot open file" << QDir::toNativeSeparators(namesFile); + return; + } + + // Now parse the file + // lines to ignore which start with a # or are empty + static const QRegularExpression commentRx("^(\\s*#.*|\\s*)$"); + static const QRegularExpression recRx("^\\s*(\\S+)\\s+_[(]\"(.*)\"[)]\\s*([\\,\\d\\s]*)\\n"); + static const QRegularExpression ctxRx("(.*)\",\\s*\"(.*)"); + + // keep track of how many records we processed. + int totalRecords=0; + int readOk=0; + int lineNumber=0; + while (!commonNameFile.atEnd()) + { + QString record = QString::fromUtf8(commonNameFile.readLine()); + lineNumber++; + + // Skip comments + if (commentRx.match(record).hasMatch()) + continue; + + totalRecords++; + + QRegularExpressionMatch recMatch=recRx.match(record); + if (!recMatch.hasMatch()) + { + qWarning() << "ERROR - cannot parse record at line" << lineNumber << "in asterism names file" << QDir::toNativeSeparators(namesFile) << ":" << record; + } + else + { + QString shortName = recMatch.captured(1); + Asterism *aster = findFromAbbreviation(shortName); + // If the asterism exists, set the English name + if (aster != Q_NULLPTR) + { + QString ctxt = recMatch.captured(2); + QRegularExpressionMatch ctxMatch=ctxRx.match(ctxt); + if (ctxMatch.hasMatch()) + { + aster->englishName = ctxMatch.captured(1); + aster->context = ctxMatch.captured(2); + } + else + { + aster->englishName = ctxt; + aster->context = ""; + } + readOk++; + } + else + { + qWarning() << "WARNING - asterism abbreviation" << shortName << "not found when loading asterism names"; + } + } + } + commonNameFile.close(); + qDebug() << "Loaded" << readOk << "/" << totalRecords << "asterism names"; +} + +bool AsterismOldLoader::dumpJSON(std::ostream& s) const +{ + if (!hasAsterism) return false; + + s << " \"asterisms\": [\n"; + for (const Asterism*const ast : asterisms) + { + s << " {\n"; + s << " \"id\": \"AST " << cultureId.toStdString() << " " << ast->abbreviation.toStdString() << "\",\n"; + if (!ast->englishName.isEmpty()) + s << " \"common_name\": {\"english\": \"" << ast->englishName.toStdString() << "\"},\n"; + const bool isRayHelper = ast->typeOfAsterism == 0; + if(isRayHelper) + s << " \"is_ray_helper\": true,\n"; + + s.precision(std::numeric_limits::digits10); + s << " \"lines\": ["; + auto& points = ast->asterism; + for (unsigned n = 1; n < points.size(); n += 2) + { + s << (n > 1 ? ", [" : "[") << points[n - 1] << ", " << points[n]; + // Merge connected segments into polylines + while (n + 2 < points.size() && points[n + 1] == points[n]) + { + s << ", " << points[n + 2]; + n += 2; + } + s << "]"; + } + s << "]\n"; + if (ast == asterisms.back()) + s << " }\n"; + else + s << " },\n"; + } + s << " ],\n"; + + return true; +} diff --git a/util/skyculture-converter/AsterismOldLoader.hpp b/util/skyculture-converter/AsterismOldLoader.hpp new file mode 100644 index 00000000000000..b032deb2d95d45 --- /dev/null +++ b/util/skyculture-converter/AsterismOldLoader.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + +class Asterism; +class AsterismOldLoader +{ + QString cultureId; + bool hasAsterism = false; + std::vector asterisms; + + Asterism* findFromAbbreviation(const QString& abbrev) const; + void loadLines(const QString& fileName); + void loadNames(const QString& namesFile); +public: + void load(const QString& skyCultureDir, const QString& cultureId); + bool dumpJSON(std::ostream& s) const; +}; diff --git a/util/skyculture-converter/CMakeLists.txt b/util/skyculture-converter/CMakeLists.txt new file mode 100644 index 00000000000000..27aac292cca120 --- /dev/null +++ b/util/skyculture-converter/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.10.0) +project(skyculture-converter VERSION 0.0.1 LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +if(${CMAKE_CXX_COMPILER_ID} MATCHES "GNU|Clang") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=return-type -Wall -Wextra") +endif() + +set(CMAKE_AUTOMOC ON) +set(CMAKE_AUTOUIC ON) +find_package(Qt5 5.15 REQUIRED Core OpenGL) + +add_executable(skyculture-converter + NamesOldLoader.cpp + AsterismOldLoader.cpp + DescriptionOldLoader.cpp + ConstellationOldLoader.cpp + main.cpp + ) +target_link_libraries(skyculture-converter Qt5::Core Qt5::Gui gettextpo) diff --git a/util/skyculture-converter/ConstellationOldLoader.cpp b/util/skyculture-converter/ConstellationOldLoader.cpp new file mode 100644 index 00000000000000..3eb4dd7efbb1de --- /dev/null +++ b/util/skyculture-converter/ConstellationOldLoader.cpp @@ -0,0 +1,490 @@ +#include "ConstellationOldLoader.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +bool ConstellationOldLoader::Constellation::read(QString const& record) +{ + unsigned int HP; + + abbreviation.clear(); + unsigned numberOfSegments = 0; + + QString buf(record); + QTextStream istr(&buf, QIODevice::ReadOnly); + // allow mixed-case abbreviations now that they can be displayed on screen. We then need toUpper() in comparisons. + istr >> abbreviation >> numberOfSegments; + if (istr.status()!=QTextStream::Ok) + return false; + + points.clear(); + points.reserve(numberOfSegments*2); + for (unsigned i = 0; i < numberOfSegments*2; ++i) + { + HP = 0; + istr >> HP; + if(HP == 0) return false; + points.push_back(HP); + } + + return true; +} + +auto ConstellationOldLoader::findFromAbbreviation(const QString& abbrev) -> Constellation* +{ + for(auto& cons : constellations) + if(cons.abbreviation == abbrev) + return &cons; + return nullptr; +} + +void ConstellationOldLoader::loadLinesAndArt(const QString& skyCultureDir, const QString& outDir) +{ + const auto fileName = skyCultureDir+"/constellationship.fab"; + const auto artfileName = skyCultureDir+"/constellationsart.fab"; + QFile in(fileName); + if (!in.open(QIODevice::ReadOnly | QIODevice::Text)) + { + qWarning() << "Can't open constellation data file" << QDir::toNativeSeparators(fileName); + Q_ASSERT(0); + } + + int totalRecords=0; + QString record; + static const QRegularExpression commentRx("^(\\s*#.*|\\s*)$"); // pure comment lines or empty lines + while (!in.atEnd()) + { + record = QString::fromUtf8(in.readLine()); + if (!commentRx.match(record).hasMatch()) + totalRecords++; + } + in.seek(0); + + constellations.clear(); + Constellation* cons = nullptr; + + // read the file of line patterns, adding a record per non-comment line + int currentLineNumber = 0; // line in file + int readOk = 0; // count of records processed OK + while (!in.atEnd()) + { + record = QString::fromUtf8(in.readLine()); + currentLineNumber++; + if (commentRx.match(record).hasMatch()) + continue; + + constellations.push_back({}); + auto* cons = &constellations.back(); + if(cons->read(record)) + { + ++readOk; + } + else + { + qWarning() << "ERROR reading constellation lines record at line " << currentLineNumber; + constellations.pop_back(); + } + } + in.close(); + if(readOk != totalRecords) + qDebug() << "Loaded" << readOk << "/" << totalRecords << "constellation records successfully"; + + // It's possible to have no art - just constellations + if (artfileName.isNull() || artfileName.isEmpty()) + return; + QFile fic(artfileName); + if (!fic.open(QIODevice::ReadOnly | QIODevice::Text)) + { + qWarning() << "Can't open constellation art file" << QDir::toNativeSeparators(fileName); + return; + } + + totalRecords=0; + while (!fic.atEnd()) + { + record = QString::fromUtf8(fic.readLine()); + if (!commentRx.match(record).hasMatch()) + totalRecords++; + } + fic.seek(0); + + // Read the constellation art file with the following format : + // ShortName texture_file x1 y1 hp1 x2 y2 hp2 + // Where : + // shortname is the international short name (i.e "Lep" for Lepus) + // texture_file is the graphic file of the art texture + // x1 y1 are the x and y texture coordinates in pixels of the star of hipparcos number hp1 + // x2 y2 are the x and y texture coordinates in pixels of the star of hipparcos number hp2 + // The coordinate are taken with (0,0) at the top left corner of the image file + QString shortname; + QString texfile; + unsigned int x1, y1, x2, y2, x3, y3, hp1, hp2, hp3; + + currentLineNumber = 0; // line in file + readOk = 0; // count of records processed OK + + while (!fic.atEnd()) + { + ++currentLineNumber; + record = QString::fromUtf8(fic.readLine()); + if (commentRx.match(record).hasMatch()) + continue; + + // prevent leading zeros on numbers from being interpreted as octal numbers + record.replace(" 0", " "); + QTextStream rStr(&record); + rStr >> shortname >> texfile >> x1 >> y1 >> hp1 >> x2 >> y2 >> hp2 >> x3 >> y3 >> hp3; + if (rStr.status()!=QTextStream::Ok) + { + qWarning() << "ERROR parsing constellation art record at line" << currentLineNumber << "of art file"; + continue; + } + + cons = findFromAbbreviation(shortname); + if (!cons) + { + qWarning() << "ERROR in constellation art file at line" << currentLineNumber + << "constellation" << shortname << "unknown"; + } + else + { + cons->artTexture = "illustrations/" + texfile; + const auto texPath = skyCultureDir+"/"+texfile; + QImage tex(texPath); + if(tex.isNull()) + { + std::cerr << "Error: failed to open texture file \"" << texPath.toStdString() << "\"\n"; + } + else + { + cons->textureSize = tex.size(); + + const auto targetPath = outDir+"/"+cons->artTexture; + QDir().mkpath(QFileInfo(targetPath).absoluteDir().path()); + QFile file(texPath); + if(!file.copy(targetPath)) + { + std::cerr << "Error: failed to copy texture file \"" << texPath.toStdString() + << "\" to \"" << targetPath.toStdString() << "\"" + << ": " << file.errorString().toStdString() << "\n"; + } + } + + cons->artP1.x = x1; + cons->artP1.y = y1; + cons->artP1.hip = hp1; + + cons->artP2.x = x2; + cons->artP2.y = y2; + cons->artP2.hip = hp2; + + cons->artP3.x = x3; + cons->artP3.y = y3; + cons->artP3.hip = hp3; + + ++readOk; + } + } + + if(readOk != totalRecords) + qDebug() << "Loaded" << readOk << "/" << totalRecords << "constellation art records successfully"; + fic.close(); +} + +void ConstellationOldLoader::loadNames(const QString& skyCultureDir) +{ + const auto namesFile = skyCultureDir + "/constellation_names.eng.fab"; + + // Constellation not loaded yet + if (constellations.empty()) return; + + // clear previous names + for (auto& constellation : constellations) + { + constellation.englishName.clear(); + } + + // Open file + QFile commonNameFile(namesFile); + if (!commonNameFile.open(QIODevice::ReadOnly | QIODevice::Text)) + { + qDebug() << "Cannot open file" << QDir::toNativeSeparators(namesFile); + return; + } + + // Now parse the file + // lines to ignore which start with a # or are empty + static const QRegularExpression commentRx("^(\\s*#.*|\\s*)$"); + + // lines which look like records - we use the RE to extract the fields + // which will be available in recRx.capturedTexts() + // abbreviation is allowed to start with a dot to mark as "hidden". + static const QRegularExpression recRx("^\\s*(\\.?\\S+)\\s+\"(.*)\"\\s+_[(]\"(.*)\"[)]\\s*([\\,\\d\\s]*)\\n"); + static const QRegularExpression ctxRx("(.*)\",\\s*\"(.*)"); + + // keep track of how many records we processed. + int totalRecords=0; + int readOk=0; + int lineNumber=0; + while (!commonNameFile.atEnd()) + { + QString record = QString::fromUtf8(commonNameFile.readLine()); + lineNumber++; + + // Skip comments + if (commentRx.match(record).hasMatch()) + continue; + + totalRecords++; + + QRegularExpressionMatch recMatch=recRx.match(record); + if (!recMatch.hasMatch()) + { + qWarning() << "ERROR - cannot parse record at line" << lineNumber << "in constellation names file" << QDir::toNativeSeparators(namesFile) << ":" << record; + } + else + { + QString shortName = recMatch.captured(1); + Constellation *aster = findFromAbbreviation(shortName); + // If the constellation exists, set the English name + if (aster) + { + aster->nativeName = recMatch.captured(2); + QString ctxt = recMatch.captured(3); + QRegularExpressionMatch ctxMatch=ctxRx.match(ctxt); + if (ctxMatch.hasMatch()) + { + aster->englishName = ctxMatch.captured(1); + //aster->context = ctxMatch.captured(2); + } + else + { + aster->englishName = ctxt; + //aster->context = ""; + } + readOk++; + } + else + { + qWarning() << "WARNING - constellation abbreviation" << shortName << "not found when loading constellation names"; + } + } + } + commonNameFile.close(); + if(readOk != totalRecords) + qDebug() << "Loaded" << readOk << "/" << totalRecords << "constellation names"; +} + +void ConstellationOldLoader::loadBoundaries(const QString& skyCultureDir) +{ + if(QString(boundariesType.c_str()).toLower() == "none") + return; + + const bool ownB = QString(boundariesType.c_str()).toLower() == "own"; + const auto boundaryFile = ownB ? skyCultureDir + "/constellation_boundaries.dat" + : skyCultureDir + "/../../data/constellation_boundaries.dat"; + + // Modified boundary file by Torsten Bronger with permission + // http://pp3.sourceforge.net + QFile dataFile(boundaryFile); + if (!dataFile.open(QIODevice::ReadOnly | QIODevice::Text)) + { + qWarning() << "Boundary file" << QDir::toNativeSeparators(boundaryFile) << "not found"; + return; + } + + QString data = ""; + + // Added support of comments for constellation_boundaries.dat file + static const QRegularExpression commentRx("^(\\s*#.*|\\s*)$"); + while (!dataFile.atEnd()) + { + // Read the line + QString record = QString::fromUtf8(dataFile.readLine()); + + // Skip comments + if (commentRx.match(record).hasMatch()) + continue; + + // Append the data + data.append(record); + } + + boundaries.clear(); + // Read and parse the data without comments + QTextStream istr(&data); + unsigned int i = 0; + while (!istr.atEnd()) + { + unsigned num = 0; + istr >> num; + if(num == 0) + continue; // empty line + + boundaries.push_back({}); + auto& line = boundaries.back(); + auto& points = line.points; + + for (unsigned int j=0;j> RA >> DE; + points.emplace_back(RaDec{RA,DE}); + } + + unsigned numc; + istr >> numc; + if(numc != 2) + { + std::cerr << "Error: expected 2 constellations per boundary, got " << numc << "\n"; + boundaries.clear(); + return; + } + + istr >> line.cons1; + istr >> line.cons2; + if(line.cons1 == "SER1" || line.cons1 == "SER2") line.cons1 = "SER"; + if(line.cons2 == "SER1" || line.cons2 == "SER2") line.cons2 = "SER"; + i++; + } + qDebug() << "Loaded" << i << "constellation boundary segments"; +} + +void ConstellationOldLoader::load(const QString& skyCultureDir, const QString& outDir) +{ + skyCultureName = QFileInfo(skyCultureDir).fileName(); + loadLinesAndArt(skyCultureDir, outDir); + loadNames(skyCultureDir); + + for(const auto& cons : constellations) + { + if(cons.artTexture.isEmpty()) + { + //std::cerr << "No texture found for constellation " << cons.englishName.toStdString() << " (" << cons.abbreviation.toStdString() << ")\n"; + continue; + } + if(cons.textureSize.width() <= 0 || cons.textureSize.height() <= 0) + { + std::cerr << "Failed to find texture size for constellation " << cons.englishName.toStdString() << " (" << cons.abbreviation.toStdString() << ")\n"; + continue; + } + } + + loadBoundaries(skyCultureDir); +} + +bool ConstellationOldLoader::dumpConstellationsJSON(std::ostream& s) const +{ + if(constellations.empty()) return false; + + s << " \"constellations\": [\n"; + for(const auto& c : constellations) + { + using std::to_string; + s << " {\n" + " \"id\": \"CON "+skyCultureName.toStdString()+" "+c.abbreviation.toStdString()+"\",\n"; + + s << " \"lines\": ["; + auto& points = c.points; + for (unsigned n = 1; n < points.size(); n += 2) + { + s << (n > 1 ? ", [" : "[") << points[n - 1] << ", " << points[n]; + // Merge connected segments into polylines + while (n + 2 < points.size() && points[n + 1] == points[n]) + { + s << ", " << points[n + 2]; + n += 2; + } + s << "]"; + } + s << "],\n"; + + if(!c.artTexture.isEmpty()) + { + s << " \"image\": {\n" + " \"file\": \"" << c.artTexture.toStdString() << "\",\n" + " \"size\": [" << to_string(c.textureSize.width()) << ", " << to_string(c.textureSize.height()) << "],\n" + " \"anchors\": [\n" + " {\"pos\": [" << to_string(c.artP1.x) << ", " << to_string(c.artP1.y) << "], \"hip\": " << to_string(c.artP1.hip) << "},\n" + " {\"pos\": [" << to_string(c.artP2.x) << ", " << to_string(c.artP2.y) << "], \"hip\": " << to_string(c.artP2.hip) << "},\n" + " {\"pos\": [" << to_string(c.artP3.x) << ", " << to_string(c.artP3.y) << "], \"hip\": " << to_string(c.artP3.hip) << "}\n" + " ]\n" + " },\n"; + } + s << " \"common_name\": {\"english\": \"" << c.englishName.toStdString() << + (c.nativeName.isEmpty() ? "\"" : "\", \"native\": \"" + c.nativeName.toStdString() + "\"") << "}\n" + " }"; + if(&c != &constellations.back()) + s << ",\n"; + else + s << "\n"; + } + s << " ],\n"; + + return true; +} + +bool ConstellationOldLoader::dumpBoundariesJSON(std::ostream& s) const +{ + if(boundaries.empty()) return false; + + s << " \"edges_type\": \"" << boundariesType << "\",\n"; + s << " \"edges\": [\n"; + s.fill('0'); +#define W2 std::setw(2) + for(const auto& line : boundaries) + { + for(unsigned n = 0; n < line.points.size() - 1; ++n) + { + const auto& p1 = line.points[n]; + const auto& p2 = line.points[n+1]; + + const int ra1ss = std::lround(3600*p1.ra); + const int ra1h = ra1ss / 3600; + const int ra1m = ra1ss / 60 % 60; + const int ra1s = ra1ss % 60; + + const int ra2ss = std::lround(3600*p2.ra); + const int ra2h = ra2ss / 3600; + const int ra2m = ra2ss / 60 % 60; + const int ra2s = ra2ss % 60; + + const int de1ss = std::lround(std::abs(3600*p1.dec)); + const int de1d = de1ss / 3600; + const int de1m = de1ss / 60 % 60; + const int de1s = de1ss % 60; + + const int de2ss = std::lround(std::abs(3600*p2.dec)); + const int de2d = de2ss / 3600; + const int de2m = de2ss / 60 % 60; + const int de2s = de2ss % 60; + + s << " \"___:___ __ " + << W2 << ra1h << ":" << W2 << ra1m << ":" << W2 << ra1s << " " + << (p1.dec>0 ? '+' : '-') << W2 << de1d << ":" << W2 << de1m << ":" << W2 << de1s << " " + << W2 << ra2h << ":" << W2 << ra2m << ":" << W2 << ra2s << " " + << (p2.dec>0 ? '+' : '-') << W2 << de2d << ":" << W2 << de2m << ":" << W2 << de2s << " " + << line.cons1.toStdString() << " " << line.cons2.toStdString() << "\""; + + if(n+2 == line.points.size() && &line == &boundaries.back()) + s << "\n"; + else + s << ",\n"; + } + } +#undef W + s << " ],\n"; + + return true; +} + +bool ConstellationOldLoader::dumpJSON(std::ostream& s) const +{ + return dumpConstellationsJSON(s) && + dumpBoundariesJSON(s); +} diff --git a/util/skyculture-converter/ConstellationOldLoader.hpp b/util/skyculture-converter/ConstellationOldLoader.hpp new file mode 100644 index 00000000000000..57a2f11a9f8612 --- /dev/null +++ b/util/skyculture-converter/ConstellationOldLoader.hpp @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include +#include + +class ConstellationOldLoader +{ + QString skyCultureName; + struct Constellation + { + QString abbreviation; + QString englishName; + QString nativeName; + QString artTexture; + QSize textureSize; + std::vector points; + struct Point + { + int hip; + int x, y; + } artP1, artP2, artP3; + + bool read(QString const& record); + }; + std::vector constellations; + struct RaDec + { + double ra, dec; + }; + struct BoundaryLine + { + std::vector points; + QString cons1, cons2; + }; + std::vector boundaries; + std::string boundariesType; + + Constellation* findFromAbbreviation(const QString& abbrev); + void loadLinesAndArt(const QString &skyCultureDir, const QString& outDir); + void loadBoundaries(const QString& skyCultureDir); + void loadNames(const QString &skyCultureDir); + bool dumpBoundariesJSON(std::ostream& s) const; + bool dumpConstellationsJSON(std::ostream& s) const; +public: + void load(const QString &skyCultureDir, const QString& outDir); + bool dumpJSON(std::ostream& s) const; + bool hasBoundaries() const { return !boundaries.empty(); } + void setBoundariesType(std::string const& type) { boundariesType = type; } +}; diff --git a/util/skyculture-converter/DescriptionOldLoader.cpp b/util/skyculture-converter/DescriptionOldLoader.cpp new file mode 100644 index 00000000000000..b24d6766537b33 --- /dev/null +++ b/util/skyculture-converter/DescriptionOldLoader.cpp @@ -0,0 +1,1075 @@ +#include "DescriptionOldLoader.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ + +#if (QT_VERSION>=QT_VERSION_CHECK(5, 14, 0)) +constexpr auto SkipEmptyParts = Qt::SkipEmptyParts; +#else +constexpr auto SkipEmptyParts = QString::SkipEmptyParts; +#endif + +const QRegularExpression htmlSimpleImageRegex(R"reg(/]*(?:\s+alt="([^"]+)")?\s+src="([^"]+)"(?:\s+alt="([^"]+)")?\s*/?>)reg"); +const QRegularExpression htmlGeneralImageRegex(R"reg(/]*\s+src="([^"]+)"[^>/]*/?>)reg"); + +void htmlListsToMarkdown(QString& string, const bool convertOrderedLists) +{ + // This will only handle lists whose entries don't contain HTML tags, and the + // lists don't contain anything except
  • entries (in particular, no comments). + + static const QRegularExpression entryPattern(R"reg(]*>\s*([^<]+)\s*)reg"); + static const QRegularExpression ulistPattern(R"reg(\s*(?:]*>[^<]+\s*)+)reg"); + static const QRegularExpression outerUListTagPattern(R"reg()reg"); + for(auto matches = ulistPattern.globalMatch(string); matches.hasNext(); ) + { + const auto& match = matches.next(); + auto list = match.captured(0); + list.replace(outerUListTagPattern, "\n"); + list.replace(entryPattern, "\n - \\1\n"); + string.replace(match.captured(0), list); + } + + if(convertOrderedLists) + { + static const QRegularExpression olistPattern(R"reg(\s*(?:]*>[^<]+\s*)+)reg"); + static const QRegularExpression outerOListTagPattern(R"reg()reg"); + for(auto matches = olistPattern.globalMatch(string); matches.hasNext(); ) + { + const auto& match = matches.next(); + auto list = match.captured(0); + list.replace(outerOListTagPattern, "\n"); + list.replace(entryPattern, "\n 1. \\1\n"); + string.replace(match.captured(0), list); + } + } +} + +void htmlBlockQuoteToMarkdown(QString& string) +{ + static const QRegularExpression blockquotePattern(R"reg(\s*[^<]*)reg"); + static const QRegularExpression outerBQTagPattern(R"reg(\s*\s*)reg"); + static const QRegularExpression emptyFinalLines(R"reg((?:\n> *)+\n*$)reg"); + for(auto matches = blockquotePattern.globalMatch(string); matches.hasNext(); ) + { + const auto& match = matches.next(); + auto blockquote = match.captured(0).trimmed(); + blockquote.replace(outerBQTagPattern, "\n"); + blockquote.replace("\n", "\n> "); + blockquote.replace(emptyFinalLines, "\n"); + blockquote = "\n" + blockquote + "\n"; + string.replace(match.captured(0), blockquote); + } +} + +void formatDescriptionLists(QString& string) +{ + // We don't convert DLs into Markdown (since there's no such + // concept there), but we want them to be editor-friendly. + string.replace(QRegularExpression(" *()\\s*()"), "\\1\n \\2"); + string.replace(QRegularExpression("( *)"), " \\1"); +// string.replace(QRegularExpression("()"), "\n\t\\1"); + string.replace(QRegularExpression("()"), "\\1\n"); + string.replace(QRegularExpression("()\n "), "\\1\n"); +} + +void htmlTablesToMarkdown(QString& string) +{ + // Using a single regex to find all tables without merging them into + // one capture appears to be too hard. Let's go in a lower-level way, + // by finding all the beginnings and ends manually. + const QRegularExpression tableBorderPattern(R"reg((]*>)|())reg"); + bool foundStart = false; + int startPos = -1, tagStartPos = -1; + QStringList tables; + std::vector> tablesPositions; + bool isLayoutTable = false; + for(auto matches = tableBorderPattern.globalMatch(string); matches.hasNext(); ) + { + const auto& match = matches.next(); + const auto& startCap = match.captured(1); + const auto& endCap = match.captured(2); + if(!startCap.isEmpty() && endCap.isEmpty() && !foundStart) + { + foundStart = true; + tagStartPos = match.capturedStart(1); + startPos = match.capturedEnd(1); + isLayoutTable = startCap.contains("class=\"layout\""); + } + else if(startCap.isEmpty() && !endCap.isEmpty() && foundStart) + { + foundStart = false; + Q_ASSERT(startPos >= 0); + Q_ASSERT(tagStartPos >= 0); + const auto endPos = match.capturedStart(2); + const auto tagEndPos = match.capturedEnd(2); + if(!isLayoutTable) + { + tables += string.mid(startPos, endPos - startPos); + tablesPositions.emplace_back(tagStartPos, tagEndPos); + } + } + else + { + qWarning() << "Inconsistency between table start and end tags detected, can't process tables further"; + return; + } + } + + // Now do the actual conversion + for(int n = 0; n < tables.size(); ++n) + { + const auto& table = tables[n]; + if(table.contains(QRegularExpression("\\s(?:col|row)span="))) + { + qWarning() << "Row/column spans are not supported, leaving the table in HTML form"; + continue; + } + if(!table.contains(QRegularExpression("^\\s*"))) + { + qWarning().noquote() << "Unexpected table contents (expected it to start with ), keeping the table in HTML form. Table:\n" << table; + continue; + } + if(!table.contains(QRegularExpression("\\s*$"))) + { + qWarning().noquote() << "Unexpected table contents (expected it to end with ), keeping the table in HTML form. Table:\n" << table; + continue; + } + auto rows = table.split(QRegularExpression("\\s*\\s*"), SkipEmptyParts); + // The closing row tags have been removed by QString::split, now remove the opening tags + static const QRegularExpression trOpenTag("^\\s*\\s*"); + for(auto& row : rows) row.replace(trOpenTag, ""); + + QString markdownTable; + // Now convert the rows + for(const auto& row : rows) + { + if(row.simplified().isEmpty()) continue; + if(!row.contains(QRegularExpression("^\\s*"))) + { + qWarning() << "Unexpected row contents (expected it to start with or ), keeping the table in HTML form. Row:" << row; + goto nextTable; + } + if(!row.contains(QRegularExpression("\\s*$"))) + { + qWarning() << "Unexpected row contents (expected it to end with or ), keeping the table in HTML form. Row:" << row; + goto nextTable; + } + auto cols = row.split(QRegularExpression("\\s*\\s*"), SkipEmptyParts); + // The closing column tags have been removed by QString::split, now remove the opening tags + static const QRegularExpression tdOpenTag("^\\s*\\s*"); + for(auto& col : cols) col.replace(tdOpenTag, ""); + + // Finally, emit the rows + const bool firstRow = markdownTable.isEmpty(); + if(firstRow) markdownTable += "\n"; // make sure the table starts as a new paragraph + markdownTable += "|"; + for(const auto& col : cols) + { + if(col.isEmpty()) + markdownTable += " "; + else + markdownTable += col; + markdownTable += '|'; + } + markdownTable += '\n'; + if(firstRow) + { + markdownTable += '|'; + for(const auto& col : cols) + { + markdownTable += QString(std::max(3, col.size()), QChar('-')); + markdownTable += '|'; + } + markdownTable += "\n"; + } + } + + // Replace the HTML table with the newly-created Markdown one + { + const auto lengthToReplace = tablesPositions[n].second - tablesPositions[n].first; + string.replace(tablesPositions[n].first, lengthToReplace, markdownTable); + // Fixup the positions of the subsequent tables + const int delta = markdownTable.size() - lengthToReplace; + for(auto& positions : tablesPositions) + { + positions.first += delta; + positions.second += delta; + } + } + +nextTable: + continue; + } + + // Format the tables that we've failed to convert with each row + // on its line, and each column entry on an indented line. + string.replace(QRegularExpression("(]*)*>)"), "\n\\1"); + string.replace(QRegularExpression("()"), "\n\\1"); + string.replace(QRegularExpression("(]*)*>)"), "\n\t\\1"); + string.replace(QRegularExpression("()"), "\n\\1"); +} + +QString readReferencesFile(const QString& inDir) +{ + const auto path = inDir + "/reference.fab"; + if (path.isEmpty()) + { + qWarning() << "Reference file wasn't found"; + return ""; + } + QFile file(path); + if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) + { + qWarning() << "WARNING - could not open" << QDir::toNativeSeparators(path); + return ""; + } + QString record; + // Allow empty and comment lines where first char (after optional blanks) is # + static const QRegularExpression commentRx("^(\\s*#.*|\\s*)$"); + QString reference = "## References\n\n"; + int totalRecords=0; + int readOk=0; + int lineNumber=0; + while(!file.atEnd()) + { + record = QString::fromUtf8(file.readLine()).trimmed(); + lineNumber++; + if (commentRx.match(record).hasMatch()) + continue; + + totalRecords++; + static const QRegularExpression refRx("\\|"); + #if (QT_VERSION>=QT_VERSION_CHECK(5, 14, 0)) + QStringList ref = record.split(refRx, Qt::KeepEmptyParts); + #else + QStringList ref = record.split(refRx, QString::KeepEmptyParts); + #endif + // 1 - URID; 2 - Reference; 3 - URL (optional) + if (ref.count()<2) + qWarning() << "Error: cannot parse record at line" << lineNumber << "in references file" << QDir::toNativeSeparators(path); + else if (ref.count()<3) + { + qWarning() << "Warning: record at line" << lineNumber << "in references file" + << QDir::toNativeSeparators(path) << " has wrong format (RefID: " << ref.at(0) << ")! Let's use fallback mode..."; + reference.append(QString(" - [#%1]: %2\n").arg(ref[0], ref[1])); + readOk++; + } + else + { + if (ref.at(2).isEmpty()) + reference.append(QString(" - [#%1]: %2\n").arg(ref[0], ref[1])); + else + reference.append(QString(" - [#%1]: [%2](%3)\n").arg(ref[0], ref[1], ref[2])); + readOk++; + } + } + if(readOk != totalRecords) + qDebug() << "Loaded" << readOk << "/" << totalRecords << "references"; + + return reference; +} + +void cleanupWhitespace(QString& markdown) +{ + // Clean too long chains of newlines + markdown.replace(QRegularExpression("\n[ \t]*\n[ \t]*\n+"), "\n\n"); + // Same for such chains inside blockquotes + markdown.replace(QRegularExpression("\n>[ \t]*(?:\n>[ \t]*)+\n"), "\n>\n"); + + // Remove trailing spaces + markdown.replace(QRegularExpression("[ \t]+\n"), "\n"); + + // Make unordered lists a bit denser + const QRegularExpression ulistSpaceListPattern("(\n -[^\n]+)\n+(\n \\-)"); + // 1. Remove space between odd and even entries + markdown.replace(ulistSpaceListPattern, "\\1\\2"); + // 2. Remove space between even and odd entries (same replacement rule) + markdown.replace(ulistSpaceListPattern, "\\1\\2"); + + // Make ordered lists a bit denser + const QRegularExpression olistSpaceListPattern("(\n 1\\.[^\n]+)\n+(\n 1)"); + // 1. Remove space between odd and even entries + markdown.replace(olistSpaceListPattern, "\\1\\2"); + // 2. Remove space between even and odd entries (same replacement rule) + markdown.replace(olistSpaceListPattern, "\\1\\2"); + + const bool startsWithList = markdown.startsWith(" 1. ") || markdown.startsWith(" - "); + markdown = (startsWithList ? " " : "") + markdown.trimmed() + "\n"; +} + +[[nodiscard]] QString convertHTMLToMarkdown(const QString& html, const bool fullerConversionToMarkdown, + const bool footnotesToRefs, const bool convertOrderedLists) +{ + QString markdown = html; + + if(fullerConversionToMarkdown) + { + markdown.replace(QRegularExpression(R"reg(<\s*html\s+dir="[^"]+"\s*>|)reg"), ""); + markdown.replace(QRegularExpression("[\n\t ]+"), " "); + } + else + { + // Twice to handle even/odd cases + markdown.replace(QRegularExpression("\n[ \t]*\n"), "\n"); + markdown.replace(QRegularExpression("\n[ \t]*\n"), "\n"); + } + + // Replace and tags with placeholders that don't + // look like tags, so as not to confuse the replacements below. + const QString notrOpenPlaceholder = "{22c35d6a-5ec3-4405-aeff-e79998dc95f7}"; + const QString notrClosePlaceholder = "{2543be41-c785-4283-a4cf-ce5471d2c422}"; + markdown.replace(QRegularExpression(""), notrOpenPlaceholder); + markdown.replace(QRegularExpression(""), notrClosePlaceholder); + + // Same for and . + const QString supOpenPlaceholder = "{4edbb3ef-6a33-472a-8faf-1b006dda557c}"; + const QString supClosePlaceholder = "{e4e12021-9fdf-48de-80ef-e65f0c42738f}"; + const auto supOpenPlaceholderPattern = "\\" + supOpenPlaceholder; + const auto supClosePlaceholderPattern = "\\" + supClosePlaceholder; + markdown.replace(QRegularExpression(""), supOpenPlaceholder); + markdown.replace(QRegularExpression(""), supClosePlaceholder); + + if(fullerConversionToMarkdown) + { + // Replace HTML line breaks with the Markdown ones + markdown.replace(QRegularExpression(""), "\n\n"); + + const auto replaceEmpasis = [&markdown] { + // Replace simple HTML emphases with the Markdown ones + markdown.replace(QRegularExpression("(\\s*)([^<\\s]{1,2}|[^<\\s][^<]+[^<\\s])(\\s*)"), "\\1*\\2*\\3"); + markdown.replace(QRegularExpression("(\\s*)([^<\\s]{1,2}|[^<\\s][^<]+[^<\\s])(\\s*)"), "\\1*\\2*\\3"); + markdown.replace(QRegularExpression("(\\s*)([^<\\s]{1,2}|[^<\\s][^<]+[^<\\s])(\\s*)"), "\\1**\\2**\\3"); + markdown.replace(QRegularExpression("(\\s*)([^<\\s]{1,2}|[^<\\s][^<]+[^<\\s])(\\s*)"), "\\1**\\2**\\3"); + }; + replaceEmpasis(); + + // Replace simple HTML images with the Markdown ones + markdown.replace(htmlSimpleImageRegex, R"rep(![\1\3](\2))rep"); + + if(footnotesToRefs) + { + // Hyperlinks to footnotes + markdown.replace(QRegularExpression(supOpenPlaceholderPattern+ + R"regex(\s*\s*\[[^\]]+\]([,\s]*)\s*)regex"+ + supClosePlaceholderPattern, + QRegularExpression::DotMatchesEverythingOption), "[#\\1]\\2"); + } + + // Replace simple HTML hyperlinks with the Markdown ones + // older version (do we want it?): markdown.replace(QRegularExpression("([^>])]*)?>([^<]+)([^<])"), "\\1[\\3](\\2)\\4"); + markdown.replace(QRegularExpression("]*)?>([^<]+)"), "[\\2](\\1)"); + + if(footnotesToRefs) + { + // First footnote (to prepend
      before it) + markdown.replace(QRegularExpression( R"regex((?:\[[^\]]+\] *)?([^<]*))regex", + QRegularExpression::DotMatchesEverythingOption), "
        \n
      • [#\\1]: \\2
      • \n"); + // Last footnote (to append
      after it) + markdown.replace(QRegularExpression(R"regex((?:\[[^\]]+\] *)?([^<]*)\s*($|[#\\1]: \\2\n
    \n\\3"); + // Middle footnotes + markdown.replace(QRegularExpression(R"regex((?:\[[^\]]+\] *)?([^<]*))regex", + QRegularExpression::DotMatchesEverythingOption), "
  • [#\\1]: \\2
  • \n"); + } + + // Retry italics etc. This might now work after the above conversions if it hasn't worked before. + replaceEmpasis(); + + // Replace HTML paragraphs with the Markdown ones + markdown.replace(QRegularExpression("]*)*>([^<]+)

    "), "\n\\1\n"); + } + + // Replace simple HTML headings with corresponding Markdown ones + for(int n = 1; n <= 6; ++n) + markdown.replace(QRegularExpression(QString("]*)*>([^<]+) *").arg(n)), "\n" + QString(n, QChar('#'))+" \\1\n"); + + if(fullerConversionToMarkdown) + { + htmlTablesToMarkdown(markdown); + + formatDescriptionLists(markdown); + } + + htmlListsToMarkdown(markdown, convertOrderedLists); + htmlBlockQuoteToMarkdown(markdown); + + if(fullerConversionToMarkdown) + { + // Retry paragraphs. This might now work after the above conversions if it hasn't worked before. + markdown.replace(QRegularExpression("]*)*>([^<]+)

    "), "\n\\1\n"); + } + else + { + markdown.replace(QRegularExpression("([^<]+)

    "), "\n\\1\n"); + } + + cleanupWhitespace(markdown); + + // Restore the reserved tags + markdown.replace(notrOpenPlaceholder, ""); + markdown.replace(notrClosePlaceholder, ""); + markdown.replace(supOpenPlaceholder, ""); + markdown.replace(supClosePlaceholder, ""); + + return markdown; +} + +void addMissingTextToMarkdown(QString& markdown, const QString& inDir, const QString& author, const QString& credit, const QString& license) +{ + // Add missing "Introduction" heading if we have a headingless intro text + if(!markdown.contains(QRegularExpression("^\\s*# [^\n]+\n+\\s*##\\s*Introduction\n"))) + markdown.replace(QRegularExpression("^(\\s*# [^\n]+\n+)(\\s*[^#])"), "\\1## Introduction\n\n\\2"); + if(!markdown.contains("\n## Description\n")) + markdown.replace(QRegularExpression("(\n## Introduction\n[^#]+\n)(\\s*#)"), "\\1## Description\n\n\\2"); + + // Add some sections the info for which is contained in info.ini in the old format + if(markdown.contains(QRegularExpression("\n##\\s+(?:References|External\\s+links)\\s*\n"))) + markdown.replace(QRegularExpression("(\n##[ \t]+)External[ \t]+links([ \t]*\n)"), "\\1References\\2"); + auto referencesFromFile = readReferencesFile(inDir); + + if(markdown.contains(QRegularExpression("\n##\\s+Authors?\\s*\n"))) + { + qWarning() << "Authors section already exists, not adding the authors from info.ini"; + + // But do add references before this section + if(!referencesFromFile.isEmpty()) + markdown.replace(QRegularExpression("(\n##\\s+Authors?\\s*\n)"), "\n"+referencesFromFile + "\n\\1"); + } + else + { + // First add references + if(!referencesFromFile.isEmpty()) + markdown += referencesFromFile + "\n"; + + if(credit.isEmpty()) + markdown += QString("\n## Authors\n\n%1\n").arg(author); + else + markdown += "\n## Authors\n\nAuthor is " + author + ". Additional credit goes to " + credit + "\n"; + } + + if(markdown.contains(QRegularExpression("\n##\\s+License\\s*\n"))) + qWarning() << "License section already exists, not adding the license from info.ini"; + else + markdown += "\n## License\n\n" + license + "\n"; + + cleanupWhitespace(markdown); +} + +struct Section +{ + int level = -1; + int levelAddition = 0; + int headerLineStartPos = -1; + int headerStartPos = -1; // including #..# + int bodyStartPos = -1; + QString title; + QString body; + std::deque subsections; +}; + +std::vector
    splitToSections(const QString& markdown) +{ + const QRegularExpression sectionHeaderPattern("^[ \t]*((#+)\\s+(.*[^\\s])\\s*)$", QRegularExpression::MultilineOption); + std::vector
    sections; + for(auto matches = sectionHeaderPattern.globalMatch(markdown); matches.hasNext(); ) + { + sections.push_back({}); + auto& section = sections.back(); + const auto& match = matches.next(); + section.headerLineStartPos = match.capturedStart(0); + section.headerStartPos = match.capturedStart(1); + section.level = match.captured(2).length(); + section.title = match.captured(3); + section.bodyStartPos = match.capturedEnd(0) + 1/*\n*/; + + if(section.title.trimmed() == "Author") + section.title = "Authors"; + } + + for(unsigned n = 0; n < sections.size(); ++n) + { + if(n+1 < sections.size()) + sections[n].body = markdown.mid(sections[n].bodyStartPos, + std::max(0, sections[n+1].headerLineStartPos - sections[n].bodyStartPos)) + .replace(QRegularExpression("^\n*|\\s*$"), ""); + else + sections[n].body = markdown.mid(sections[n].bodyStartPos).replace(QRegularExpression("^\n*|\\s*$"), ""); + } + + return sections; +} + +bool isStandardTitle(const QString& title) +{ + return title == "Introduction" || + title == "Description" || + title == "Constellations" || + title == "References" || + title == "Authors" || + title == "License"; +} + +void gettextpo_xerror(int severity, po_message_t message, const char *filename, size_t lineno, size_t column, int multiline_p, const char *message_text) +{ + (void)message; + qWarning().nospace() << "libgettextpo: " << filename << ":" << lineno << ":" << column << ": " << (multiline_p ? "\n" : "") << message_text; + if(severity == PO_SEVERITY_FATAL_ERROR) + std::abort(); +} + +void gettextpo_xerror2(int severity, + po_message_t message1, const char *filename1, size_t lineno1, size_t column1, int multiline_p1, const char *message_text1, + po_message_t message2, const char *filename2, size_t lineno2, size_t column2, int multiline_p2, const char *message_text2) +{ + (void)message1; + (void)message2; + qWarning().nospace() << "libgettextpo: error with two messages:"; + qWarning().nospace() << "libgettextpo: message 1 error: " << filename1 << ":" << lineno1 << ":" << column1 << ": " << (multiline_p1 ? "\n" : "") << message_text1; + qWarning().nospace() << "libgettextpo: message 2 error: " << filename2 << ":" << lineno2 << ":" << column2 << ": " << (multiline_p2 ? "\n" : "") << message_text2; + if(severity == PO_SEVERITY_FATAL_ERROR) + std::abort(); +} +} + +QString DescriptionOldLoader::translateSection(const QString& markdown, const qsizetype bodyStartPos, + const qsizetype bodyEndPos, const QString& locale, const QString& sectionName) const +{ + auto text = markdown.mid(bodyStartPos, bodyEndPos - bodyStartPos); + text.replace(QRegularExpression("^\n*|\n*$"), ""); + for(const auto& entry : translations[locale]) + { + if(entry.english == text) + { + text = entry.translated; + break; + } + if(entry.comment == QString("Sky culture %1 section in markdown format").arg(sectionName.trimmed().toLower())) + qWarning() << " *** BAD TRANSLATION ENTRY for section" << sectionName; + } + return text; +} + +QString DescriptionOldLoader::translateDescription(const QString& markdown, const QString& locale) const +{ + const QRegularExpression headerPat("^# +(.+)$", QRegularExpression::MultilineOption); + const auto match = headerPat.match(markdown); + QString name; + if (match.isValid()) + { + name = match.captured(1); + } + else + { + qCritical().nospace() << "Failed to get sky culture name: got " << match.lastCapturedIndex() << " matches instead of 1"; + name = "Unknown"; + } + + QString text = "# " + name + "\n\n"; + const QRegularExpression sectionNamePat("^## +(.+)$", QRegularExpression::MultilineOption); + QString prevSectionName; + qsizetype prevBodyStartPos = -1; + for (auto it = sectionNamePat.globalMatch(markdown); it.hasNext(); ) + { + const auto match = it.next(); + const auto sectionName = match.captured(1); + const auto nameStartPos = match.capturedStart(0); + const auto bodyStartPos = match.capturedEnd(0); + if (!prevSectionName.isEmpty()) + { + const auto sectionText = translateSection(markdown, prevBodyStartPos, nameStartPos, locale, prevSectionName); + text += "## " + prevSectionName + "\n\n"; + if (!sectionText.isEmpty()) + text += sectionText + "\n\n"; + } + prevBodyStartPos = bodyStartPos; + prevSectionName = sectionName; + } + if (prevBodyStartPos >= 0) + { + const auto sectionText = translateSection(markdown, prevBodyStartPos, markdown.size(), locale, prevSectionName); + if (!sectionText.isEmpty()) + { + text += "## " + prevSectionName + "\n\n"; + text += sectionText; + } + } + + return text; +} +void DescriptionOldLoader::loadTranslationsOfNames(const QString& poBaseDir, const QString& cultureIdQS, const QString& englishName) +{ + po_xerror_handler handler = {gettextpo_xerror, gettextpo_xerror2}; + const auto cultureId = cultureIdQS.toStdString(); + + const auto poDir = poBaseDir+"/stellarium-skycultures"; + for(const auto& fileName : QDir(poDir).entryList({"*.po"})) + { + const QString locale = fileName.chopped(3); + const auto file = po_file_read((poDir+"/"+fileName).toStdString().c_str(), &handler); + if(!file) continue; + + const auto header = po_file_domain_header(file, nullptr); + if(header) poHeaders[locale] = header; + + qDebug().nospace() << "Processing translations of names for locale " << locale << "..."; + auto& dict = translations[locale]; + + // First try to find translation for the name of the sky culture + bool scNameTranslated = false; + if(const auto scNameFile = po_file_read((poBaseDir+"/stellarium/"+fileName).toStdString().c_str(), &handler)) + { + const auto domains = po_file_domains(scNameFile); + for(auto domainp = domains; *domainp && !scNameTranslated; domainp++) + { + const auto domain = *domainp; + po_message_iterator_t iterator = po_message_iterator(scNameFile, domain); + + for(auto message = po_next_message(iterator); message != nullptr; message = po_next_message(iterator)) + { + const auto msgid = po_message_msgid(message); + const auto msgstr = po_message_msgstr(message); + const auto ctxt = po_message_msgctxt(message); + if(ctxt && ctxt == std::string_view("sky culture") && msgid == englishName) + { + dict.insert(dict.begin(), {"Sky culture name", "", msgid, msgstr}); + scNameTranslated = true; + break; + } + } + po_message_iterator_free(iterator); + } + po_file_free(scNameFile); + } + + if(!scNameTranslated) + qWarning() << "Couldn't find a translation for the name of the sky culture"; + + const auto domains = po_file_domains(file); + for(auto domainp = domains; *domainp; domainp++) + { + const auto domain = *domainp; + po_message_iterator_t iterator = po_message_iterator(file, domain); + + for(auto message = po_next_message(iterator); message != nullptr; message = po_next_message(iterator)) + { + const auto msgid = po_message_msgid(message); + const auto msgstr = po_message_msgstr(message); + const auto comments = po_message_comments(message); + const auto xcomments = po_message_extracted_comments(message); + for(int n = 0; ; ++n) + { + const auto filepos = po_message_filepos(message, n); + if(!filepos) break; + const auto refFileName = po_filepos_file(filepos); + for(const auto ref : { + "skycultures/"+cultureId+"/star_names.fab", + "skycultures/"+cultureId+"/dso_names.fab", + "skycultures/"+cultureId+"/planet_names.fab", + "skycultures/"+cultureId+"/asterism_names.fab", + "skycultures/"+cultureId+"/constellation_names.eng.fab", + }) + { + if(refFileName == ref) + dict.push_back({comments, xcomments, msgid, msgstr}); + } + } + } + po_message_iterator_free(iterator); + } + po_file_free(file); + } +} + +void DescriptionOldLoader::locateAllInlineImages(const QString& html) +{ + for(auto matches = htmlGeneralImageRegex.globalMatch(html); matches.hasNext(); ) + { + const auto& match = matches.next(); + imageHRefs.emplace_back(match.captured(1)); + } +} + +void DescriptionOldLoader::load(const QString& inDir, const QString& poBaseDir, const QString& cultureId, const QString& englishName, + const QString& author, const QString& credit, const QString& license, + const bool fullerConversionToMarkdown, const bool footnotesToRefs, const bool convertOrderedLists, + const bool genTranslatedMD) +{ + inputDir = inDir; + const auto englishDescrPath = inDir+"/description.en.utf8"; + QFile englishDescrFile(englishDescrPath); + if(!englishDescrFile.open(QFile::ReadOnly)) + { + qCritical().noquote() << "Failed to open file" << englishDescrPath; + return; + } + const auto html = englishDescrFile.readAll(); + locateAllInlineImages(html); + qDebug() << "Processing English description..."; + markdown = convertHTMLToMarkdown(html, fullerConversionToMarkdown, footnotesToRefs, convertOrderedLists); + + auto englishSections = splitToSections(markdown); + const int level1sectionCount = std::count_if(englishSections.begin(), englishSections.end(), + [](auto& s){return s.level==1;}); + if(level1sectionCount != 1) + { + qCritical().nospace() << "Unexpected number of level-1 sections in file " << englishDescrPath + << " (expected 1, found " << level1sectionCount + << "), will not convert the description"; + return; + } + + // Mark all sections with level>2 to be subsections of the nearest preceding level<=2 sections + std::deque subsections; + for(int n = signed(englishSections.size()) - 1; n >= 0; --n) + { + const bool hasStandardTitle = isStandardTitle(englishSections[n].title); + if(hasStandardTitle && englishSections[n].level != 2) + { + qWarning() << "Warning: found a section titled" << englishSections[n].title + << "but having level" << englishSections[n].level << " instead of 2"; + } + + if(englishSections[n].level > 2 || (englishSections[n].level == 2 && !hasStandardTitle)) + { + subsections.push_front(n); + } + else + { + englishSections[n].subsections = std::move(subsections); + subsections.clear(); + } + } + + // Increase the level of all level-2 sections and their subsections unless they have one of the standard titles + for(auto& section : englishSections) + { + if(section.level != 2 || isStandardTitle(section.title)) continue; + if(section.level == 2) + { + for(const int n : section.subsections) + englishSections[n].levelAddition = 1; + } + section.levelAddition = 1; + } + + if(englishSections.empty()) + { + qCritical() << "No sections found in" << englishDescrPath; + return; + } + + if(englishSections[0].level != 1) + { + qCritical() << "Unexpected section structure: first section must have level 1, but instead has" << englishSections[0].level; + return; + } + + if(englishSections[0].title.trimmed().toLower() != englishName.toLower()) + { + qWarning().nospace() << "English description caption is not the same as the name of the sky cuture: " + << englishSections[0].title << " vs " << englishName << ". Will change the caption to match the name."; + englishSections[0].title = englishName; + } + + const QRegularExpression localePattern("description\\.([^.]+)\\.utf8"); + + // This will contain the final form of the English sections for use as a key + // in translations as well as to reconstruct the main description.md + std::vector> finalEnglishSections; + bool finalEnglishSectionsDone = false; + + bool descrSectionExists = false; + for(const auto& section : englishSections) + { + if(section.level + section.levelAddition != 2) + continue; + if(section.title.trimmed().toLower() == "description") + descrSectionExists = true; + } + + std::vector locales; + for(const auto& fileName : QDir(inDir).entryList({"description.*.utf8"})) + { + if(fileName == "description.en.utf8") continue; + + const auto localeMatch = localePattern.match(fileName); + if(!localeMatch.isValid()) + { + qCritical() << "Failed to extract locale from file name" << fileName; + continue; + } + const auto locale = localeMatch.captured(1); + locales.push_back(locale); + const auto path = inDir + "/" + fileName; + QFile file(path); + if(!file.open(QFile::ReadOnly)) + { + qCritical().noquote() << "Failed to open file" << path << "\n"; + continue; + } + qDebug().nospace() << "Processing description for locale " << locale << "..."; + auto trMD0 = convertHTMLToMarkdown(file.readAll(), fullerConversionToMarkdown, footnotesToRefs, convertOrderedLists); + const auto translationMD = trMD0.replace(QRegularExpression("([^<]+)"), "\\1"); + const auto translatedSections = splitToSections(translationMD); + if(translatedSections.size() != englishSections.size()) + { + qCritical().nospace().noquote() << "Number of sections (" << translatedSections.size() + << ") in description for locale " << locale + << " doesn't match that of the English description (" + << englishSections.size() << "). Skipping this translation."; + auto dbg = qDebug().nospace().noquote(); + dbg << " ** English section titles:\n"; + for(const auto& sec : englishSections) + dbg << sec.level << ": " << sec.title << "\n"; + dbg << " ** Translated section titles:\n"; + for(const auto& sec : translatedSections) + dbg << sec.level << ": " << sec.title << "\n"; + dbg << "\n____________________________________________\n"; + continue; + } + + bool sectionsOK = true; + for(unsigned n = 0; n < englishSections.size(); ++n) + { + if(translatedSections[n].level != englishSections[n].level) + { + qCritical() << "Section structure of English text and translation for" + << locale << "doesn't match, skipping this translation"; + auto dbg = qDebug().nospace(); + dbg << " ** English section titles:\n"; + for(const auto& sec : englishSections) + dbg << sec.level << ": " << sec.title << "\n"; + dbg << " ** Translated section titles:\n"; + for(const auto& sec : translatedSections) + dbg << sec.level << ": " << sec.title << "\n"; + dbg << "\n____________________________________________\n"; + sectionsOK = false; + break; + } + } + if(!sectionsOK) continue; + + TranslationDict dict; + for(unsigned n = 0; n < englishSections.size(); ++n) + { + const auto& engSec = englishSections[n]; + if(engSec.level + engSec.levelAddition > 2) continue; + + QString key = engSec.body; + QString value = translatedSections[n].body; + auto titleForComment = engSec.title.contains(' ') ? '"' + engSec.title.toLower() + '"' : engSec.title.toLower(); + auto sectionTitle = engSec.title; + bool insertDescriptionHeading = false; + if(engSec.level == 1 && !key.isEmpty()) + { + if(!finalEnglishSectionsDone) + finalEnglishSections.emplace_back("Introduction", key); + auto comment = QString("Sky culture introduction section in markdown format"); + dict.push_back({std::move(comment), "", std::move(key), std::move(value)}); + key = ""; + value = ""; + if(descrSectionExists) continue; + + titleForComment = "description"; + sectionTitle = "Description"; + insertDescriptionHeading = true; + } + + for(const auto subN : engSec.subsections) + { + const auto& keySubSection = englishSections[subN]; + key += "\n\n"; + key += QString(keySubSection.level + keySubSection.levelAddition, QChar('#')); + key += ' '; + key += keySubSection.title; + key += "\n\n"; + key += keySubSection.body; + key += "\n\n"; + cleanupWhitespace(key); + key.replace(QRegularExpression("^\n*|\\s*$"), ""); + + const auto& valueSubSection = translatedSections[subN]; + value += "\n\n"; + value += QString(keySubSection.level + keySubSection.levelAddition, QChar('#')); + value += ' '; + value += valueSubSection.title; + value += "\n\n"; + value += valueSubSection.body; + value += "\n\n"; + cleanupWhitespace(value); + value.replace(QRegularExpression("^\n*|\\s*$"), ""); + } + if(!finalEnglishSectionsDone) + { + if((!sectionTitle.isEmpty() && engSec.level + engSec.levelAddition == 2) || + insertDescriptionHeading) + finalEnglishSections.emplace_back(sectionTitle, key); + } + if(!key.isEmpty()) + { + auto comment = QString("Sky culture %1 section in markdown format").arg(titleForComment); + dict.push_back({std::move(comment), "", std::move(key), std::move(value)}); + } + } + if(!finalEnglishSections.empty()) + finalEnglishSectionsDone = true; + translations[locale] = std::move(dict); + } + + // Reconstruct markdown from the altered sections + if(finalEnglishSections.empty()) + { + // A case where no translation exists + markdown.clear(); + for(const auto& section : englishSections) + { + markdown += QString(section.level + section.levelAddition, QChar('#')); + markdown += ' '; + markdown += section.title.trimmed(); + markdown += "\n\n"; + markdown += section.body; + markdown += "\n\n"; + } + } + else + { + markdown = "# " + englishSections[0].title + "\n\n"; + for(const auto& section : finalEnglishSections) + { + markdown += "## "; + markdown += section.first; + markdown += "\n\n"; + markdown += section.second; + markdown += "\n\n"; + } + } + + addMissingTextToMarkdown(markdown, inDir, author, credit, license); + if(genTranslatedMD) + { + for(const auto& locale : locales) + translatedMDs[locale] = translateDescription(markdown, locale); + } + + loadTranslationsOfNames(poBaseDir, cultureId, englishName); +} + +bool DescriptionOldLoader::dumpMarkdown(const QString& outDir) const +{ + const auto path = outDir+"/description.md"; + QFile file(path); + if(!file.open(QFile::WriteOnly)) + { + qCritical().noquote() << "Failed to open file" << path << "\n"; + return false; + } + + if(markdown.isEmpty()) return true; + + if(file.write(markdown.toUtf8()) < 0 || !file.flush()) + { + qCritical().noquote() << "Failed to write " << path << ": " << file.errorString() << "\n"; + return false; + } + + for(const auto& img : imageHRefs) + { + const auto imgInPath = inputDir+"/"+img; + if(!QFileInfo(imgInPath).exists()) + { + qCritical() << "Failed to locate an image referenced in the description:" << img; + continue; + } + const auto imgOutPath = outDir + "/" + img; + const auto imgDir = QFileInfo(imgOutPath).absoluteDir().absolutePath(); + if(!QDir().mkpath(imgDir)) + { + qCritical() << "Failed to create output directory for image file" << img; + continue; + } + + if(!QFile(imgInPath).copy(imgOutPath)) + { + qCritical() << "Failed to copy an image file referenced in the description:" << img; + continue; + } + } + + if(!translatedMDs.isEmpty()) + { + for(const auto& key : translatedMDs.keys()) + { + const auto path = outDir+"/description."+key+".DO_NOT_COMMIT.md"; + QFile file(path); + if(!file.open(QFile::WriteOnly)) + { + qCritical().noquote() << "Failed to open file" << path << "\n"; + return false; + } + if(file.write(translatedMDs[key].toUtf8()) < 0 || !file.flush()) + { + qCritical().noquote() << "Failed to write " << path << ": " << file.errorString() << "\n"; + return false; + } + } + } + + return true; +} + +bool DescriptionOldLoader::dump(const QString& outDir) const +{ + if(!dumpMarkdown(outDir)) return false; + + const auto poDir = outDir + "/po"; + if(!QDir().mkpath(poDir)) + { + qCritical() << "Failed to create po directory\n"; + return false; + } + + for(auto dictIt = translations.begin(); dictIt != translations.end(); ++dictIt) + { + const auto& locale = dictIt.key(); + const auto path = poDir + "/" + locale + ".po"; + + const auto file = po_file_create(); + po_message_iterator_t iterator = po_message_iterator(file, nullptr); + + // I've found no API to *create* a header, so will try to emulate it with a message + const auto header = poHeaders[locale]; + const auto headerMsg = po_message_create(); + po_message_set_msgid(headerMsg, ""); + po_message_set_msgstr(headerMsg, header.toStdString().c_str()); + po_message_insert(iterator, headerMsg); + + std::set emittedEntries; + for(const auto& entry : dictIt.value()) + { + if(emittedEntries.find(entry) != emittedEntries.end()) continue; + const auto msg = po_message_create(); + if(!entry.comment.isEmpty()) + po_message_set_comments(msg, entry.comment.toStdString().c_str()); + if(!entry.extractedComment.isEmpty()) + po_message_set_extracted_comments(msg, entry.extractedComment.toStdString().c_str()); + po_message_set_msgid(msg, entry.english.toStdString().c_str()); + po_message_set_msgstr(msg, entry.translated.toStdString().c_str()); + po_message_insert(iterator, msg); + emittedEntries.insert(entry); + } + po_message_iterator_free(iterator); + po_xerror_handler handler = {gettextpo_xerror, gettextpo_xerror2}; + po_file_write(file, path.toStdString().c_str(), &handler); + po_file_free(file); + } + return true; +} diff --git a/util/skyculture-converter/DescriptionOldLoader.hpp b/util/skyculture-converter/DescriptionOldLoader.hpp new file mode 100644 index 00000000000000..98e6a013e9f54b --- /dev/null +++ b/util/skyculture-converter/DescriptionOldLoader.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include +#include + +class DescriptionOldLoader +{ + QString markdown; + QHash translatedMDs; + QString inputDir; + std::vector imageHRefs; + struct DictEntry + { + QString comment; + QString extractedComment; + QString english; + QString translated; + bool operator<(const DictEntry& other) const + { + return std::tie(comment,english,translated) < std::tie(other.comment,other.english,other.translated); + } + }; + using TranslationDict = std::vector; + QHash translations; + QHash poHeaders; + bool dumpMarkdown(const QString& outDir) const; + void locateAllInlineImages(const QString& html); + void loadTranslationsOfNames(const QString& poBaseDir, const QString& cultureId, const QString& englishName); + QString translateSection(const QString& markdown, const qsizetype bodyStartPos, const qsizetype bodyEndPos, const QString& locale, const QString& sectionName) const; + QString translateDescription(const QString& markdown, const QString& locale) const; +public: + void load(const QString& inDir, const QString& poBaseDir, const QString& cultureId, const QString& englishName, + const QString& author, const QString& credit, const QString& license, + bool fullerConversionToMarkdown, bool footnotesToRefs, bool convertOrderedLists, bool genTranslatedMD); + bool dump(const QString& outDir) const; +}; diff --git a/util/skyculture-converter/NamesOldLoader.cpp b/util/skyculture-converter/NamesOldLoader.cpp new file mode 100644 index 00000000000000..c7f8247e05cef2 --- /dev/null +++ b/util/skyculture-converter/NamesOldLoader.cpp @@ -0,0 +1,255 @@ +#include "NamesOldLoader.hpp" + +#include +#include +#include +#include +#include + +void NamesOldLoader::loadStarNames(const QString& skyCultureDir) +{ + const auto nameFile = skyCultureDir + "/star_names.fab"; + QFile cnFile(nameFile); + if (!cnFile.open(QIODevice::ReadOnly | QIODevice::Text)) + { + qWarning().noquote() << "WARNING - could not open" << QDir::toNativeSeparators(nameFile); + return; + } + + int readOk=0; + int totalRecords=0; + int lineNumber=0; + QString record; + // Allow empty and comment lines where first char (after optional blanks) is # + static const QRegularExpression commentRx("^(\\s*#.*|\\s*)$"); + // record structure is delimited with a | character. We will + // use a QRegularExpression to extract the fields. with white-space padding permitted + // (i.e. it will be stripped automatically) Example record strings: + // " 677|_("Alpheratz")" + // "113368|_("Fomalhaut")" + // Note: Stellarium doesn't support sky cultures made prior to version 0.10.6 now! + static const QRegularExpression recordRx("^\\s*(\\d+)\\s*\\|[_]*[(]\"(.*)\"[)]\\s*([\\,\\d\\s]*)"); + + while(!cnFile.atEnd()) + { + record = QString::fromUtf8(cnFile.readLine()).trimmed(); + lineNumber++; + if (commentRx.match(record).hasMatch()) + continue; + + totalRecords++; + QRegularExpressionMatch recMatch=recordRx.match(record); + if (!recMatch.hasMatch()) + { + qWarning().noquote() << "WARNING - parse error at line" << lineNumber << "in" << QDir::toNativeSeparators(nameFile) + << " - record does not match record pattern"; + qWarning().noquote() << "Problematic record:" << record; + continue; + } + else + { + // The record is the right format. Extract the fields + bool ok; + const int hip = recMatch.captured(1).toInt(&ok); + if (!ok) + { + qWarning().noquote() << "WARNING - parse error at line" << lineNumber << "in" << QDir::toNativeSeparators(nameFile) + << " - failed to convert " << recMatch.captured(1) << "to a number"; + continue; + } + QString englishCommonName = recMatch.captured(2).trimmed(); + if (englishCommonName.isEmpty()) + { + qWarning().noquote() << "WARNING - parse error at line" << lineNumber << "in" << QDir::toNativeSeparators(nameFile) + << " - empty name field"; + continue; + } + + starNames[hip].push_back(englishCommonName); + + readOk++; + } + } + cnFile.close(); + + if(readOk != totalRecords) + qDebug().noquote() << "Loaded" << readOk << "/" << totalRecords << "common star names"; +} + +void NamesOldLoader::loadDSONames(const QString& skyCultureDir) +{ + const auto namesFile = skyCultureDir + "/dso_names.fab"; + if (namesFile.isEmpty()) + { + qWarning() << "Failed to open file" << QDir::toNativeSeparators(namesFile); + return; + } + + // Open file + QFile dsoNamesFile(namesFile); + if (!dsoNamesFile.open(QIODevice::ReadOnly | QIODevice::Text)) + { + qWarning() << "Failed to open file" << QDir::toNativeSeparators(namesFile); + return; + } + + // Now parse the file + // lines to ignore which start with a # or are empty + static const QRegularExpression commentRx("^(\\s*#.*|\\s*)$"); + + // lines which look like records - we use the RE to extract the fields + // which will be available in recMatch.capturedTexts() + static const QRegularExpression recRx("^\\s*([\\w\\s\\-\\+\\.]+)\\s*\\|[_]*[(]\"(.*)\"[)]\\s*([\\,\\d\\s]*)"); + + QString record, dsoId, nativeName; + int totalRecords=0; + int readOk=0; + int lineNumber=0; + while (!dsoNamesFile.atEnd()) + { + record = QString::fromUtf8(dsoNamesFile.readLine()).trimmed(); + lineNumber++; + + // Skip comments + if (commentRx.match(record).hasMatch()) + continue; + + totalRecords++; + + QRegularExpressionMatch recMatch=recRx.match(record); + if (!recMatch.hasMatch()) + { + qWarning().noquote() << "ERROR - cannot parse record at line" << lineNumber << "in native deep-sky object names file" << QDir::toNativeSeparators(namesFile); + } + else + { + dsoId = recMatch.captured(1).trimmed(); + nativeName = recMatch.captured(2).trimmed(); // Use translatable text + dsoNames[dsoId].push_back(nativeName); + + readOk++; + } + } + dsoNamesFile.close(); + if(readOk != totalRecords) + qDebug().noquote() << "Loaded" << readOk << "/" << totalRecords << "common names of deep-sky objects"; +} + +void NamesOldLoader::loadPlanetNames(const QString& skyCultureDir) +{ + const auto namesFile = skyCultureDir + "/planet_names.fab"; + // Open file + QFile planetNamesFile(namesFile); + if (!planetNamesFile.open(QIODevice::ReadOnly | QIODevice::Text)) + { + qWarning() << "Failed to open file" << QDir::toNativeSeparators(namesFile); + return; + } + + // Now parse the file + // lines to ignore which start with a # or are empty + static const QRegularExpression commentRx("^(\\s*#.*|\\s*)$"); + + // lines which look like records - we use the RE to extract the fields + // which will be available in recRx.capturedTexts() + static const QRegularExpression recRx("^\\s*(\\w+)\\s+\"(.+)\"\\s+_[(]\"(.+)\"[)]\\n"); + + // keep track of how many records we processed. + int totalRecords=0; + int readOk=0; + int lineNumber=0; + while (!planetNamesFile.atEnd()) + { + const auto record = QString::fromUtf8(planetNamesFile.readLine()); + lineNumber++; + + // Skip comments + if (commentRx.match(record).hasMatch()) + continue; + + totalRecords++; + + QRegularExpressionMatch match=recRx.match(record); + if (!match.hasMatch()) + { + qWarning() << "ERROR - cannot parse record at line" << lineNumber << "in planet names file" << QDir::toNativeSeparators(namesFile); + } + else + { + const auto planetId = match.captured(1).trimmed(); + const auto nativeName = match.captured(2).trimmed(); + const auto nativeNameMeaning = match.captured(3).trimmed(); + planetNames[planetId].push_back({nativeNameMeaning,nativeName}); + readOk++; + } + } +} + +void NamesOldLoader::load(const QString& skyCultureDir) +{ + loadStarNames(skyCultureDir); + loadDSONames(skyCultureDir); + loadPlanetNames(skyCultureDir); +} + +bool NamesOldLoader::dumpJSON(std::ostream& s) const +{ + if (starNames.isEmpty() && dsoNames.isEmpty() && planetNames.isEmpty()) + return false; + s << " \"common_names\": {\n"; + const auto starKeys = starNames.keys(); + for(int k = 0; k < starKeys.size(); ++k) + { + const auto& key = starKeys[k]; + s << " \"HIP " + std::to_string(key) + "\": ["; + const auto& values = starNames[key]; + for(unsigned v = 0; v < values.size(); ++v) + { + s << ("{\"english\": \"" + values[v] + "\"}").toStdString(); + if(v+1 != values.size()) s << ", "; + } + if(k+1 != starKeys.size() || !dsoNames.isEmpty() || !planetNames.isEmpty()) + s << "],\n"; + else + s << "]\n"; + } + + const auto dsoKeys = dsoNames.keys(); + for(int k = 0; k < dsoKeys.size(); ++k) + { + const auto& key = dsoKeys[k]; + s << (" \"" + key + "\": [").toStdString(); + const auto& values = dsoNames[key]; + for(unsigned v = 0; v < values.size(); ++v) + { + s << ("{\"english\": \"" + values[v] + "\"}").toStdString(); + if(v+1 != values.size()) s << ", "; + } + if(k+1 != dsoKeys.size() || !planetNames.isEmpty()) + s << "],\n"; + else + s << "]\n"; + } + + const auto planetKeys = planetNames.keys(); + for(int k = 0; k < planetKeys.size(); ++k) + { + const auto& key = planetKeys[k]; + s << (" \"NAME " + key + "\": [").toStdString(); + const auto& values = planetNames[key]; + for(unsigned v = 0; v < values.size(); ++v) + { + s << ("{\"english\": \"" + values[v].english + + "\", \"native\": \"" + values[v].native + + "\"}").toStdString(); + if(v+1 != values.size()) s << ", "; + } + if(k+1 != planetKeys.size()) + s << "],\n"; + else + s << "]\n"; + } + + s << " },\n"; + return true; +} diff --git a/util/skyculture-converter/NamesOldLoader.hpp b/util/skyculture-converter/NamesOldLoader.hpp new file mode 100644 index 00000000000000..75ad266b184445 --- /dev/null +++ b/util/skyculture-converter/NamesOldLoader.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include +#include +#include +#include + +class NamesOldLoader +{ + QMap> starNames; + QMap> dsoNames; + struct PlanetName + { + QString english; + QString native; + }; + QMap> planetNames; + + void loadStarNames(const QString& skyCultureDir); + void loadDSONames(const QString& skyCultureDir); + void loadPlanetNames(const QString& skyCultureDir); +public: + void load(const QString& skyCultureDir); + bool dumpJSON(std::ostream& s) const; +}; diff --git a/util/skyculture-converter/main.cpp b/util/skyculture-converter/main.cpp new file mode 100644 index 00000000000000..855a488e8c826a --- /dev/null +++ b/util/skyculture-converter/main.cpp @@ -0,0 +1,192 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "NamesOldLoader.hpp" +#include "AsterismOldLoader.hpp" +#include "DescriptionOldLoader.hpp" +#include "ConstellationOldLoader.hpp" + +QString convertLicense(const QString& license) +{ + auto parts = license.split("+"); + for(auto& p : parts) + p = p.simplified(); + for(auto& lic : parts) + { + if(lic.startsWith("Free Art ")) continue; + + lic.replace(QRegularExpression("(?: International)?(?: Publice?)? License"), ""); + } + + if(parts.size() == 1) return parts[0]; + if(parts.size() == 2) + { + if(parts[1].startsWith("Free Art ") && !parts[0].startsWith("Free Art ")) + return "Text and data: " + parts[0] + "\n\nIllustrations: " + parts[1]; + else if(parts[0].startsWith("Free Art ") && !parts[1].startsWith("Free Art ")) + return "Text and data: " + parts[1] + "\n\nIllustrations: " + parts[0]; + } + std::cerr << "Unexpected combination of licenses, leaving them unformatted.\n"; + return license; +} + +void convertInfoIni(const QString& dir, std::ostream& s, QString& boundariesType, QString& author, QString& credit, QString& license, QString& cultureId, QString& englishName) +{ + QSettings pd(dir + "/info.ini", QSettings::IniFormat); // FIXME: do we really need StelIniFormat here instead? + englishName = pd.value("info/name").toString(); + author = pd.value("info/author").toString(); + credit = pd.value("info/credit").toString(); + license = pd.value("info/license", "").toString(); + const auto region = pd.value("info/region", "???").toString(); + const auto classification = pd.value("info/classification").toString(); + boundariesType = pd.value("info/boundaries", "none").toString(); + + const std::string highlight = "???"; + cultureId = QFileInfo(dir).fileName(); + + // Now emit the JSON snippet + s << "{\n" + " \"id\": \"" + cultureId.toStdString() + "\",\n" + " \"region\": \"" + region.toStdString() + "\",\n" + " \"classification\": [\"" + classification.toStdString() + "\"],\n" + " \"fallback_to_international_names\": false,\n" + " \"thumbnail\": \"???\",\n" + " \"thumbnail_bscale\": 2,\n" + " \"highlight\": \"" + highlight + "\",\n"; +} + +void writeEnding(std::string& s) +{ + if(s.size() > 2 && s.substr(s.size() - 2) == ",\n") + s.resize(s.size() - 2); + s += "\n}\n"; +} + +int usage(const char* argv0, const int ret) +{ + auto& out = ret ? std::cerr : std::cout; + out << "Usage: " << argv0 << " [options...] skyCultureDir outputDir skyCulturePoDir\n" + << "Options:\n" + << " --footnotes-to-references Try to convert footnotes to references\n" + << " --full-markdown Try to convert most of HTML constructs to their Markdown counterparts\n" + << " --convert-olists Try to convert ordered lists to Markdown (only when --full-markdown is on)\n" + << " --translated-md Generate localized Markdown files (for checking translations)\n"; + return ret; +} + +int main(int argc, char** argv) +{ + + QString inDir; + QString outDir; + QString poDir; + bool fullerConversion = false, footnotesToRefs = false, convertOrderedLists = false, genTranslatedMD = false; + for(int n = 1; n < argc; ++n) + { + const std::string arg = argv[n]; + if(arg.size() >= 1 && arg[0] != '-') + { + if(inDir.isEmpty()) inDir = arg.c_str(); + else if(outDir.isEmpty()) outDir = arg.c_str(); + else if(poDir.isEmpty()) poDir = arg.c_str(); + else + { + std::cerr << "Unknown command-line parameter: \"" << arg << "\"\n"; + return usage(argv[0], 1); + } + } + else if(arg == "--full-markdown") + fullerConversion = true; + else if(arg == "--translated-md") + genTranslatedMD = true; + else if(arg == "--footnotes-to-references") + footnotesToRefs = true; + else if(arg == "--convert-olists") + convertOrderedLists = true; + else if(arg == "--help" || arg == "-h") + return usage(argv[0], 0); + } + if(inDir.isEmpty()) + { + std::cerr << "Input sky culture directory not specified.\n"; + return usage(argv[0], 1); + } + if(outDir.isEmpty()) + { + std::cerr << "Output directory not specified.\n"; + return usage(argv[0], 1); + } + if(poDir.isEmpty()) + { + std::cerr << "Translations (po) directory not specified.\n"; + return usage(argv[0], 1); + } + + if(QFile(outDir).exists()) + { + std::cerr << "Output directory already exists, won't touch it.\n"; + return 1; + } + if (!QFile(inDir+"/info.ini").exists()) + { + std::cerr << "Error: info.ini file wasn't found\n"; + return 1; + } + std::stringstream out; + QString boundariesType, author, credit, license, cultureId, englishName; + convertInfoIni(inDir, out, boundariesType, author, credit, license, cultureId, englishName); + + AsterismOldLoader aLoader; + aLoader.load(inDir, cultureId); + + ConstellationOldLoader cLoader; + cLoader.setBoundariesType(boundariesType.toStdString()); + cLoader.load(inDir, outDir); + + NamesOldLoader nLoader; + nLoader.load(inDir); + + std::cerr << "Starting emission of JSON...\n\n"; + aLoader.dumpJSON(out); + cLoader.dumpJSON(out); + nLoader.dumpJSON(out); + + auto str = std::move(out).str(); + writeEnding(str); + + if(!QDir().mkpath(outDir)) + { + std::cerr << "Failed to create output directory\n"; + return 1; + } + { + std::ofstream outFile((outDir+"/index.json").toStdString()); + outFile << str; + outFile.flush(); + if(!outFile) + { + std::cerr << "Failed to write index.json\n"; + return 1; + } + } + + DescriptionOldLoader dLoader; + license = convertLicense(license); + dLoader.load(inDir, poDir, cultureId, englishName, author, credit, license, + fullerConversion, footnotesToRefs, convertOrderedLists, genTranslatedMD); + dLoader.dump(outDir); + + std::cerr << "--- NOTE ---\n"; + std::cerr << "* Some JSON values can't be deduced from the old-format data. They have been" + " marked by \"???\". Please replace them with something sensible.\n"; + std::cerr << "* Also, langs_use_native_names key is omitted since it has no counterpart" + " in the old format. If this sky culture needs it, please add it manually.\n"; + std::cerr << "* The transformation of the description text is very basic, please check that" + " it looks as it should. Pay special attention at References, Authors, and" + " License sections, which may have been formulated in a suboptimal way.\n"; +}