Skip to content

Commit

Permalink
Drop usage of codecvt. Fixes compilation warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
lpugin committed Sep 30, 2024
1 parent 769a0bd commit 2e7290c
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 5 deletions.
5 changes: 5 additions & 0 deletions include/vrv/vrv.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ std::string UTF32to8(const std::u32string &in);
*/
std::u32string UTF8to32(const std::string &in);

/**
* Utility for converting UTF16 (std::u16string) to UTF-8
*/
std::string UTF16to8(const std::u16string &in);

/**
* Format a string using vsnprintf.
* The maximum length is giving by STRING_FORMAT_MAX_LEN
Expand Down
1 change: 0 additions & 1 deletion src/editortoolkit_cmn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

//--------------------------------------------------------------------------------

#include <codecvt>
#include <locale>
#include <set>

Expand Down
21 changes: 18 additions & 3 deletions src/toolkit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
//----------------------------------------------------------------------------

#include <cassert>
#include <codecvt>
#include <locale>
#include <regex>

Expand Down Expand Up @@ -400,12 +399,28 @@ bool Toolkit::LoadUTF16File(const std::string &filename)
u16data.erase(0, 1);
}

std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert;
std::string utf8line = convert.to_bytes(u16data);
// std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert;
std::string utf8line = vrv::UTF16to8(u16data); // convert.to_bytes(u16data);

return this->LoadData(utf8line, false);
}

std::string UTF16toUTF8(const std::u16string &input)
{
std::string output;
// Placeholder for manual conversion logic
// Real conversion logic here should handle actual UTF-16 to UTF-8 conversion
for (char16_t c : input) {
if (c < 0x80) { // Handle basic ASCII conversion
output.push_back(static_cast<char8_t>(c));
}
else {
// Extend this block to handle non-ASCII characters
}
}
return output;
}

bool Toolkit::IsZip(const std::string &filename)
{
std::ifstream fin(filename.c_str(), std::ios::in | std::ios::binary);
Expand Down
112 changes: 111 additions & 1 deletion src/vrv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

#include <cassert>
#include <cmath>
#include <codecvt>
#include <cstdlib>
#include <iostream>
#include <locale>
Expand Down Expand Up @@ -264,14 +263,125 @@ std::string ExtractIDFragment(std::string refID)

std::string UTF32to8(const std::u32string &in)
{
std::string out;
for (auto cp : in) {
if (cp < 0x80) { // One byte
out.push_back(static_cast<char>(cp));
}
else if (cp < 0x800) { // Two bytes
out.push_back(static_cast<char>((cp >> 6) | 0xC0));
out.push_back(static_cast<char>((cp & 0x3F) | 0x80));
}
else if (cp < 0x10000) { // Three bytes
out.push_back(static_cast<char>((cp >> 12) | 0xE0));
out.push_back(static_cast<char>(((cp >> 6) & 0x3F) | 0x80));
out.push_back(static_cast<char>((cp & 0x3F) | 0x80));
}
else { // Four bytes
out.push_back(static_cast<char>((cp >> 18) | 0xF0));
out.push_back(static_cast<char>(((cp >> 12) & 0x3F) | 0x80));
out.push_back(static_cast<char>(((cp >> 6) & 0x3F) | 0x80));
out.push_back(static_cast<char>((cp & 0x3F) | 0x80));
}
}
return out;

// deprecated code
/*
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> strCnv;
return strCnv.to_bytes(in);
*/
}

std::u32string UTF8to32(const std::string &in)
{
std::u32string out;
for (size_t i = 0; i < in.size();) {
uint32_t cp = in[i] & 0xFF;
if (cp <= 0x7F) { // One byte
out.push_back(cp);
++i;
}
else if (cp <= 0xDF) { // Two bytes
cp = ((cp & 0x1F) << 6) | (in[i + 1] & 0x3F);
out.push_back(cp);
i += 2;
}
else if (cp <= 0xEF) { // Three bytes
cp = ((cp & 0x0F) << 12) | ((in[i + 1] & 0x3F) << 6) | (in[i + 2] & 0x3F);
out.push_back(cp);
i += 3;
}
else { // Four bytes
cp = ((cp & 0x07) << 18) | ((in[i + 1] & 0x3F) << 12) | ((in[i + 2] & 0x3F) << 6) | (in[i + 3] & 0x3F);
out.push_back(cp);
i += 4;
}
}
return out;

// deprecated code
/*
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> strCnv;
return strCnv.from_bytes(in);
*/
}

std::string UTF16to8(const std::u16string &in)
{
std::string out;
auto it = in.begin();

while (it != in.end()) {
uint32_t cp; // Code point

// Read the first UTF-16 unit
uint16_t lead = *it++;

// If it's a high surrogate, read the next unit to form a full code point
if (lead >= 0xD800 && lead <= 0xDBFF) {
// Make sure there's a trailing surrogate
if (it != in.end()) {
uint16_t trail = *it++;
if (trail >= 0xDC00 && trail <= 0xDFFF) {
// Combine lead and trail to form a full code point
cp = ((lead - 0xD800) << 10) + (trail - 0xDC00) + 0x10000;
}
else {
throw std::runtime_error("Invalid UTF-16 sequence");
}
}
else {
throw std::runtime_error("Incomplete UTF-16 sequence");
}
}
else {
// It's a single UTF-16 unit, treat as a single code point
cp = lead;
}

// Convert code point to UTF-8
if (cp < 0x80) { // One byte
out.push_back(static_cast<char>(cp));
}
else if (cp < 0x800) { // Two bytes
out.push_back(static_cast<char>((cp >> 6) | 0xC0));
out.push_back(static_cast<char>((cp & 0x3F) | 0x80));
}
else if (cp < 0x10000) { // Three bytes
out.push_back(static_cast<char>((cp >> 12) | 0xE0));
out.push_back(static_cast<char>(((cp >> 6) & 0x3F) | 0x80));
out.push_back(static_cast<char>((cp & 0x3F) | 0x80));
}
else { // Four bytes
out.push_back(static_cast<char>((cp >> 18) | 0xF0));
out.push_back(static_cast<char>(((cp >> 12) & 0x3F) | 0x80));
out.push_back(static_cast<char>(((cp >> 6) & 0x3F) | 0x80));
out.push_back(static_cast<char>((cp & 0x3F) | 0x80));
}
}

return out;
}

std::string GetFileVersion(int vmaj, int vmin, int vrev)
Expand Down

0 comments on commit 2e7290c

Please sign in to comment.