From 60b786e04541e3e851d132fdd38aee2aa821c882 Mon Sep 17 00:00:00 2001 From: YiFang Xiao Date: Sat, 4 Jan 2025 13:48:51 +0800 Subject: [PATCH 1/5] opt: remove qtextcodec --- src/common/iconv.cc | 33 ++++++++++++++ src/common/iconv.hh | 1 + src/dict/epwing_book.cc | 92 +++++++++++++++----------------------- src/dict/epwing_book.hh | 18 +------- src/dict/mdictparser.cc | 6 +-- src/dict/website.cc | 8 +--- src/iframeschemehandler.cc | 8 ++-- 7 files changed, 79 insertions(+), 87 deletions(-) diff --git a/src/common/iconv.cc b/src/common/iconv.cc index 365f75bdb..26f113120 100644 --- a/src/common/iconv.cc +++ b/src/common/iconv.cc @@ -19,6 +19,39 @@ Iconv::~Iconv() iconv_close( state ); } +QByteArray Iconv::fromUnicode( const QString & input, const char * toEncoding ) +{ + // Convert QString to UTF-8 + QByteArray utf8Data = input.toUtf8(); + const char * inBuf = utf8Data.constData(); + size_t inBytesLeft = utf8Data.size(); + + // Initialize iconv + iconv_t cd = iconv_open( toEncoding, "UTF-8" ); + if ( cd == (iconv_t)-1 ) { + throw std::runtime_error( "iconv_open failed" ); + } + + // Prepare output buffer + size_t outBytesLeft = inBytesLeft * 4; // Allocate enough space + std::vector< char > outBuf( outBytesLeft ); + char * outBufPtr = outBuf.data(); + + // Perform conversion + size_t result = iconv( cd, const_cast< char ** >( &inBuf ), &inBytesLeft, &outBufPtr, &outBytesLeft ); + if ( result == (size_t)-1 ) { + iconv_close( cd ); + throw std::runtime_error( "iconv conversion failed" ); + } + + // Clean up + iconv_close( cd ); + + // Resize output buffer to actual size + outBuf.resize( outBuf.size() - outBytesLeft ); + return QByteArray( outBuf.data(), outBuf.size() ); +} + QString Iconv::convert( void const *& inBuf, size_t & inBytesLeft ) { size_t dsz = inBytesLeft; diff --git a/src/common/iconv.hh b/src/common/iconv.hh index 872d69079..be3eab533 100644 --- a/src/common/iconv.hh +++ b/src/common/iconv.hh @@ -23,6 +23,7 @@ public: explicit Iconv( char const * from ); ~Iconv(); + static QByteArray fromUnicode( const QString & input, const char * toEncoding ); QString convert( void const *& inBuf, size_t & inBytesLeft ); diff --git a/src/dict/epwing_book.cc b/src/dict/epwing_book.cc index 87a10ffcd..bec302463 100644 --- a/src/dict/epwing_book.cc +++ b/src/dict/epwing_book.cc @@ -14,6 +14,7 @@ #include "folding.hh" #include "epwing_charmap.hh" #include "htmlescape.hh" + #include "iconv.hh" #if defined( Q_OS_WIN32 ) || defined( Q_OS_MAC ) #define _FILE_OFFSET_BITS 64 #endif @@ -146,10 +147,8 @@ EB_Error_Code hook_iso8859_1( EB_Book * book, EB_Appendix *, void * container, EB_Hook_Code, int, const unsigned int * argv ) { EpwingBook * ebook = static_cast< EpwingBook * >( container ); - if ( ebook->codecISO() ) { - QByteArray b = ebook->codecISO()->toUnicode( (const char *)argv, 1 ).toUtf8(); - eb_write_text( book, b.data(), b.size() ); - } + QByteArray b = Iconv::toQString( ebook->codec_ISO_name, (const char *)argv, 1 ).toUtf8(); + eb_write_text( book, b.data(), b.size() ); return EB_SUCCESS; } @@ -175,8 +174,8 @@ hook_narrow_jisx0208( EB_Book * book, EB_Appendix *, void * container, EB_Hook_C if ( out_code == 0 ) { EContainer * cont = static_cast< EContainer * >( container ); - if ( cont->book->codecEuc() ) { - QByteArray str = cont->book->codecEuc()->toUnicode( (const char *)buf, 2 ).toUtf8(); + if ( cont->book->codec_Euc_name ) { + QByteArray str = Iconv::toQString( cont->book->codec_Euc_name, (const char *)buf, 2 ).toUtf8(); eb_write_text( book, str.data(), str.size() ); } else @@ -198,12 +197,8 @@ hook_wide_jisx0208( EB_Book * book, EB_Appendix *, void * ptr, EB_Hook_Code, int buf[ 1 ] = *argv & 0xFF; buf[ 0 ] = ( *argv & 0xFF00 ) >> 8; - if ( ebook->codecEuc() ) { - QByteArray b = ebook->codecEuc()->toUnicode( buf, 2 ).toUtf8(); - eb_write_text( book, b.data(), b.size() ); - } - else - eb_write_text_byte2( book, buf[ 0 ], buf[ 1 ] ); + QByteArray b = Iconv::toQString( ebook->codec_Euc_name, buf, 2 ).toUtf8(); + eb_write_text( book, b.data(), b.size() ); return EB_SUCCESS; } @@ -217,12 +212,8 @@ hook_gb2312( EB_Book * book, EB_Appendix *, void * container, EB_Hook_Code, int, buf[ 1 ] = *argv & 0xFF; buf[ 0 ] = ( *argv & 0xFF00 ) >> 8; - if ( ebook->codecGB() ) { - QByteArray b = ebook->codecGB()->toUnicode( buf, 2 ).toUtf8(); - eb_write_text( book, b.data(), b.size() ); - } - else - eb_write_text_byte2( book, buf[ 0 ], buf[ 1 ] ); + QByteArray b = Iconv::toQString( ebook->codec_GB_name, buf, 2 ).toUtf8(); + eb_write_text( book, b.data(), b.size() ); return EB_SUCCESS; } @@ -397,9 +388,9 @@ hook_candidate( EB_Book * book, EB_Appendix *, void * container, EB_Hook_Code co EpwingBook::EpwingBook(): currentSubBook( -1 ) { - codec_ISO = QTextCodec::codecForName( "ISO8859-1" ); - codec_GB = QTextCodec::codecForName( "GB2312" ); - codec_Euc = QTextCodec::codecForName( "EUC-JP" ); + codec_ISO_name = "ISO8859-1"; + codec_GB_name = "GB2312"; + codec_Euc_name = "EUC-JP"; eb_initialize_book( &book ); eb_initialize_appendix( &appendix ); @@ -422,8 +413,8 @@ void EpwingBook::setErrorString( QString const & func, EB_Error_Code code ) { error_string = QString( "EB \"%1\" function error: %2 (%3)" ) .arg( func ) - .arg( QTextCodec::codecForLocale()->toUnicode( eb_error_string( code ) ) ) - .arg( QTextCodec::codecForLocale()->toUnicode( eb_error_message( code ) ) ); + .arg( QString::fromLocal8Bit( eb_error_string( code ) ) ) + .arg( QString::fromLocal8Bit( eb_error_message( code ) ) ); if ( currentPosition.page != 0 ) error_string += QString( " on page %1, offset %2" ) @@ -488,9 +479,9 @@ int EpwingBook::setBook( string const & directory ) setErrorString( "eb_appendix_subbook_list", ret ); } - if ( !codec_Euc || ( book.character_code == EB_CHARCODE_ISO8859_1 && !codec_ISO ) - || ( book.character_code == EB_CHARCODE_JISX0208_GB2312 && !codec_GB ) ) - throw exEpwing( "No required codec to decode dictionary" ); + // if ( !codec_Euc || ( book.character_code == EB_CHARCODE_ISO8859_1 && !codec_ISO ) + // || ( book.character_code == EB_CHARCODE_JISX0208_GB2312 && !codec_GB ) ) + // throw exEpwing( "No required codec to decode dictionary" ); rootDir = QString::fromStdString( directory ); @@ -657,10 +648,7 @@ QString EpwingBook::title() } buf[ EB_MAX_TITLE_LENGTH ] = 0; - if ( codec_Euc ) - return codec_Euc->toUnicode( buf ); - - return {}; + return Iconv::toQString( codec_Euc_name, buf, strlen( buf ) ); } QString EpwingBook::copyright() @@ -1086,14 +1074,12 @@ bool EpwingBook::isHeadwordCorrect( QString const & headword ) if ( headword.isEmpty() ) return false; - if ( book.character_code == EB_CHARCODE_ISO8859_1 && codec_ISO ) - buf = codec_ISO->fromUnicode( headword ); - else if ( ( book.character_code == EB_CHARCODE_JISX0208 || book.character_code == EB_CHARCODE_JISX0208_GB2312 ) - && codec_Euc ) - buf = codec_Euc->fromUnicode( headword ); - - if ( book.character_code == EB_CHARCODE_JISX0208_GB2312 && codec_GB ) - buf2 = codec_GB->fromUnicode( headword ); + if ( book.character_code == EB_CHARCODE_ISO8859_1 ) + buf = Iconv::fromUnicode( headword, codec_ISO_name ); + else if ( ( book.character_code == EB_CHARCODE_JISX0208 || book.character_code == EB_CHARCODE_JISX0208_GB2312 ) ) + buf = Iconv::fromUnicode( headword, codec_Euc_name ); + if ( book.character_code == EB_CHARCODE_JISX0208_GB2312 ) + buf2 = Iconv::fromUnicode( headword, codec_GB_name ); if ( !buf.isEmpty() && eb_search_exactword( &book, buf.data() ) == EB_SUCCESS ) { ret = eb_hit_list( &book, 2, hits, &hit_count ); @@ -1846,9 +1832,7 @@ QString EpwingBook::currentCandidate() const char * s = eb_current_candidate( &book ); if ( book.character_code == EB_CHARCODE_ISO8859_1 ) return QString::fromLatin1( s ); - if ( codec_Euc ) - return codec_Euc->toUnicode( s ); - return QString{}; + return Iconv::toQString( codec_Euc_name, s, strlen( s ) ); } bool EpwingBook::getMatches( QString word, QList< QString > & matches ) @@ -1857,14 +1841,13 @@ bool EpwingBook::getMatches( QString word, QList< QString > & matches ) EB_Hit hits[ HitsBufferSize ]; int hitCount = 0; - if ( book.character_code == EB_CHARCODE_ISO8859_1 && codec_ISO ) - bword = codec_ISO->fromUnicode( word ); - else if ( ( book.character_code == EB_CHARCODE_JISX0208 || book.character_code == EB_CHARCODE_JISX0208_GB2312 ) - && codec_Euc ) - bword = codec_Euc->fromUnicode( word ); + if ( book.character_code == EB_CHARCODE_ISO8859_1 ) + bword = Iconv::fromUnicode( word, codec_ISO_name ); + else if ( ( book.character_code == EB_CHARCODE_JISX0208 || book.character_code == EB_CHARCODE_JISX0208_GB2312 ) ) + bword = Iconv::fromUnicode( word, codec_Euc_name ); - if ( book.character_code == EB_CHARCODE_JISX0208_GB2312 && codec_GB ) - bword2 = codec_GB->fromUnicode( word ); + if ( book.character_code == EB_CHARCODE_JISX0208_GB2312 ) + bword2 = Iconv::fromUnicode( word, codec_GB_name ); if ( !bword.isEmpty() ) { EB_Error_Code ret = eb_search_word( &book, bword.data() ); @@ -1928,14 +1911,13 @@ bool EpwingBook::getArticlePos( QString word, QList< int > & pages, QList< int > EB_Hit hits[ HitsBufferSize ]; int hitCount = 0; - if ( book.character_code == EB_CHARCODE_ISO8859_1 && codec_ISO ) - bword = codec_ISO->fromUnicode( word ); - else if ( ( book.character_code == EB_CHARCODE_JISX0208 || book.character_code == EB_CHARCODE_JISX0208_GB2312 ) - && codec_Euc ) - bword = codec_Euc->fromUnicode( word ); + if ( book.character_code == EB_CHARCODE_ISO8859_1 ) + bword = Iconv::fromUnicode( word, codec_ISO_name ); + else if ( ( book.character_code == EB_CHARCODE_JISX0208 || book.character_code == EB_CHARCODE_JISX0208_GB2312 ) ) + bword = Iconv::fromUnicode( word, codec_Euc_name ); - if ( book.character_code == EB_CHARCODE_JISX0208_GB2312 && codec_GB ) - bword2 = codec_GB->fromUnicode( word ); + if ( book.character_code == EB_CHARCODE_JISX0208_GB2312 ) + bword2 = Iconv::fromUnicode( word, codec_GB_name ); if ( !bword.isEmpty() ) { EB_Error_Code ret = eb_search_exactword( &book, bword.data() ); diff --git a/src/dict/epwing_book.hh b/src/dict/epwing_book.hh index 9a02413d4..c5606225f 100644 --- a/src/dict/epwing_book.hh +++ b/src/dict/epwing_book.hh @@ -16,7 +16,6 @@ #endif #include -#include // POSIX symbol unavailable on Windows needed for eb headers #ifdef Q_OS_WIN @@ -72,7 +71,6 @@ class EpwingBook QString mainCacheDir, rootDir; QString cacheImagesDir, cacheSoundsDir, cacheMoviesDir, cacheFontsDir; QString dictID; - QTextCodec *codec_ISO, *codec_GB, *codec_Euc; QStack< unsigned int > decorationStack; int monoWidth, monoHeight; QStringList imageCacheList, soundsCacheList, moviesCacheList, fontsCacheList; @@ -110,6 +108,7 @@ class EpwingBook QByteArray codeToUnicode( QString const & code ); public: + const char *codec_ISO_name, *codec_GB_name, *codec_Euc_name; enum DecorationCodes { UNKNOWN = 0, @@ -133,21 +132,6 @@ public: return error_string; } - QTextCodec * codecISO() - { - return codec_ISO; - } - - QTextCodec * codecGB() - { - return codec_GB; - } - - QTextCodec * codecEuc() - { - return codec_Euc; - } - int getSubBookCount() { return subBookCount; diff --git a/src/dict/mdictparser.cc b/src/dict/mdictparser.cc index 83b48c0ac..8442b8241 100644 --- a/src/dict/mdictparser.cc +++ b/src/dict/mdictparser.cc @@ -32,12 +32,11 @@ #include #include #include -#include - #include "decompress.hh" #include "ripemd.hh" #include "utils.hh" #include "htmlescape.hh" +#include "iconv.hh" namespace Mdict { @@ -187,8 +186,7 @@ QString MdictParser::toUtf16( const char * fromCode, const char * from, size_t f return QString(); } - QTextCodec * codec = QTextCodec::codecForName( fromCode ); - return codec->toUnicode( from, fromSize ); + return Iconv::toQString( fromCode, from, fromSize ); } bool MdictParser::decryptHeadWordIndex( char * buffer, qint64 len ) diff --git a/src/dict/website.cc b/src/dict/website.cc index f4d40947c..349bf25b0 100644 --- a/src/dict/website.cc +++ b/src/dict/website.cc @@ -122,7 +122,6 @@ class WebSiteArticleRequest: public WebSiteDataRequestSlots private: void requestFinished( QNetworkReply * ) override; - static QTextCodec * codecForHtml( QByteArray const & ba ); }; void WebSiteArticleRequest::cancel() @@ -152,11 +151,6 @@ WebSiteArticleRequest::WebSiteArticleRequest( QString const & url_, QNetworkAcce #endif } -QTextCodec * WebSiteArticleRequest::codecForHtml( QByteArray const & ba ) -{ - return QTextCodec::codecForHtml( ba, 0 ); -} - void WebSiteArticleRequest::requestFinished( QNetworkReply * r ) { if ( isFinished() ) { // Was cancelled @@ -188,7 +182,7 @@ void WebSiteArticleRequest::requestFinished( QNetworkReply * r ) QByteArray replyData = netReply->readAll(); QString articleString; - QTextCodec * codec = WebSiteArticleRequest::codecForHtml( replyData ); + QTextCodec * codec = QTextCodec::codecForHtml( replyData, 0 ); if ( codec ) { articleString = codec->toUnicode( replyData ); } diff --git a/src/iframeschemehandler.cc b/src/iframeschemehandler.cc index 9bf6d6770..3731a8d0f 100644 --- a/src/iframeschemehandler.cc +++ b/src/iframeschemehandler.cc @@ -1,6 +1,6 @@ #include "iframeschemehandler.hh" -#include +#include "iconv.hh" IframeSchemeHandler::IframeSchemeHandler( QObject * parent ): QWebEngineUrlSchemeHandler( parent ) @@ -36,9 +36,9 @@ void IframeSchemeHandler::requestStarted( QWebEngineUrlRequestJob * requestJob ) QByteArray replyData = reply->readAll(); QString articleString; - QTextCodec * codec = QTextCodec::codecForUtfText( replyData, QTextCodec::codecForName( codecName.toUtf8() ) ); - if ( codec ) { - articleString = codec->toUnicode( replyData ); + auto encoding = Iconv::findValidEncoding( { codecName } ); + if ( !encoding.isEmpty() ) { + articleString = Iconv::toQString( encoding.toUtf8().constData(), replyData.data(), replyData.size() ); } else { articleString = QString::fromUtf8( replyData ); From b2ed9d3d62a88239f89ee2162539f79750907e7b Mon Sep 17 00:00:00 2001 From: YiFang Xiao Date: Sat, 4 Jan 2025 13:57:55 +0800 Subject: [PATCH 2/5] opt: remove qtextcodec --- src/common/iconv.cc | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/common/iconv.cc b/src/common/iconv.cc index 26f113120..f8c49a606 100644 --- a/src/common/iconv.cc +++ b/src/common/iconv.cc @@ -10,7 +10,9 @@ Iconv::Iconv( char const * from ): state( iconv_open( Text::utf8, from ) ) { if ( state == (iconv_t)-1 ) { - throw exCantInit( strerror( errno ) ); + char buffer[ 256 ]; + strerror_s( buffer, sizeof( buffer ), errno ); + throw exCantInit( buffer ); } } @@ -38,10 +40,21 @@ QByteArray Iconv::fromUnicode( const QString & input, const char * toEncoding ) char * outBufPtr = outBuf.data(); // Perform conversion - size_t result = iconv( cd, const_cast< char ** >( &inBuf ), &inBytesLeft, &outBufPtr, &outBytesLeft ); - if ( result == (size_t)-1 ) { - iconv_close( cd ); - throw std::runtime_error( "iconv conversion failed" ); + while ( inBytesLeft > 0 ) { + size_t result = iconv( cd, const_cast< char ** >( &inBuf ), &inBytesLeft, &outBufPtr, &outBytesLeft ); + if ( result == (size_t)-1 ) { + if ( errno == E2BIG ) { + // Grow the buffer and retry + size_t offset = outBufPtr - outBuf.data(); + outBuf.resize( outBuf.size() + inBytesLeft * 4 ); + outBufPtr = outBuf.data() + offset; + outBytesLeft += inBytesLeft * 4; + } + else { + iconv_close( cd ); + throw std::runtime_error( "iconv conversion failed" ); + } + } } // Clean up From 3490ca5a81c1b5ce34315c00b6940908f6572ba2 Mon Sep 17 00:00:00 2001 From: YiFang Xiao Date: Sat, 4 Jan 2025 14:02:36 +0800 Subject: [PATCH 3/5] opt: remove qtextcodec --- src/dict/epwing_book.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/dict/epwing_book.cc b/src/dict/epwing_book.cc index bec302463..99cba97db 100644 --- a/src/dict/epwing_book.cc +++ b/src/dict/epwing_book.cc @@ -174,15 +174,12 @@ hook_narrow_jisx0208( EB_Book * book, EB_Appendix *, void * container, EB_Hook_C if ( out_code == 0 ) { EContainer * cont = static_cast< EContainer * >( container ); - if ( cont->book->codec_Euc_name ) { - QByteArray str = Iconv::toQString( cont->book->codec_Euc_name, (const char *)buf, 2 ).toUtf8(); - eb_write_text( book, str.data(), str.size() ); - } - else - eb_write_text( book, (const char *)buf, 2 ); + QByteArray str = Iconv::toQString( cont->book->codec_Euc_name, (const char *)buf, 2 ).toUtf8(); + eb_write_text( book, str.data(), str.size() ); } - else + else { eb_write_text_byte1( book, out_code ); + } } return EB_SUCCESS; From 896766fe2e864351a12cb2c7b620add6db97bf86 Mon Sep 17 00:00:00 2001 From: xiaoyifang <105986+xiaoyifang@users.noreply.github.com> Date: Sat, 4 Jan 2025 15:46:10 +0800 Subject: [PATCH 4/5] Update src/dict/epwing_book.cc --- src/dict/epwing_book.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/dict/epwing_book.cc b/src/dict/epwing_book.cc index 99cba97db..70d9ff442 100644 --- a/src/dict/epwing_book.cc +++ b/src/dict/epwing_book.cc @@ -476,9 +476,6 @@ int EpwingBook::setBook( string const & directory ) setErrorString( "eb_appendix_subbook_list", ret ); } - // if ( !codec_Euc || ( book.character_code == EB_CHARCODE_ISO8859_1 && !codec_ISO ) - // || ( book.character_code == EB_CHARCODE_JISX0208_GB2312 && !codec_GB ) ) - // throw exEpwing( "No required codec to decode dictionary" ); rootDir = QString::fromStdString( directory ); From 43a782cecec2dcfb9765fedf1f8a5849cb2784f8 Mon Sep 17 00:00:00 2001 From: YiFang Xiao Date: Sun, 5 Jan 2025 18:29:09 +0800 Subject: [PATCH 5/5] opt: 1 --- src/common/iconv.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/common/iconv.cc b/src/common/iconv.cc index f8c49a606..30cb7afea 100644 --- a/src/common/iconv.cc +++ b/src/common/iconv.cc @@ -10,9 +10,7 @@ Iconv::Iconv( char const * from ): state( iconv_open( Text::utf8, from ) ) { if ( state == (iconv_t)-1 ) { - char buffer[ 256 ]; - strerror_s( buffer, sizeof( buffer ), errno ); - throw exCantInit( buffer ); + throw exCantInit( strerror( errno ) ); } }