Skip to content

Commit

Permalink
Updated patch for github issue xslate#88 "Latin 1 text could end up a…
Browse files Browse the repository at this point in the history
…s malformed UTF-8 on output" (nc)

Now using U8 casts
-            *(d++) = UTF8_EIGHT_BIT_HI(c);
-            *(d++) = UTF8_EIGHT_BIT_LO(c);
+            *(d++) = UTF8_EIGHT_BIT_HI((U8) c);
+            *(d++) = UTF8_EIGHT_BIT_LO((U8) c);

Minor POD improvement
  • Loading branch information
Michael Kröll committed Apr 20, 2016
1 parent a9bcd95 commit 78d34cb
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 7 deletions.
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ t/900_bugs/042_perl59_issue.t
t/900_bugs/043_issue107.t
t/900_bugs/044_empty_result.t
t/900_bugs/045_issue130.t
t/900_bugs/046_issue88.t
t/900_bugs/issue79/tmpl/contentA.tt
t/900_bugs/issue79/tmpl/contentB.tt
t/900_bugs/issue79/tmpl/wrapperA.tt
Expand Down
4 changes: 2 additions & 2 deletions lib/Text/Xslate/PP/State.pm
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ sub print {
if(defined ${$sv}) {
$st->{output} .=
(utf8::is_utf8($st->{output}) && !utf8::is_utf8(${$sv}))
? $st->encoding->decode(${$sv})
? eval {$st->encoding->decode(${$sv}, Encode::FB_CROAK())} || ${$sv}
: ${$sv};
}
else {
Expand All @@ -188,7 +188,7 @@ sub print {
$sv =~ s/($Text::Xslate::PP::html_metachars)/$Text::Xslate::PP::html_escape{$1}/xmsgeo;
$st->{output} .=
(utf8::is_utf8($st->{output}) && !utf8::is_utf8($sv))
? $st->encoding->decode($sv)
? eval {$st->encoding->decode($sv, Encode::FB_CROAK())} || $sv
: $sv;
}
else {
Expand Down
4 changes: 2 additions & 2 deletions script/xslate
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ xslate - Process Xslate Templates

=head1 DESCRIPTION

The xslate script is used to process entire directory trees containing
The C<xslate> script is used to process entire directory trees containing
template files, or to process one liners.

=head1 ARGUMENTS
Expand All @@ -51,7 +51,7 @@ template files, or to process one liners.

Specifies the target to be processed by Xslate.

If the target is a file, the file is processed, and xslate will exit immediately. If the target is a directory, then the directory is traversed and each file found is processed via xslate.
If the target is a file, the file is processed, and C<xslate> will exit immediately. If the target is a directory, then the directory is traversed and each file found is processed via C<xslate>.

=head1 AUTHOR

Expand Down
32 changes: 29 additions & 3 deletions src/Text-Xslate.xs
Original file line number Diff line number Diff line change
Expand Up @@ -541,19 +541,39 @@ tx_unmark_raw(pTHX_ SV* const str) {
/* does sv_catsv_nomg(dest, src), but significantly faster */
STATIC_INLINE void
tx_sv_cat(pTHX_ SV* const dest, SV* const src) {
STRLEN len;
const char* pv = SvPV_const(src, len);

if(!SvUTF8(dest) && SvUTF8(src)) {
sv_utf8_upgrade(dest);
}

{
STRLEN len;
const char* const pv = SvPV_const(src, len);
if(SvUTF8(dest) == SvUTF8(src)
|| is_utf8_string((const U8 *)pv, len)) {
STRLEN const dest_cur = SvCUR(dest);
char* const d = SvGROW(dest, dest_cur + len + 1 /* count '\0' */);

SvCUR_set(dest, dest_cur + len);
Copy(pv, d + dest_cur, len + 1 /* copy '\0' */, char);
}
else {
STRLEN const dest_cur = SvCUR(dest);
/* Longest UTF-8 representation of each char is 2 octets. */
char* const d_start = SvGROW(dest, dest_cur + 2 * len + 1 /* count '\0' */);
char* d = d_start + dest_cur;

while(len--) {
const U8 c = *pv++;
if (UTF8_IS_INVARIANT(c)) {
*(d++) = c;
} else {
*(d++) = UTF8_EIGHT_BIT_HI(c);
*(d++) = UTF8_EIGHT_BIT_LO(c);
}
}
*d = '\0';
SvCUR_set(dest, d - d_start);
}
}

static void /* doesn't care about raw-ness */
Expand All @@ -563,6 +583,8 @@ tx_sv_cat_with_html_escape_force(pTHX_ SV* const dest, SV* const src) {
const char* const end = cur + len;
STRLEN const dest_cur = SvCUR(dest);
char* d;
const U32 upgrade_on_copy = SvUTF8(dest) && !SvUTF8(src)
&& !is_utf8_string((const U8 *)cur, len);

(void)SvGROW(dest, dest_cur + ( len * ( sizeof("&quot;") - 1) ) + 1);
if(!SvUTF8(dest) && SvUTF8(src)) {
Expand Down Expand Up @@ -595,6 +617,10 @@ tx_sv_cat_with_html_escape_force(pTHX_ SV* const dest, SV* const src) {
// CopyToken("&apos;", d);
CopyToken("&#39;", d);
}
else if (upgrade_on_copy && !UTF8_IS_INVARIANT(c)) {
*(d++) = UTF8_EIGHT_BIT_HI((U8) c);
*(d++) = UTF8_EIGHT_BIT_LO((U8) c);
}
else {
*(d++) = c;
}
Expand Down
23 changes: 23 additions & 0 deletions t/900_bugs/046_issue88.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!perl
# https://github.com/xslate/p5-Text-Xslate/issues/88
use strict;
use warnings;
use Test::More;

use utf8;
use Text::Xslate 'mark_raw';
my $xslate = Text::Xslate->new();

is $xslate->render_string('<: $string :>', {string => "Ä"}) => 'Ä';
is $xslate->render_string('<: $string :>', {string => "\x{c4}"}) => 'Ä';

is $xslate->render_string('あ<: $string :>', {string => "Ä"}) => 'あÄ';
is $xslate->render_string('あ<: $string :>', {string => "\x{c4}"}) => 'あÄ';

is $xslate->render_string('<: $string :>', {string => mark_raw("Ä")}) => 'Ä';
is $xslate->render_string('<: $string :>', {string => mark_raw("\x{c4}")}) => 'Ä';

is $xslate->render_string('あ<: $string :>', {string => mark_raw("Ä")}) => 'あÄ';
is $xslate->render_string('あ<: $string :>', {string => mark_raw("\x{c4}")}) => 'あÄ';

done_testing();

0 comments on commit 78d34cb

Please sign in to comment.