From 016be4275d03f0c46d126f6c6f2a45840e72edb2 Mon Sep 17 00:00:00 2001 From: Dorian Taylor Date: Tue, 11 Dec 2018 14:31:48 -0800 Subject: [PATCH] closes #57 (canonical always clones) --- lib/URI.pm | 31 ++++++++++++++++++------------- t/generic.t | 8 +++++++- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/lib/URI.pm b/lib/URI.pm index c1e593a6..bd030e14 100644 --- a/lib/URI.pm +++ b/lib/URI.pm @@ -302,21 +302,22 @@ sub canonical # Make sure scheme is lowercased, that we don't escape unreserved chars, # and that we use upcase escape sequences. - my $self = shift; - my $scheme = $self->_scheme || ""; + # We now clone unconditionally; see + # https://github.com/libwww-perl/URI/issues/57 + + my $other = $_[0]->clone; + my $scheme = $other->_scheme || ""; my $uc_scheme = $scheme =~ /[A-Z]/; - my $esc = $$self =~ /%[a-fA-F0-9]{2}/; - return $self unless $uc_scheme || $esc; + my $esc = $$other =~ /%[a-fA-F0-9]{2}/; + return $other unless $uc_scheme || $esc; + + $other->_scheme(lc $scheme) if $uc_scheme; - my $other = $self->clone; - if ($uc_scheme) { - $other->_scheme(lc $scheme); - } if ($esc) { - $$other =~ s{%([0-9a-fA-F]{2})} - { my $a = chr(hex($1)); + $$other =~ s{%([0-9a-fA-F]{2})} + { my $a = chr(hex($1)); $a =~ /^[$unreserved]\z/o ? $a : "%\U$1" - }ge; + }ge; } return $other; } @@ -571,8 +572,12 @@ removing the explicit port specification if it matches the default port, uppercasing all escape sequences, and unescaping octets that can be better represented as plain characters. -For efficiency reasons, if the $uri is already in normalized form, -then a reference to it is returned instead of a copy. +Before version 1.75, this method would return the original unchanged +C<$uri> object if it detected nothing to change. To make the return +value consistent (and since the efficiency gains from this behaviour +were marginal), this method now unconditionally returns a clone. This +means idioms like C<< $uri->clone->canonical >> are no longer +necessary. =item $uri->eq( $other_uri ) diff --git a/t/generic.t b/t/generic.t index e2f7b974..0d8f5291 100644 --- a/t/generic.t +++ b/t/generic.t @@ -1,9 +1,10 @@ use strict; use warnings; -print "1..48\n"; +print "1..49\n"; use URI; +use Scalar::Util qw(refaddr); my $foo = URI->new("Foo:opaque#frag"); @@ -217,3 +218,8 @@ $old = $foo->query("q"); print "not " unless !defined($old) && $foo eq "?q"; print "ok 48\n"; +# canonical must always be a clone +my $c1 = $foo->canonical; # canonicalize first +my $c2 = $c1->canonical; # canonicalize again +print 'not ' if refaddr($c1) == refaddr($c2) or $$c1 ne $$c2; +print "ok 49\n";