Skip to content

Commit

Permalink
Add valid_identifier_{pve,pvn,sv} API functions
Browse files Browse the repository at this point in the history
These functions test whether a given string would be considered by the
Perl parser to be a valid identifier. Three variants are provided: one
taking a string start/end pair, one a string start/length pair, and one
looking at the string contained in an SV.
  • Loading branch information
leonerd committed Nov 23, 2024
1 parent 63feb21 commit 049897a
Show file tree
Hide file tree
Showing 9 changed files with 184 additions and 1 deletion.
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -5207,6 +5207,7 @@ ext/XS-APItest/t/utf8_warn07.t Tests for code in utf8.c
ext/XS-APItest/t/utf8_warn08.t Tests for code in utf8.c
ext/XS-APItest/t/utf8_warn09.t Tests for code in utf8.c
ext/XS-APItest/t/utf8_warn_base.pl Tests for code in utf8.c
ext/XS-APItest/t/valid_identifier.t XS::APItest: tests for valid_identifier_sv()
ext/XS-APItest/t/weaken.t XS::APItest: tests for sv_rvweaken() and sv_get_backrefs()
ext/XS-APItest/t/whichsig.t XS::APItest: tests for whichsig() and variants
ext/XS-APItest/t/win32.t Test Win32 specific APIs
Expand Down
10 changes: 10 additions & 0 deletions embed.fnc
Original file line number Diff line number Diff line change
Expand Up @@ -3766,6 +3766,16 @@ EXdpx |bool |validate_proto |NN SV *name \
|NULLOK SV *proto \
|bool warn \
|bool curstash
Adp |bool |valid_identifier_pve \
|NN const char *s \
|NN const char *end \
|U32 flags
Adp |bool |valid_identifier_pvn \
|NN const char *s \
|STRLEN len \
|U32 flags
Adp |bool |valid_identifier_sv \
|NULLOK SV *sv
CRTdip |UV |valid_utf8_to_uvchr \
|NN const U8 *s \
|NULLOK STRLEN *retlen
Expand Down
3 changes: 3 additions & 0 deletions embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,9 @@
# define uvchr_to_utf8_flags(a,b,c) Perl_uvchr_to_utf8_flags(aTHX,a,b,c)
# define uvchr_to_utf8_flags_msgs(a,b,c,d) Perl_uvchr_to_utf8_flags_msgs(aTHX,a,b,c,d)
# define uvoffuni_to_utf8_flags_msgs(a,b,c,d) Perl_uvoffuni_to_utf8_flags_msgs(aTHX_ a,b,c,d)
# define valid_identifier_pve(a,b,c) Perl_valid_identifier_pve(aTHX_ a,b,c)
# define valid_identifier_pvn(a,b,c) Perl_valid_identifier_pvn(aTHX_ a,b,c)
# define valid_identifier_sv(a) Perl_valid_identifier_sv(aTHX_ a)
# define valid_utf8_to_uvchr Perl_valid_utf8_to_uvchr
# define vcmp(a,b) Perl_vcmp(aTHX_ a,b)
# define vcroak(a,b) Perl_vcroak(aTHX_ a,b)
Expand Down
2 changes: 1 addition & 1 deletion ext/XS-APItest/APItest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use strict;
use warnings;
use Carp;

our $VERSION = '1.39';
our $VERSION = '1.40';

require XSLoader;

Expand Down
6 changes: 6 additions & 0 deletions ext/XS-APItest/APItest.xs
Original file line number Diff line number Diff line change
Expand Up @@ -7408,6 +7408,12 @@ gimme()
OUTPUT:
RETVAL

bool
valid_identifier(SV *s)
CODE:
RETVAL = valid_identifier_sv(s);
OUTPUT:
RETVAL

MODULE = XS::APItest PACKAGE = XS::APItest::Backrefs

Expand Down
43 changes: 43 additions & 0 deletions ext/XS-APItest/t/valid_identifier.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!perl

use strict;
use warnings;

use open ':std', ':encoding(UTF-8)';
use Test::More;

use_ok('XS::APItest');

# These should all be valid
foreach my $id (qw( abc ab_cd _abc x123 )) {
ok(valid_identifier($id), "'$id' is valid identifier");
}

# These should all not be
foreach my $id (qw( ab-cd 123 abc() ), "ab cd") {
ok(!valid_identifier($id), "'$id' is not valid identifier");
}

# Now for some UTF-8 tests
{
use utf8;

foreach my $id (qw( café sandviĉon )) {
ok(valid_identifier($id), "'$id' is valid UTF-8 identifier");
}

# en-dash
ok(!valid_identifier("ab–cd"), "'ab–cd' is not valid UTF-8 identifier");
}

# objects with "" overloading still work
{
package WithStringify {
use overload '""' => sub { return "an_identifier"; };
sub new { bless [], shift; }
}

ok(valid_identifier(WithStringify->new), 'Object with stringify overload can be valid identifier');
}

done_testing;
7 changes: 7 additions & 0 deletions pod/perldelta.pod
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,13 @@ well.

XXX

=item *

New API functions L<C<valid_identifier_pve()>|perlapi/valid_identifier_pve>,
L<C<valid_identifier_pvn()>|perlapi/valid_identifier_pvn> and
L<C<valid_identifier_sv()>|perlapi/valid_identifier_sv> have been added, which
test if a string would be considered by Perl to be a valid identifier name.

=back

=head1 Selected Bug Fixes
Expand Down
14 changes: 14 additions & 0 deletions proto.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

99 changes: 99 additions & 0 deletions toke.c
Original file line number Diff line number Diff line change
Expand Up @@ -13932,6 +13932,105 @@ Perl_parse_subsignature(pTHX_ U32 flags)
return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
}

/*
=for apidoc valid_identifier_pve
Returns true if the string given by C<s> until C<end> would be considered
valid as a Perl identifier. That is, it must begin with a character matching
C<isIDFIRST>, followed by characters all matching C<isIDCONT>. An empty
string (i.e. when C<end> is C<s>) will return false.
If C<flags> contains the C<SVf_UTF8> bit, then the string is presumed to be
encoded in UTF-8, and suitable Unicode character test functions will be used.
=cut
*/

bool
Perl_valid_identifier_pve(pTHX_ const char *s, const char *end, U32 flags)
{
PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVE;

if(end <= s)
return false;

if(flags & SVf_UTF8) {
if(!isIDFIRST_utf8_safe((U8 *)s, (U8 *)end))
return false;

while(s < end) {
s += UTF8SKIP((U8 *)s);
if(s == end)
break;
if(!isIDCONT_utf8_safe((U8 *)s, (U8 *)end))
return false;
}
return true;
}
else {
if(!isIDFIRST(s[0]))
return false;

while(s < end) {
s += 1;
if(s == end)
break;
if(!isIDCONT(s[0]))
return false;
}
return true;
}

return false;
}

/*
=for apidoc valid_identifier_pvn
Returns true if the string given by C<s> whose length is C<len> would be
considered valid as a Perl identifier. That is, it must begin with a
character matching C<isIDFIRST>, followed by characters all matching
C<isIDCONT>. An empty string (i.e. when C<len> is zero) will return false.
If C<flags> contains the C<SVf_UTF8> bit, then the string is presumed to be
encoded in UTF-8, and suitable Unicode character test functions will be used.
=cut
*/

bool
Perl_valid_identifier_pvn(pTHX_ const char *s, STRLEN len, U32 flags)
{
PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVN;

return valid_identifier_pve(s, s + len, flags);
}

/*
=for apidoc valid_identifier_sv
Returns true if the given SV contains a non-empty string whose characters
match accoding to C<valid_identifier_pvn>. Returns false if given NULL, an
undefined SV, or a SV that does not contain a non-empty string.
Does not invoke C<get> magic on the SV beforehand.
=cut
*/

bool
Perl_valid_identifier_sv(pTHX_ SV *sv)
{
PERL_ARGS_ASSERT_VALID_IDENTIFIER_SV;

if(!sv || !SvOK(sv))
return false;

STRLEN len;
const char *pv = SvPV_const(sv, len);
return valid_identifier_pve(pv, pv + len, SvUTF8(sv));
}

/*
* ex: set ts=8 sts=4 sw=4 et:
*/

0 comments on commit 049897a

Please sign in to comment.