Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Perl: skip string literals when collecting heredoc markers #3592

Merged
merged 9 commits into from
Dec 27, 2022
2 changes: 2 additions & 0 deletions Units/parser-perl.r/no-heredoc.d/args.ctags
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--sort=no
--kinds-Perl=+{heredoc}
19 changes: 19 additions & 0 deletions Units/parser-perl.r/no-heredoc.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
f0tag input.pl /^sub f0tag() {}$/;" s
f1tag input.pl /^sub f1tag() {}$/;" s
f2tag input.pl /^sub f2tag() {}$/;" s
f3tag input.pl /^sub f3tag() {}$/;" s
hereodc0tag input.pl /^print 'cat <<<heredoct0notag' . <<hereodc0tag;$/;" h
f4tag input.pl /^sub f4tag() {}$/;" s
hereodc1tag input.pl /^print "cat <<<heredoct1notag" . <<hereodc1tag;$/;" h
f5tag input.pl /^sub f5tag() {}$/;" s
hereodc2tag input.pl /^print `cat <<<heredoct1notag` . <<hereodc2tag;$/;" h
f6tag input.pl /^sub f6tag() {}$/;" s
heredoc3tag input.pl /^print "abc" . <<heredoc3tag . 'efg' . << "heredoc4tag" . `ls` . '<<hereodc5notag';$/;" h
heredoc4tag input.pl /^print "abc" . <<heredoc3tag . 'efg' . << "heredoc4tag" . `ls` . '<<hereodc5notag';$/;" h
f7tag input.pl /^sub f7tag() {}$/;" s
f8tag input.pl /^sub f8tag() {}$/;" s
f9tag input.pl /^sub f9tag() {}$/;" s
five_sub input-0.pl /^sub five_sub() {$/;" s
five_mark0 input-0.pl /^print 3 + 2 . <<five_mark0;$/;" h
five_mark1 input-0.pl /^print 3 + 2 . <<~five_mark1;$/;" h
five_mark2 input-0.pl /^print 3 + 2 . << "five_mark2";$/;" h
31 changes: 31 additions & 0 deletions Units/parser-perl.r/no-heredoc.d/input-0.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
print 3 + 2 <<5;
5
;

sub five_sub() {
return 5
}
print 3 + 2 << five_sub;
five
;
print 3 + 2 . <<five_mark0;
a
five_mark0
print 3 + 2 . <<~five_mark1;
a
five_mark1
print 3 + 2 << ~five_sub;
five_sub;
print 3 + 2 . << "five_mark2";
ox
five_mark2
47 changes: 47 additions & 0 deletions Units/parser-perl.r/no-heredoc.d/input.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Derrived from #3588 submitted by @petdance

sub f0tag() {}

my $x = '<<NOT_A_HEREDOC0';

sub f1tag() {}

print "<<NOT_A_HEREDOC0\n";

sub f2tag() {}

print `cat <<<BASH_HERE_STRING`;

sub f3tag() {}

print 'cat <<<heredoct0notag' . <<hereodc0tag;
sub f0notag() {}
hereodc0tag

sub f4tag() {}

print "cat <<<heredoct1notag" . <<hereodc1tag;
sub f1notag() {}
hereodc1tag

sub f5tag() {}

print `cat <<<heredoct1notag` . <<hereodc2tag;
sub f2notag() {}
hereodc2tag

sub f6tag() {}

print "abc" . <<heredoc3tag . 'efg' . << "heredoc4tag" . `ls` . '<<hereodc5notag';
sub f3notag() {}
heredoc3tag
sub f4notag() {}
heredoc4tag
sub f7tag() {}

sub f8tag() {}

my $i = 1;
print "a" . 3 << $i;

sub f9tag() {}
2 changes: 2 additions & 0 deletions Units/parser-perl.r/perl-module.d/expected.tags
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ sort input.pl /^use sort qw(stable _quicksort _mergesort);$/;" M line:10 rol
integer input.pl /^no integer;$/;" M line:12 roles:unused extras:reference
strict input.pl /^no strict 'refs';$/;" M line:13 roles:unused extras:reference
warnings input.pl /^no warnings;$/;" M line:14 roles:unused extras:reference
5.006_001 input.pl /^use 5.006_001;$/;" M line:16 roles:used extras:reference
5.006_001 input.pl /^no 5.006_001;$/;" M line:17 roles:unused extras:reference
3 changes: 3 additions & 0 deletions Units/parser-perl.r/perl-module.d/input.pl
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,6 @@
no integer;
no strict 'refs';
no warnings;

use 5.006_001;
no 5.006_001;
148 changes: 121 additions & 27 deletions parsers/perl.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,10 @@
#include "routines.h"
#include "selectors.h"
#include "subparser.h"
#include "trace.h"
#include "vstring.h"
#include "xtag.h"

#define TRACE_PERL_C 0
#define TRACE if (TRACE_PERL_C) printf("perl.c:%d: ", __LINE__), printf

/*
* DATA DEFINITIONS
*/
Expand Down Expand Up @@ -443,41 +441,123 @@ static unsigned char *readHereDocMarker (unsigned char *line,
return cp;
}

static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
const unsigned char *line)
enum stringType {
STRING_TYPE_NONE = '\0',
STRING_TYPE_SINGLEQ = '\'',
STRING_TYPE_DOUBLEQ = '"',
STRING_TYPE_BACKQ = '`',
};


static const unsigned char *escapeFromString (const unsigned char *line,
const unsigned char *end,
enum stringType stype)
{
bool in_escape = false;
const unsigned char *cp = line;

switch (stype)
{
case STRING_TYPE_NONE:
return line;
case STRING_TYPE_SINGLEQ:
case STRING_TYPE_DOUBLEQ:
case STRING_TYPE_BACKQ:
while ((end && cp < end) || (end == NULL && *cp != '\0'))
{
if (in_escape)
{
cp++;
in_escape = false;
}
else if (*cp == '\\')
{
cp++;
in_escape = true;
}
else if (*cp == (unsigned char)stype)
{
cp++;
return cp;
}
else
cp++;
}
return NULL;
default:
AssertNotReached ();
return NULL;
}
}

static enum stringType isInString (const unsigned char *line,
const unsigned char *end)
{
const unsigned char *cp = line;
enum stringType t = STRING_TYPE_NONE;

while (cp && cp < end)
{
switch (*cp)
{
case '\'':
case '\"':
case '`':
t = *cp;
break;
default:
t = STRING_TYPE_NONE;
break;
}

cp++;
if (t != STRING_TYPE_NONE)
cp = escapeFromString (cp, end, t);
}

return (cp == NULL)? t: STRING_TYPE_NONE;
}


static const unsigned char *collectHereDocMarker (struct hereDocMarkerManager *mgr,
const unsigned char *line)
{
unsigned char *starter = (unsigned char*)strstr((char *)line, "<<");
unsigned char *cp = NULL;
bool indented = false;
unsigned char quote_char = 0;
bool space_seen = false;

if (starter == NULL)
return;
return NULL;

enum stringType stype;
if ((stype = isInString(line, starter)) != STRING_TYPE_NONE)
return escapeFromString (starter + 2, NULL, stype);

cp = starter + 2;
while (isspace (*cp))
{
/* To avoid confusing with a shift operator, we track
* spaces after the starter (<<). */
space_seen = true;
cp++;
}

if (*cp == '\0')
return;

/* Is shift operator? */
if (isdigit (*cp))
{
/* Scan the rest of the string. */
collectHereDocMarkers (mgr, ++cp);
return;
}
return NULL;

if (*cp == '~') {
if (space_seen)
return cp + 1;
indented = true;
cp++;
if (*cp == '\0')
return;
return NULL;
while (isspace (*cp))
cp++;
if (*cp == '\0')
return;
return NULL;
}

switch (*cp)
Expand All @@ -490,9 +570,13 @@ static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
case '\\':
cp++;
if (*cp == '\0')
return;
return NULL;
break;
default:
if (!isIdentifier1(*cp))
return cp;
if (space_seen)
return cp;
break;
}

Expand All @@ -509,7 +593,17 @@ static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
hereDocMarkerDelete (marker);

if (*cp != '\0' && cp != last_cp)
collectHereDocMarkers (mgr, cp);
return cp;
return NULL;
}

static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
const unsigned char *line)
{
const unsigned char *cp = line;
const unsigned char *last = cp;
while ((cp = collectHereDocMarker(mgr, cp)) != NULL)
Assert(last < cp);
}

static bool isInHereDoc (struct hereDocMarkerManager *mgr,
Expand Down Expand Up @@ -661,11 +755,9 @@ static void findPerlTags (void)
while (isspace (*cp))
cp++;

collectHereDocMarkers (&hdoc_mgr, cp);

if (strncmp((const char*) cp, "sub", (size_t) 3) == 0)
{
TRACE("this looks like a sub\n");
TRACE_PRINT("this looks like a sub");
cp += 3;
kind = KIND_PERL_SUBROUTINE;
spaceRequired = true;
Expand All @@ -690,7 +782,7 @@ static void findPerlTags (void)
}

vString *module = NULL;
while (isalnum(*cp) || *cp == ':' || *cp == '.') {
while (isalnum(*cp) || *cp == ':' || *cp == '.' || *cp == '_') {
if (!module)
module = vStringNew();
vStringPut(module, *cp);
Expand Down Expand Up @@ -750,7 +842,7 @@ static void findPerlTags (void)
while (isspace (*cp))
cp++;
vString *module = NULL;
while (isalnum(*cp) || *cp == ':' || *cp == '.') {
while (isalnum(*cp) || *cp == ':' || *cp == '.' || *cp == '_') {
if (!module)
module = vStringNew();
vStringPut(module, *cp);
Expand Down Expand Up @@ -811,14 +903,16 @@ static void findPerlTags (void)
if ((int) *p == ':' && (int) *(p + 1) != ':')
kind = KIND_PERL_LABEL;
}
if (kind != KIND_PERL_LABEL)
collectHereDocMarkers (&hdoc_mgr, cp);
}
if (kind != KIND_PERL_NONE)
{
TRACE("cp0: %s\n", (const char *) cp);
TRACE_PRINT("cp0: %s", (const char *) cp);
if (spaceRequired && *cp && !isspace (*cp))
continue;

TRACE("cp1: %s\n", (const char *) cp);
TRACE_PRINT("cp1: %s", (const char *) cp);
while (isspace (*cp))
cp++;

Expand Down Expand Up @@ -846,7 +940,7 @@ static void findPerlTags (void)
vStringCatS (name, "STDOUT");
}

TRACE("name: %s\n", vStringValue (name));
TRACE_PRINT("name: %s", vStringValue (name));

if (0 == vStringLength(name)) {
vStringClear(name);
Expand Down