Skip to content

Commit

Permalink
Merge pull request #3592 from masatake/perl--revise-heredoc
Browse files Browse the repository at this point in the history
Perl: skip string literals when collecting heredoc markers
  • Loading branch information
masatake committed Dec 27, 2022
2 parents 2fc641e + 6f5b873 commit 50e7a74
Show file tree
Hide file tree
Showing 7 changed files with 225 additions and 27 deletions.
2 changes: 2 additions & 0 deletions Units/parser-perl.r/no-heredoc.d/args.ctags
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--sort=no
--kinds-Perl=+{heredoc}
19 changes: 19 additions & 0 deletions Units/parser-perl.r/no-heredoc.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
f0tag input.pl /^sub f0tag() {}$/;" s
f1tag input.pl /^sub f1tag() {}$/;" s
f2tag input.pl /^sub f2tag() {}$/;" s
f3tag input.pl /^sub f3tag() {}$/;" s
hereodc0tag input.pl /^print 'cat <<<heredoct0notag' . <<hereodc0tag;$/;" h
f4tag input.pl /^sub f4tag() {}$/;" s
hereodc1tag input.pl /^print "cat <<<heredoct1notag" . <<hereodc1tag;$/;" h
f5tag input.pl /^sub f5tag() {}$/;" s
hereodc2tag input.pl /^print `cat <<<heredoct1notag` . <<hereodc2tag;$/;" h
f6tag input.pl /^sub f6tag() {}$/;" s
heredoc3tag input.pl /^print "abc" . <<heredoc3tag . 'efg' . << "heredoc4tag" . `ls` . '<<hereodc5notag';$/;" h
heredoc4tag input.pl /^print "abc" . <<heredoc3tag . 'efg' . << "heredoc4tag" . `ls` . '<<hereodc5notag';$/;" h
f7tag input.pl /^sub f7tag() {}$/;" s
f8tag input.pl /^sub f8tag() {}$/;" s
f9tag input.pl /^sub f9tag() {}$/;" s
five_sub input-0.pl /^sub five_sub() {$/;" s
five_mark0 input-0.pl /^print 3 + 2 . <<five_mark0;$/;" h
five_mark1 input-0.pl /^print 3 + 2 . <<~five_mark1;$/;" h
five_mark2 input-0.pl /^print 3 + 2 . << "five_mark2";$/;" h
31 changes: 31 additions & 0 deletions Units/parser-perl.r/no-heredoc.d/input-0.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
print 3 + 2 <<5;
5
;

sub five_sub() {
return 5
}
print 3 + 2 << five_sub;
five
;
print 3 + 2 . <<five_mark0;
a
five_mark0
print 3 + 2 . <<~five_mark1;
a
five_mark1
print 3 + 2 << ~five_sub;
five_sub;
print 3 + 2 . << "five_mark2";
ox
five_mark2
47 changes: 47 additions & 0 deletions Units/parser-perl.r/no-heredoc.d/input.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Derrived from #3588 submitted by @petdance

sub f0tag() {}

my $x = '<<NOT_A_HEREDOC0';

sub f1tag() {}

print "<<NOT_A_HEREDOC0\n";

sub f2tag() {}

print `cat <<<BASH_HERE_STRING`;

sub f3tag() {}

print 'cat <<<heredoct0notag' . <<hereodc0tag;
sub f0notag() {}
hereodc0tag

sub f4tag() {}

print "cat <<<heredoct1notag" . <<hereodc1tag;
sub f1notag() {}
hereodc1tag

sub f5tag() {}

print `cat <<<heredoct1notag` . <<hereodc2tag;
sub f2notag() {}
hereodc2tag

sub f6tag() {}

print "abc" . <<heredoc3tag . 'efg' . << "heredoc4tag" . `ls` . '<<hereodc5notag';
sub f3notag() {}
heredoc3tag
sub f4notag() {}
heredoc4tag
sub f7tag() {}

sub f8tag() {}

my $i = 1;
print "a" . 3 << $i;

sub f9tag() {}
2 changes: 2 additions & 0 deletions Units/parser-perl.r/perl-module.d/expected.tags
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ sort input.pl /^use sort qw(stable _quicksort _mergesort);$/;" M line:10 rol
integer input.pl /^no integer;$/;" M line:12 roles:unused extras:reference
strict input.pl /^no strict 'refs';$/;" M line:13 roles:unused extras:reference
warnings input.pl /^no warnings;$/;" M line:14 roles:unused extras:reference
5.006_001 input.pl /^use 5.006_001;$/;" M line:16 roles:used extras:reference
5.006_001 input.pl /^no 5.006_001;$/;" M line:17 roles:unused extras:reference
3 changes: 3 additions & 0 deletions Units/parser-perl.r/perl-module.d/input.pl
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,6 @@
no integer;
no strict 'refs';
no warnings;

use 5.006_001;
no 5.006_001;
148 changes: 121 additions & 27 deletions parsers/perl.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,10 @@
#include "routines.h"
#include "selectors.h"
#include "subparser.h"
#include "trace.h"
#include "vstring.h"
#include "xtag.h"

#define TRACE_PERL_C 0
#define TRACE if (TRACE_PERL_C) printf("perl.c:%d: ", __LINE__), printf

/*
* DATA DEFINITIONS
*/
Expand Down Expand Up @@ -443,41 +441,123 @@ static unsigned char *readHereDocMarker (unsigned char *line,
return cp;
}

static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
const unsigned char *line)
enum stringType {
STRING_TYPE_NONE = '\0',
STRING_TYPE_SINGLEQ = '\'',
STRING_TYPE_DOUBLEQ = '"',
STRING_TYPE_BACKQ = '`',
};


static const unsigned char *escapeFromString (const unsigned char *line,
const unsigned char *end,
enum stringType stype)
{
bool in_escape = false;
const unsigned char *cp = line;

switch (stype)
{
case STRING_TYPE_NONE:
return line;
case STRING_TYPE_SINGLEQ:
case STRING_TYPE_DOUBLEQ:
case STRING_TYPE_BACKQ:
while ((end && cp < end) || (end == NULL && *cp != '\0'))
{
if (in_escape)
{
cp++;
in_escape = false;
}
else if (*cp == '\\')
{
cp++;
in_escape = true;
}
else if (*cp == (unsigned char)stype)
{
cp++;
return cp;
}
else
cp++;
}
return NULL;
default:
AssertNotReached ();
return NULL;
}
}

static enum stringType isInString (const unsigned char *line,
const unsigned char *end)
{
const unsigned char *cp = line;
enum stringType t = STRING_TYPE_NONE;

while (cp && cp < end)
{
switch (*cp)
{
case '\'':
case '\"':
case '`':
t = *cp;
break;
default:
t = STRING_TYPE_NONE;
break;
}

cp++;
if (t != STRING_TYPE_NONE)
cp = escapeFromString (cp, end, t);
}

return (cp == NULL)? t: STRING_TYPE_NONE;
}


static const unsigned char *collectHereDocMarker (struct hereDocMarkerManager *mgr,
const unsigned char *line)
{
unsigned char *starter = (unsigned char*)strstr((char *)line, "<<");
unsigned char *cp = NULL;
bool indented = false;
unsigned char quote_char = 0;
bool space_seen = false;

if (starter == NULL)
return;
return NULL;

enum stringType stype;
if ((stype = isInString(line, starter)) != STRING_TYPE_NONE)
return escapeFromString (starter + 2, NULL, stype);

cp = starter + 2;
while (isspace (*cp))
{
/* To avoid confusing with a shift operator, we track
* spaces after the starter (<<). */
space_seen = true;
cp++;
}

if (*cp == '\0')
return;

/* Is shift operator? */
if (isdigit (*cp))
{
/* Scan the rest of the string. */
collectHereDocMarkers (mgr, ++cp);
return;
}
return NULL;

if (*cp == '~') {
if (space_seen)
return cp + 1;
indented = true;
cp++;
if (*cp == '\0')
return;
return NULL;
while (isspace (*cp))
cp++;
if (*cp == '\0')
return;
return NULL;
}

switch (*cp)
Expand All @@ -490,9 +570,13 @@ static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
case '\\':
cp++;
if (*cp == '\0')
return;
return NULL;
break;
default:
if (!isIdentifier1(*cp))
return cp;
if (space_seen)
return cp;
break;
}

Expand All @@ -509,7 +593,17 @@ static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
hereDocMarkerDelete (marker);

if (*cp != '\0' && cp != last_cp)
collectHereDocMarkers (mgr, cp);
return cp;
return NULL;
}

static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
const unsigned char *line)
{
const unsigned char *cp = line;
const unsigned char *last = cp;
while ((cp = collectHereDocMarker(mgr, cp)) != NULL)
Assert(last < cp);
}

static bool isInHereDoc (struct hereDocMarkerManager *mgr,
Expand Down Expand Up @@ -661,11 +755,9 @@ static void findPerlTags (void)
while (isspace (*cp))
cp++;

collectHereDocMarkers (&hdoc_mgr, cp);

if (strncmp((const char*) cp, "sub", (size_t) 3) == 0)
{
TRACE("this looks like a sub\n");
TRACE_PRINT("this looks like a sub");
cp += 3;
kind = KIND_PERL_SUBROUTINE;
spaceRequired = true;
Expand All @@ -690,7 +782,7 @@ static void findPerlTags (void)
}

vString *module = NULL;
while (isalnum(*cp) || *cp == ':' || *cp == '.') {
while (isalnum(*cp) || *cp == ':' || *cp == '.' || *cp == '_') {
if (!module)
module = vStringNew();
vStringPut(module, *cp);
Expand Down Expand Up @@ -750,7 +842,7 @@ static void findPerlTags (void)
while (isspace (*cp))
cp++;
vString *module = NULL;
while (isalnum(*cp) || *cp == ':' || *cp == '.') {
while (isalnum(*cp) || *cp == ':' || *cp == '.' || *cp == '_') {
if (!module)
module = vStringNew();
vStringPut(module, *cp);
Expand Down Expand Up @@ -811,14 +903,16 @@ static void findPerlTags (void)
if ((int) *p == ':' && (int) *(p + 1) != ':')
kind = KIND_PERL_LABEL;
}
if (kind != KIND_PERL_LABEL)
collectHereDocMarkers (&hdoc_mgr, cp);
}
if (kind != KIND_PERL_NONE)
{
TRACE("cp0: %s\n", (const char *) cp);
TRACE_PRINT("cp0: %s", (const char *) cp);
if (spaceRequired && *cp && !isspace (*cp))
continue;

TRACE("cp1: %s\n", (const char *) cp);
TRACE_PRINT("cp1: %s", (const char *) cp);
while (isspace (*cp))
cp++;

Expand Down Expand Up @@ -846,7 +940,7 @@ static void findPerlTags (void)
vStringCatS (name, "STDOUT");
}

TRACE("name: %s\n", vStringValue (name));
TRACE_PRINT("name: %s", vStringValue (name));

if (0 == vStringLength(name)) {
vStringClear(name);
Expand Down

0 comments on commit 50e7a74

Please sign in to comment.