Skip to content

Commit

Permalink
Merge pull request #1848 from b4n/parser/tex
Browse files Browse the repository at this point in the history
tex: Fix reporting non-alphanumeric tag contents plus a few cleanups.
  • Loading branch information
b4n committed Sep 4, 2018
2 parents 0fdbfdc + 5d5afe6 commit 0d87063
Show file tree
Hide file tree
Showing 6 changed files with 192 additions and 171 deletions.
3 changes: 3 additions & 0 deletions Units/parser-tex.r/unicode-sections.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ABC input.tex /^\\section{ABC}$/;" s
DEF input.tex /^\\section{DEF}$/;" s
ZÖZÜZÄZßZ input.tex /^\\section{ZÖZÜZÄZßZ}$/;" s
7 changes: 7 additions & 0 deletions Units/parser-tex.r/unicode-sections.d/input.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
% https://github.com/universal-ctags/ctags/issues/1846

\begin{document}
\section{ABC}
\section{ZÖZÜZÄZßZ}
\section{DEF}
\end{document}
246 changes: 123 additions & 123 deletions Units/tex-review-needed.r/3526726.tex.t/expected.tags

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions Units/tex-review-needed.r/bug2886870.tex.t/expected.tags
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ Special Symbols input.tex /^\\section{Special Symbols}$/;" s
Special symbols input.tex /^\\subsection{Special symbols}$/;" u section:Special Symbols
Tables input.tex /^\\section{Tables}$/;" s
Test for ctags input.tex /^\\subsubsection{Test for ctags}$/;" b subsection:Special Symbols""Common Greek letters
color red input.tex /^\\section{\\color{red}Use of Color}$/;" s
\\color{red}Use of Color input.tex /^\\section{\\color{red}Use of Color}$/;" s
\\label{morefig}Subfigures input.tex /^\\section{\\label{morefig}Subfigures}$/;" s
eq:fine input.tex /^I = \\! \\int_{-\\infty}^\\infty f(x)\\,dx \\label{eq:fine}.$/;" l
eq:ising input.tex /^\\label{eq:ising}$/;" l
eq:mdiv input.tex /^\\label{eq:mdiv}$/;" l
fig:lj input.tex /^\\caption{\\label{fig:lj}Plot of the$/;" l
fig:qm complexfunctions input.tex /^\\caption{\\label{fig:qm\/complexfunctions} Two representations of complex$/;" l
fig:qm/complexfunctions input.tex /^\\caption{\\label{fig:qm\/complexfunctions} Two representations of complex$/;" l
fig:typical input.tex /^\\caption{\\label{fig:typical}Show me a sine.}$/;" l
label morefig input.tex /^\\section{\\label{morefig}Subfigures}$/;" s
tab:5 tc input.tex /^\\caption{\\label{tab:5\/tc}Comparison of the mean-field predictions$/;" l
tab:5/tc input.tex /^\\caption{\\label{tab:5\/tc}Comparison of the mean-field predictions$/;" l
8 changes: 4 additions & 4 deletions Units/tex-review-needed.r/intro_orig.tex.t/expected.tags
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ Literal text input.tex /^\\section{Literal text}$/;" s
Special Symbols input.tex /^\\section{Special Symbols}$/;" s
Special symbols input.tex /^\\subsection{Special symbols}$/;" u section:Special Symbols
Tables input.tex /^\\section{Tables}$/;" s
color red input.tex /^\\section{\\color{red}Use of Color}$/;" s
\\color{red}Use of Color input.tex /^\\section{\\color{red}Use of Color}$/;" s
\\label{morefig}Subfigures input.tex /^\\section{\\label{morefig}Subfigures}$/;" s
eq:fine input.tex /^I = \\! \\int_{-\\infty}^\\infty f(x)\\,dx \\label{eq:fine}.$/;" l
eq:ising input.tex /^\\label{eq:ising}$/;" l
eq:mdiv input.tex /^\\label{eq:mdiv}$/;" l
fig:lj input.tex /^\\caption{\\label{fig:lj}Plot of the$/;" l
fig:qm complexfunctions input.tex /^\\caption{\\label{fig:qm\/complexfunctions} Two representations of complex$/;" l
fig:qm/complexfunctions input.tex /^\\caption{\\label{fig:qm\/complexfunctions} Two representations of complex$/;" l
fig:typical input.tex /^\\caption{\\label{fig:typical}Show me a sine.}$/;" l
label morefig input.tex /^\\section{\\label{morefig}Subfigures}$/;" s
tab:5 tc input.tex /^\\caption{\\label{tab:5\/tc}Comparison of the mean-field predictions$/;" l
tab:5/tc input.tex /^\\caption{\\label{tab:5\/tc}Comparison of the mean-field predictions$/;" l
91 changes: 51 additions & 40 deletions parsers/tex.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#define isType(token,t) (bool) ((token)->type == (t))
#define isKeyword(token,k) (bool) ((token)->keyword == (k))
#define isIdentChar(c) \
(isalpha (c) || isdigit (c) || (c) == '$' || \
(isalpha (c) || isdigit (c) || (((unsigned char) c) >= 0x80) || (c) == '$' || \
(c) == '_' || (c) == '#' || (c) == '-' || (c) == '.' || (c) == ':')

/*
Expand All @@ -58,22 +58,22 @@ enum eKeywordId {
};
typedef int keywordId; /* to allow KEYWORD_NONE */

typedef enum eTokenType {
TOKEN_UNDEFINED,
TOKEN_CHARACTER,
TOKEN_CLOSE_PAREN,
TOKEN_COMMA,
enum eTokenType {
/* 0..255 are the byte's value. Some are named for convenience */
TOKEN_OPEN_PAREN = '(',
TOKEN_CLOSE_PAREN = ')',
TOKEN_OPEN_CURLY = '{',
TOKEN_CLOSE_CURLY = '}',
TOKEN_OPEN_SQUARE = '[',
TOKEN_CLOSE_SQUARE = ']',
TOKEN_STAR = '*',
/* above is special types */
TOKEN_UNDEFINED = 256,
TOKEN_KEYWORD,
TOKEN_OPEN_PAREN,
TOKEN_IDENTIFIER,
TOKEN_STRING,
TOKEN_OPEN_CURLY,
TOKEN_CLOSE_CURLY,
TOKEN_OPEN_SQUARE,
TOKEN_CLOSE_SQUARE,
TOKEN_QUESTION_MARK,
TOKEN_STAR
} tokenType;
};
typedef int tokenType;

typedef struct sTokenInfo {
tokenType type;
Expand Down Expand Up @@ -266,38 +266,41 @@ static void parseIdentifier (vString *const string, const int firstChar)
c = getcFromInputFile ();
} while (isIdentChar (c));

if (!isspace (c))
if (c != EOF)
ungetcToInputFile (c); /* unget non-identifier character */
}

static bool readToken (tokenInfo *const token)
static bool readTokenFull (tokenInfo *const token, const bool includeWhitespaces)
{
int c;
int whitespaces = -1;

token->type = TOKEN_UNDEFINED;
token->keyword = KEYWORD_NONE;
vStringClear (token->string);

getNextChar:

do
{
c = getcFromInputFile ();
token->lineNumber = getInputLineNumber ();
token->filePosition = getInputFilePosition ();
whitespaces++;
}
while (c == '\t' || c == ' ' || c == '\n');

token->lineNumber = getInputLineNumber ();
token->filePosition = getInputFilePosition ();

if (includeWhitespaces && whitespaces > 0 && c != '%' && c != EOF)
{
ungetcToInputFile (c);
c = ' ';
}

token->type = (unsigned char) c;
switch (c)
{
case EOF: return false;
case '(': token->type = TOKEN_OPEN_PAREN; break;
case ')': token->type = TOKEN_CLOSE_PAREN; break;
case ',': token->type = TOKEN_COMMA; break;
case '{': token->type = TOKEN_OPEN_CURLY; break;
case '}': token->type = TOKEN_CLOSE_CURLY; break;
case '[': token->type = TOKEN_OPEN_SQUARE; break;
case ']': token->type = TOKEN_CLOSE_SQUARE; break;
case '*': token->type = TOKEN_STAR; break;

case '\\':
/*
Expand All @@ -310,10 +313,9 @@ static bool readToken (tokenInfo *const token)
ungetcToInputFile (c);
else
{
vStringPut (token->string, '\\');
parseIdentifier (token->string, c);
token->lineNumber = getInputLineNumber ();
token->filePosition = getInputFilePosition ();
token->keyword = lookupKeyword (vStringValue (token->string), Lang_tex);
token->keyword = lookupKeyword (vStringValue (token->string) + 1, Lang_tex);
if (isKeyword (token, KEYWORD_NONE))
token->type = TOKEN_IDENTIFIER;
else
Expand All @@ -327,20 +329,21 @@ static bool readToken (tokenInfo *const token)
break;

default:
if (! isIdentChar (c))
token->type = TOKEN_UNDEFINED;
else
if (isIdentChar (c))
{
parseIdentifier (token->string, c);
token->lineNumber = getInputLineNumber ();
token->filePosition = getInputFilePosition ();
token->type = TOKEN_IDENTIFIER;
}
break;
}
return true;
}

static bool readToken (tokenInfo *const token)
{
return readTokenFull (token, false);
}

static void copyToken (tokenInfo *const dest, tokenInfo *const src)
{
dest->lineNumber = src->lineNumber;
Expand Down Expand Up @@ -405,7 +408,7 @@ static bool parseTag (tokenInfo *const token, texKind kind)
{
if (vStringLength (fullname) > 0)
vStringPut (fullname, ' ');
vStringCatS (fullname, vStringValue (token->string));
vStringCat (fullname, token->string);
}
if (!readToken (token))
{
Expand Down Expand Up @@ -436,28 +439,36 @@ static bool parseTag (tokenInfo *const token, texKind kind)

if (isType (token, TOKEN_OPEN_CURLY))
{
int depth = 1;

if (!readToken (token))
{
eof = true;
goto out;
}
while (! isType (token, TOKEN_CLOSE_CURLY) )
while (depth > 0)
{
/* if (isType (token, TOKEN_IDENTIFIER) && useLongName) */
if (useLongName)
{
if (vStringLength (fullname) > 0)
vStringPut (fullname, ' ');
vStringCatS (fullname, vStringValue (token->string));
if (isType (token, TOKEN_IDENTIFIER) || isType (token, TOKEN_KEYWORD))
vStringCat (fullname, token->string);
else
vStringPut (fullname, token->type);
}
if (!readToken (token))
if (!readTokenFull (token, useLongName))
{
eof = true;
goto out;
}
else if (isType (token, TOKEN_OPEN_CURLY))
depth++;
else if (isType (token, TOKEN_CLOSE_CURLY))
depth--;
}
if (useLongName)
{
vStringStripTrailing (fullname);
if (vStringLength (fullname) > 0)
{
vStringCopy (name->string, fullname);
Expand Down

0 comments on commit 0d87063

Please sign in to comment.