diff --git a/Makefile.am b/Makefile.am index 98a3adfd36..6b85f70251 100644 --- a/Makefile.am +++ b/Makefile.am @@ -17,6 +17,7 @@ bin_PROGRAMS = ctags if USE_READCMD bin_PROGRAMS+= readtags readtags_CPPFLAGS = -I. -I$(srcdir) -I$(srcdir)/main -I$(srcdir)/read +readtags_CFLAGS = $(COVERAGE_CFLAGS) dist_readtags_SOURCES = $(READTAGS_SRCS) $(READTAGS_HEADS) readtags_CPPFLAGS += -DQUALIFIER -I$(srcdir)/dsl dist_readtags_SOURCES += $(QUALIFIER_SRCS) $(QUALIFIER_HEADS) diff --git a/Units/roundtrip-escapes.d/expected.tags b/Units/roundtrip-escapes.d/expected.tags new file mode 100644 index 0000000000..927ba14c21 --- /dev/null +++ b/Units/roundtrip-escapes.d/expected.tags @@ -0,0 +1,43 @@ +_\\_ input // +_\a_ input // +_\b_ input // +_\f_ input // +_\n_ input // +\n input // +_\r_ input // +\r input // +_\t_ input // +_\v_ input // +_\x01_ input // +_\x02_ input // +_\x03_ input // +_\x04_ input // +_\x05_ input // +_\x06_ input // +_\x07_ input // +_\x08_ input // +_\x09_ input // +_\x0A_ input // +_\x0b_ input // +_\x0c_ input // +_\x0d_ input // +_\x0e_ input // +_\x0f_ input // +_\x10_ input // +_\x11_ input // +_\x12_ input // +_\x13_ input // +_\x14_ input // +_\x15_ input // +_\x16_ input // +_\x17_ input // +_\x18_ input // +_\x19_ input // +_\x1a_ input // +_\x1b_ input // +_\x1c_ input // +_\x1d_ input // +_\x1e_ input // +_\x1F_ input // +_\x20_ input // +_\x21_ input // diff --git a/circle.yml b/circle.yml index 45ce833134..ef2771a9f3 100644 --- a/circle.yml +++ b/circle.yml @@ -13,7 +13,7 @@ jobs: - run: name: Install build tools command: | - dnf -y install gcc automake autoconf pkgconfig bmake aspell-devel aspell-en libxml2-devel jansson-devel libyaml-devel || : + dnf -y install gcc automake autoconf pkgconfig bmake aspell-devel aspell-en libxml2-devel jansson-devel libyaml-devel findutils || : - run: name: Build command: | diff --git a/makefiles/testing.mak b/makefiles/testing.mak index f99e185076..5ceae60b89 100644 --- a/makefiles/testing.mak +++ b/makefiles/testing.mak @@ -1,7 +1,7 @@ # -*- makefile -*- .PHONY: check units fuzz noise tmain tinst clean-units clean-tmain clean-gcov run-gcov codecheck cppcheck dicts cspell -check: tmain units +check: tmain units roundtrip clean-local: clean-units clean-tmain @@ -18,13 +18,21 @@ LANGUAGES= CATEGORIES= UNITS= +SILENT = $(SILENT_@AM_V@) +SILENT_ = $(SILENT_@AM_DEFAULT_V@) +SILENT_0 = @ + +V_RUN = $(V_RUN_@AM_V@) +V_RUN_ = $(V_RUN_@AM_DEFAULT_V@) +V_RUN_0 = @echo " RUN $@"; + # # FUZZ Target # # SHELL must be dash or bash. # fuzz: $(CTAGS_TEST) - @ \ + $(V_RUN) \ if test -n "$${ZSH_VERSION+set}"; then set -o SH_WORD_SPLIT; fi; \ if test x$(VG) = x1; then \ VALGRIND=--with-valgrind; \ @@ -40,7 +48,7 @@ fuzz: $(CTAGS_TEST) # NOISE Target # noise: $(CTAGS_TEST) - @ \ + $(V_RUN) \ if test -n "$${ZSH_VERSION+set}"; then set -o SH_WORD_SPLIT; fi; \ if test x$(VG) = x1; then \ VALGRIND=--with-valgrind; \ @@ -56,7 +64,7 @@ noise: $(CTAGS_TEST) # CHOP Target # chop: $(CTAGS_TEST) - @ \ + $(V_RUN) \ if test -n "$${ZSH_VERSION+set}"; then set -o SH_WORD_SPLIT; fi; \ if test x$(VG) = x1; then \ VALGRIND=--with-valgrind; \ @@ -68,7 +76,7 @@ chop: $(CTAGS_TEST) --with-timeout=$(TIMEOUT)"; \ $(SHELL) $${c} $(srcdir)/Units slap: $(CTAGS_TEST) - @ \ + $(V_RUN) \ if test -n "$${ZSH_VERSION+set}"; then set -o SH_WORD_SPLIT; fi; \ if test x$(VG) = x1; then \ VALGRIND=--with-valgrind; \ @@ -84,7 +92,7 @@ slap: $(CTAGS_TEST) # UNITS Target # units: $(CTAGS_TEST) - @ \ + $(V_RUN) \ if test -n "$${ZSH_VERSION+set}"; then set -o SH_WORD_SPLIT; fi; \ if test x$(VG) = x1; then \ VALGRIND=--with-valgrind; \ @@ -115,8 +123,7 @@ clean-units: # Test main part, not parsers # tmain: $(CTAGS_TEST) - @ \ - \ + $(V_RUN) \ if test -n "$${ZSH_VERSION+set}"; then set -o SH_WORD_SPLIT; fi; \ if test x$(VG) = x1; then \ VALGRIND=--with-valgrind; \ @@ -144,22 +151,25 @@ clean-tmain: # Test installation # tinst: - @ \ - \ + $(V_RUN) \ builddir=$$(pwd); \ rm -rf $$builddir/$(TINST_ROOT); \ - \ $(SHELL) $(srcdir)/misc/tinst $(srcdir) $$builddir/$(TINST_ROOT) # # Test readtags # +if USE_READCMD roundtrip: $(READ_TEST) - @ \ - \ + $(V_RUN) \ builddir=$$(pwd); \ - \ - $(SHELL) $(srcdir)/misc/roundtrip $(READ_TEST) $${builddir}/Units + APPVEYOR=$(APPVEYOR) \ + $(SHELL) $(srcdir)/misc/roundtrip $(READ_TEST) $${builddir}/Units +else +# apparently Automake doesn't like adding an extra dependency in a conditional, +# so add it unconditionally and simply have it not do anything if it's disabled. +roundtrip: +endif # # Checking code in ctags own rules diff --git a/misc/roundtrip b/misc/roundtrip index ae356aeda3..8ce5685caf 100644 --- a/misc/roundtrip +++ b/misc/roundtrip @@ -15,15 +15,15 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +set -e + +# Avoid trouble with weird bytes on non-C locales +export LC_ALL=C + READTAGS=${1:-./readtags} UNITS=${2:-./Units} s=0 -sink() -{ - cat > /dev/null -} - if ! [ -f "${READTAGS}" ]; then echo "No such file: ${READTAGS}" 1>&2 exit 1 @@ -40,16 +40,116 @@ if ! [ -x "${UNITS}" ]; then fi -for tags in $(find "$UNITS" -name expected.tags); do - for t in $(sed -e 's/^\([^\t]*\)\t.*/\1/' "$tags"); do - if r=$( "${READTAGS}" -t "$tags" - "$t" 2>/dev/null ); then - : - else - printf "FAILED[%3d]: " $? - echo "${READTAGS}" -t "$tags" - "$t" +# Expand CTags escape sequences (and keep the original, too) +# See docs/format.rst +expandEscapeSequences() +{ + sed -e ' +# early out if there is no escape sequence (common case) +/\\/!b + +# loop until we handled all occurrences of \. +# as it is not possible to do all replacements at once and we need not to +# ever use the result of an replacement to perform another one, use two +# reserved placeholders. +:again + s/\\/__BACKSLASH__/ + s/__BACKSLASH__\\/__LITBACKSLASH__/; t again + s/__BACKSLASH__t/ /; t again + s/__BACKSLASH__r/'"$(printf '\r')"'/; t again + s/__BACKSLASH__n/\ +/; t again + s/__BACKSLASH__a//; t again + s/__BACKSLASH__b/'"$(printf '\b')"'/; t again + s/__BACKSLASH__v/ /; t again + s/__BACKSLASH__f/ /; t again + s/__BACKSLASH__x01//; t again + s/__BACKSLASH__x02//; t again + s/__BACKSLASH__x03//; t again + s/__BACKSLASH__x04//; t again + s/__BACKSLASH__x05//; t again + s/__BACKSLASH__x06//; t again + s/__BACKSLASH__x07//; t again + s/__BACKSLASH__x08//; t again + s/__BACKSLASH__x09/ /; t again + s/__BACKSLASH__x0[aA]/\ +/; t again + s/__BACKSLASH__x0[bB]/ /; t again + s/__BACKSLASH__x0[cC]/ /; t again + s/__BACKSLASH__x0[dD]/'"$(printf '\r')"'/; t again + s/__BACKSLASH__x0[eE]//; t again + s/__BACKSLASH__x0[fF]//; t again + s/__BACKSLASH__x10//; t again + s/__BACKSLASH__x11//; t again + s/__BACKSLASH__x12//; t again + s/__BACKSLASH__x13//; t again + s/__BACKSLASH__x14//; t again + s/__BACKSLASH__x15//; t again + s/__BACKSLASH__x16//; t again + s/__BACKSLASH__x17//; t again + s/__BACKSLASH__x18//; t again + s/__BACKSLASH__x19//; t again + s/__BACKSLASH__x1[aA]//; t again + s/__BACKSLASH__x1[bB]//; t again + s/__BACKSLASH__x1[cC]//; t again + s/__BACKSLASH__x1[dD]//; t again + s/__BACKSLASH__x1[eE]//; t again + s/__BACKSLASH__x1[fF]//; t again + s/__BACKSLASH__x21/!/; t again + s/__BACKSLASH__x20/ /; t again + s/__BACKSLASH__x7[fF]//; t again + /\\/b again + +:out + # replace lonely "\"es + s/__BACKSLASH__/\\/g + # replace literal "\"es ("\\" from the input) + s/__LITBACKSLASH__/\\/g +' +} + +# Escape characters MSYS2 would convert when calling readtags +if [ -n "$APPVEYOR" ]; then + MSYS2_READTAGS="${READTAGS}" + msys2_readtags() + { + MSYS2_ARG_CONV_EXCL="$4" \ + "${MSYS2_READTAGS}" "$@" + } + READTAGS=msys2_readtags +fi + +# disable path expansion, because we don't need it and it's applied on the +# result of the commands used to loop on, and we don't want that. +set -f +# remove space from IFS as it's valid in tag names +OLD_IFS="$IFS" +IFS=' +' + +tagfiles="$(find "$UNITS" -name expected.tags)" +for tags in $tagfiles; do + tagnames="$(sed -e 's/^\([^ ]*\) .*/\1/' "$tags")" + for name in $tagnames; do + # Yes, there is a reason for this craziness. We need to properly + # handle embedded newlines (expanded from "\n"), including trailing + # ones the shell would strip automatically. To work around this, we + # add a dummy character at the end to inhibit stripping, and then + # remove it, plus the extra newline, using variable substitutions. + # Note: we use "printf '%s\n'" instead of "echo" because Dash's "echo" + # unconditionally expands some sequences, like "\t" and alike. + t="$(printf '%s\n' "$name" | expandEscapeSequences; printf _)" + t="${t% +_}" + + if [ 1 -gt $("${READTAGS}" -t "$tags" - "$t" | wc -l) ]; then + printf 'FAILED: "%s" -t "%s" - "%s"\n' "${READTAGS}" "$tags" "$t" + printf ' The raw tag name was "%s"\n' "$name" s=1 fi - done + done done +IFS="$OLD_IFS" + exit $s diff --git a/read/readtags-cmd.c b/read/readtags-cmd.c index 4722995557..873237df56 100644 --- a/read/readtags-cmd.c +++ b/read/readtags-cmd.c @@ -95,6 +95,11 @@ static void findTag (const char *const name, const int options) printTag (&entry); } while (tagsFindNext (file, &entry) == TagSuccess); } + else + { + fprintf (stderr, "%s: no match found for \"%s\" in \"%s\"\n", + ProgramName, name, TagFileName); + } tagsClose (file); } } diff --git a/read/readtags.c b/read/readtags.c index 7031087ecc..5ec0483a10 100644 --- a/read/readtags.c +++ b/read/readtags.c @@ -93,6 +93,59 @@ static const char *const PseudoTagPrefix = "!_"; * FUNCTION DEFINITIONS */ +/* Converts a hexadecimal digit to its value */ +static int xdigitValue (char digit) +{ + if (digit >= '0' && digit <= '9') + return digit - '0'; + else if (digit >= 'a' && digit <= 'f') + return 10 + digit - 'a'; + else if (digit >= 'A' && digit <= 'F') + return 10 + digit - 'A'; + else + return 0; +} + +/* + * Reads the first character from the string, possibly un-escaping it, and + * advances *s to the start of the next character. + */ +static int readTagCharacter (const char **s) +{ + int c = **s; + + (*s)++; + + if (c == '\\') + { + switch (**s) + { + case 't': c = '\t'; (*s)++; break; + case 'r': c = '\r'; (*s)++; break; + case 'n': c = '\n'; (*s)++; break; + case '\\': c = '\\'; (*s)++; break; + /* Universal-CTags extensions */ + case 'a': c = '\a'; (*s)++; break; + case 'b': c = '\b'; (*s)++; break; + case 'v': c = '\v'; (*s)++; break; + case 'f': c = '\f'; (*s)++; break; + case 'x': + if (isxdigit ((*s)[1]) && isxdigit ((*s)[2])) + { + int val = (xdigitValue ((*s)[1]) << 4) | xdigitValue ((*s)[2]); + if (val < 0x80) + { + (*s) += 3; + c = val; + } + } + break; + } + } + + return c; +} + /* * Compare two strings, ignoring case. * Return 0 for match, < 0 for smaller, > 0 for bigger @@ -100,23 +153,59 @@ static const char *const PseudoTagPrefix = "!_"; * This makes a difference when one of the chars lies between upper and lower * ie. one of the chars [ \ ] ^ _ ` for ascii. (The '_' in particular !) */ -static int struppercmp (const char *s1, const char *s2) +static int taguppercmp (const char *s1, const char *s2) +{ + int result; + int c1, c2; + do + { + c1 = *s1++; + c2 = readTagCharacter (&s2); + + result = toupper (c1) - toupper (c2); + } while (result == 0 && c1 != '\0' && c2 != '\0'); + return result; +} + +static int tagnuppercmp (const char *s1, const char *s2, size_t n) +{ + int result; + int c1, c2; + do + { + c1 = *s1++; + c2 = readTagCharacter (&s2); + + result = toupper (c1) - toupper (c2); + } while (result == 0 && --n > 0 && c1 != '\0' && c2 != '\0'); + return result; +} + +static int tagcmp (const char *s1, const char *s2) { int result; + int c1, c2; do { - result = toupper ((int) *s1) - toupper ((int) *s2); - } while (result == 0 && *s1++ != '\0' && *s2++ != '\0'); + c1 = *s1++; + c2 = readTagCharacter (&s2); + + result = c1 - c2; + } while (result == 0 && c1 != '\0' && c2 != '\0'); return result; } -static int strnuppercmp (const char *s1, const char *s2, size_t n) +static int tagncmp (const char *s1, const char *s2, size_t n) { int result; + int c1, c2; do { - result = toupper ((int) *s1) - toupper ((int) *s2); - } while (result == 0 && --n > 0 && *s1++ != '\0' && *s2++ != '\0'); + c1 = *s1++; + c2 = readTagCharacter (&s2); + + result = c1 - c2; + } while (result == 0 && --n > 0 && c1 != '\0' && c2 != '\0'); return result; } @@ -311,6 +400,7 @@ static void parseTagLine (tagFile *file, tagEntry *const entry) { int i; char *p = file->line.buffer; + size_t p_len = strlen (p); char *tab = strchr (p, TAB); entry->fields.list = NULL; @@ -320,8 +410,25 @@ static void parseTagLine (tagFile *file, tagEntry *const entry) entry->name = p; if (tab != NULL) - { *tab = '\0'; + while (*p != '\0') + { + const char *next = p; + int ch = readTagCharacter (&next); + size_t skip = next - p; + + *p = (char) ch; + p++; + p_len -= skip; + if (skip > 1) + { + memmove (p, next, p_len); + tab -= skip - 1; + } + } + + if (tab != NULL) + { p = tab + 1; entry->file = p; tab = strchr (p, TAB); @@ -561,18 +668,18 @@ static int nameComparison (tagFile *const file) if (file->search.ignorecase) { if (file->search.partial) - result = strnuppercmp (file->search.name, file->name.buffer, + result = tagnuppercmp (file->search.name, file->name.buffer, file->search.nameLength); else - result = struppercmp (file->search.name, file->name.buffer); + result = taguppercmp (file->search.name, file->name.buffer); } else { if (file->search.partial) - result = strncmp (file->search.name, file->name.buffer, + result = tagncmp (file->search.name, file->name.buffer, file->search.nameLength); else - result = strcmp (file->search.name, file->name.buffer); + result = tagcmp (file->search.name, file->name.buffer); } return result; }