Skip to content

Commit

Permalink
awk: split record into runes for empty FS (#292)
Browse files Browse the repository at this point in the history
awk was splitting records into bytes instead of runes for empty FS.
For example, this was printing only the first byte of the utf-8 encoding
of é:

	echo é | awk 'BEGIN{FS=""}{print $1}'

The change just copies how the `split` function handles runes.

Originally reported by kris on twitter:
https://twitter.com/p9luv/status/1180436083433201665
  • Loading branch information
fhs authored and dancrossnyc committed Oct 29, 2019
1 parent 715807d commit 1309450
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions src/cmd/awk/lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ THIS SOFTWARE.
#include <errno.h>
#include <stdlib.h>
#include <stdarg.h>
#include <utf.h>
#include "awk.h"
#include "y.tab.h"

Expand Down Expand Up @@ -293,15 +294,19 @@ void fldbld(void) /* create fields from current record */
}
*fr = 0;
} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
for (i = 0; *r != 0; r++) {
char buf[2];
int nb;
for (i = 0; *r != 0; r += nb) {
Rune rr;
char buf[UTFmax+1];

i++;
if (i > nfields)
growfldtab(i);
if (freeable(fldtab[i]))
xfree(fldtab[i]->sval);
buf[0] = *r;
buf[1] = 0;
nb = chartorune(&rr, r);
memmove(buf, r, nb);
buf[nb] = '\0';
fldtab[i]->sval = tostring(buf);
fldtab[i]->tval = FLD | STR;
}
Expand Down

0 comments on commit 1309450

Please sign in to comment.