Skip to content

Commit

Permalink
Extend GCproto debug with declared function name (declname)
Browse files Browse the repository at this point in the history
Lua functions can be declared in many different ways:

    function () end
    function foo () end
    function foo.bar () end
    function foo.bar:baz() end
    local function foo.bar:baz() end

This change captures the text between 'function' and '(' as the
"declared name" (declname) and records this in the GCproto object's
debug information. This makes it possible to recover the declared name
of a function, such as "foo.bar:baz", from its prototype.

The Lua approach is usually to resolve names at runtime using
introspection to see what name a function is bound to. However we need
a mechanism that works on the "cold" debug information in the
audit.log.

One consequence is that semantically equivalent function declarations
will have different levels of debug information in practice:

    fun = function () end    -- Has no declname
    function fun ()   end    -- Has declname "fun"

which might penalize certain legitimate programming styles. That is
lamentable. The situation is similar to the Scheme world:

    (define fun (lamdba ()))   ; Function object has no debug name
    (define (fun))             ; Function object has debug name "fun"
  • Loading branch information
lukego committed May 8, 2018
1 parent f58daf9 commit 8d9d959
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 17 deletions.
12 changes: 8 additions & 4 deletions src/lj_bcread.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ static uint32_t bcread_uleb128_33(LexState *ls)
static void bcread_dbg(LexState *ls, GCproto *pt, MSize sizedbg)
{
uint32_t *lineinfo = (uint32_t*)proto_lineinfo(pt);
bcread_block(ls, lineinfo, sizedbg);
bcread_block(ls, (void*)proto_declname(pt), sizedbg);
/* Swap lineinfo if the endianess differs. */
if (bcread_swap(ls)) {
int i;
Expand Down Expand Up @@ -302,7 +302,7 @@ GCproto *lj_bcread_proto(LexState *ls)
MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
MSize ofsk, ofsuv, ofsdbg;
MSize sizedbg = 0;
BCLine firstline = 0, numline = 0;
BCLine firstline = 0, numline = 0, ndeclname = 0;

/* Read prototype header. */
flags = bcread_byte(ls);
Expand All @@ -315,6 +315,7 @@ GCproto *lj_bcread_proto(LexState *ls)
if (!(bcread_flags(ls) & BCDUMP_F_STRIP)) {
sizedbg = bcread_uleb128(ls);
if (sizedbg) {
ndeclname = bcread_uleb128(ls);
firstline = bcread_uleb128(ls);
numline = bcread_uleb128(ls);
}
Expand Down Expand Up @@ -362,11 +363,14 @@ GCproto *lj_bcread_proto(LexState *ls)
pt->numline = numline;
if (sizedbg) {
MSize sizeli = (sizebc-1) * sizeof(BCLine);
setmref(pt->lineinfo, (char *)pt + ofsdbg);
setmref(pt->uvinfo, (char *)pt + ofsdbg + sizeli);
setmref(pt->declname, (char *)pt + ofsdbg);
setmref(pt->lineinfo, (char *)pt + ofsdbg + ndeclname);
setmref(pt->uvinfo, (char *)pt + ofsdbg + ndeclname + sizeli);
bcread_dbg(ls, pt, sizedbg);
setmref(pt->varinfo, bcread_varinfo(pt));
lua_assert(strlen(pt->declname)+1 == ndeclname);
} else {
setmref(pt->declname, NULL);
setmref(pt->lineinfo, NULL);
setmref(pt->uvinfo, NULL);
setmref(pt->varinfo, NULL);
Expand Down
6 changes: 4 additions & 2 deletions src/lj_bcwrite.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
{
MSize sizedbg = 0;
char *p;
const char *declname = pt->declname ? proto_declname(pt) : "";

/* Recursively write children of prototype. */
if ((pt->flags & PROTO_CHILD)) {
Expand Down Expand Up @@ -239,9 +240,10 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
p = lj_strfmt_wuleb128(p, pt->sizebc-1);
if (!ctx->strip) {
if (proto_lineinfo(pt))
sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
sizedbg = pt->sizept - (MSize)((char *)proto_declname(pt) - (char *)pt);
p = lj_strfmt_wuleb128(p, sizedbg);
if (sizedbg) {
p = lj_strfmt_wuleb128(p, strlen(declname)+1);
p = lj_strfmt_wuleb128(p, pt->firstline);
p = lj_strfmt_wuleb128(p, pt->numline);
}
Expand All @@ -259,7 +261,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
/* Write debug info, if not stripped. */
if (sizedbg) {
p = lj_buf_more(&ctx->sb, sizedbg);
p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
p = lj_buf_wmem(p, declname, sizedbg);
setsbufP(&ctx->sb, p);
}

Expand Down
20 changes: 19 additions & 1 deletion src/lj_lex.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ static LJ_NOINLINE LexChar lex_more(LexState *ls)
/* Get next character. */
static LJ_AINLINE LexChar lex_next(LexState *ls)
{
return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls));
LexChar c = (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls));
if (ls->log && ls->log < ls->logend) *ls->log++ = c;
return c;
}

/* Save character. */
Expand Down Expand Up @@ -399,6 +401,8 @@ int lj_lex_setup(lua_State *L, LexState *ls)
ls->lookahead = TK_eof; /* No look-ahead token. */
ls->linenumber = 1;
ls->lastline = 1;
ls->log = NULL;
ls->logend = NULL;
lex_next(ls); /* Read-ahead first char. */
if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
(uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
Expand Down Expand Up @@ -489,6 +493,20 @@ void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...)
va_end(argp);
}

/* Log the next input characters to a bounded buffer. */
void lj_lex_log(LexState *ls, char *log, int size)
{
ls->log = log;
ls->logend = log + size-1;
}

/* Stop logging input characters. */
void lj_lex_endlog(LexState *ls)
{
ls->log = NULL;
ls->logend = NULL;
}

/* Initialize strings for reserved words. */
void lj_lex_init(lua_State *L)
{
Expand Down
4 changes: 4 additions & 0 deletions src/lj_lex.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,17 @@ typedef struct LexState {
BCInsLine *bcstack; /* Stack for bytecode instructions/line numbers. */
MSize sizebcstack; /* Size of bytecode stack. */
uint32_t level; /* Syntactical nesting level. */
char *log; /* Current position where input should be logged. */
char *logend; /* Last position where input can be logged. */
} LexState;

LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls);
LJ_FUNC void lj_lex_next(LexState *ls);
LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok);
LJ_FUNC void lj_lex_log(LexState *ls, char *log, int size);
LJ_FUNC void lj_lex_endlog(LexState *ls);
LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...);
LJ_FUNC void lj_lex_init(lua_State *L);

Expand Down
2 changes: 2 additions & 0 deletions src/lj_obj.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ typedef struct GCproto {
GCRef chunkname; /* Name of the chunk this function was defined in. */
BCLine firstline; /* First line of the function definition. */
BCLine numline; /* Number of lines for the function definition. */
MRef declname; /* Declared name of function (null-terminated). */
MRef lineinfo; /* Map from bytecode ins. to source line. */
MRef uvinfo; /* Upvalue names. */
MRef varinfo; /* Names and compressed extents of local variables. */
Expand Down Expand Up @@ -344,6 +345,7 @@ typedef struct GCproto {

#define proto_chunkname(pt) (strref((pt)->chunkname))
#define proto_chunknamestr(pt) (strdata(proto_chunkname((pt))))
#define proto_declname(pt) (mref((pt)->declname, const char))
#define proto_lineinfo(pt) (mref((pt)->lineinfo, const uint32_t))
#define proto_uvinfo(pt) (mref((pt)->uvinfo, const uint8_t))
#define proto_varinfo(pt) (mref((pt)->varinfo, const uint8_t))
Expand Down
53 changes: 43 additions & 10 deletions src/lj_parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -1366,12 +1366,20 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt,
}

/* Prepare variable info for prototype. */
static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar)
static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar, const char *declname)
{
VarInfo *vs =ls->vstack, *ve;
MSize i, n;
BCPos lastpc;
lj_buf_reset(&ls->sb); /* Copy to temp. string buffer. */
/* Store function declaration name. */
{
char *p;
int len = strlen(declname) + 1;
p = lj_buf_more(&ls->sb, len);
p = lj_buf_wmem(p, declname, len);
setsbufP(&ls->sb, p);
}
/* Store upvalue names. */
for (i = 0, n = fs->nuv; i < n; i++) {
GCstr *s = strref(vs[fs->uvmap[i]].name);
Expand Down Expand Up @@ -1410,7 +1418,9 @@ static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar)
/* Fixup variable info for prototype. */
static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar)
{
setmref(pt->uvinfo, p);
int ndeclname = strlen((char*)p)+1;
setmref(pt->declname, p);
setmref(pt->uvinfo, p + ndeclname);
setmref(pt->varinfo, (char *)p + ofsvar);
memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb)); /* Copy from temp. buffer. */
}
Expand Down Expand Up @@ -1466,7 +1476,7 @@ static void fs_fixup_ret(FuncState *fs)
}

/* Finish a FuncState and return the new prototype. */
static GCproto *fs_finish(LexState *ls, BCLine line)
static GCproto *fs_finish(LexState *ls, BCLine line, char *declname)
{
lua_State *L = ls->L;
FuncState *fs = ls->fs;
Expand All @@ -1483,7 +1493,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
ofsk = sizept; sizept += fs->nkn*sizeof(TValue);
ofsuv = sizept; sizept += ((fs->nuv+1)&~1)*2;
ofsli = sizept; sizept += fs_prep_line(fs, numline);
ofsdbg = sizept; sizept += fs_prep_var(ls, fs, &ofsvar);
ofsdbg = sizept; sizept += fs_prep_var(ls, fs, &ofsvar, declname);

/* Allocate prototype and initialize its fields. */
pt = (GCproto *)lj_mem_newgco(L, (MSize)sizept);
Expand Down Expand Up @@ -1742,7 +1752,8 @@ static BCReg parse_params(LexState *ls, int needself)
static void parse_chunk(LexState *ls);

/* Parse body of a function. */
static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line)
static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line,
char *declname)
{
FuncState fs, *pfs = ls->fs;
FuncScope bl;
Expand All @@ -1757,7 +1768,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line)
bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */
parse_chunk(ls);
if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line);
pt = fs_finish(ls, (ls->lastline = ls->linenumber));
pt = fs_finish(ls, (ls->lastline = ls->linenumber), declname);
pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */
pfs->bclim = (BCPos)(ls->sizebcstack - oldbase);
/* Store new prototype in the constant array of the parent. */
Expand Down Expand Up @@ -1908,7 +1919,7 @@ static void expr_simple(LexState *ls, ExpDesc *v)
return;
case TK_function:
lj_lex_next(ls);
parse_body(ls, v, 0, ls->linenumber);
parse_body(ls, v, 0, ls->linenumber, "");
return;
default:
expr_primary(ls, v);
Expand Down Expand Up @@ -2137,9 +2148,24 @@ static void parse_call_assign(LexState *ls)
}
}

/* Convert the logged input into a canonical function declaration name. */
static char *log_declname(char *log)
{
char *end = log + strlen(log) - 1;
/* Strip off trailing chars e.g. change "myfunc (X" to "myfunc". */
while (end > log && *end != '(') end--; /* Rewind to '('. */
if (*end != '(') return log; else end--; /* Skip '(' if present. */
while (end > log && *end == ' ') end--; /* Rewind over spaces. */
*(end+1) = '\0'; /* Truncate. */
return log;
}

/* Parse 'local' statement. */
static void parse_local(LexState *ls)
{
char log[128];
memset(&log[0], 0, sizeof(log));
lj_lex_log(ls, &log[0], sizeof(log)-1);
if (lex_opt(ls, TK_function)) { /* Local function declaration. */
ExpDesc v, b;
FuncState *fs = ls->fs;
Expand All @@ -2148,7 +2174,8 @@ static void parse_local(LexState *ls)
v.u.s.aux = fs->varmap[fs->freereg];
bcreg_reserve(fs, 1);
var_add(ls, 1);
parse_body(ls, &b, 0, ls->linenumber);
lj_lex_endlog(ls); /* Captured declared function name. */
parse_body(ls, &b, 0, ls->linenumber, log_declname(log));
/* bcemit_store(fs, &v, &b) without setting VSTACK_VAR_RW. */
expr_free(fs, &b);
expr_toreg(fs, &b, v.u.s.info);
Expand All @@ -2157,6 +2184,7 @@ static void parse_local(LexState *ls)
} else { /* Local variable declaration. */
ExpDesc e;
BCReg nexps, nvars = 0;
lj_lex_endlog(ls); /* Not used for variables. */
do { /* Collect LHS. */
var_new(ls, nvars++, lex_str(ls));
} while (lex_opt(ls, ','));
Expand All @@ -2177,6 +2205,10 @@ static void parse_func(LexState *ls, BCLine line)
FuncState *fs;
ExpDesc v, b;
int needself = 0;
char log[128];
log[0] = 'c';
memset(log, 0, sizeof(log));
lj_lex_log(ls, log, sizeof(log)-1);
lj_lex_next(ls); /* Skip 'function'. */
/* Parse function name. */
var_lookup(ls, &v);
Expand All @@ -2186,7 +2218,8 @@ static void parse_func(LexState *ls, BCLine line)
needself = 1;
expr_field(ls, &v);
}
parse_body(ls, &b, needself, line);
lj_lex_endlog(ls);
parse_body(ls, &b, needself, line, log_declname(log));
fs = ls->fs;
bcemit_store(fs, &v, &b);
fs->bcbase[fs->pc - 1].line = line; /* Set line for the store. */
Expand Down Expand Up @@ -2616,7 +2649,7 @@ GCproto *lj_parse(LexState *ls)
parse_chunk(ls);
if (ls->tok != TK_eof)
err_token(ls, TK_eof);
pt = fs_finish(ls, ls->linenumber);
pt = fs_finish(ls, ls->linenumber, "");
L->top--; /* Drop chunkname. */
lua_assert(fs.prev == NULL);
lua_assert(ls->fs == NULL);
Expand Down

0 comments on commit 8d9d959

Please sign in to comment.