-
-
Notifications
You must be signed in to change notification settings - Fork 30.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gh-81283: compiler: remove indent from docstring #106411
Changes from all commits
469b3f7
6fc4456
78ef10c
34d0cbd
f61fe41
a16e5d8
64d0d2f
976a013
028b5df
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -881,29 +881,28 @@ def cleandoc(doc): | |
|
||
Any whitespace that can be uniformly removed from the second line | ||
onwards is removed.""" | ||
try: | ||
lines = doc.expandtabs().split('\n') | ||
except UnicodeError: | ||
return None | ||
else: | ||
# Find minimum indentation of any non-blank lines after first line. | ||
margin = sys.maxsize | ||
for line in lines[1:]: | ||
content = len(line.lstrip()) | ||
if content: | ||
indent = len(line) - content | ||
margin = min(margin, indent) | ||
# Remove indentation. | ||
if lines: | ||
lines[0] = lines[0].lstrip() | ||
if margin < sys.maxsize: | ||
for i in range(1, len(lines)): lines[i] = lines[i][margin:] | ||
# Remove any trailing or leading blank lines. | ||
while lines and not lines[-1]: | ||
lines.pop() | ||
while lines and not lines[0]: | ||
lines.pop(0) | ||
return '\n'.join(lines) | ||
lines = doc.expandtabs().split('\n') | ||
|
||
# Find minimum indentation of any non-blank lines after first line. | ||
margin = sys.maxsize | ||
for line in lines[1:]: | ||
content = len(line.lstrip(' ')) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed from |
||
if content: | ||
indent = len(line) - content | ||
margin = min(margin, indent) | ||
# Remove indentation. | ||
if lines: | ||
lines[0] = lines[0].lstrip(' ') | ||
if margin < sys.maxsize: | ||
for i in range(1, len(lines)): | ||
lines[i] = lines[i][margin:] | ||
# Remove any trailing or leading blank lines. | ||
while lines and not lines[-1]: | ||
lines.pop() | ||
while lines and not lines[0]: | ||
lines.pop(0) | ||
return '\n'.join(lines) | ||
|
||
|
||
def getfile(object): | ||
"""Work out which source or compiled file an object was defined in.""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1287,14 +1287,14 @@ def optionflags(): r""" | |
treated as equal: | ||
|
||
>>> def f(x): | ||
... '>>> print(1, 2, 3)\n 1 2\n 3' | ||
... '\n>>> print(1, 2, 3)\n 1 2\n 3' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change is needed to avoid dedenting output examples. |
||
|
||
>>> # Without the flag: | ||
>>> test = doctest.DocTestFinder().find(f)[0] | ||
>>> doctest.DocTestRunner(verbose=False).run(test) | ||
... # doctest: +ELLIPSIS | ||
********************************************************************** | ||
File ..., line 2, in f | ||
File ..., line 3, in f | ||
Failed example: | ||
print(1, 2, 3) | ||
Expected: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Compiler now strips indents from docstrings. It reduces ``pyc`` file size 5% | ||
when the module is heavily documented. This change affects to ``__doc__`` so | ||
tools like doctest will be affected. |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1704,10 +1704,16 @@ compiler_body(struct compiler *c, location loc, asdl_stmt_seq *stmts) | |
if (c->c_optimize < 2) { | ||
docstring = _PyAST_GetDocString(stmts); | ||
if (docstring) { | ||
PyObject *cleandoc = _PyCompile_CleanDoc(docstring); | ||
if (cleandoc == NULL) { | ||
return ERROR; | ||
} | ||
i = 1; | ||
st = (stmt_ty)asdl_seq_GET(stmts, 0); | ||
assert(st->kind == Expr_kind); | ||
VISIT(c, expr, st->v.Expr.value); | ||
location loc = LOC(st->v.Expr.value); | ||
ADDOP_LOAD_CONST(c, loc, cleandoc); | ||
Py_DECREF(cleandoc); | ||
RETURN_IF_ERROR(compiler_nameop(c, NO_LOCATION, &_Py_ID(__doc__), Store)); | ||
} | ||
} | ||
|
@@ -2252,11 +2258,19 @@ compiler_function_body(struct compiler *c, stmt_ty s, int is_async, Py_ssize_t f | |
/* if not -OO mode, add docstring */ | ||
if (c->c_optimize < 2) { | ||
docstring = _PyAST_GetDocString(body); | ||
if (docstring) { | ||
docstring = _PyCompile_CleanDoc(docstring); | ||
if (docstring == NULL) { | ||
compiler_exit_scope(c); | ||
return ERROR; | ||
} | ||
} | ||
} | ||
if (compiler_add_const(c->c_const_cache, c->u, docstring ? docstring : Py_None) < 0) { | ||
compiler_exit_scope(c); | ||
return ERROR; | ||
} | ||
Py_XDECREF(docstring); | ||
|
||
c->u->u_metadata.u_argcount = asdl_seq_LEN(args->args); | ||
c->u->u_metadata.u_posonlyargcount = asdl_seq_LEN(args->posonlyargs); | ||
|
@@ -7967,6 +7981,89 @@ cfg_to_instructions(cfg_builder *g) | |
return NULL; | ||
} | ||
|
||
// C implementation of inspect.cleandoc() | ||
// | ||
// Difference from inspect.cleandoc(): | ||
// - Do not remove leading and trailing blank lines to keep lineno. | ||
PyObject * | ||
_PyCompile_CleanDoc(PyObject *doc) | ||
{ | ||
doc = PyObject_CallMethod(doc, "expandtabs", NULL); | ||
if (doc == NULL) { | ||
return NULL; | ||
} | ||
|
||
Py_ssize_t doc_size; | ||
const char *doc_utf8 = PyUnicode_AsUTF8AndSize(doc, &doc_size); | ||
if (doc_utf8 == NULL) { | ||
Py_DECREF(doc); | ||
return NULL; | ||
} | ||
const char *p = doc_utf8; | ||
const char *pend = p + doc_size; | ||
|
||
// First pass: find minimum indentation of any non-blank lines | ||
// after first line. | ||
while (p < pend && *p++ != '\n') { | ||
} | ||
|
||
Py_ssize_t margin = PY_SSIZE_T_MAX; | ||
while (p < pend) { | ||
const char *s = p; | ||
while (*p == ' ') p++; | ||
if (p < pend && *p != '\n') { | ||
margin = Py_MIN(margin, p - s); | ||
} | ||
while (p < pend && *p++ != '\n') { | ||
} | ||
} | ||
if (margin == PY_SSIZE_T_MAX) { | ||
margin = 0; | ||
} | ||
|
||
// Second pass: write cleandoc into buff. | ||
|
||
// copy first line without leading spaces. | ||
p = doc_utf8; | ||
while (*p == ' ') { | ||
p++; | ||
} | ||
if (p == doc_utf8 && margin == 0 ) { | ||
// doc is already clean. | ||
return doc; | ||
} | ||
|
||
char *buff = PyMem_Malloc(doc_size); | ||
methane marked this conversation as resolved.
Show resolved
Hide resolved
|
||
char *w = buff; | ||
|
||
while (p < pend) { | ||
int ch = *w++ = *p++; | ||
if (ch == '\n') { | ||
break; | ||
} | ||
} | ||
|
||
// copy subsequent lines without margin. | ||
while (p < pend) { | ||
for (Py_ssize_t i = 0; i < margin; i++, p++) { | ||
if (*p != ' ') { | ||
assert(*p == '\n' || *p == '\0'); | ||
break; | ||
} | ||
} | ||
while (p < pend) { | ||
int ch = *w++ = *p++; | ||
if (ch == '\n') { | ||
break; | ||
} | ||
} | ||
} | ||
|
||
Py_DECREF(doc); | ||
return PyUnicode_FromStringAndSize(buff, w - buff); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure the dedent logic belongs in compile.c. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because this dedent logic is very specific for docstring. If we reuse this logic in doc = compiler.cleandoc(doc).strip('\n') |
||
|
||
|
||
PyObject * | ||
_PyCompile_CodeGen(PyObject *ast, PyObject *filename, PyCompilerFlags *pflags, | ||
int optimize, int compile_mode) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I removed this try-except-else block because Python 3 don't autodecode from bytes.
If doc is bytes,
doc.split('\n')
raises TypeError, not UnicodeError.