diff --git a/Include/internal/pycore_compile.h b/Include/internal/pycore_compile.h index 511f0689c93822..6a1e02c0b895d7 100644 --- a/Include/internal/pycore_compile.h +++ b/Include/internal/pycore_compile.h @@ -33,6 +33,16 @@ extern int _PyAST_Optimize( struct _arena *arena, _PyASTOptimizeState *state); +/* Utility for a number of growing arrays used in the compiler */ +int _PyCompile_EnsureArrayLargeEnough( + int idx, + void **array, + int *alloc, + int default_alloc, + size_t item_size); + +int _PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj); + /* Access compiler internals for unit testing */ PyAPI_FUNC(PyObject*) _PyCompile_CodeGen( diff --git a/Include/internal/pycore_flowgraph.h b/Include/internal/pycore_flowgraph.h new file mode 100644 index 00000000000000..7c0b8fe980c3c7 --- /dev/null +++ b/Include/internal/pycore_flowgraph.h @@ -0,0 +1,117 @@ +#ifndef Py_INTERNAL_CFG_H +#define Py_INTERNAL_CFG_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_opcode_utils.h" + +static const _PyCompilerSrcLocation NO_LOCATION = {-1, -1, -1, -1}; + +typedef struct { + int i_opcode; + int i_oparg; + _PyCompilerSrcLocation i_loc; + struct _PyCfgBasicblock_ *i_target; /* target block (if jump instruction) */ + struct _PyCfgBasicblock_ *i_except; /* target block when exception is raised */ +} _PyCfgInstruction; + +typedef struct { + int id; +} _PyCfgJumpTargetLabel; + + +typedef struct { + struct _PyCfgBasicblock_ *handlers[CO_MAXBLOCKS+1]; + int depth; +} _PyCfgExceptStack; + +typedef struct _PyCfgBasicblock_ { + /* Each basicblock in a compilation unit is linked via b_list in the + reverse order that the block are allocated. b_list points to the next + block, not to be confused with b_next, which is next by control flow. */ + struct _PyCfgBasicblock_ *b_list; + /* The label of this block if it is a jump target, -1 otherwise */ + _PyCfgJumpTargetLabel b_label; + /* Exception stack at start of block, used by assembler to create the exception handling table */ + _PyCfgExceptStack *b_exceptstack; + /* pointer to an array of instructions, initially NULL */ + _PyCfgInstruction *b_instr; + /* If b_next is non-NULL, it is a pointer to the next + block reached by normal control flow. */ + struct _PyCfgBasicblock_ *b_next; + /* number of instructions used */ + int b_iused; + /* length of instruction array (b_instr) */ + int b_ialloc; + /* Used by add_checks_for_loads_of_unknown_variables */ + uint64_t b_unsafe_locals_mask; + /* Number of predecessors that a block has. */ + int b_predecessors; + /* depth of stack upon entry of block, computed by stackdepth() */ + int b_startdepth; + /* instruction offset for block, computed by assemble_jump_offsets() */ + int b_offset; + /* Basic block is an exception handler that preserves lasti */ + unsigned b_preserve_lasti : 1; + /* Used by compiler passes to mark whether they have visited a basic block. */ + unsigned b_visited : 1; + /* b_except_handler is used by the cold-detection algorithm to mark exception targets */ + unsigned b_except_handler : 1; + /* b_cold is true if this block is not perf critical (like an exception handler) */ + unsigned b_cold : 1; + /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */ + unsigned b_warm : 1; +} _PyCfgBasicblock; + +int _PyBasicblock_InsertInstruction(_PyCfgBasicblock *block, int pos, _PyCfgInstruction *instr); + +typedef struct cfg_builder_ { + /* The entryblock, at which control flow begins. All blocks of the + CFG are reachable through the b_next links */ + _PyCfgBasicblock *g_entryblock; + /* Pointer to the most recently allocated block. By following + b_list links, you can reach all allocated blocks. */ + _PyCfgBasicblock *g_block_list; + /* pointer to the block currently being constructed */ + _PyCfgBasicblock *g_curblock; + /* label for the next instruction to be placed */ + _PyCfgJumpTargetLabel g_current_label; +} _PyCfgBuilder; + +int _PyCfgBuilder_UseLabel(_PyCfgBuilder *g, _PyCfgJumpTargetLabel lbl); +int _PyCfgBuilder_Addop(_PyCfgBuilder *g, int opcode, int oparg, _PyCompilerSrcLocation loc); + +int _PyCfgBuilder_Init(_PyCfgBuilder *g); +void _PyCfgBuilder_Fini(_PyCfgBuilder *g); + +_PyCfgInstruction* _PyCfg_BasicblockLastInstr(const _PyCfgBasicblock *b); +int _PyCfg_OptimizeCodeUnit(_PyCfgBuilder *g, PyObject *consts, PyObject *const_cache, + int code_flags, int nlocals, int nparams); +int _PyCfg_Stackdepth(_PyCfgBasicblock *entryblock, int code_flags); +void _PyCfg_ConvertExceptionHandlersToNops(_PyCfgBasicblock *entryblock); +int _PyCfg_ResolveLineNumbers(_PyCfgBuilder *g, int firstlineno); +int _PyCfg_ResolveJumps(_PyCfgBuilder *g); +int _PyCfg_InstrSize(_PyCfgInstruction *instruction); + + +static inline int +basicblock_nofallthrough(const _PyCfgBasicblock *b) { + _PyCfgInstruction *last = _PyCfg_BasicblockLastInstr(b); + return (last && + (IS_SCOPE_EXIT_OPCODE(last->i_opcode) || + IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode))); +} + +#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B)) +#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B)) + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_CFG_H */ diff --git a/Include/internal/pycore_opcode.h b/Include/internal/pycore_opcode.h index 22914da07994f8..58f7da51aebd5e 100644 --- a/Include/internal/pycore_opcode.h +++ b/Include/internal/pycore_opcode.h @@ -12,12 +12,16 @@ extern "C" { #include "opcode.h" +extern const uint32_t _PyOpcode_RelativeJump[9]; + +extern const uint32_t _PyOpcode_Jump[9]; + extern const uint8_t _PyOpcode_Caches[256]; extern const uint8_t _PyOpcode_Deopt[256]; #ifdef NEED_OPCODE_TABLES -static const uint32_t _PyOpcode_RelativeJump[9] = { +const uint32_t _PyOpcode_RelativeJump[9] = { 0U, 0U, 536870912U, @@ -28,7 +32,7 @@ static const uint32_t _PyOpcode_RelativeJump[9] = { 0U, 48U, }; -static const uint32_t _PyOpcode_Jump[9] = { +const uint32_t _PyOpcode_Jump[9] = { 0U, 0U, 536870912U, diff --git a/Include/internal/pycore_opcode_utils.h b/Include/internal/pycore_opcode_utils.h new file mode 100644 index 00000000000000..96bb4d743bf29f --- /dev/null +++ b/Include/internal/pycore_opcode_utils.h @@ -0,0 +1,95 @@ +#ifndef Py_INTERNAL_OPCODE_UTILS_H +#define Py_INTERNAL_OPCODE_UTILS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_opcode.h" // _PyOpcode_RelativeJump + + +#define MAX_REAL_OPCODE 254 + +#define IS_WITHIN_OPCODE_RANGE(opcode) \ + (((opcode) >= 0 && (opcode) <= MAX_REAL_OPCODE) || \ + IS_PSEUDO_OPCODE(opcode)) + +#define IS_JUMP_OPCODE(opcode) \ + is_bit_set_in_table(_PyOpcode_Jump, opcode) + +#define IS_BLOCK_PUSH_OPCODE(opcode) \ + ((opcode) == SETUP_FINALLY || \ + (opcode) == SETUP_WITH || \ + (opcode) == SETUP_CLEANUP) + +#define HAS_TARGET(opcode) \ + (IS_JUMP_OPCODE(opcode) || IS_BLOCK_PUSH_OPCODE(opcode)) + +/* opcodes that must be last in the basicblock */ +#define IS_TERMINATOR_OPCODE(opcode) \ + (IS_JUMP_OPCODE(opcode) || IS_SCOPE_EXIT_OPCODE(opcode)) + +/* opcodes which are not emitted in codegen stage, only by the assembler */ +#define IS_ASSEMBLER_OPCODE(opcode) \ + ((opcode) == JUMP_FORWARD || \ + (opcode) == JUMP_BACKWARD || \ + (opcode) == JUMP_BACKWARD_NO_INTERRUPT) + +#define IS_BACKWARDS_JUMP_OPCODE(opcode) \ + ((opcode) == JUMP_BACKWARD || \ + (opcode) == JUMP_BACKWARD_NO_INTERRUPT) + +#define IS_UNCONDITIONAL_JUMP_OPCODE(opcode) \ + ((opcode) == JUMP || \ + (opcode) == JUMP_NO_INTERRUPT || \ + (opcode) == JUMP_FORWARD || \ + (opcode) == JUMP_BACKWARD || \ + (opcode) == JUMP_BACKWARD_NO_INTERRUPT) + +#define IS_SCOPE_EXIT_OPCODE(opcode) \ + ((opcode) == RETURN_VALUE || \ + (opcode) == RETURN_CONST || \ + (opcode) == RAISE_VARARGS || \ + (opcode) == RERAISE) + +#define IS_SUPERINSTRUCTION_OPCODE(opcode) \ + ((opcode) == LOAD_FAST__LOAD_FAST || \ + (opcode) == LOAD_FAST__LOAD_CONST || \ + (opcode) == LOAD_CONST__LOAD_FAST || \ + (opcode) == STORE_FAST__LOAD_FAST || \ + (opcode) == STORE_FAST__STORE_FAST) + + +#define LOG_BITS_PER_INT 5 +#define MASK_LOW_LOG_BITS 31 + +static inline int +is_bit_set_in_table(const uint32_t *table, int bitindex) { + /* Is the relevant bit set in the relevant word? */ + /* 512 bits fit into 9 32-bits words. + * Word is indexed by (bitindex>>ln(size of int in bits)). + * Bit within word is the low bits of bitindex. + */ + if (bitindex >= 0 && bitindex < 512) { + uint32_t word = table[bitindex >> LOG_BITS_PER_INT]; + return (word >> (bitindex & MASK_LOW_LOG_BITS)) & 1; + } + else { + return 0; + } +} + +#undef LOG_BITS_PER_INT +#undef MASK_LOW_LOG_BITS + +#define IS_RELATIVE_JUMP(opcode) (is_bit_set_in_table(_PyOpcode_RelativeJump, opcode)) + + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_OPCODE_UTILS_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 74e4171b010d0f..b97daaf6f445cc 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -374,6 +374,7 @@ PYTHON_OBJS= \ Python/ast_unparse.o \ Python/bltinmodule.o \ Python/ceval.o \ + Python/flowgraph.o \ Python/codecs.o \ Python/compile.o \ Python/context.o \ @@ -1702,6 +1703,8 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_object_state.h \ $(srcdir)/Include/internal/pycore_obmalloc.h \ $(srcdir)/Include/internal/pycore_obmalloc_init.h \ + $(srcdir)/Include/internal/pycore_opcode.h \ + $(srcdir)/Include/internal/pycore_opcode_utils.h \ $(srcdir)/Include/internal/pycore_pathconfig.h \ $(srcdir)/Include/internal/pycore_pyarena.h \ $(srcdir)/Include/internal/pycore_pyerrors.h \ diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 4f39756019e692..28eced6d4b2862 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -183,6 +183,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 7d7c4587b9a3f3..e4faa89bb831d9 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -76,6 +76,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index c754b2165745ff..8fab600334160d 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -205,6 +205,7 @@ + @@ -504,6 +505,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 90ed0602821bff..6c5d8dd89f5bc7 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -1106,6 +1106,9 @@ Python + + Python + Python diff --git a/Python/compile.c b/Python/compile.c index 192deaa4b35f4d..fed9ae7066e4f0 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -23,23 +23,21 @@ #include -// Need _PyOpcode_RelativeJump of pycore_opcode.h -#define NEED_OPCODE_TABLES - #include "Python.h" #include "pycore_ast.h" // _PyAST_GetDocString() +#define NEED_OPCODE_TABLES +#include "pycore_opcode_utils.h" +#undef NEED_OPCODE_TABLES +#include "pycore_flowgraph.h" #include "pycore_code.h" // _PyCode_New() #include "pycore_compile.h" #include "pycore_intrinsics.h" #include "pycore_long.h" // _PyLong_GetZero() -#include "pycore_opcode.h" // _PyOpcode_Caches #include "pycore_pymem.h" // _PyMem_IsPtrFreed() #include "pycore_symtable.h" // PySTEntryObject, _PyFuture_FromAST() #include "opcode_metadata.h" // _PyOpcode_opcode_metadata, _PyOpcode_num_popped/pushed - -#define DEFAULT_BLOCK_SIZE 16 #define DEFAULT_CODE_SIZE 128 #define DEFAULT_LNOTAB_SIZE 16 #define DEFAULT_CNOTAB_SIZE 32 @@ -83,68 +81,17 @@ */ #define MAX_ALLOWED_STACK_USE (STACK_USE_GUIDELINE * 100) - -#define MAX_REAL_OPCODE 254 - -#define IS_WITHIN_OPCODE_RANGE(opcode) \ - (((opcode) >= 0 && (opcode) <= MAX_REAL_OPCODE) || \ - IS_PSEUDO_OPCODE(opcode)) - -#define IS_JUMP_OPCODE(opcode) \ - is_bit_set_in_table(_PyOpcode_Jump, opcode) - -#define IS_BLOCK_PUSH_OPCODE(opcode) \ - ((opcode) == SETUP_FINALLY || \ - (opcode) == SETUP_WITH || \ - (opcode) == SETUP_CLEANUP) - -#define HAS_TARGET(opcode) \ - (IS_JUMP_OPCODE(opcode) || IS_BLOCK_PUSH_OPCODE(opcode)) - -/* opcodes that must be last in the basicblock */ -#define IS_TERMINATOR_OPCODE(opcode) \ - (IS_JUMP_OPCODE(opcode) || IS_SCOPE_EXIT_OPCODE(opcode)) - -/* opcodes which are not emitted in codegen stage, only by the assembler */ -#define IS_ASSEMBLER_OPCODE(opcode) \ - ((opcode) == JUMP_FORWARD || \ - (opcode) == JUMP_BACKWARD || \ - (opcode) == JUMP_BACKWARD_NO_INTERRUPT) - -#define IS_BACKWARDS_JUMP_OPCODE(opcode) \ - ((opcode) == JUMP_BACKWARD || \ - (opcode) == JUMP_BACKWARD_NO_INTERRUPT) - -#define IS_UNCONDITIONAL_JUMP_OPCODE(opcode) \ - ((opcode) == JUMP || \ - (opcode) == JUMP_NO_INTERRUPT || \ - (opcode) == JUMP_FORWARD || \ - (opcode) == JUMP_BACKWARD || \ - (opcode) == JUMP_BACKWARD_NO_INTERRUPT) - -#define IS_SCOPE_EXIT_OPCODE(opcode) \ - ((opcode) == RETURN_VALUE || \ - (opcode) == RETURN_CONST || \ - (opcode) == RAISE_VARARGS || \ - (opcode) == RERAISE) - -#define IS_SUPERINSTRUCTION_OPCODE(opcode) \ - ((opcode) == LOAD_FAST__LOAD_FAST || \ - (opcode) == LOAD_FAST__LOAD_CONST || \ - (opcode) == LOAD_CONST__LOAD_FAST || \ - (opcode) == STORE_FAST__LOAD_FAST || \ - (opcode) == STORE_FAST__STORE_FAST) - #define IS_TOP_LEVEL_AWAIT(C) ( \ ((C)->c_flags.cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \ && ((C)->u->u_ste->ste_type == ModuleBlock)) typedef _PyCompilerSrcLocation location; +typedef _PyCfgInstruction cfg_instr; +typedef _PyCfgBasicblock basicblock; +typedef _PyCfgBuilder cfg_builder; #define LOCATION(LNO, END_LNO, COL, END_COL) \ - ((const location){(LNO), (END_LNO), (COL), (END_COL)}) - -static location NO_LOCATION = {-1, -1, -1, -1}; + ((const _PyCompilerSrcLocation){(LNO), (END_LNO), (COL), (END_COL)}) /* Return true if loc1 starts after loc2 ends. */ static inline bool @@ -165,11 +112,9 @@ same_location(location a, location b) #define LOC(x) SRC_LOCATION_FROM_AST(x) -typedef struct jump_target_label_ { - int id; -} jump_target_label; +typedef _PyCfgJumpTargetLabel jump_target_label; -static struct jump_target_label_ NO_LABEL = {-1}; +static jump_target_label NO_LABEL = {-1}; #define SAME_LABEL(L1, L2) ((L1).id == (L2).id) #define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL))) @@ -183,88 +128,9 @@ static struct jump_target_label_ NO_LABEL = {-1}; #define USE_LABEL(C, LBL) \ RETURN_IF_ERROR(instr_sequence_use_label(INSTR_SEQUENCE(C), (LBL).id)) -struct cfg_instr { - int i_opcode; - int i_oparg; - location i_loc; - struct basicblock_ *i_target; /* target block (if jump instruction) */ - struct basicblock_ *i_except; /* target block when exception is raised */ -}; - -/* One arg*/ -#define INSTR_SET_OP1(I, OP, ARG) \ - do { \ - assert(HAS_ARG(OP)); \ - struct cfg_instr *_instr__ptr_ = (I); \ - _instr__ptr_->i_opcode = (OP); \ - _instr__ptr_->i_oparg = (ARG); \ - } while (0); - -/* No args*/ -#define INSTR_SET_OP0(I, OP) \ - do { \ - assert(!HAS_ARG(OP)); \ - struct cfg_instr *_instr__ptr_ = (I); \ - _instr__ptr_->i_opcode = (OP); \ - _instr__ptr_->i_oparg = 0; \ - } while (0); - -typedef struct exceptstack { - struct basicblock_ *handlers[CO_MAXBLOCKS+1]; - int depth; -} ExceptStack; - -#define LOG_BITS_PER_INT 5 -#define MASK_LOW_LOG_BITS 31 - -static inline int -is_bit_set_in_table(const uint32_t *table, int bitindex) { - /* Is the relevant bit set in the relevant word? */ - /* 512 bits fit into 9 32-bits words. - * Word is indexed by (bitindex>>ln(size of int in bits)). - * Bit within word is the low bits of bitindex. - */ - if (bitindex >= 0 && bitindex < 512) { - uint32_t word = table[bitindex >> LOG_BITS_PER_INT]; - return (word >> (bitindex & MASK_LOW_LOG_BITS)) & 1; - } - else { - return 0; - } -} - -static inline int -is_relative_jump(struct cfg_instr *i) -{ - return is_bit_set_in_table(_PyOpcode_RelativeJump, i->i_opcode); -} - -static inline int -is_block_push(struct cfg_instr *i) -{ - return IS_BLOCK_PUSH_OPCODE(i->i_opcode); -} - -static inline int -is_jump(struct cfg_instr *i) -{ - return IS_JUMP_OPCODE(i->i_opcode); -} - -static int -instr_size(struct cfg_instr *instruction) -{ - int opcode = instruction->i_opcode; - assert(!IS_PSEUDO_OPCODE(opcode)); - int oparg = instruction->i_oparg; - assert(HAS_ARG(opcode) || oparg == 0); - int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg); - int caches = _PyOpcode_Caches[opcode]; - return extended_args + 1 + caches; -} static void -write_instr(_Py_CODEUNIT *codestr, struct cfg_instr *instruction, int ilen) +write_instr(_Py_CODEUNIT *codestr, cfg_instr *instruction, int ilen) { int opcode = instruction->i_opcode; assert(!IS_PSEUDO_OPCODE(opcode)); @@ -302,71 +168,6 @@ write_instr(_Py_CODEUNIT *codestr, struct cfg_instr *instruction, int ilen) } } -typedef struct basicblock_ { - /* Each basicblock in a compilation unit is linked via b_list in the - reverse order that the block are allocated. b_list points to the next - block, not to be confused with b_next, which is next by control flow. */ - struct basicblock_ *b_list; - /* The label of this block if it is a jump target, -1 otherwise */ - jump_target_label b_label; - /* Exception stack at start of block, used by assembler to create the exception handling table */ - ExceptStack *b_exceptstack; - /* pointer to an array of instructions, initially NULL */ - struct cfg_instr *b_instr; - /* If b_next is non-NULL, it is a pointer to the next - block reached by normal control flow. */ - struct basicblock_ *b_next; - /* number of instructions used */ - int b_iused; - /* length of instruction array (b_instr) */ - int b_ialloc; - /* Used by add_checks_for_loads_of_unknown_variables */ - uint64_t b_unsafe_locals_mask; - /* Number of predecessors that a block has. */ - int b_predecessors; - /* depth of stack upon entry of block, computed by stackdepth() */ - int b_startdepth; - /* instruction offset for block, computed by assemble_jump_offsets() */ - int b_offset; - /* Basic block is an exception handler that preserves lasti */ - unsigned b_preserve_lasti : 1; - /* Used by compiler passes to mark whether they have visited a basic block. */ - unsigned b_visited : 1; - /* b_except_handler is used by the cold-detection algorithm to mark exception targets */ - unsigned b_except_handler : 1; - /* b_cold is true if this block is not perf critical (like an exception handler) */ - unsigned b_cold : 1; - /* b_warm is used by the cold-detection algorithm to mark blocks which are definitely not cold */ - unsigned b_warm : 1; -} basicblock; - - -static struct cfg_instr * -basicblock_last_instr(const basicblock *b) { - assert(b->b_iused >= 0); - if (b->b_iused > 0) { - assert(b->b_instr != NULL); - return &b->b_instr[b->b_iused - 1]; - } - return NULL; -} - -static inline int -basicblock_exits_scope(const basicblock *b) { - struct cfg_instr *last = basicblock_last_instr(b); - return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode); -} - -static inline int -basicblock_nofallthrough(const basicblock *b) { - struct cfg_instr *last = basicblock_last_instr(b); - return (last && - (IS_SCOPE_EXIT_OPCODE(last->i_opcode) || - IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode))); -} - -#define BB_NO_FALLTHROUGH(B) (basicblock_nofallthrough(B)) -#define BB_HAS_FALLTHROUGH(B) (!basicblock_nofallthrough(B)) /* fblockinfo tracks the current frame block. @@ -397,26 +198,12 @@ enum { COMPILER_SCOPE_COMPREHENSION, }; -typedef struct cfg_builder_ { - /* The entryblock, at which control flow begins. All blocks of the - CFG are reachable through the b_next links */ - basicblock *g_entryblock; - /* Pointer to the most recently allocated block. By following - b_list links, you can reach all allocated blocks. */ - basicblock *g_block_list; - /* pointer to the block currently being constructed */ - basicblock *g_curblock; - /* label for the next instruction to be placed */ - jump_target_label g_current_label; -} cfg_builder; - typedef struct { int i_opcode; int i_oparg; location i_loc; } instruction; - typedef struct instr_sequence_ { instruction *s_instrs; int s_allocated; @@ -440,10 +227,11 @@ typedef struct instr_sequence_ { * item_size: size of each item * */ -static int -ensure_array_large_enough(int idx, void **arr_, int *alloc, int default_alloc, size_t item_size) +int +_PyCompile_EnsureArrayLargeEnough(int idx, void **array, int *alloc, + int default_alloc, size_t item_size) { - void *arr = *arr_; + void *arr = *array; if (arr == NULL) { int new_alloc = default_alloc; if (idx >= new_alloc) { @@ -480,7 +268,7 @@ ensure_array_large_enough(int idx, void **arr_, int *alloc, int default_alloc, s memset((char *)arr + oldsize, 0, newsize - oldsize); } - *arr_ = arr; + *array = arr; return SUCCESS; } @@ -489,11 +277,11 @@ instr_sequence_next_inst(instr_sequence *seq) { assert(seq->s_instrs != NULL || seq->s_used == 0); RETURN_IF_ERROR( - ensure_array_large_enough(seq->s_used + 1, - (void**)&seq->s_instrs, - &seq->s_allocated, - INITIAL_INSTR_SEQUENCE_SIZE, - sizeof(instruction))); + _PyCompile_EnsureArrayLargeEnough(seq->s_used + 1, + (void**)&seq->s_instrs, + &seq->s_allocated, + INITIAL_INSTR_SEQUENCE_SIZE, + sizeof(instruction))); assert(seq->s_used < seq->s_allocated); return seq->s_used++; } @@ -509,11 +297,11 @@ static int instr_sequence_use_label(instr_sequence *seq, int lbl) { int old_size = seq->s_labelmap_size; RETURN_IF_ERROR( - ensure_array_large_enough(lbl, - (void**)&seq->s_labelmap, - &seq->s_labelmap_size, - INITIAL_INSTR_SEQUENCE_LABELS_MAP_SIZE, - sizeof(int))); + _PyCompile_EnsureArrayLargeEnough(lbl, + (void**)&seq->s_labelmap, + &seq->s_labelmap_size, + INITIAL_INSTR_SEQUENCE_LABELS_MAP_SIZE, + sizeof(int))); for(int i = old_size; i < seq->s_labelmap_size; i++) { seq->s_labelmap[i] = -111; /* something weird, for debugging */ @@ -572,29 +360,11 @@ instr_sequence_fini(instr_sequence *seq) { seq->s_instrs = NULL; } -static int basicblock_addop(basicblock *b, int opcode, int oparg, location loc); -static int cfg_builder_maybe_start_new_block(cfg_builder *g); - -static int -cfg_builder_use_label(cfg_builder *g, jump_target_label lbl) -{ - g->g_current_label = lbl; - return cfg_builder_maybe_start_new_block(g); -} - -static int -cfg_builder_addop(cfg_builder *g, int opcode, int oparg, location loc) -{ - RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g)); - return basicblock_addop(g->g_curblock, opcode, oparg, loc); -} - -static int cfg_builder_init(cfg_builder *g); static int instr_sequence_to_cfg(instr_sequence *seq, cfg_builder *g) { memset(g, 0, sizeof(cfg_builder)); - RETURN_IF_ERROR(cfg_builder_init(g)); + RETURN_IF_ERROR(_PyCfgBuilder_Init(g)); /* There can be more than one label for the same offset. The * offset2lbl maping selects one of them which we use consistently. @@ -621,7 +391,7 @@ instr_sequence_to_cfg(instr_sequence *seq, cfg_builder *g) { if (lbl >= 0) { assert (lbl < seq->s_labelmap_size); jump_target_label lbl_ = {lbl}; - if (cfg_builder_use_label(g, lbl_) < 0) { + if (_PyCfgBuilder_UseLabel(g, lbl_) < 0) { goto error; } } @@ -635,7 +405,7 @@ instr_sequence_to_cfg(instr_sequence *seq, cfg_builder *g) { assert(lbl >= 0 && lbl < seq->s_labelmap_size); oparg = lbl; } - if (cfg_builder_addop(g, opcode, oparg, instr->i_loc) < 0) { + if (_PyCfgBuilder_Addop(g, opcode, oparg, instr->i_loc) < 0) { goto error; } } @@ -746,8 +516,6 @@ typedef struct { Py_ssize_t on_top; } pattern_context; -static int basicblock_next_instr(basicblock *); - static int codegen_addop_i(instr_sequence *seq, int opcode, Py_ssize_t oparg, location loc); static void compiler_free(struct compiler *); @@ -798,8 +566,6 @@ static int compiler_match(struct compiler *, stmt_ty); static int compiler_pattern_subpattern(struct compiler *, pattern_ty, pattern_context *); -static int remove_redundant_nops(basicblock *bb); - static PyCodeObject *assemble(struct compiler *, int addNone); #define CAPSULE_NAME "compile.c compiler unit" @@ -979,57 +745,6 @@ dictbytype(PyObject *src, int scope_type, int flag, Py_ssize_t offset) return dest; } -#ifndef NDEBUG -static bool -cfg_builder_check(cfg_builder *g) -{ - assert(g->g_entryblock->b_iused > 0); - for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) { - assert(!_PyMem_IsPtrFreed(block)); - if (block->b_instr != NULL) { - assert(block->b_ialloc > 0); - assert(block->b_iused >= 0); - assert(block->b_ialloc >= block->b_iused); - } - else { - assert (block->b_iused == 0); - assert (block->b_ialloc == 0); - } - } - return true; -} -#endif - -static basicblock *cfg_builder_new_block(cfg_builder *g); - -static int -cfg_builder_init(cfg_builder *g) -{ - g->g_block_list = NULL; - basicblock *block = cfg_builder_new_block(g); - if (block == NULL) { - return ERROR; - } - g->g_curblock = g->g_entryblock = block; - g->g_current_label = NO_LABEL; - return SUCCESS; -} - -static void -cfg_builder_fini(cfg_builder* g) -{ - assert(cfg_builder_check(g)); - basicblock *b = g->g_block_list; - while (b != NULL) { - if (b->b_instr) { - PyObject_Free((void *)b->b_instr); - } - basicblock *next = b->b_list; - PyObject_Free((void *)b); - b = next; - } -} - static void compiler_unit_free(struct compiler_unit *u) { @@ -1119,85 +834,6 @@ compiler_set_qualname(struct compiler *c) return SUCCESS; } -/* Allocate a new block and return a pointer to it. - Returns NULL on error. -*/ -static basicblock * -cfg_builder_new_block(cfg_builder *g) -{ - basicblock *b = (basicblock *)PyObject_Calloc(1, sizeof(basicblock)); - if (b == NULL) { - PyErr_NoMemory(); - return NULL; - } - /* Extend the singly linked list of blocks with new block. */ - b->b_list = g->g_block_list; - g->g_block_list = b; - b->b_label = NO_LABEL; - return b; -} - -static basicblock * -cfg_builder_use_next_block(cfg_builder *g, basicblock *block) -{ - assert(block != NULL); - g->g_curblock->b_next = block; - g->g_curblock = block; - return block; -} - -static inline int -basicblock_append_instructions(basicblock *target, basicblock *source) -{ - for (int i = 0; i < source->b_iused; i++) { - int n = basicblock_next_instr(target); - if (n < 0) { - return ERROR; - } - target->b_instr[n] = source->b_instr[i]; - } - return SUCCESS; -} - -static basicblock * -copy_basicblock(cfg_builder *g, basicblock *block) -{ - /* Cannot copy a block if it has a fallthrough, since - * a block can only have one fallthrough predecessor. - */ - assert(BB_NO_FALLTHROUGH(block)); - basicblock *result = cfg_builder_new_block(g); - if (result == NULL) { - return NULL; - } - if (basicblock_append_instructions(result, block) < 0) { - return NULL; - } - return result; -} - -/* Returns the offset of the next instruction in the current block's - b_instr array. Resizes the b_instr as necessary. - Returns -1 on failure. -*/ - -static int -basicblock_next_instr(basicblock *b) -{ - assert(b != NULL); - - RETURN_IF_ERROR( - ensure_array_large_enough( - b->b_iused + 1, - (void**)&b->b_instr, - &b->b_ialloc, - DEFAULT_BLOCK_SIZE, - sizeof(struct cfg_instr))); - - return b->b_iused++; -} - - /* Return the stack effect of opcode with argument oparg. Some opcodes have different stack effect when jump to the target and @@ -1290,62 +926,6 @@ PyCompile_OpcodeStackEffect(int opcode, int oparg) return stack_effect(opcode, oparg, -1); } -static int -basicblock_addop(basicblock *b, int opcode, int oparg, location loc) -{ - assert(IS_WITHIN_OPCODE_RANGE(opcode)); - assert(!IS_ASSEMBLER_OPCODE(opcode)); - assert(HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0); - assert(0 <= oparg && oparg < (1 << 30)); - - int off = basicblock_next_instr(b); - if (off < 0) { - return ERROR; - } - struct cfg_instr *i = &b->b_instr[off]; - i->i_opcode = opcode; - i->i_oparg = oparg; - i->i_target = NULL; - i->i_loc = loc; - - return SUCCESS; -} - -static bool -cfg_builder_current_block_is_terminated(cfg_builder *g) -{ - struct cfg_instr *last = basicblock_last_instr(g->g_curblock); - if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) { - return true; - } - if (IS_LABEL(g->g_current_label)) { - if (last || IS_LABEL(g->g_curblock->b_label)) { - return true; - } - else { - /* current block is empty, label it */ - g->g_curblock->b_label = g->g_current_label; - g->g_current_label = NO_LABEL; - } - } - return false; -} - -static int -cfg_builder_maybe_start_new_block(cfg_builder *g) -{ - if (cfg_builder_current_block_is_terminated(g)) { - basicblock *b = cfg_builder_new_block(g); - if (b == NULL) { - return ERROR; - } - b->b_label = g->g_current_label; - g->g_current_label = NO_LABEL; - cfg_builder_use_next_block(g, b); - } - return SUCCESS; -} - static int codegen_addop_noarg(instr_sequence *seq, int opcode, location loc) { @@ -2520,16 +2100,6 @@ compiler_check_debug_args(struct compiler *c, arguments_ty args) return SUCCESS; } -static inline int -insert_instruction(basicblock *block, int pos, struct cfg_instr *instr) { - RETURN_IF_ERROR(basicblock_next_instr(block)); - for (int i = block->b_iused - 1; i > pos; i--) { - block->b_instr[i] = block->b_instr[i-1]; - } - block->b_instr[pos] = *instr; - return SUCCESS; -} - static int wrap_in_stopiteration_handler(struct compiler *c) { @@ -7037,101 +6607,6 @@ struct assembler { int a_location_off; /* offset of last written location info frame */ }; -static basicblock** -make_cfg_traversal_stack(basicblock *entryblock) { - int nblocks = 0; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - b->b_visited = 0; - nblocks++; - } - basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks); - if (!stack) { - PyErr_NoMemory(); - } - return stack; -} - -Py_LOCAL_INLINE(void) -stackdepth_push(basicblock ***sp, basicblock *b, int depth) -{ - assert(b->b_startdepth < 0 || b->b_startdepth == depth); - if (b->b_startdepth < depth && b->b_startdepth < 100) { - assert(b->b_startdepth < 0); - b->b_startdepth = depth; - *(*sp)++ = b; - } -} - -/* Find the flow path that needs the largest stack. We assume that - * cycles in the flow graph have no net effect on the stack depth. - */ -static int -stackdepth(basicblock *entryblock, int code_flags) -{ - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - b->b_startdepth = INT_MIN; - } - basicblock **stack = make_cfg_traversal_stack(entryblock); - if (!stack) { - return ERROR; - } - - int maxdepth = 0; - basicblock **sp = stack; - if (code_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) { - stackdepth_push(&sp, entryblock, 1); - } else { - stackdepth_push(&sp, entryblock, 0); - } - - while (sp != stack) { - basicblock *b = *--sp; - int depth = b->b_startdepth; - assert(depth >= 0); - basicblock *next = b->b_next; - for (int i = 0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; - int effect = stack_effect(instr->i_opcode, instr->i_oparg, 0); - if (effect == PY_INVALID_STACK_EFFECT) { - PyErr_Format(PyExc_SystemError, - "compiler stack_effect(opcode=%d, arg=%i) failed", - instr->i_opcode, instr->i_oparg); - return ERROR; - } - int new_depth = depth + effect; - assert(new_depth >= 0); /* invalid code or bug in stackdepth() */ - if (new_depth > maxdepth) { - maxdepth = new_depth; - } - if (HAS_TARGET(instr->i_opcode)) { - effect = stack_effect(instr->i_opcode, instr->i_oparg, 1); - assert(effect != PY_INVALID_STACK_EFFECT); - int target_depth = depth + effect; - assert(target_depth >= 0); /* invalid code or bug in stackdepth() */ - if (target_depth > maxdepth) { - maxdepth = target_depth; - } - stackdepth_push(&sp, instr->i_target, target_depth); - } - depth = new_depth; - assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode)); - if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) || - IS_SCOPE_EXIT_OPCODE(instr->i_opcode)) - { - /* remaining code is dead */ - next = NULL; - break; - } - } - if (next != NULL) { - assert(BB_HAS_FALLTHROUGH(b)); - stackdepth_push(&sp, next, depth); - } - } - PyMem_Free(stack); - return maxdepth; -} - static int assemble_init(struct assembler *a, int firstlineno) { @@ -7168,389 +6643,47 @@ assemble_free(struct assembler *a) Py_XDECREF(a->a_except_table); } -static int -blocksize(basicblock *b) -{ - int size = 0; - for (int i = 0; i < b->b_iused; i++) { - size += instr_size(&b->b_instr[i]); - } - return size; -} - -static basicblock * -push_except_block(ExceptStack *stack, struct cfg_instr *setup) { - assert(is_block_push(setup)); - int opcode = setup->i_opcode; - basicblock * target = setup->i_target; - if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) { - target->b_preserve_lasti = 1; - } - stack->handlers[++stack->depth] = target; - return target; -} - -static basicblock * -pop_except_block(ExceptStack *stack) { - assert(stack->depth > 0); - return stack->handlers[--stack->depth]; +static inline void +write_except_byte(struct assembler *a, int byte) { + unsigned char *p = (unsigned char *) PyBytes_AS_STRING(a->a_except_table); + p[a->a_except_table_off++] = byte; } -static basicblock * -except_stack_top(ExceptStack *stack) { - return stack->handlers[stack->depth]; -} +#define CONTINUATION_BIT 64 -static ExceptStack * -make_except_stack(void) { - ExceptStack *new = PyMem_Malloc(sizeof(ExceptStack)); - if (new == NULL) { - PyErr_NoMemory(); - return NULL; +static void +assemble_emit_exception_table_item(struct assembler *a, int value, int msb) +{ + assert ((msb | 128) == 128); + assert(value >= 0 && value < (1 << 30)); + if (value >= 1 << 24) { + write_except_byte(a, (value >> 24) | CONTINUATION_BIT | msb); + msb = 0; } - new->depth = 0; - new->handlers[0] = NULL; - return new; -} - -static ExceptStack * -copy_except_stack(ExceptStack *stack) { - ExceptStack *copy = PyMem_Malloc(sizeof(ExceptStack)); - if (copy == NULL) { - PyErr_NoMemory(); - return NULL; + if (value >= 1 << 18) { + write_except_byte(a, ((value >> 18)&0x3f) | CONTINUATION_BIT | msb); + msb = 0; + } + if (value >= 1 << 12) { + write_except_byte(a, ((value >> 12)&0x3f) | CONTINUATION_BIT | msb); + msb = 0; } - memcpy(copy, stack, sizeof(ExceptStack)); - return copy; + if (value >= 1 << 6) { + write_except_byte(a, ((value >> 6)&0x3f) | CONTINUATION_BIT | msb); + msb = 0; + } + write_except_byte(a, (value&0x3f) | msb); } +/* See Objects/exception_handling_notes.txt for details of layout */ +#define MAX_SIZE_OF_ENTRY 20 + static int -label_exception_targets(basicblock *entryblock) { - basicblock **todo_stack = make_cfg_traversal_stack(entryblock); - if (todo_stack == NULL) { - return ERROR; - } - ExceptStack *except_stack = make_except_stack(); - if (except_stack == NULL) { - PyMem_Free(todo_stack); - PyErr_NoMemory(); - return ERROR; - } - except_stack->depth = 0; - todo_stack[0] = entryblock; - entryblock->b_visited = 1; - entryblock->b_exceptstack = except_stack; - basicblock **todo = &todo_stack[1]; - basicblock *handler = NULL; - while (todo > todo_stack) { - todo--; - basicblock *b = todo[0]; - assert(b->b_visited == 1); - except_stack = b->b_exceptstack; - assert(except_stack != NULL); - b->b_exceptstack = NULL; - handler = except_stack_top(except_stack); - for (int i = 0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; - if (is_block_push(instr)) { - if (!instr->i_target->b_visited) { - ExceptStack *copy = copy_except_stack(except_stack); - if (copy == NULL) { - goto error; - } - instr->i_target->b_exceptstack = copy; - todo[0] = instr->i_target; - instr->i_target->b_visited = 1; - todo++; - } - handler = push_except_block(except_stack, instr); - } - else if (instr->i_opcode == POP_BLOCK) { - handler = pop_except_block(except_stack); - } - else if (is_jump(instr)) { - instr->i_except = handler; - assert(i == b->b_iused -1); - if (!instr->i_target->b_visited) { - if (BB_HAS_FALLTHROUGH(b)) { - ExceptStack *copy = copy_except_stack(except_stack); - if (copy == NULL) { - goto error; - } - instr->i_target->b_exceptstack = copy; - } - else { - instr->i_target->b_exceptstack = except_stack; - except_stack = NULL; - } - todo[0] = instr->i_target; - instr->i_target->b_visited = 1; - todo++; - } - } - else { - if (instr->i_opcode == YIELD_VALUE) { - instr->i_oparg = except_stack->depth; - } - instr->i_except = handler; - } - } - if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) { - assert(except_stack != NULL); - b->b_next->b_exceptstack = except_stack; - todo[0] = b->b_next; - b->b_next->b_visited = 1; - todo++; - } - else if (except_stack != NULL) { - PyMem_Free(except_stack); - } - } -#ifdef Py_DEBUG - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - assert(b->b_exceptstack == NULL); - } -#endif - PyMem_Free(todo_stack); - return SUCCESS; -error: - PyMem_Free(todo_stack); - PyMem_Free(except_stack); - return ERROR; -} - - -static int -mark_except_handlers(basicblock *entryblock) { -#ifndef NDEBUG - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - assert(!b->b_except_handler); - } -#endif - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - for (int i=0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; - if (is_block_push(instr)) { - instr->i_target->b_except_handler = 1; - } - } - } - return SUCCESS; -} - -static int -mark_warm(basicblock *entryblock) { - basicblock **stack = make_cfg_traversal_stack(entryblock); - if (stack == NULL) { - return ERROR; - } - basicblock **sp = stack; - - *sp++ = entryblock; - entryblock->b_visited = 1; - while (sp > stack) { - basicblock *b = *(--sp); - assert(!b->b_except_handler); - b->b_warm = 1; - basicblock *next = b->b_next; - if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) { - *sp++ = next; - next->b_visited = 1; - } - for (int i=0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; - if (is_jump(instr) && !instr->i_target->b_visited) { - *sp++ = instr->i_target; - instr->i_target->b_visited = 1; - } - } - } - PyMem_Free(stack); - return SUCCESS; -} - -static int -mark_cold(basicblock *entryblock) { - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - assert(!b->b_cold && !b->b_warm); - } - if (mark_warm(entryblock) < 0) { - return ERROR; - } - - basicblock **stack = make_cfg_traversal_stack(entryblock); - if (stack == NULL) { - return ERROR; - } - - basicblock **sp = stack; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - if (b->b_except_handler) { - assert(!b->b_warm); - *sp++ = b; - b->b_visited = 1; - } - } - - while (sp > stack) { - basicblock *b = *(--sp); - b->b_cold = 1; - basicblock *next = b->b_next; - if (next && BB_HAS_FALLTHROUGH(b)) { - if (!next->b_warm && !next->b_visited) { - *sp++ = next; - next->b_visited = 1; - } - } - for (int i = 0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; - if (is_jump(instr)) { - assert(i == b->b_iused - 1); - basicblock *target = b->b_instr[i].i_target; - if (!target->b_warm && !target->b_visited) { - *sp++ = target; - target->b_visited = 1; - } - } - } - } - PyMem_Free(stack); - return SUCCESS; -} - -static int -remove_redundant_jumps(cfg_builder *g); - -static int -push_cold_blocks_to_end(cfg_builder *g, int code_flags) { - basicblock *entryblock = g->g_entryblock; - if (entryblock->b_next == NULL) { - /* single basicblock, no need to reorder */ - return SUCCESS; - } - RETURN_IF_ERROR(mark_cold(entryblock)); - - /* If we have a cold block with fallthrough to a warm block, add */ - /* an explicit jump instead of fallthrough */ - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) { - basicblock *explicit_jump = cfg_builder_new_block(g); - if (explicit_jump == NULL) { - return ERROR; - } - basicblock_addop(explicit_jump, JUMP, b->b_next->b_label.id, NO_LOCATION); - explicit_jump->b_cold = 1; - explicit_jump->b_next = b->b_next; - b->b_next = explicit_jump; - - /* set target */ - struct cfg_instr *last = basicblock_last_instr(explicit_jump); - last->i_target = explicit_jump->b_next; - } - } - - assert(!entryblock->b_cold); /* First block can't be cold */ - basicblock *cold_blocks = NULL; - basicblock *cold_blocks_tail = NULL; - - basicblock *b = entryblock; - while(b->b_next) { - assert(!b->b_cold); - while (b->b_next && !b->b_next->b_cold) { - b = b->b_next; - } - if (b->b_next == NULL) { - /* no more cold blocks */ - break; - } - - /* b->b_next is the beginning of a cold streak */ - assert(!b->b_cold && b->b_next->b_cold); - - basicblock *b_end = b->b_next; - while (b_end->b_next && b_end->b_next->b_cold) { - b_end = b_end->b_next; - } - - /* b_end is the end of the cold streak */ - assert(b_end && b_end->b_cold); - assert(b_end->b_next == NULL || !b_end->b_next->b_cold); - - if (cold_blocks == NULL) { - cold_blocks = b->b_next; - } - else { - cold_blocks_tail->b_next = b->b_next; - } - cold_blocks_tail = b_end; - b->b_next = b_end->b_next; - b_end->b_next = NULL; - } - assert(b != NULL && b->b_next == NULL); - b->b_next = cold_blocks; - - if (cold_blocks != NULL) { - RETURN_IF_ERROR(remove_redundant_jumps(g)); - } - return SUCCESS; -} - -static void -convert_exception_handlers_to_nops(basicblock *entryblock) { - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - for (int i = 0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; - if (is_block_push(instr) || instr->i_opcode == POP_BLOCK) { - INSTR_SET_OP0(instr, NOP); - } - } - } - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - remove_redundant_nops(b); - } -} - -static inline void -write_except_byte(struct assembler *a, int byte) { - unsigned char *p = (unsigned char *) PyBytes_AS_STRING(a->a_except_table); - p[a->a_except_table_off++] = byte; -} - -#define CONTINUATION_BIT 64 - -static void -assemble_emit_exception_table_item(struct assembler *a, int value, int msb) -{ - assert ((msb | 128) == 128); - assert(value >= 0 && value < (1 << 30)); - if (value >= 1 << 24) { - write_except_byte(a, (value >> 24) | CONTINUATION_BIT | msb); - msb = 0; - } - if (value >= 1 << 18) { - write_except_byte(a, ((value >> 18)&0x3f) | CONTINUATION_BIT | msb); - msb = 0; - } - if (value >= 1 << 12) { - write_except_byte(a, ((value >> 12)&0x3f) | CONTINUATION_BIT | msb); - msb = 0; - } - if (value >= 1 << 6) { - write_except_byte(a, ((value >> 6)&0x3f) | CONTINUATION_BIT | msb); - msb = 0; - } - write_except_byte(a, (value&0x3f) | msb); -} - -/* See Objects/exception_handling_notes.txt for details of layout */ -#define MAX_SIZE_OF_ENTRY 20 - -static int -assemble_emit_exception_table_entry(struct assembler *a, int start, int end, basicblock *handler) -{ - Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table); - if (a->a_except_table_off + MAX_SIZE_OF_ENTRY >= len) { - RETURN_IF_ERROR(_PyBytes_Resize(&a->a_except_table, len * 2)); +assemble_emit_exception_table_entry(struct assembler *a, int start, int end, basicblock *handler) +{ + Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table); + if (a->a_except_table_off + MAX_SIZE_OF_ENTRY >= len) { + RETURN_IF_ERROR(_PyBytes_Resize(&a->a_except_table, len * 2)); } int size = end-start; assert(end > start); @@ -7578,7 +6711,7 @@ assemble_exception_table(struct assembler *a, basicblock *entryblock) for (b = entryblock; b != NULL; b = b->b_next) { ioffset = b->b_offset; for (int i = 0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; + cfg_instr *instr = &b->b_instr[i]; if (instr->i_except != handler) { if (handler != NULL) { RETURN_IF_ERROR( @@ -7587,7 +6720,7 @@ assemble_exception_table(struct assembler *a, basicblock *entryblock) start = ioffset; handler = instr->i_except; } - ioffset += instr_size(instr); + ioffset += _PyCfg_InstrSize(instr); } } if (handler != NULL) { @@ -7755,7 +6888,7 @@ assemble_location_info(struct assembler *a, basicblock *entryblock, int firstlin loc = b->b_instr[j].i_loc; size = 0; } - size += instr_size(&b->b_instr[j]); + size += _PyCfg_InstrSize(&b->b_instr[j]); } } RETURN_IF_ERROR(assemble_emit_location(a, loc, size)); @@ -7768,12 +6901,12 @@ assemble_location_info(struct assembler *a, basicblock *entryblock, int firstlin */ static int -assemble_emit_instr(struct assembler *a, struct cfg_instr *i) +assemble_emit_instr(struct assembler *a, cfg_instr *i) { Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode); _Py_CODEUNIT *code; - int size = instr_size(i); + int size = _PyCfg_InstrSize(i); if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) { if (len > PY_SSIZE_T_MAX / 2) { return ERROR; @@ -7786,8 +6919,6 @@ assemble_emit_instr(struct assembler *a, struct cfg_instr *i) return SUCCESS; } -static int merge_const_one(PyObject *const_cache, PyObject **obj); - static int assemble_emit(struct assembler *a, basicblock *entryblock, int first_lineno, PyObject *const_cache) @@ -7805,391 +6936,95 @@ assemble_emit(struct assembler *a, basicblock *entryblock, int first_lineno, RETURN_IF_ERROR(assemble_exception_table(a, entryblock)); RETURN_IF_ERROR(_PyBytes_Resize(&a->a_except_table, a->a_except_table_off)); - RETURN_IF_ERROR(merge_const_one(const_cache, &a->a_except_table)); + RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_except_table)); RETURN_IF_ERROR(_PyBytes_Resize(&a->a_linetable, a->a_location_off)); - RETURN_IF_ERROR(merge_const_one(const_cache, &a->a_linetable)); + RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_linetable)); RETURN_IF_ERROR(_PyBytes_Resize(&a->a_bytecode, a->a_offset * sizeof(_Py_CODEUNIT))); - RETURN_IF_ERROR(merge_const_one(const_cache, &a->a_bytecode)); + RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_bytecode)); return SUCCESS; } -static int -normalize_jumps_in_block(cfg_builder *g, basicblock *b) { - struct cfg_instr *last = basicblock_last_instr(b); - if (last == NULL || !is_jump(last)) { - return SUCCESS; - } - assert(!IS_ASSEMBLER_OPCODE(last->i_opcode)); - bool is_forward = last->i_target->b_visited == 0; - switch(last->i_opcode) { - case JUMP: - last->i_opcode = is_forward ? JUMP_FORWARD : JUMP_BACKWARD; - return SUCCESS; - case JUMP_NO_INTERRUPT: - last->i_opcode = is_forward ? - JUMP_FORWARD : JUMP_BACKWARD_NO_INTERRUPT; - return SUCCESS; - } - int reversed_opcode = 0; - switch(last->i_opcode) { - case POP_JUMP_IF_NOT_NONE: - reversed_opcode = POP_JUMP_IF_NONE; - break; - case POP_JUMP_IF_NONE: - reversed_opcode = POP_JUMP_IF_NOT_NONE; - break; - case POP_JUMP_IF_FALSE: - reversed_opcode = POP_JUMP_IF_TRUE; - break; - case POP_JUMP_IF_TRUE: - reversed_opcode = POP_JUMP_IF_FALSE; - break; - } - if (is_forward) { - return SUCCESS; +static PyObject * +dict_keys_inorder(PyObject *dict, Py_ssize_t offset) +{ + PyObject *tuple, *k, *v; + Py_ssize_t i, pos = 0, size = PyDict_GET_SIZE(dict); + + tuple = PyTuple_New(size); + if (tuple == NULL) + return NULL; + while (PyDict_Next(dict, &pos, &k, &v)) { + i = PyLong_AS_LONG(v); + assert((i - offset) < size); + assert((i - offset) >= 0); + PyTuple_SET_ITEM(tuple, i - offset, Py_NewRef(k)); } + return tuple; +} - /* transform 'conditional jump T' to - * 'reversed_jump b_next' followed by 'jump_backwards T' - */ +static PyObject * +consts_dict_keys_inorder(PyObject *dict) +{ + PyObject *consts, *k, *v; + Py_ssize_t i, pos = 0, size = PyDict_GET_SIZE(dict); - basicblock *target = last->i_target; - basicblock *backwards_jump = cfg_builder_new_block(g); - if (backwards_jump == NULL) { - return ERROR; + consts = PyList_New(size); /* PyCode_Optimize() requires a list */ + if (consts == NULL) + return NULL; + while (PyDict_Next(dict, &pos, &k, &v)) { + i = PyLong_AS_LONG(v); + /* The keys of the dictionary can be tuples wrapping a constant. + * (see dict_add_o and _PyCode_ConstantKey). In that case + * the object we want is always second. */ + if (PyTuple_CheckExact(k)) { + k = PyTuple_GET_ITEM(k, 1); + } + assert(i < size); + assert(i >= 0); + PyList_SET_ITEM(consts, i, Py_NewRef(k)); } - basicblock_addop(backwards_jump, JUMP, target->b_label.id, NO_LOCATION); - backwards_jump->b_instr[0].i_target = target; - last->i_opcode = reversed_opcode; - last->i_target = b->b_next; - - backwards_jump->b_cold = b->b_cold; - backwards_jump->b_next = b->b_next; - b->b_next = backwards_jump; - return SUCCESS; + return consts; } static int -normalize_jumps(cfg_builder *g) +compute_code_flags(struct compiler *c) { - basicblock *entryblock = g->g_entryblock; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - b->b_visited = 0; - } - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - b->b_visited = 1; - RETURN_IF_ERROR(normalize_jumps_in_block(g, b)); + PySTEntryObject *ste = c->u->u_ste; + int flags = 0; + if (ste->ste_type == FunctionBlock) { + flags |= CO_NEWLOCALS | CO_OPTIMIZED; + if (ste->ste_nested) + flags |= CO_NESTED; + if (ste->ste_generator && !ste->ste_coroutine) + flags |= CO_GENERATOR; + if (!ste->ste_generator && ste->ste_coroutine) + flags |= CO_COROUTINE; + if (ste->ste_generator && ste->ste_coroutine) + flags |= CO_ASYNC_GENERATOR; + if (ste->ste_varargs) + flags |= CO_VARARGS; + if (ste->ste_varkeywords) + flags |= CO_VARKEYWORDS; } - return SUCCESS; -} - -static void -assemble_jump_offsets(basicblock *entryblock) -{ - int bsize, totsize, extended_arg_recompile; - - /* Compute the size of each block and fixup jump args. - Replace block pointer with position in bytecode. */ - do { - totsize = 0; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - bsize = blocksize(b); - b->b_offset = totsize; - totsize += bsize; - } - extended_arg_recompile = 0; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - bsize = b->b_offset; - for (int i = 0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; - int isize = instr_size(instr); - /* Relative jumps are computed relative to - the instruction pointer after fetching - the jump instruction. - */ - bsize += isize; - if (is_jump(instr)) { - instr->i_oparg = instr->i_target->b_offset; - if (is_relative_jump(instr)) { - if (instr->i_oparg < bsize) { - assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode)); - instr->i_oparg = bsize - instr->i_oparg; - } - else { - assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode)); - instr->i_oparg -= bsize; - } - } - else { - assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode)); - } - if (instr_size(instr) != isize) { - extended_arg_recompile = 1; - } - } - } - } - /* XXX: This is an awful hack that could hurt performance, but - on the bright side it should work until we come up - with a better solution. + /* (Only) inherit compilerflags in PyCF_MASK */ + flags |= (c->c_flags.cf_flags & PyCF_MASK); - The issue is that in the first loop blocksize() is called - which calls instr_size() which requires i_oparg be set - appropriately. There is a bootstrap problem because - i_oparg is calculated in the second loop above. + if ((IS_TOP_LEVEL_AWAIT(c)) && + ste->ste_coroutine && + !ste->ste_generator) { + flags |= CO_COROUTINE; + } - So we loop until we stop seeing new EXTENDED_ARGs. - The only EXTENDED_ARGs that could be popping up are - ones in jump instructions. So this should converge - fairly quickly. - */ - } while (extended_arg_recompile); + return flags; } - -// helper functions for add_checks_for_loads_of_unknown_variables -static inline void -maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) -{ - // Push b if the unsafe mask is giving us any new information. - // To avoid overflowing the stack, only allow each block once. - // Use b->b_visited=1 to mean that b is currently on the stack. - uint64_t both = b->b_unsafe_locals_mask | unsafe_mask; - if (b->b_unsafe_locals_mask != both) { - b->b_unsafe_locals_mask = both; - // More work left to do. - if (!b->b_visited) { - // not on the stack, so push it. - *(*sp)++ = b; - b->b_visited = 1; - } - } -} - -static void -scan_block_for_locals(basicblock *b, basicblock ***sp) -{ - // bit i is set if local i is potentially uninitialized - uint64_t unsafe_mask = b->b_unsafe_locals_mask; - for (int i = 0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; - assert(instr->i_opcode != EXTENDED_ARG); - assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode)); - if (instr->i_except != NULL) { - maybe_push(instr->i_except, unsafe_mask, sp); - } - if (instr->i_oparg >= 64) { - continue; - } - assert(instr->i_oparg >= 0); - uint64_t bit = (uint64_t)1 << instr->i_oparg; - switch (instr->i_opcode) { - case DELETE_FAST: - unsafe_mask |= bit; - break; - case STORE_FAST: - unsafe_mask &= ~bit; - break; - case LOAD_FAST_CHECK: - // If this doesn't raise, then the local is defined. - unsafe_mask &= ~bit; - break; - case LOAD_FAST: - if (unsafe_mask & bit) { - instr->i_opcode = LOAD_FAST_CHECK; - } - unsafe_mask &= ~bit; - break; - } - } - if (b->b_next && BB_HAS_FALLTHROUGH(b)) { - maybe_push(b->b_next, unsafe_mask, sp); - } - struct cfg_instr *last = basicblock_last_instr(b); - if (last && is_jump(last)) { - assert(last->i_target != NULL); - maybe_push(last->i_target, unsafe_mask, sp); - } -} - -static int -fast_scan_many_locals(basicblock *entryblock, int nlocals) -{ - assert(nlocals > 64); - Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t)); - if (states == NULL) { - PyErr_NoMemory(); - return ERROR; - } - Py_ssize_t blocknum = 0; - // state[i - 64] == blocknum if local i is guaranteed to - // be initialized, i.e., if it has had a previous LOAD_FAST or - // STORE_FAST within that basicblock (not followed by DELETE_FAST). - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - blocknum++; - for (int i = 0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; - assert(instr->i_opcode != EXTENDED_ARG); - assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode)); - int arg = instr->i_oparg; - if (arg < 64) { - continue; - } - assert(arg >= 0); - switch (instr->i_opcode) { - case DELETE_FAST: - states[arg - 64] = blocknum - 1; - break; - case STORE_FAST: - states[arg - 64] = blocknum; - break; - case LOAD_FAST: - if (states[arg - 64] != blocknum) { - instr->i_opcode = LOAD_FAST_CHECK; - } - states[arg - 64] = blocknum; - break; - case LOAD_FAST_CHECK: - Py_UNREACHABLE(); - } - } - } - PyMem_Free(states); - return SUCCESS; -} - -static int -add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock, - int nlocals, - int nparams) -{ - if (nlocals == 0) { - return SUCCESS; - } - if (nlocals > 64) { - // To avoid O(nlocals**2) compilation, locals beyond the first - // 64 are only analyzed one basicblock at a time: initialization - // info is not passed between basicblocks. - if (fast_scan_many_locals(entryblock, nlocals) < 0) { - return ERROR; - } - nlocals = 64; - } - basicblock **stack = make_cfg_traversal_stack(entryblock); - if (stack == NULL) { - return ERROR; - } - basicblock **sp = stack; - - // First origin of being uninitialized: - // The non-parameter locals in the entry block. - uint64_t start_mask = 0; - for (int i = nparams; i < nlocals; i++) { - start_mask |= (uint64_t)1 << i; - } - maybe_push(entryblock, start_mask, &sp); - - // Second origin of being uninitialized: - // There could be DELETE_FAST somewhere, so - // be sure to scan each basicblock at least once. - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - scan_block_for_locals(b, &sp); - } - - // Now propagate the uncertainty from the origins we found: Use - // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined. - while (sp > stack) { - basicblock *b = *--sp; - // mark as no longer on stack - b->b_visited = 0; - scan_block_for_locals(b, &sp); - } - PyMem_Free(stack); - return SUCCESS; -} - -static PyObject * -dict_keys_inorder(PyObject *dict, Py_ssize_t offset) -{ - PyObject *tuple, *k, *v; - Py_ssize_t i, pos = 0, size = PyDict_GET_SIZE(dict); - - tuple = PyTuple_New(size); - if (tuple == NULL) - return NULL; - while (PyDict_Next(dict, &pos, &k, &v)) { - i = PyLong_AS_LONG(v); - assert((i - offset) < size); - assert((i - offset) >= 0); - PyTuple_SET_ITEM(tuple, i - offset, Py_NewRef(k)); - } - return tuple; -} - -static PyObject * -consts_dict_keys_inorder(PyObject *dict) -{ - PyObject *consts, *k, *v; - Py_ssize_t i, pos = 0, size = PyDict_GET_SIZE(dict); - - consts = PyList_New(size); /* PyCode_Optimize() requires a list */ - if (consts == NULL) - return NULL; - while (PyDict_Next(dict, &pos, &k, &v)) { - i = PyLong_AS_LONG(v); - /* The keys of the dictionary can be tuples wrapping a constant. - * (see dict_add_o and _PyCode_ConstantKey). In that case - * the object we want is always second. */ - if (PyTuple_CheckExact(k)) { - k = PyTuple_GET_ITEM(k, 1); - } - assert(i < size); - assert(i >= 0); - PyList_SET_ITEM(consts, i, Py_NewRef(k)); - } - return consts; -} - -static int -compute_code_flags(struct compiler *c) -{ - PySTEntryObject *ste = c->u->u_ste; - int flags = 0; - if (ste->ste_type == FunctionBlock) { - flags |= CO_NEWLOCALS | CO_OPTIMIZED; - if (ste->ste_nested) - flags |= CO_NESTED; - if (ste->ste_generator && !ste->ste_coroutine) - flags |= CO_GENERATOR; - if (!ste->ste_generator && ste->ste_coroutine) - flags |= CO_COROUTINE; - if (ste->ste_generator && ste->ste_coroutine) - flags |= CO_ASYNC_GENERATOR; - if (ste->ste_varargs) - flags |= CO_VARARGS; - if (ste->ste_varkeywords) - flags |= CO_VARKEYWORDS; - } - - /* (Only) inherit compilerflags in PyCF_MASK */ - flags |= (c->c_flags.cf_flags & PyCF_MASK); - - if ((IS_TOP_LEVEL_AWAIT(c)) && - ste->ste_coroutine && - !ste->ste_generator) { - flags |= CO_COROUTINE; - } - - return flags; -} - -// Merge *obj* with constant cache. -// Unlike merge_consts_recursive(), this function doesn't work recursively. -static int -merge_const_one(PyObject *const_cache, PyObject **obj) +// Merge *obj* with constant cache. +// Unlike merge_consts_recursive(), this function doesn't work recursively. +int +_PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj) { assert(PyDict_CheckExact(const_cache)); PyObject *key = _PyCode_ConstantKey(*obj); @@ -8279,7 +7114,7 @@ makecode(struct compiler_unit *u, struct assembler *a, PyObject *const_cache, if (!names) { goto error; } - if (merge_const_one(const_cache, &names) < 0) { + if (_PyCompile_ConstCacheMergeOne(const_cache, &names) < 0) { goto error; } @@ -8287,7 +7122,7 @@ makecode(struct compiler_unit *u, struct assembler *a, PyObject *const_cache, if (consts == NULL) { goto error; } - if (merge_const_one(const_cache, &consts) < 0) { + if (_PyCompile_ConstCacheMergeOne(const_cache, &consts) < 0) { goto error; } @@ -8338,7 +7173,7 @@ makecode(struct compiler_unit *u, struct assembler *a, PyObject *const_cache, goto error; } - if (merge_const_one(const_cache, &localsplusnames) < 0) { + if (_PyCompile_ConstCacheMergeOne(const_cache, &localsplusnames) < 0) { goto error; } con.localsplusnames = localsplusnames; @@ -8357,64 +7192,6 @@ makecode(struct compiler_unit *u, struct assembler *a, PyObject *const_cache, } -/* For debugging purposes only */ -#if 0 -static void -dump_instr(struct cfg_instr *i) -{ - const char *jrel = (is_relative_jump(i)) ? "jrel " : ""; - const char *jabs = (is_jump(i) && !is_relative_jump(i))? "jabs " : ""; - - char arg[128]; - - *arg = '\0'; - if (HAS_ARG(i->i_opcode)) { - sprintf(arg, "arg: %d ", i->i_oparg); - } - if (HAS_TARGET(i->i_opcode)) { - sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg); - } - fprintf(stderr, "line: %d, opcode: %d %s%s%s\n", - i->i_loc.lineno, i->i_opcode, arg, jabs, jrel); -} - -static inline int -basicblock_returns(const basicblock *b) { - struct cfg_instr *last = basicblock_last_instr(b); - return last && (last->i_opcode == RETURN_VALUE || last->i_opcode == RETURN_CONST); -} - -static void -dump_basicblock(const basicblock *b) -{ - const char *b_return = basicblock_returns(b) ? "return " : ""; - fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, offset: %d %s\n", - b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused, - b->b_startdepth, b->b_offset, b_return); - if (b->b_instr) { - int i; - for (i = 0; i < b->b_iused; i++) { - fprintf(stderr, " [%02d] ", i); - dump_instr(b->b_instr + i); - } - } -} -#endif - - -static int -translate_jump_labels_to_targets(basicblock *entryblock); - -static int -optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache); - -static int -remove_unused_consts(basicblock *entryblock, PyObject *consts); - -/* Duplicates exit BBs, so that line numbers can be propagated to them */ -static int -duplicate_exits_without_lineno(cfg_builder *g); - static int * build_cellfixedoffsets(struct compiler_unit *u) { @@ -8456,20 +7233,20 @@ insert_prefix_instructions(struct compiler_unit *u, basicblock *entryblock, /* Add the generator prefix instructions. */ if (code_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) { - struct cfg_instr make_gen = { + cfg_instr make_gen = { .i_opcode = RETURN_GENERATOR, .i_oparg = 0, .i_loc = LOCATION(u->u_firstlineno, u->u_firstlineno, -1, -1), .i_target = NULL, }; - RETURN_IF_ERROR(insert_instruction(entryblock, 0, &make_gen)); - struct cfg_instr pop_top = { + RETURN_IF_ERROR(_PyBasicblock_InsertInstruction(entryblock, 0, &make_gen)); + cfg_instr pop_top = { .i_opcode = POP_TOP, .i_oparg = 0, .i_loc = NO_LOCATION, .i_target = NULL, }; - RETURN_IF_ERROR(insert_instruction(entryblock, 1, &pop_top)); + RETURN_IF_ERROR(_PyBasicblock_InsertInstruction(entryblock, 1, &pop_top)); } /* Set up cells for any variable that escapes, to be put in a closure. */ @@ -8492,60 +7269,32 @@ insert_prefix_instructions(struct compiler_unit *u, basicblock *entryblock, if (oldindex == -1) { continue; } - struct cfg_instr make_cell = { + cfg_instr make_cell = { .i_opcode = MAKE_CELL, // This will get fixed in offset_derefs(). .i_oparg = oldindex, .i_loc = NO_LOCATION, .i_target = NULL, }; - RETURN_IF_ERROR(insert_instruction(entryblock, ncellsused, &make_cell)); + RETURN_IF_ERROR(_PyBasicblock_InsertInstruction(entryblock, ncellsused, &make_cell)); ncellsused += 1; } PyMem_RawFree(sorted); } if (nfreevars) { - struct cfg_instr copy_frees = { + cfg_instr copy_frees = { .i_opcode = COPY_FREE_VARS, .i_oparg = nfreevars, .i_loc = NO_LOCATION, .i_target = NULL, }; - RETURN_IF_ERROR(insert_instruction(entryblock, 0, ©_frees)); + RETURN_IF_ERROR(_PyBasicblock_InsertInstruction(entryblock, 0, ©_frees)); } return SUCCESS; } -/* Make sure that all returns have a line number, even if early passes - * have failed to propagate a correct line number. - * The resulting line number may not be correct according to PEP 626, - * but should be "good enough", and no worse than in older versions. */ -static void -guarantee_lineno_for_exits(basicblock *entryblock, int firstlineno) { - int lineno = firstlineno; - assert(lineno > 0); - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - struct cfg_instr *last = basicblock_last_instr(b); - if (last == NULL) { - continue; - } - if (last->i_loc.lineno < 0) { - if (last->i_opcode == RETURN_VALUE) { - for (int i = 0; i < b->b_iused; i++) { - assert(b->b_instr[i].i_loc.lineno < 0); - - b->b_instr[i].i_loc.lineno = lineno; - } - } - } - else { - lineno = last->i_loc.lineno; - } - } -} - static int fix_cell_offsets(struct compiler_unit *u, basicblock *entryblock, int *fixedmap) { @@ -8569,7 +7318,7 @@ fix_cell_offsets(struct compiler_unit *u, basicblock *entryblock, int *fixedmap) // Then update offsets, either relative to locals or by cell2arg. for (basicblock *b = entryblock; b != NULL; b = b->b_next) { for (int i = 0; i < b->b_iused; i++) { - struct cfg_instr *inst = &b->b_instr[i]; + cfg_instr *inst = &b->b_instr[i]; // This is called before extended args are generated. assert(inst->i_opcode != EXTENDED_ARG); int oldoffset = inst->i_oparg; @@ -8592,100 +7341,6 @@ fix_cell_offsets(struct compiler_unit *u, basicblock *entryblock, int *fixedmap) } -#ifndef NDEBUG - -static bool -no_redundant_nops(cfg_builder *g) { - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - if (remove_redundant_nops(b) != 0) { - return false; - } - } - return true; -} - -static bool -no_redundant_jumps(cfg_builder *g) { - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - struct cfg_instr *last = basicblock_last_instr(b); - if (last != NULL) { - if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) { - assert(last->i_target != b->b_next); - if (last->i_target == b->b_next) { - return false; - } - } - } - } - return true; -} - -static bool -opcode_metadata_is_sane(cfg_builder *g) { - bool result = true; - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - for (int i = 0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; - int opcode = instr->i_opcode; - int oparg = instr->i_oparg; - assert(opcode <= MAX_REAL_OPCODE); - for (int jump = 0; jump <= 1; jump++) { - int popped = _PyOpcode_num_popped(opcode, oparg, jump ? true : false); - int pushed = _PyOpcode_num_pushed(opcode, oparg, jump ? true : false); - assert((pushed < 0) == (popped < 0)); - if (pushed >= 0) { - assert(_PyOpcode_opcode_metadata[opcode].valid_entry); - int effect = stack_effect(opcode, instr->i_oparg, jump); - if (effect != pushed - popped) { - fprintf(stderr, - "op=%d arg=%d jump=%d: stack_effect (%d) != pushed (%d) - popped (%d)\n", - opcode, oparg, jump, effect, pushed, popped); - result = false; - } - } - } - } - } - return result; -} - -static bool -no_empty_basic_blocks(cfg_builder *g) { - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - if (b->b_iused == 0) { - return false; - } - } - return true; -} -#endif - -static int -remove_redundant_jumps(cfg_builder *g) { - /* If a non-empty block ends with a jump instruction, check if the next - * non-empty block reached through normal flow control is the target - * of that jump. If it is, then the jump instruction is redundant and - * can be deleted. - */ - assert(no_empty_basic_blocks(g)); - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - struct cfg_instr *last = basicblock_last_instr(b); - assert(last != NULL); - assert(!IS_ASSEMBLER_OPCODE(last->i_opcode)); - if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) { - if (last->i_target == NULL) { - PyErr_SetString(PyExc_SystemError, "jump with NULL target"); - return ERROR; - } - if (last->i_target == b->b_next) { - assert(b->b_next->b_iused); - INSTR_SET_OP0(last, NOP); - } - } - } - return SUCCESS; -} - static int prepare_localsplus(struct compiler_unit* u, cfg_builder *g, int code_flags) { @@ -8734,47 +7389,6 @@ add_return_at_end(struct compiler *c, int addNone) return SUCCESS; } -static void propagate_line_numbers(basicblock *entryblock); - -static int -resolve_line_numbers(struct compiler_unit *u, cfg_builder *g) -{ - /* Set firstlineno if it wasn't explicitly set. */ - if (!u->u_firstlineno) { - if (g->g_entryblock->b_instr && g->g_entryblock->b_instr->i_loc.lineno) { - u->u_firstlineno = g->g_entryblock->b_instr->i_loc.lineno; - } - else { - u->u_firstlineno = 1; - } - } - RETURN_IF_ERROR(duplicate_exits_without_lineno(g)); - propagate_line_numbers(g->g_entryblock); - guarantee_lineno_for_exits(g->g_entryblock, u->u_firstlineno); - return SUCCESS; -} - -static int -optimize_code_unit(cfg_builder *g, PyObject *consts, PyObject *const_cache, - int code_flags, int nlocals, int nparams) -{ - assert(cfg_builder_check(g)); - /** Preprocessing **/ - /* Map labels to targets and mark exception handlers */ - RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock)); - RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock)); - RETURN_IF_ERROR(label_exception_targets(g->g_entryblock)); - - /** Optimization **/ - RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache)); - RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts)); - RETURN_IF_ERROR( - add_checks_for_loads_of_uninitialized_variables( - g->g_entryblock, nlocals, nparams)); - - RETURN_IF_ERROR(push_cold_blocks_to_end(g, code_flags)); - return SUCCESS; -} static PyCodeObject * assemble_code_unit(struct compiler_unit *u, PyObject *const_cache, @@ -8791,13 +7405,21 @@ assemble_code_unit(struct compiler_unit *u, PyObject *const_cache, } int nparams = (int)PyList_GET_SIZE(u->u_ste->ste_varnames); int nlocals = (int)PyDict_GET_SIZE(u->u_varnames); - if (optimize_code_unit(&g, consts, const_cache, code_flags, nlocals, nparams) < 0) { + if (_PyCfg_OptimizeCodeUnit(&g, consts, const_cache, code_flags, nlocals, nparams) < 0) { goto error; } /** Assembly **/ - - if (resolve_line_numbers(u, &g) < 0) { + /* Set firstlineno if it wasn't explicitly set. */ + if (!u->u_firstlineno) { + if (g.g_entryblock->b_instr && g.g_entryblock->b_instr->i_loc.lineno) { + u->u_firstlineno = g.g_entryblock->b_instr->i_loc.lineno; + } + else { + u->u_firstlineno = 1; + } + } + if (_PyCfg_ResolveLineNumbers(&g, u->u_firstlineno) < 0) { goto error; } @@ -8806,23 +7428,21 @@ assemble_code_unit(struct compiler_unit *u, PyObject *const_cache, goto error; } - int maxdepth = stackdepth(g.g_entryblock, code_flags); + int maxdepth = _PyCfg_Stackdepth(g.g_entryblock, code_flags); if (maxdepth < 0) { goto error; } /* TO DO -- For 3.12, make sure that `maxdepth <= MAX_ALLOWED_STACK_USE` */ - convert_exception_handlers_to_nops(g.g_entryblock); + _PyCfg_ConvertExceptionHandlersToNops(g.g_entryblock); /* Order of basic blocks must have been determined by now */ - if (normalize_jumps(&g) < 0) { + + if (_PyCfg_ResolveJumps(&g) < 0) { goto error; } - assert(no_redundant_jumps(&g)); - assert(opcode_metadata_is_sane(&g)); /* Can't modify the bytecode after computing jump offsets. */ - assemble_jump_offsets(g.g_entryblock); struct assembler a; int res = assemble_emit(&a, g.g_entryblock, u->u_firstlineno, const_cache); @@ -8834,7 +7454,7 @@ assemble_code_unit(struct compiler_unit *u, PyObject *const_cache, error: Py_XDECREF(consts); - cfg_builder_fini(&g); + _PyCfgBuilder_Fini(&g); return co; } @@ -8857,941 +7477,6 @@ assemble(struct compiler *c, int addNone) return assemble_code_unit(u, const_cache, code_flags, filename); } -static PyObject* -get_const_value(int opcode, int oparg, PyObject *co_consts) -{ - PyObject *constant = NULL; - assert(HAS_CONST(opcode)); - if (opcode == LOAD_CONST) { - constant = PyList_GET_ITEM(co_consts, oparg); - } - - if (constant == NULL) { - PyErr_SetString(PyExc_SystemError, - "Internal error: failed to get value of a constant"); - return NULL; - } - return Py_NewRef(constant); -} - -/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n - with LOAD_CONST (c1, c2, ... cn). - The consts table must still be in list form so that the - new constant (c1, c2, ... cn) can be appended. - Called with codestr pointing to the first LOAD_CONST. -*/ -static int -fold_tuple_on_constants(PyObject *const_cache, - struct cfg_instr *inst, - int n, PyObject *consts) -{ - /* Pre-conditions */ - assert(PyDict_CheckExact(const_cache)); - assert(PyList_CheckExact(consts)); - assert(inst[n].i_opcode == BUILD_TUPLE); - assert(inst[n].i_oparg == n); - - for (int i = 0; i < n; i++) { - if (!HAS_CONST(inst[i].i_opcode)) { - return SUCCESS; - } - } - - /* Buildup new tuple of constants */ - PyObject *newconst = PyTuple_New(n); - if (newconst == NULL) { - return ERROR; - } - for (int i = 0; i < n; i++) { - int op = inst[i].i_opcode; - int arg = inst[i].i_oparg; - PyObject *constant = get_const_value(op, arg, consts); - if (constant == NULL) { - return ERROR; - } - PyTuple_SET_ITEM(newconst, i, constant); - } - if (merge_const_one(const_cache, &newconst) < 0) { - Py_DECREF(newconst); - return ERROR; - } - - Py_ssize_t index; - for (index = 0; index < PyList_GET_SIZE(consts); index++) { - if (PyList_GET_ITEM(consts, index) == newconst) { - break; - } - } - if (index == PyList_GET_SIZE(consts)) { - if ((size_t)index >= (size_t)INT_MAX - 1) { - Py_DECREF(newconst); - PyErr_SetString(PyExc_OverflowError, "too many constants"); - return ERROR; - } - if (PyList_Append(consts, newconst)) { - Py_DECREF(newconst); - return ERROR; - } - } - Py_DECREF(newconst); - for (int i = 0; i < n; i++) { - INSTR_SET_OP0(&inst[i], NOP); - } - INSTR_SET_OP1(&inst[n], LOAD_CONST, (int)index); - return SUCCESS; -} - -#define VISITED (-1) - -// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the -// same effect. -static int -swaptimize(basicblock *block, int *ix) -{ - // NOTE: "./python -m test test_patma" serves as a good, quick stress test - // for this function. Make sure to blow away cached *.pyc files first! - assert(*ix < block->b_iused); - struct cfg_instr *instructions = &block->b_instr[*ix]; - // Find the length of the current sequence of SWAPs and NOPs, and record the - // maximum depth of the stack manipulations: - assert(instructions[0].i_opcode == SWAP); - int depth = instructions[0].i_oparg; - int len = 0; - int more = false; - int limit = block->b_iused - *ix; - while (++len < limit) { - int opcode = instructions[len].i_opcode; - if (opcode == SWAP) { - depth = Py_MAX(depth, instructions[len].i_oparg); - more = true; - } - else if (opcode != NOP) { - break; - } - } - // It's already optimal if there's only one SWAP: - if (!more) { - return SUCCESS; - } - // Create an array with elements {0, 1, 2, ..., depth - 1}: - int *stack = PyMem_Malloc(depth * sizeof(int)); - if (stack == NULL) { - PyErr_NoMemory(); - return ERROR; - } - for (int i = 0; i < depth; i++) { - stack[i] = i; - } - // Simulate the combined effect of these instructions by "running" them on - // our "stack": - for (int i = 0; i < len; i++) { - if (instructions[i].i_opcode == SWAP) { - int oparg = instructions[i].i_oparg; - int top = stack[0]; - // SWAPs are 1-indexed: - stack[0] = stack[oparg - 1]; - stack[oparg - 1] = top; - } - } - // Now we can begin! Our approach here is based on a solution to a closely - // related problem (https://cs.stackexchange.com/a/13938). It's easiest to - // think of this algorithm as determining the steps needed to efficiently - // "un-shuffle" our stack. By performing the moves in *reverse* order, - // though, we can efficiently *shuffle* it! For this reason, we will be - // replacing instructions starting from the *end* of the run. Since the - // solution is optimal, we don't need to worry about running out of space: - int current = len - 1; - for (int i = 0; i < depth; i++) { - // Skip items that have already been visited, or just happen to be in - // the correct location: - if (stack[i] == VISITED || stack[i] == i) { - continue; - } - // Okay, we've found an item that hasn't been visited. It forms a cycle - // with other items; traversing the cycle and swapping each item with - // the next will put them all in the correct place. The weird - // loop-and-a-half is necessary to insert 0 into every cycle, since we - // can only swap from that position: - int j = i; - while (true) { - // Skip the actual swap if our item is zero, since swapping the top - // item with itself is pointless: - if (j) { - assert(0 <= current); - // SWAPs are 1-indexed: - instructions[current].i_opcode = SWAP; - instructions[current--].i_oparg = j + 1; - } - if (stack[j] == VISITED) { - // Completed the cycle: - assert(j == i); - break; - } - int next_j = stack[j]; - stack[j] = VISITED; - j = next_j; - } - } - // NOP out any unused instructions: - while (0 <= current) { - INSTR_SET_OP0(&instructions[current--], NOP); - } - PyMem_Free(stack); - *ix += len - 1; - return SUCCESS; -} - -// This list is pretty small, since it's only okay to reorder opcodes that: -// - can't affect control flow (like jumping or raising exceptions) -// - can't invoke arbitrary code (besides finalizers) -// - only touch the TOS (and pop it when finished) -#define SWAPPABLE(opcode) \ - ((opcode) == STORE_FAST || (opcode) == POP_TOP) - -static int -next_swappable_instruction(basicblock *block, int i, int lineno) -{ - while (++i < block->b_iused) { - struct cfg_instr *instruction = &block->b_instr[i]; - if (0 <= lineno && instruction->i_loc.lineno != lineno) { - // Optimizing across this instruction could cause user-visible - // changes in the names bound between line tracing events! - return -1; - } - if (instruction->i_opcode == NOP) { - continue; - } - if (SWAPPABLE(instruction->i_opcode)) { - return i; - } - return -1; - } - return -1; -} - -// Attempt to apply SWAPs statically by swapping *instructions* rather than -// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42) -// with the more efficient NOP, STORE_FAST(42), POP_TOP. -static void -apply_static_swaps(basicblock *block, int i) -{ - // SWAPs are to our left, and potential swaperands are to our right: - for (; 0 <= i; i--) { - assert(i < block->b_iused); - struct cfg_instr *swap = &block->b_instr[i]; - if (swap->i_opcode != SWAP) { - if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) { - // Nope, but we know how to handle these. Keep looking: - continue; - } - // We can't reason about what this instruction does. Bail: - return; - } - int j = next_swappable_instruction(block, i, -1); - if (j < 0) { - return; - } - int k = j; - int lineno = block->b_instr[j].i_loc.lineno; - for (int count = swap->i_oparg - 1; 0 < count; count--) { - k = next_swappable_instruction(block, k, lineno); - if (k < 0) { - return; - } - } - // Success! - INSTR_SET_OP0(swap, NOP); - struct cfg_instr temp = block->b_instr[j]; - block->b_instr[j] = block->b_instr[k]; - block->b_instr[k] = temp; - } -} - -// Attempt to eliminate jumps to jumps by updating inst to jump to -// target->i_target using the provided opcode. Return whether or not the -// optimization was successful. -static bool -jump_thread(struct cfg_instr *inst, struct cfg_instr *target, int opcode) -{ - assert(is_jump(inst)); - assert(is_jump(target)); - // bpo-45773: If inst->i_target == target->i_target, then nothing actually - // changes (and we fall into an infinite loop): - if ((inst->i_loc.lineno == target->i_loc.lineno || target->i_loc.lineno == -1) && - inst->i_target != target->i_target) - { - inst->i_target = target->i_target; - inst->i_opcode = opcode; - return true; - } - return false; -} - -/* Maximum size of basic block that should be copied in optimizer */ -#define MAX_COPY_SIZE 4 - -/* Optimization */ -static int -optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) -{ - assert(PyDict_CheckExact(const_cache)); - assert(PyList_CheckExact(consts)); - struct cfg_instr nop; - INSTR_SET_OP0(&nop, NOP); - struct cfg_instr *target = &nop; - int opcode = 0; - int oparg = 0; - int nextop = 0; - for (int i = 0; i < bb->b_iused; i++) { - struct cfg_instr *inst = &bb->b_instr[i]; - bool is_copy_of_load_const = (opcode == LOAD_CONST && - inst->i_opcode == COPY && - inst->i_oparg == 1); - if (! is_copy_of_load_const) { - opcode = inst->i_opcode; - oparg = inst->i_oparg; - if (HAS_TARGET(opcode)) { - assert(inst->i_target->b_iused > 0); - target = &inst->i_target->b_instr[0]; - assert(!IS_ASSEMBLER_OPCODE(target->i_opcode)); - } - else { - target = &nop; - } - } - nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0; - assert(!IS_ASSEMBLER_OPCODE(opcode)); - switch (opcode) { - /* Remove LOAD_CONST const; conditional jump */ - case LOAD_CONST: - { - PyObject* cnt; - int is_true; - int jump_if_true; - switch(nextop) { - case POP_JUMP_IF_FALSE: - case POP_JUMP_IF_TRUE: - cnt = get_const_value(opcode, oparg, consts); - if (cnt == NULL) { - goto error; - } - is_true = PyObject_IsTrue(cnt); - Py_DECREF(cnt); - if (is_true == -1) { - goto error; - } - INSTR_SET_OP0(inst, NOP); - jump_if_true = nextop == POP_JUMP_IF_TRUE; - if (is_true == jump_if_true) { - bb->b_instr[i+1].i_opcode = JUMP; - } - else { - INSTR_SET_OP0(&bb->b_instr[i + 1], NOP); - } - break; - case IS_OP: - cnt = get_const_value(opcode, oparg, consts); - if (cnt == NULL) { - goto error; - } - int jump_op = i+2 < bb->b_iused ? bb->b_instr[i+2].i_opcode : 0; - if (Py_IsNone(cnt) && (jump_op == POP_JUMP_IF_FALSE || jump_op == POP_JUMP_IF_TRUE)) { - unsigned char nextarg = bb->b_instr[i+1].i_oparg; - INSTR_SET_OP0(inst, NOP); - INSTR_SET_OP0(&bb->b_instr[i + 1], NOP); - bb->b_instr[i+2].i_opcode = nextarg ^ (jump_op == POP_JUMP_IF_FALSE) ? - POP_JUMP_IF_NOT_NONE : POP_JUMP_IF_NONE; - } - Py_DECREF(cnt); - break; - case RETURN_VALUE: - INSTR_SET_OP0(inst, NOP); - INSTR_SET_OP1(&bb->b_instr[++i], RETURN_CONST, oparg); - break; - } - break; - } - - /* Try to fold tuples of constants. - Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1). - Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2). - Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */ - case BUILD_TUPLE: - if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) { - switch(oparg) { - case 1: - INSTR_SET_OP0(inst, NOP); - INSTR_SET_OP0(&bb->b_instr[i + 1], NOP); - continue; - case 2: - case 3: - INSTR_SET_OP0(inst, NOP); - bb->b_instr[i+1].i_opcode = SWAP; - continue; - } - } - if (i >= oparg) { - if (fold_tuple_on_constants(const_cache, inst-oparg, oparg, consts)) { - goto error; - } - } - break; - case POP_JUMP_IF_NOT_NONE: - case POP_JUMP_IF_NONE: - switch (target->i_opcode) { - case JUMP: - i -= jump_thread(inst, target, inst->i_opcode); - } - break; - case POP_JUMP_IF_FALSE: - switch (target->i_opcode) { - case JUMP: - i -= jump_thread(inst, target, POP_JUMP_IF_FALSE); - } - break; - case POP_JUMP_IF_TRUE: - switch (target->i_opcode) { - case JUMP: - i -= jump_thread(inst, target, POP_JUMP_IF_TRUE); - } - break; - case JUMP: - switch (target->i_opcode) { - case JUMP: - i -= jump_thread(inst, target, JUMP); - } - break; - case FOR_ITER: - if (target->i_opcode == JUMP) { - /* This will not work now because the jump (at target) could - * be forward or backward and FOR_ITER only jumps forward. We - * can re-enable this if ever we implement a backward version - * of FOR_ITER. - */ - /* - i -= jump_thread(inst, target, FOR_ITER); - */ - } - break; - case SWAP: - if (oparg == 1) { - INSTR_SET_OP0(inst, NOP); - break; - } - if (swaptimize(bb, &i) < 0) { - goto error; - } - apply_static_swaps(bb, i); - break; - case KW_NAMES: - break; - case PUSH_NULL: - if (nextop == LOAD_GLOBAL && (inst[1].i_opcode & 1) == 0) { - INSTR_SET_OP0(inst, NOP); - inst[1].i_oparg |= 1; - } - break; - default: - /* All HAS_CONST opcodes should be handled with LOAD_CONST */ - assert (!HAS_CONST(inst->i_opcode)); - } - } - return SUCCESS; -error: - return ERROR; -} - -/* If this block ends with an unconditional jump to a small exit block, then - * remove the jump and extend this block with the target. - * Returns 1 if extended, 0 if no change, and -1 on error. - */ -static int -inline_small_exit_blocks(basicblock *bb) { - struct cfg_instr *last = basicblock_last_instr(bb); - if (last == NULL) { - return 0; - } - if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) { - return 0; - } - basicblock *target = last->i_target; - if (basicblock_exits_scope(target) && target->b_iused <= MAX_COPY_SIZE) { - INSTR_SET_OP0(last, NOP); - RETURN_IF_ERROR(basicblock_append_instructions(bb, target)); - return 1; - } - return 0; -} - - -static int -remove_redundant_nops_and_pairs(basicblock *entryblock) -{ - bool done = false; - - while (! done) { - done = true; - struct cfg_instr *prev_instr = NULL; - struct cfg_instr *instr = NULL; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - remove_redundant_nops(b); - if (IS_LABEL(b->b_label)) { - /* this block is a jump target, forget instr */ - instr = NULL; - } - for (int i = 0; i < b->b_iused; i++) { - prev_instr = instr; - instr = &b->b_instr[i]; - int prev_opcode = prev_instr ? prev_instr->i_opcode : 0; - int prev_oparg = prev_instr ? prev_instr->i_oparg : 0; - int opcode = instr->i_opcode; - bool is_redundant_pair = false; - if (opcode == POP_TOP) { - if (prev_opcode == LOAD_CONST) { - is_redundant_pair = true; - } - else if (prev_opcode == COPY && prev_oparg == 1) { - is_redundant_pair = true; - } - } - if (is_redundant_pair) { - INSTR_SET_OP0(prev_instr, NOP); - INSTR_SET_OP0(instr, NOP); - done = false; - } - } - if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) { - instr = NULL; - } - } - } - return SUCCESS; -} - - -static int -remove_redundant_nops(basicblock *bb) { - /* Remove NOPs when legal to do so. */ - int dest = 0; - int prev_lineno = -1; - for (int src = 0; src < bb->b_iused; src++) { - int lineno = bb->b_instr[src].i_loc.lineno; - if (bb->b_instr[src].i_opcode == NOP) { - /* Eliminate no-op if it doesn't have a line number */ - if (lineno < 0) { - continue; - } - /* or, if the previous instruction had the same line number. */ - if (prev_lineno == lineno) { - continue; - } - /* or, if the next instruction has same line number or no line number */ - if (src < bb->b_iused - 1) { - int next_lineno = bb->b_instr[src+1].i_loc.lineno; - if (next_lineno == lineno) { - continue; - } - if (next_lineno < 0) { - bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc; - continue; - } - } - else { - basicblock* next = bb->b_next; - while (next && next->b_iused == 0) { - next = next->b_next; - } - /* or if last instruction in BB and next BB has same line number */ - if (next) { - if (lineno == next->b_instr[0].i_loc.lineno) { - continue; - } - } - } - - } - if (dest != src) { - bb->b_instr[dest] = bb->b_instr[src]; - } - dest++; - prev_lineno = lineno; - } - assert(dest <= bb->b_iused); - int num_removed = bb->b_iused - dest; - bb->b_iused = dest; - return num_removed; -} - -static int -check_cfg(cfg_builder *g) { - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - /* Raise SystemError if jump or exit is not last instruction in the block. */ - for (int i = 0; i < b->b_iused; i++) { - int opcode = b->b_instr[i].i_opcode; - assert(!IS_ASSEMBLER_OPCODE(opcode)); - if (IS_TERMINATOR_OPCODE(opcode)) { - if (i != b->b_iused - 1) { - PyErr_SetString(PyExc_SystemError, "malformed control flow graph."); - return ERROR; - } - } - } - } - return SUCCESS; -} - -static int -mark_reachable(basicblock *entryblock) { - basicblock **stack = make_cfg_traversal_stack(entryblock); - if (stack == NULL) { - return ERROR; - } - basicblock **sp = stack; - entryblock->b_predecessors = 1; - *sp++ = entryblock; - while (sp > stack) { - basicblock *b = *(--sp); - b->b_visited = 1; - if (b->b_next && BB_HAS_FALLTHROUGH(b)) { - if (!b->b_next->b_visited) { - assert(b->b_next->b_predecessors == 0); - *sp++ = b->b_next; - } - b->b_next->b_predecessors++; - } - for (int i = 0; i < b->b_iused; i++) { - basicblock *target; - struct cfg_instr *instr = &b->b_instr[i]; - if (is_jump(instr) || is_block_push(instr)) { - target = instr->i_target; - if (!target->b_visited) { - assert(target->b_predecessors == 0 || target == b->b_next); - *sp++ = target; - } - target->b_predecessors++; - } - } - } - PyMem_Free(stack); - return SUCCESS; -} - -static void -eliminate_empty_basic_blocks(cfg_builder *g) { - /* Eliminate empty blocks */ - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - basicblock *next = b->b_next; - while (next && next->b_iused == 0) { - next = next->b_next; - } - b->b_next = next; - } - while(g->g_entryblock && g->g_entryblock->b_iused == 0) { - g->g_entryblock = g->g_entryblock->b_next; - } - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - assert(b->b_iused > 0); - for (int i = 0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; - if (HAS_TARGET(instr->i_opcode)) { - basicblock *target = instr->i_target; - while (target->b_iused == 0) { - target = target->b_next; - } - instr->i_target = target; - assert(instr->i_target && instr->i_target->b_iused > 0); - } - } - } -} - - -/* If an instruction has no line number, but it's predecessor in the BB does, - * then copy the line number. If a successor block has no line number, and only - * one predecessor, then inherit the line number. - * This ensures that all exit blocks (with one predecessor) receive a line number. - * Also reduces the size of the line number table, - * but has no impact on the generated line number events. - */ -static void -propagate_line_numbers(basicblock *entryblock) { - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - struct cfg_instr *last = basicblock_last_instr(b); - if (last == NULL) { - continue; - } - - location prev_location = NO_LOCATION; - for (int i = 0; i < b->b_iused; i++) { - if (b->b_instr[i].i_loc.lineno < 0) { - b->b_instr[i].i_loc = prev_location; - } - else { - prev_location = b->b_instr[i].i_loc; - } - } - if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) { - assert(b->b_next->b_iused); - if (b->b_next->b_instr[0].i_loc.lineno < 0) { - b->b_next->b_instr[0].i_loc = prev_location; - } - } - if (is_jump(last)) { - basicblock *target = last->i_target; - if (target->b_predecessors == 1) { - if (target->b_instr[0].i_loc.lineno < 0) { - target->b_instr[0].i_loc = prev_location; - } - } - } - } -} - - -/* Calculate the actual jump target from the target_label */ -static int -translate_jump_labels_to_targets(basicblock *entryblock) -{ - int max_label = -1; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - if (b->b_label.id > max_label) { - max_label = b->b_label.id; - } - } - size_t mapsize = sizeof(basicblock *) * (max_label + 1); - basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize); - if (!label2block) { - PyErr_NoMemory(); - return ERROR; - } - memset(label2block, 0, mapsize); - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - if (b->b_label.id >= 0) { - label2block[b->b_label.id] = b; - } - } - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - for (int i = 0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; - assert(instr->i_target == NULL); - if (HAS_TARGET(instr->i_opcode)) { - int lbl = instr->i_oparg; - assert(lbl >= 0 && lbl <= max_label); - instr->i_target = label2block[lbl]; - assert(instr->i_target != NULL); - assert(instr->i_target->b_label.id == lbl); - } - } - } - PyMem_Free(label2block); - return SUCCESS; -} - -/* Perform optimizations on a control flow graph. - The consts object should still be in list form to allow new constants - to be appended. - - Code trasnformations that reduce code size initially fill the gaps with - NOPs. Later those NOPs are removed. -*/ - -static int -optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache) -{ - assert(PyDict_CheckExact(const_cache)); - RETURN_IF_ERROR(check_cfg(g)); - eliminate_empty_basic_blocks(g); - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - RETURN_IF_ERROR(inline_small_exit_blocks(b)); - } - assert(no_empty_basic_blocks(g)); - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts)); - assert(b->b_predecessors == 0); - } - RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock)); - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - RETURN_IF_ERROR(inline_small_exit_blocks(b)); - } - RETURN_IF_ERROR(mark_reachable(g->g_entryblock)); - - /* Delete unreachable instructions */ - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - if (b->b_predecessors == 0) { - b->b_iused = 0; - } - } - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - remove_redundant_nops(b); - } - eliminate_empty_basic_blocks(g); - assert(no_redundant_nops(g)); - RETURN_IF_ERROR(remove_redundant_jumps(g)); - return SUCCESS; -} - - -static int -remove_unused_consts(basicblock *entryblock, PyObject *consts) -{ - assert(PyList_CheckExact(consts)); - Py_ssize_t nconsts = PyList_GET_SIZE(consts); - if (nconsts == 0) { - return SUCCESS; /* nothing to do */ - } - - Py_ssize_t *index_map = NULL; - Py_ssize_t *reverse_index_map = NULL; - int err = ERROR; - - index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t)); - if (index_map == NULL) { - goto end; - } - for (Py_ssize_t i = 1; i < nconsts; i++) { - index_map[i] = -1; - } - // The first constant may be docstring; keep it always. - index_map[0] = 0; - - /* mark used consts */ - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - for (int i = 0; i < b->b_iused; i++) { - if (HAS_CONST(b->b_instr[i].i_opcode)) { - int index = b->b_instr[i].i_oparg; - index_map[index] = index; - } - } - } - /* now index_map[i] == i if consts[i] is used, -1 otherwise */ - - /* condense consts */ - Py_ssize_t n_used_consts = 0; - for (int i = 0; i < nconsts; i++) { - if (index_map[i] != -1) { - assert(index_map[i] == i); - index_map[n_used_consts++] = index_map[i]; - } - } - if (n_used_consts == nconsts) { - /* nothing to do */ - err = SUCCESS; - goto end; - } - - /* move all used consts to the beginning of the consts list */ - assert(n_used_consts < nconsts); - for (Py_ssize_t i = 0; i < n_used_consts; i++) { - Py_ssize_t old_index = index_map[i]; - assert(i <= old_index && old_index < nconsts); - if (i != old_index) { - PyObject *value = PyList_GET_ITEM(consts, index_map[i]); - assert(value != NULL); - PyList_SetItem(consts, i, Py_NewRef(value)); - } - } - - /* truncate the consts list at its new size */ - if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) { - goto end; - } - - /* adjust const indices in the bytecode */ - reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t)); - if (reverse_index_map == NULL) { - goto end; - } - for (Py_ssize_t i = 0; i < nconsts; i++) { - reverse_index_map[i] = -1; - } - for (Py_ssize_t i = 0; i < n_used_consts; i++) { - assert(index_map[i] != -1); - assert(reverse_index_map[index_map[i]] == -1); - reverse_index_map[index_map[i]] = i; - } - - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - for (int i = 0; i < b->b_iused; i++) { - if (HAS_CONST(b->b_instr[i].i_opcode)) { - int index = b->b_instr[i].i_oparg; - assert(reverse_index_map[index] >= 0); - assert(reverse_index_map[index] < n_used_consts); - b->b_instr[i].i_oparg = (int)reverse_index_map[index]; - } - } - } - - err = SUCCESS; -end: - PyMem_Free(index_map); - PyMem_Free(reverse_index_map); - return err; -} - -static inline bool -is_exit_without_lineno(basicblock *b) { - if (!basicblock_exits_scope(b)) { - return false; - } - for (int i = 0; i < b->b_iused; i++) { - if (b->b_instr[i].i_loc.lineno >= 0) { - return false; - } - } - return true; -} - -/* PEP 626 mandates that the f_lineno of a frame is correct - * after a frame terminates. It would be prohibitively expensive - * to continuously update the f_lineno field at runtime, - * so we make sure that all exiting instruction (raises and returns) - * have a valid line number, allowing us to compute f_lineno lazily. - * We can do this by duplicating the exit blocks without line number - * so that none have more than one predecessor. We can then safely - * copy the line number from the sole predecessor block. - */ -static int -duplicate_exits_without_lineno(cfg_builder *g) -{ - assert(no_empty_basic_blocks(g)); - /* Copy all exit blocks without line number that are targets of a jump. - */ - basicblock *entryblock = g->g_entryblock; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - struct cfg_instr *last = basicblock_last_instr(b); - assert(last != NULL); - if (is_jump(last)) { - basicblock *target = last->i_target; - if (is_exit_without_lineno(target) && target->b_predecessors > 1) { - basicblock *new_target = copy_basicblock(g, target); - if (new_target == NULL) { - return ERROR; - } - new_target->b_instr[0].i_loc = last->i_loc; - last->i_target = new_target; - target->b_predecessors--; - new_target->b_predecessors = 1; - new_target->b_next = target->b_next; - target->b_next = new_target; - } - } - } - - /* Any remaining reachable exit blocks without line number can only be reached by - * fall through, and thus can only have a single predecessor */ - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) { - if (is_exit_without_lineno(b->b_next)) { - struct cfg_instr *last = basicblock_last_instr(b); - assert(last != NULL); - b->b_next->b_instr[0].i_loc = last->i_loc; - } - } - } - return SUCCESS; -} - - /* Access to compiler optimizations for unit tests. * * _PyCompile_CodeGen takes and AST, applies code-gen and @@ -9842,7 +7527,7 @@ instructions_to_cfg(PyObject *instructions, cfg_builder *g) for (int i = 0; i < num_insts; i++) { if (is_target[i]) { jump_target_label lbl = {i}; - RETURN_IF_ERROR(cfg_builder_use_label(g, lbl)); + RETURN_IF_ERROR(_PyCfgBuilder_UseLabel(g, lbl)); } PyObject *item = PyList_GET_ITEM(instructions, i); if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 6) { @@ -9880,11 +7565,11 @@ instructions_to_cfg(PyObject *instructions, cfg_builder *g) if (PyErr_Occurred()) { goto error; } - RETURN_IF_ERROR(cfg_builder_addop(g, opcode, oparg, loc)); + RETURN_IF_ERROR(_PyCfgBuilder_Addop(g, opcode, oparg, loc)); } - struct cfg_instr *last = basicblock_last_instr(g->g_curblock); + cfg_instr *last = _PyCfg_BasicblockLastInstr(g->g_curblock); if (last && !IS_TERMINATOR_OPCODE(last->i_opcode)) { - RETURN_IF_ERROR(cfg_builder_addop(g, RETURN_VALUE, 0, NO_LOCATION)); + RETURN_IF_ERROR(_PyCfgBuilder_Addop(g, RETURN_VALUE, 0, NO_LOCATION)); } PyMem_Free(is_target); return SUCCESS; @@ -9939,7 +7624,7 @@ cfg_to_instructions(cfg_builder *g) } for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { for (int i = 0; i < b->b_iused; i++) { - struct cfg_instr *instr = &b->b_instr[i]; + cfg_instr *instr = &b->b_instr[i]; location loc = instr->i_loc; int arg = HAS_TARGET(instr->i_opcode) ? instr->i_target->b_label.id : instr->i_oparg; @@ -10018,26 +7703,26 @@ _PyCompile_OptimizeCfg(PyObject *instructions, PyObject *consts) cfg_builder g; memset(&g, 0, sizeof(cfg_builder)); - if (cfg_builder_init(&g) < 0) { + if (_PyCfgBuilder_Init(&g) < 0) { goto error; } if (instructions_to_cfg(instructions, &g) < 0) { goto error; } int code_flags = 0, nlocals = 0, nparams = 0; - if (optimize_code_unit(&g, consts, const_cache, code_flags, nlocals, nparams) < 0) { + if (_PyCfg_OptimizeCodeUnit(&g, consts, const_cache, code_flags, nlocals, nparams) < 0) { goto error; } res = cfg_to_instructions(&g); error: Py_DECREF(const_cache); - cfg_builder_fini(&g); + _PyCfgBuilder_Fini(&g); return res; } /* Retained for API compatibility. - * Optimization is now done in optimize_cfg */ + * Optimization is now done in _PyCfg_OptimizeCodeUnit */ PyObject * PyCode_Optimize(PyObject *code, PyObject* Py_UNUSED(consts), diff --git a/Python/flowgraph.c b/Python/flowgraph.c new file mode 100644 index 00000000000000..cecddbd39c94b3 --- /dev/null +++ b/Python/flowgraph.c @@ -0,0 +1,2160 @@ + +#include + +#include "Python.h" +#include "pycore_flowgraph.h" +#include "pycore_compile.h" +#include "pycore_pymem.h" // _PyMem_IsPtrFreed() + +#include "pycore_opcode_utils.h" +#define NEED_OPCODE_METADATA +#include "opcode_metadata.h" // _PyOpcode_opcode_metadata, _PyOpcode_num_popped/pushed +#undef NEED_OPCODE_METADATA + + +#undef SUCCESS +#undef ERROR +#define SUCCESS 0 +#define ERROR -1 + +#define RETURN_IF_ERROR(X) \ + if ((X) == -1) { \ + return ERROR; \ + } + +#define DEFAULT_BLOCK_SIZE 16 + +typedef _PyCompilerSrcLocation location; +typedef _PyCfgJumpTargetLabel jump_target_label; +typedef _PyCfgBasicblock basicblock; +typedef _PyCfgBuilder cfg_builder; +typedef _PyCfgInstruction cfg_instr; + +static const jump_target_label NO_LABEL = {-1}; + +#define SAME_LABEL(L1, L2) ((L1).id == (L2).id) +#define IS_LABEL(L) (!SAME_LABEL((L), (NO_LABEL))) + + +static inline int +is_block_push(cfg_instr *i) +{ + return IS_BLOCK_PUSH_OPCODE(i->i_opcode); +} + +static inline int +is_relative_jump(cfg_instr *i) +{ + return IS_RELATIVE_JUMP(i->i_opcode); +} + +static inline int +is_jump(cfg_instr *i) +{ + return IS_JUMP_OPCODE(i->i_opcode); +} + +/* One arg*/ +#define INSTR_SET_OP1(I, OP, ARG) \ + do { \ + assert(HAS_ARG(OP)); \ + _PyCfgInstruction *_instr__ptr_ = (I); \ + _instr__ptr_->i_opcode = (OP); \ + _instr__ptr_->i_oparg = (ARG); \ + } while (0); + +/* No args*/ +#define INSTR_SET_OP0(I, OP) \ + do { \ + assert(!HAS_ARG(OP)); \ + _PyCfgInstruction *_instr__ptr_ = (I); \ + _instr__ptr_->i_opcode = (OP); \ + _instr__ptr_->i_oparg = 0; \ + } while (0); + +/***** Blocks *****/ + +/* Returns the offset of the next instruction in the current block's + b_instr array. Resizes the b_instr as necessary. + Returns -1 on failure. +*/ +static int +basicblock_next_instr(basicblock *b) +{ + assert(b != NULL); + RETURN_IF_ERROR( + _PyCompile_EnsureArrayLargeEnough( + b->b_iused + 1, + (void**)&b->b_instr, + &b->b_ialloc, + DEFAULT_BLOCK_SIZE, + sizeof(cfg_instr))); + return b->b_iused++; +} + +/* Allocate a new block and return a pointer to it. + Returns NULL on error. +*/ + +static basicblock * +cfg_builder_new_block(cfg_builder *g) +{ + basicblock *b = (basicblock *)PyObject_Calloc(1, sizeof(basicblock)); + if (b == NULL) { + PyErr_NoMemory(); + return NULL; + } + /* Extend the singly linked list of blocks with new block. */ + b->b_list = g->g_block_list; + g->g_block_list = b; + b->b_label = NO_LABEL; + return b; +} + +static int +basicblock_addop(basicblock *b, int opcode, int oparg, location loc) +{ + assert(IS_WITHIN_OPCODE_RANGE(opcode)); + assert(!IS_ASSEMBLER_OPCODE(opcode)); + assert(HAS_ARG(opcode) || HAS_TARGET(opcode) || oparg == 0); + assert(0 <= oparg && oparg < (1 << 30)); + + int off = basicblock_next_instr(b); + if (off < 0) { + return ERROR; + } + cfg_instr *i = &b->b_instr[off]; + i->i_opcode = opcode; + i->i_oparg = oparg; + i->i_target = NULL; + i->i_loc = loc; + + return SUCCESS; +} + +static inline int +basicblock_append_instructions(basicblock *target, basicblock *source) +{ + for (int i = 0; i < source->b_iused; i++) { + int n = basicblock_next_instr(target); + if (n < 0) { + return ERROR; + } + target->b_instr[n] = source->b_instr[i]; + } + return SUCCESS; +} + +static basicblock * +copy_basicblock(cfg_builder *g, basicblock *block) +{ + /* Cannot copy a block if it has a fallthrough, since + * a block can only have one fallthrough predecessor. + */ + assert(BB_NO_FALLTHROUGH(block)); + basicblock *result = cfg_builder_new_block(g); + if (result == NULL) { + return NULL; + } + if (basicblock_append_instructions(result, block) < 0) { + return NULL; + } + return result; +} + +int +_PyBasicblock_InsertInstruction(basicblock *block, int pos, cfg_instr *instr) { + RETURN_IF_ERROR(basicblock_next_instr(block)); + for (int i = block->b_iused - 1; i > pos; i--) { + block->b_instr[i] = block->b_instr[i-1]; + } + block->b_instr[pos] = *instr; + return SUCCESS; +} + +int +_PyCfg_InstrSize(cfg_instr *instruction) +{ + int opcode = instruction->i_opcode; + assert(!IS_PSEUDO_OPCODE(opcode)); + int oparg = instruction->i_oparg; + assert(HAS_ARG(opcode) || oparg == 0); + int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg); + int caches = _PyOpcode_Caches[opcode]; + return extended_args + 1 + caches; +} + +static int +blocksize(basicblock *b) +{ + int size = 0; + for (int i = 0; i < b->b_iused; i++) { + size += _PyCfg_InstrSize(&b->b_instr[i]); + } + return size; +} + +/* For debugging purposes only */ +#if 0 +static void +dump_instr(cfg_instr *i) +{ + const char *jrel = (is_relative_jump(i)) ? "jrel " : ""; + const char *jabs = (is_jump(i) && !is_relative_jump(i))? "jabs " : ""; + + char arg[128]; + + *arg = '\0'; + if (HAS_ARG(i->i_opcode)) { + sprintf(arg, "arg: %d ", i->i_oparg); + } + if (HAS_TARGET(i->i_opcode)) { + sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg); + } + fprintf(stderr, "line: %d, opcode: %d %s%s%s\n", + i->i_loc.lineno, i->i_opcode, arg, jabs, jrel); +} + +static inline int +basicblock_returns(const basicblock *b) { + cfg_instr *last = _PyCfg_BasicblockLastInstr(b); + return last && (last->i_opcode == RETURN_VALUE || last->i_opcode == RETURN_CONST); +} + +static void +dump_basicblock(const basicblock *b) +{ + const char *b_return = basicblock_returns(b) ? "return " : ""; + fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, offset: %d %s\n", + b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused, + b->b_startdepth, b->b_offset, b_return); + if (b->b_instr) { + int i; + for (i = 0; i < b->b_iused; i++) { + fprintf(stderr, " [%02d] ", i); + dump_instr(b->b_instr + i); + } + } +} +#endif + + +/***** CFG construction and modification *****/ + +static basicblock * +cfg_builder_use_next_block(cfg_builder *g, basicblock *block) +{ + assert(block != NULL); + g->g_curblock->b_next = block; + g->g_curblock = block; + return block; +} + +cfg_instr * +_PyCfg_BasicblockLastInstr(const basicblock *b) { + assert(b->b_iused >= 0); + if (b->b_iused > 0) { + assert(b->b_instr != NULL); + return &b->b_instr[b->b_iused - 1]; + } + return NULL; +} + +static inline int +basicblock_exits_scope(const basicblock *b) { + cfg_instr *last = _PyCfg_BasicblockLastInstr(b); + return last && IS_SCOPE_EXIT_OPCODE(last->i_opcode); +} + +static bool +cfg_builder_current_block_is_terminated(cfg_builder *g) +{ + cfg_instr *last = _PyCfg_BasicblockLastInstr(g->g_curblock); + if (last && IS_TERMINATOR_OPCODE(last->i_opcode)) { + return true; + } + if (IS_LABEL(g->g_current_label)) { + if (last || IS_LABEL(g->g_curblock->b_label)) { + return true; + } + else { + /* current block is empty, label it */ + g->g_curblock->b_label = g->g_current_label; + g->g_current_label = NO_LABEL; + } + } + return false; +} + +static int +cfg_builder_maybe_start_new_block(cfg_builder *g) +{ + if (cfg_builder_current_block_is_terminated(g)) { + basicblock *b = cfg_builder_new_block(g); + if (b == NULL) { + return ERROR; + } + b->b_label = g->g_current_label; + g->g_current_label = NO_LABEL; + cfg_builder_use_next_block(g, b); + } + return SUCCESS; +} + +#ifndef NDEBUG +static bool +cfg_builder_check(cfg_builder *g) +{ + assert(g->g_entryblock->b_iused > 0); + for (basicblock *block = g->g_block_list; block != NULL; block = block->b_list) { + assert(!_PyMem_IsPtrFreed(block)); + if (block->b_instr != NULL) { + assert(block->b_ialloc > 0); + assert(block->b_iused >= 0); + assert(block->b_ialloc >= block->b_iused); + } + else { + assert (block->b_iused == 0); + assert (block->b_ialloc == 0); + } + } + return true; +} +#endif + +int +_PyCfgBuilder_Init(cfg_builder *g) +{ + g->g_block_list = NULL; + basicblock *block = cfg_builder_new_block(g); + if (block == NULL) { + return ERROR; + } + g->g_curblock = g->g_entryblock = block; + g->g_current_label = NO_LABEL; + return SUCCESS; +} + +void +_PyCfgBuilder_Fini(cfg_builder* g) +{ + assert(cfg_builder_check(g)); + basicblock *b = g->g_block_list; + while (b != NULL) { + if (b->b_instr) { + PyObject_Free((void *)b->b_instr); + } + basicblock *next = b->b_list; + PyObject_Free((void *)b); + b = next; + } +} + +int +_PyCfgBuilder_UseLabel(cfg_builder *g, jump_target_label lbl) +{ + g->g_current_label = lbl; + return cfg_builder_maybe_start_new_block(g); +} + +int +_PyCfgBuilder_Addop(cfg_builder *g, int opcode, int oparg, location loc) +{ + RETURN_IF_ERROR(cfg_builder_maybe_start_new_block(g)); + return basicblock_addop(g->g_curblock, opcode, oparg, loc); +} + + +/***** debugging helpers *****/ + +#ifndef NDEBUG +static int remove_redundant_nops(basicblock *bb); + +static bool +no_redundant_nops(cfg_builder *g) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + if (remove_redundant_nops(b) != 0) { + return false; + } + } + return true; +} + +static bool +no_empty_basic_blocks(cfg_builder *g) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + if (b->b_iused == 0) { + return false; + } + } + return true; +} + +static bool +no_redundant_jumps(cfg_builder *g) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + cfg_instr *last = _PyCfg_BasicblockLastInstr(b); + if (last != NULL) { + if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) { + assert(last->i_target != b->b_next); + if (last->i_target == b->b_next) { + return false; + } + } + } + } + return true; +} + +#endif + +/***** CFG preprocessing (jump targets and exceptions) *****/ + +static int +normalize_jumps_in_block(cfg_builder *g, basicblock *b) { + cfg_instr *last = _PyCfg_BasicblockLastInstr(b); + if (last == NULL || !is_jump(last)) { + return SUCCESS; + } + assert(!IS_ASSEMBLER_OPCODE(last->i_opcode)); + bool is_forward = last->i_target->b_visited == 0; + switch(last->i_opcode) { + case JUMP: + last->i_opcode = is_forward ? JUMP_FORWARD : JUMP_BACKWARD; + return SUCCESS; + case JUMP_NO_INTERRUPT: + last->i_opcode = is_forward ? + JUMP_FORWARD : JUMP_BACKWARD_NO_INTERRUPT; + return SUCCESS; + } + int reversed_opcode = 0; + switch(last->i_opcode) { + case POP_JUMP_IF_NOT_NONE: + reversed_opcode = POP_JUMP_IF_NONE; + break; + case POP_JUMP_IF_NONE: + reversed_opcode = POP_JUMP_IF_NOT_NONE; + break; + case POP_JUMP_IF_FALSE: + reversed_opcode = POP_JUMP_IF_TRUE; + break; + case POP_JUMP_IF_TRUE: + reversed_opcode = POP_JUMP_IF_FALSE; + break; + } + if (is_forward) { + return SUCCESS; + } + /* transform 'conditional jump T' to + * 'reversed_jump b_next' followed by 'jump_backwards T' + */ + + basicblock *target = last->i_target; + basicblock *backwards_jump = cfg_builder_new_block(g); + if (backwards_jump == NULL) { + return ERROR; + } + basicblock_addop(backwards_jump, JUMP, target->b_label.id, NO_LOCATION); + backwards_jump->b_instr[0].i_target = target; + last->i_opcode = reversed_opcode; + last->i_target = b->b_next; + + backwards_jump->b_cold = b->b_cold; + backwards_jump->b_next = b->b_next; + b->b_next = backwards_jump; + return SUCCESS; +} + + +static int +normalize_jumps(_PyCfgBuilder *g) +{ + basicblock *entryblock = g->g_entryblock; + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + b->b_visited = 0; + } + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + b->b_visited = 1; + RETURN_IF_ERROR(normalize_jumps_in_block(g, b)); + } + return SUCCESS; +} + +static void +resolve_jump_offsets(basicblock *entryblock) +{ + int bsize, totsize, extended_arg_recompile; + + /* Compute the size of each block and fixup jump args. + Replace block pointer with position in bytecode. */ + do { + totsize = 0; + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + bsize = blocksize(b); + b->b_offset = totsize; + totsize += bsize; + } + extended_arg_recompile = 0; + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + bsize = b->b_offset; + for (int i = 0; i < b->b_iused; i++) { + cfg_instr *instr = &b->b_instr[i]; + int isize = _PyCfg_InstrSize(instr); + /* Relative jumps are computed relative to + the instruction pointer after fetching + the jump instruction. + */ + bsize += isize; + if (is_jump(instr)) { + instr->i_oparg = instr->i_target->b_offset; + if (is_relative_jump(instr)) { + if (instr->i_oparg < bsize) { + assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode)); + instr->i_oparg = bsize - instr->i_oparg; + } + else { + assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode)); + instr->i_oparg -= bsize; + } + } + else { + assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode)); + } + if (_PyCfg_InstrSize(instr) != isize) { + extended_arg_recompile = 1; + } + } + } + } + + /* XXX: This is an awful hack that could hurt performance, but + on the bright side it should work until we come up + with a better solution. + + The issue is that in the first loop blocksize() is called + which calls _PyCfg_InstrSize() which requires i_oparg be set + appropriately. There is a bootstrap problem because + i_oparg is calculated in the second loop above. + + So we loop until we stop seeing new EXTENDED_ARGs. + The only EXTENDED_ARGs that could be popping up are + ones in jump instructions. So this should converge + fairly quickly. + */ + } while (extended_arg_recompile); +} + +int +_PyCfg_ResolveJumps(_PyCfgBuilder *g) +{ + RETURN_IF_ERROR(normalize_jumps(g)); + assert(no_redundant_jumps(g)); + resolve_jump_offsets(g->g_entryblock); + return SUCCESS; +} + +static int +check_cfg(cfg_builder *g) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + /* Raise SystemError if jump or exit is not last instruction in the block. */ + for (int i = 0; i < b->b_iused; i++) { + int opcode = b->b_instr[i].i_opcode; + assert(!IS_ASSEMBLER_OPCODE(opcode)); + if (IS_TERMINATOR_OPCODE(opcode)) { + if (i != b->b_iused - 1) { + PyErr_SetString(PyExc_SystemError, "malformed control flow graph."); + return ERROR; + } + } + } + } + return SUCCESS; +} + +/* Calculate the actual jump target from the target_label */ +static int +translate_jump_labels_to_targets(basicblock *entryblock) +{ + int max_label = -1; + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + if (b->b_label.id > max_label) { + max_label = b->b_label.id; + } + } + size_t mapsize = sizeof(basicblock *) * (max_label + 1); + basicblock **label2block = (basicblock **)PyMem_Malloc(mapsize); + if (!label2block) { + PyErr_NoMemory(); + return ERROR; + } + memset(label2block, 0, mapsize); + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + if (b->b_label.id >= 0) { + label2block[b->b_label.id] = b; + } + } + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (int i = 0; i < b->b_iused; i++) { + cfg_instr *instr = &b->b_instr[i]; + assert(instr->i_target == NULL); + if (HAS_TARGET(instr->i_opcode)) { + int lbl = instr->i_oparg; + assert(lbl >= 0 && lbl <= max_label); + instr->i_target = label2block[lbl]; + assert(instr->i_target != NULL); + assert(instr->i_target->b_label.id == lbl); + } + } + } + PyMem_Free(label2block); + return SUCCESS; +} + + +static int +mark_except_handlers(basicblock *entryblock) { +#ifndef NDEBUG + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + assert(!b->b_except_handler); + } +#endif + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (int i=0; i < b->b_iused; i++) { + cfg_instr *instr = &b->b_instr[i]; + if (is_block_push(instr)) { + instr->i_target->b_except_handler = 1; + } + } + } + return SUCCESS; +} + + +typedef _PyCfgExceptStack ExceptStack; + +static basicblock * +push_except_block(ExceptStack *stack, cfg_instr *setup) { + assert(is_block_push(setup)); + int opcode = setup->i_opcode; + basicblock * target = setup->i_target; + if (opcode == SETUP_WITH || opcode == SETUP_CLEANUP) { + target->b_preserve_lasti = 1; + } + stack->handlers[++stack->depth] = target; + return target; +} + +static basicblock * +pop_except_block(ExceptStack *stack) { + assert(stack->depth > 0); + return stack->handlers[--stack->depth]; +} + +static basicblock * +except_stack_top(ExceptStack *stack) { + return stack->handlers[stack->depth]; +} + +static ExceptStack * +make_except_stack(void) { + ExceptStack *new = PyMem_Malloc(sizeof(ExceptStack)); + if (new == NULL) { + PyErr_NoMemory(); + return NULL; + } + new->depth = 0; + new->handlers[0] = NULL; + return new; +} + +static ExceptStack * +copy_except_stack(ExceptStack *stack) { + ExceptStack *copy = PyMem_Malloc(sizeof(ExceptStack)); + if (copy == NULL) { + PyErr_NoMemory(); + return NULL; + } + memcpy(copy, stack, sizeof(ExceptStack)); + return copy; +} + +static basicblock** +make_cfg_traversal_stack(basicblock *entryblock) { + int nblocks = 0; + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + b->b_visited = 0; + nblocks++; + } + basicblock **stack = (basicblock **)PyMem_Malloc(sizeof(basicblock *) * nblocks); + if (!stack) { + PyErr_NoMemory(); + } + return stack; +} + +Py_LOCAL_INLINE(void) +stackdepth_push(basicblock ***sp, basicblock *b, int depth) +{ + assert(b->b_startdepth < 0 || b->b_startdepth == depth); + if (b->b_startdepth < depth && b->b_startdepth < 100) { + assert(b->b_startdepth < 0); + b->b_startdepth = depth; + *(*sp)++ = b; + } +} + +/* Find the flow path that needs the largest stack. We assume that + * cycles in the flow graph have no net effect on the stack depth. + */ +int +_PyCfg_Stackdepth(basicblock *entryblock, int code_flags) +{ + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + b->b_startdepth = INT_MIN; + } + basicblock **stack = make_cfg_traversal_stack(entryblock); + if (!stack) { + return ERROR; + } + + int maxdepth = 0; + basicblock **sp = stack; + if (code_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) { + stackdepth_push(&sp, entryblock, 1); + } else { + stackdepth_push(&sp, entryblock, 0); + } + + while (sp != stack) { + basicblock *b = *--sp; + int depth = b->b_startdepth; + assert(depth >= 0); + basicblock *next = b->b_next; + for (int i = 0; i < b->b_iused; i++) { + cfg_instr *instr = &b->b_instr[i]; + int effect = PyCompile_OpcodeStackEffectWithJump(instr->i_opcode, instr->i_oparg, 0); + if (effect == PY_INVALID_STACK_EFFECT) { + PyErr_Format(PyExc_SystemError, + "compiler PyCompile_OpcodeStackEffectWithJump(opcode=%d, arg=%i) failed", + instr->i_opcode, instr->i_oparg); + return ERROR; + } + int new_depth = depth + effect; + assert(new_depth >= 0); /* invalid code or bug in stackdepth() */ + if (new_depth > maxdepth) { + maxdepth = new_depth; + } + if (HAS_TARGET(instr->i_opcode)) { + effect = PyCompile_OpcodeStackEffectWithJump(instr->i_opcode, instr->i_oparg, 1); + assert(effect != PY_INVALID_STACK_EFFECT); + int target_depth = depth + effect; + assert(target_depth >= 0); /* invalid code or bug in stackdepth() */ + if (target_depth > maxdepth) { + maxdepth = target_depth; + } + stackdepth_push(&sp, instr->i_target, target_depth); + } + depth = new_depth; + assert(!IS_ASSEMBLER_OPCODE(instr->i_opcode)); + if (IS_UNCONDITIONAL_JUMP_OPCODE(instr->i_opcode) || + IS_SCOPE_EXIT_OPCODE(instr->i_opcode)) + { + /* remaining code is dead */ + next = NULL; + break; + } + } + if (next != NULL) { + assert(BB_HAS_FALLTHROUGH(b)); + stackdepth_push(&sp, next, depth); + } + } + PyMem_Free(stack); + return maxdepth; +} + +static int +label_exception_targets(basicblock *entryblock) { + basicblock **todo_stack = make_cfg_traversal_stack(entryblock); + if (todo_stack == NULL) { + return ERROR; + } + ExceptStack *except_stack = make_except_stack(); + if (except_stack == NULL) { + PyMem_Free(todo_stack); + PyErr_NoMemory(); + return ERROR; + } + except_stack->depth = 0; + todo_stack[0] = entryblock; + entryblock->b_visited = 1; + entryblock->b_exceptstack = except_stack; + basicblock **todo = &todo_stack[1]; + basicblock *handler = NULL; + while (todo > todo_stack) { + todo--; + basicblock *b = todo[0]; + assert(b->b_visited == 1); + except_stack = b->b_exceptstack; + assert(except_stack != NULL); + b->b_exceptstack = NULL; + handler = except_stack_top(except_stack); + for (int i = 0; i < b->b_iused; i++) { + cfg_instr *instr = &b->b_instr[i]; + if (is_block_push(instr)) { + if (!instr->i_target->b_visited) { + ExceptStack *copy = copy_except_stack(except_stack); + if (copy == NULL) { + goto error; + } + instr->i_target->b_exceptstack = copy; + todo[0] = instr->i_target; + instr->i_target->b_visited = 1; + todo++; + } + handler = push_except_block(except_stack, instr); + } + else if (instr->i_opcode == POP_BLOCK) { + handler = pop_except_block(except_stack); + } + else if (is_jump(instr)) { + instr->i_except = handler; + assert(i == b->b_iused -1); + if (!instr->i_target->b_visited) { + if (BB_HAS_FALLTHROUGH(b)) { + ExceptStack *copy = copy_except_stack(except_stack); + if (copy == NULL) { + goto error; + } + instr->i_target->b_exceptstack = copy; + } + else { + instr->i_target->b_exceptstack = except_stack; + except_stack = NULL; + } + todo[0] = instr->i_target; + instr->i_target->b_visited = 1; + todo++; + } + } + else { + if (instr->i_opcode == YIELD_VALUE) { + instr->i_oparg = except_stack->depth; + } + instr->i_except = handler; + } + } + if (BB_HAS_FALLTHROUGH(b) && !b->b_next->b_visited) { + assert(except_stack != NULL); + b->b_next->b_exceptstack = except_stack; + todo[0] = b->b_next; + b->b_next->b_visited = 1; + todo++; + } + else if (except_stack != NULL) { + PyMem_Free(except_stack); + } + } +#ifdef Py_DEBUG + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + assert(b->b_exceptstack == NULL); + } +#endif + PyMem_Free(todo_stack); + return SUCCESS; +error: + PyMem_Free(todo_stack); + PyMem_Free(except_stack); + return ERROR; +} + +/***** CFG optimizations *****/ + +static int +mark_reachable(basicblock *entryblock) { + basicblock **stack = make_cfg_traversal_stack(entryblock); + if (stack == NULL) { + return ERROR; + } + basicblock **sp = stack; + entryblock->b_predecessors = 1; + *sp++ = entryblock; + while (sp > stack) { + basicblock *b = *(--sp); + b->b_visited = 1; + if (b->b_next && BB_HAS_FALLTHROUGH(b)) { + if (!b->b_next->b_visited) { + assert(b->b_next->b_predecessors == 0); + *sp++ = b->b_next; + } + b->b_next->b_predecessors++; + } + for (int i = 0; i < b->b_iused; i++) { + basicblock *target; + cfg_instr *instr = &b->b_instr[i]; + if (is_jump(instr) || is_block_push(instr)) { + target = instr->i_target; + if (!target->b_visited) { + assert(target->b_predecessors == 0 || target == b->b_next); + *sp++ = target; + } + target->b_predecessors++; + } + } + } + PyMem_Free(stack); + return SUCCESS; +} + +static void +eliminate_empty_basic_blocks(cfg_builder *g) { + /* Eliminate empty blocks */ + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + basicblock *next = b->b_next; + while (next && next->b_iused == 0) { + next = next->b_next; + } + b->b_next = next; + } + while(g->g_entryblock && g->g_entryblock->b_iused == 0) { + g->g_entryblock = g->g_entryblock->b_next; + } + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + assert(b->b_iused > 0); + for (int i = 0; i < b->b_iused; i++) { + cfg_instr *instr = &b->b_instr[i]; + if (HAS_TARGET(instr->i_opcode)) { + basicblock *target = instr->i_target; + while (target->b_iused == 0) { + target = target->b_next; + } + instr->i_target = target; + assert(instr->i_target && instr->i_target->b_iused > 0); + } + } + } +} + +static int +remove_redundant_nops(basicblock *bb) { + /* Remove NOPs when legal to do so. */ + int dest = 0; + int prev_lineno = -1; + for (int src = 0; src < bb->b_iused; src++) { + int lineno = bb->b_instr[src].i_loc.lineno; + if (bb->b_instr[src].i_opcode == NOP) { + /* Eliminate no-op if it doesn't have a line number */ + if (lineno < 0) { + continue; + } + /* or, if the previous instruction had the same line number. */ + if (prev_lineno == lineno) { + continue; + } + /* or, if the next instruction has same line number or no line number */ + if (src < bb->b_iused - 1) { + int next_lineno = bb->b_instr[src+1].i_loc.lineno; + if (next_lineno == lineno) { + continue; + } + if (next_lineno < 0) { + bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc; + continue; + } + } + else { + basicblock* next = bb->b_next; + while (next && next->b_iused == 0) { + next = next->b_next; + } + /* or if last instruction in BB and next BB has same line number */ + if (next) { + if (lineno == next->b_instr[0].i_loc.lineno) { + continue; + } + } + } + + } + if (dest != src) { + bb->b_instr[dest] = bb->b_instr[src]; + } + dest++; + prev_lineno = lineno; + } + assert(dest <= bb->b_iused); + int num_removed = bb->b_iused - dest; + bb->b_iused = dest; + return num_removed; +} + +static int +remove_redundant_nops_and_pairs(basicblock *entryblock) +{ + bool done = false; + + while (! done) { + done = true; + cfg_instr *prev_instr = NULL; + cfg_instr *instr = NULL; + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + remove_redundant_nops(b); + if (IS_LABEL(b->b_label)) { + /* this block is a jump target, forget instr */ + instr = NULL; + } + for (int i = 0; i < b->b_iused; i++) { + prev_instr = instr; + instr = &b->b_instr[i]; + int prev_opcode = prev_instr ? prev_instr->i_opcode : 0; + int prev_oparg = prev_instr ? prev_instr->i_oparg : 0; + int opcode = instr->i_opcode; + bool is_redundant_pair = false; + if (opcode == POP_TOP) { + if (prev_opcode == LOAD_CONST) { + is_redundant_pair = true; + } + else if (prev_opcode == COPY && prev_oparg == 1) { + is_redundant_pair = true; + } + } + if (is_redundant_pair) { + INSTR_SET_OP0(prev_instr, NOP); + INSTR_SET_OP0(instr, NOP); + done = false; + } + } + if ((instr && is_jump(instr)) || !BB_HAS_FALLTHROUGH(b)) { + instr = NULL; + } + } + } + return SUCCESS; +} + +static int +remove_redundant_jumps(cfg_builder *g) { + /* If a non-empty block ends with a jump instruction, check if the next + * non-empty block reached through normal flow control is the target + * of that jump. If it is, then the jump instruction is redundant and + * can be deleted. + */ + assert(no_empty_basic_blocks(g)); + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + cfg_instr *last = _PyCfg_BasicblockLastInstr(b); + assert(last != NULL); + assert(!IS_ASSEMBLER_OPCODE(last->i_opcode)); + if (IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) { + if (last->i_target == NULL) { + PyErr_SetString(PyExc_SystemError, "jump with NULL target"); + return ERROR; + } + if (last->i_target == b->b_next) { + assert(b->b_next->b_iused); + INSTR_SET_OP0(last, NOP); + } + } + } + return SUCCESS; +} + +/* Maximum size of basic block that should be copied in optimizer */ +#define MAX_COPY_SIZE 4 + +/* If this block ends with an unconditional jump to a small exit block, then + * remove the jump and extend this block with the target. + * Returns 1 if extended, 0 if no change, and -1 on error. + */ +static int +inline_small_exit_blocks(basicblock *bb) { + cfg_instr *last = _PyCfg_BasicblockLastInstr(bb); + if (last == NULL) { + return 0; + } + if (!IS_UNCONDITIONAL_JUMP_OPCODE(last->i_opcode)) { + return 0; + } + basicblock *target = last->i_target; + if (basicblock_exits_scope(target) && target->b_iused <= MAX_COPY_SIZE) { + INSTR_SET_OP0(last, NOP); + RETURN_IF_ERROR(basicblock_append_instructions(bb, target)); + return 1; + } + return 0; +} + +// Attempt to eliminate jumps to jumps by updating inst to jump to +// target->i_target using the provided opcode. Return whether or not the +// optimization was successful. +static bool +jump_thread(cfg_instr *inst, cfg_instr *target, int opcode) +{ + assert(is_jump(inst)); + assert(is_jump(target)); + // bpo-45773: If inst->i_target == target->i_target, then nothing actually + // changes (and we fall into an infinite loop): + if ((inst->i_loc.lineno == target->i_loc.lineno || target->i_loc.lineno == -1) && + inst->i_target != target->i_target) + { + inst->i_target = target->i_target; + inst->i_opcode = opcode; + return true; + } + return false; +} + +static PyObject* +get_const_value(int opcode, int oparg, PyObject *co_consts) +{ + PyObject *constant = NULL; + assert(HAS_CONST(opcode)); + if (opcode == LOAD_CONST) { + constant = PyList_GET_ITEM(co_consts, oparg); + } + + if (constant == NULL) { + PyErr_SetString(PyExc_SystemError, + "Internal error: failed to get value of a constant"); + return NULL; + } + return Py_NewRef(constant); +} + +/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n + with LOAD_CONST (c1, c2, ... cn). + The consts table must still be in list form so that the + new constant (c1, c2, ... cn) can be appended. + Called with codestr pointing to the first LOAD_CONST. +*/ +static int +fold_tuple_on_constants(PyObject *const_cache, + cfg_instr *inst, + int n, PyObject *consts) +{ + /* Pre-conditions */ + assert(PyDict_CheckExact(const_cache)); + assert(PyList_CheckExact(consts)); + assert(inst[n].i_opcode == BUILD_TUPLE); + assert(inst[n].i_oparg == n); + + for (int i = 0; i < n; i++) { + if (!HAS_CONST(inst[i].i_opcode)) { + return SUCCESS; + } + } + + /* Buildup new tuple of constants */ + PyObject *newconst = PyTuple_New(n); + if (newconst == NULL) { + return ERROR; + } + for (int i = 0; i < n; i++) { + int op = inst[i].i_opcode; + int arg = inst[i].i_oparg; + PyObject *constant = get_const_value(op, arg, consts); + if (constant == NULL) { + return ERROR; + } + PyTuple_SET_ITEM(newconst, i, constant); + } + if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) { + Py_DECREF(newconst); + return ERROR; + } + + Py_ssize_t index; + for (index = 0; index < PyList_GET_SIZE(consts); index++) { + if (PyList_GET_ITEM(consts, index) == newconst) { + break; + } + } + if (index == PyList_GET_SIZE(consts)) { + if ((size_t)index >= (size_t)INT_MAX - 1) { + Py_DECREF(newconst); + PyErr_SetString(PyExc_OverflowError, "too many constants"); + return ERROR; + } + if (PyList_Append(consts, newconst)) { + Py_DECREF(newconst); + return ERROR; + } + } + Py_DECREF(newconst); + for (int i = 0; i < n; i++) { + INSTR_SET_OP0(&inst[i], NOP); + } + INSTR_SET_OP1(&inst[n], LOAD_CONST, (int)index); + return SUCCESS; +} + +#define VISITED (-1) + +// Replace an arbitrary run of SWAPs and NOPs with an optimal one that has the +// same effect. +static int +swaptimize(basicblock *block, int *ix) +{ + // NOTE: "./python -m test test_patma" serves as a good, quick stress test + // for this function. Make sure to blow away cached *.pyc files first! + assert(*ix < block->b_iused); + cfg_instr *instructions = &block->b_instr[*ix]; + // Find the length of the current sequence of SWAPs and NOPs, and record the + // maximum depth of the stack manipulations: + assert(instructions[0].i_opcode == SWAP); + int depth = instructions[0].i_oparg; + int len = 0; + int more = false; + int limit = block->b_iused - *ix; + while (++len < limit) { + int opcode = instructions[len].i_opcode; + if (opcode == SWAP) { + depth = Py_MAX(depth, instructions[len].i_oparg); + more = true; + } + else if (opcode != NOP) { + break; + } + } + // It's already optimal if there's only one SWAP: + if (!more) { + return SUCCESS; + } + // Create an array with elements {0, 1, 2, ..., depth - 1}: + int *stack = PyMem_Malloc(depth * sizeof(int)); + if (stack == NULL) { + PyErr_NoMemory(); + return ERROR; + } + for (int i = 0; i < depth; i++) { + stack[i] = i; + } + // Simulate the combined effect of these instructions by "running" them on + // our "stack": + for (int i = 0; i < len; i++) { + if (instructions[i].i_opcode == SWAP) { + int oparg = instructions[i].i_oparg; + int top = stack[0]; + // SWAPs are 1-indexed: + stack[0] = stack[oparg - 1]; + stack[oparg - 1] = top; + } + } + // Now we can begin! Our approach here is based on a solution to a closely + // related problem (https://cs.stackexchange.com/a/13938). It's easiest to + // think of this algorithm as determining the steps needed to efficiently + // "un-shuffle" our stack. By performing the moves in *reverse* order, + // though, we can efficiently *shuffle* it! For this reason, we will be + // replacing instructions starting from the *end* of the run. Since the + // solution is optimal, we don't need to worry about running out of space: + int current = len - 1; + for (int i = 0; i < depth; i++) { + // Skip items that have already been visited, or just happen to be in + // the correct location: + if (stack[i] == VISITED || stack[i] == i) { + continue; + } + // Okay, we've found an item that hasn't been visited. It forms a cycle + // with other items; traversing the cycle and swapping each item with + // the next will put them all in the correct place. The weird + // loop-and-a-half is necessary to insert 0 into every cycle, since we + // can only swap from that position: + int j = i; + while (true) { + // Skip the actual swap if our item is zero, since swapping the top + // item with itself is pointless: + if (j) { + assert(0 <= current); + // SWAPs are 1-indexed: + instructions[current].i_opcode = SWAP; + instructions[current--].i_oparg = j + 1; + } + if (stack[j] == VISITED) { + // Completed the cycle: + assert(j == i); + break; + } + int next_j = stack[j]; + stack[j] = VISITED; + j = next_j; + } + } + // NOP out any unused instructions: + while (0 <= current) { + INSTR_SET_OP0(&instructions[current--], NOP); + } + PyMem_Free(stack); + *ix += len - 1; + return SUCCESS; +} + + +// This list is pretty small, since it's only okay to reorder opcodes that: +// - can't affect control flow (like jumping or raising exceptions) +// - can't invoke arbitrary code (besides finalizers) +// - only touch the TOS (and pop it when finished) +#define SWAPPABLE(opcode) \ + ((opcode) == STORE_FAST || (opcode) == POP_TOP) + +static int +next_swappable_instruction(basicblock *block, int i, int lineno) +{ + while (++i < block->b_iused) { + cfg_instr *instruction = &block->b_instr[i]; + if (0 <= lineno && instruction->i_loc.lineno != lineno) { + // Optimizing across this instruction could cause user-visible + // changes in the names bound between line tracing events! + return -1; + } + if (instruction->i_opcode == NOP) { + continue; + } + if (SWAPPABLE(instruction->i_opcode)) { + return i; + } + return -1; + } + return -1; +} + +// Attempt to apply SWAPs statically by swapping *instructions* rather than +// stack items. For example, we can replace SWAP(2), POP_TOP, STORE_FAST(42) +// with the more efficient NOP, STORE_FAST(42), POP_TOP. +static void +apply_static_swaps(basicblock *block, int i) +{ + // SWAPs are to our left, and potential swaperands are to our right: + for (; 0 <= i; i--) { + assert(i < block->b_iused); + cfg_instr *swap = &block->b_instr[i]; + if (swap->i_opcode != SWAP) { + if (swap->i_opcode == NOP || SWAPPABLE(swap->i_opcode)) { + // Nope, but we know how to handle these. Keep looking: + continue; + } + // We can't reason about what this instruction does. Bail: + return; + } + int j = next_swappable_instruction(block, i, -1); + if (j < 0) { + return; + } + int k = j; + int lineno = block->b_instr[j].i_loc.lineno; + for (int count = swap->i_oparg - 1; 0 < count; count--) { + k = next_swappable_instruction(block, k, lineno); + if (k < 0) { + return; + } + } + // Success! + INSTR_SET_OP0(swap, NOP); + cfg_instr temp = block->b_instr[j]; + block->b_instr[j] = block->b_instr[k]; + block->b_instr[k] = temp; + } +} + +static int +optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) +{ + assert(PyDict_CheckExact(const_cache)); + assert(PyList_CheckExact(consts)); + cfg_instr nop; + INSTR_SET_OP0(&nop, NOP); + cfg_instr *target = &nop; + int opcode = 0; + int oparg = 0; + int nextop = 0; + for (int i = 0; i < bb->b_iused; i++) { + cfg_instr *inst = &bb->b_instr[i]; + bool is_copy_of_load_const = (opcode == LOAD_CONST && + inst->i_opcode == COPY && + inst->i_oparg == 1); + if (! is_copy_of_load_const) { + opcode = inst->i_opcode; + oparg = inst->i_oparg; + if (HAS_TARGET(opcode)) { + assert(inst->i_target->b_iused > 0); + target = &inst->i_target->b_instr[0]; + assert(!IS_ASSEMBLER_OPCODE(target->i_opcode)); + } + else { + target = &nop; + } + } + nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0; + assert(!IS_ASSEMBLER_OPCODE(opcode)); + switch (opcode) { + /* Remove LOAD_CONST const; conditional jump */ + case LOAD_CONST: + { + PyObject* cnt; + int is_true; + int jump_if_true; + switch(nextop) { + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: + cnt = get_const_value(opcode, oparg, consts); + if (cnt == NULL) { + goto error; + } + is_true = PyObject_IsTrue(cnt); + Py_DECREF(cnt); + if (is_true == -1) { + goto error; + } + INSTR_SET_OP0(inst, NOP); + jump_if_true = nextop == POP_JUMP_IF_TRUE; + if (is_true == jump_if_true) { + bb->b_instr[i+1].i_opcode = JUMP; + } + else { + INSTR_SET_OP0(&bb->b_instr[i + 1], NOP); + } + break; + case IS_OP: + cnt = get_const_value(opcode, oparg, consts); + if (cnt == NULL) { + goto error; + } + int jump_op = i+2 < bb->b_iused ? bb->b_instr[i+2].i_opcode : 0; + if (Py_IsNone(cnt) && (jump_op == POP_JUMP_IF_FALSE || jump_op == POP_JUMP_IF_TRUE)) { + unsigned char nextarg = bb->b_instr[i+1].i_oparg; + INSTR_SET_OP0(inst, NOP); + INSTR_SET_OP0(&bb->b_instr[i + 1], NOP); + bb->b_instr[i+2].i_opcode = nextarg ^ (jump_op == POP_JUMP_IF_FALSE) ? + POP_JUMP_IF_NOT_NONE : POP_JUMP_IF_NONE; + } + Py_DECREF(cnt); + break; + case RETURN_VALUE: + INSTR_SET_OP0(inst, NOP); + INSTR_SET_OP1(&bb->b_instr[++i], RETURN_CONST, oparg); + break; + } + break; + } + /* Try to fold tuples of constants. + Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1). + Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2). + Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */ + case BUILD_TUPLE: + if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) { + switch(oparg) { + case 1: + INSTR_SET_OP0(inst, NOP); + INSTR_SET_OP0(&bb->b_instr[i + 1], NOP); + continue; + case 2: + case 3: + INSTR_SET_OP0(inst, NOP); + bb->b_instr[i+1].i_opcode = SWAP; + continue; + } + } + if (i >= oparg) { + if (fold_tuple_on_constants(const_cache, inst-oparg, oparg, consts)) { + goto error; + } + } + break; + case POP_JUMP_IF_NOT_NONE: + case POP_JUMP_IF_NONE: + switch (target->i_opcode) { + case JUMP: + i -= jump_thread(inst, target, inst->i_opcode); + } + break; + case POP_JUMP_IF_FALSE: + switch (target->i_opcode) { + case JUMP: + i -= jump_thread(inst, target, POP_JUMP_IF_FALSE); + } + break; + case POP_JUMP_IF_TRUE: + switch (target->i_opcode) { + case JUMP: + i -= jump_thread(inst, target, POP_JUMP_IF_TRUE); + } + break; + case JUMP: + switch (target->i_opcode) { + case JUMP: + i -= jump_thread(inst, target, JUMP); + } + break; + case FOR_ITER: + if (target->i_opcode == JUMP) { + /* This will not work now because the jump (at target) could + * be forward or backward and FOR_ITER only jumps forward. We + * can re-enable this if ever we implement a backward version + * of FOR_ITER. + */ + /* + i -= jump_thread(inst, target, FOR_ITER); + */ + } + break; + case SWAP: + if (oparg == 1) { + INSTR_SET_OP0(inst, NOP); + break; + } + if (swaptimize(bb, &i) < 0) { + goto error; + } + apply_static_swaps(bb, i); + break; + case KW_NAMES: + break; + case PUSH_NULL: + if (nextop == LOAD_GLOBAL && (inst[1].i_opcode & 1) == 0) { + INSTR_SET_OP0(inst, NOP); + inst[1].i_oparg |= 1; + } + break; + default: + /* All HAS_CONST opcodes should be handled with LOAD_CONST */ + assert (!HAS_CONST(inst->i_opcode)); + } + } + return SUCCESS; +error: + return ERROR; +} + + +/* Perform optimizations on a control flow graph. + The consts object should still be in list form to allow new constants + to be appended. + + Code trasnformations that reduce code size initially fill the gaps with + NOPs. Later those NOPs are removed. +*/ +static int +optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache) +{ + assert(PyDict_CheckExact(const_cache)); + RETURN_IF_ERROR(check_cfg(g)); + eliminate_empty_basic_blocks(g); + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + RETURN_IF_ERROR(inline_small_exit_blocks(b)); + } + assert(no_empty_basic_blocks(g)); + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts)); + assert(b->b_predecessors == 0); + } + RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock)); + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + RETURN_IF_ERROR(inline_small_exit_blocks(b)); + } + RETURN_IF_ERROR(mark_reachable(g->g_entryblock)); + + /* Delete unreachable instructions */ + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + if (b->b_predecessors == 0) { + b->b_iused = 0; + } + } + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + remove_redundant_nops(b); + } + eliminate_empty_basic_blocks(g); + assert(no_redundant_nops(g)); + RETURN_IF_ERROR(remove_redundant_jumps(g)); + return SUCCESS; +} + +// helper functions for add_checks_for_loads_of_unknown_variables +static inline void +maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) +{ + // Push b if the unsafe mask is giving us any new information. + // To avoid overflowing the stack, only allow each block once. + // Use b->b_visited=1 to mean that b is currently on the stack. + uint64_t both = b->b_unsafe_locals_mask | unsafe_mask; + if (b->b_unsafe_locals_mask != both) { + b->b_unsafe_locals_mask = both; + // More work left to do. + if (!b->b_visited) { + // not on the stack, so push it. + *(*sp)++ = b; + b->b_visited = 1; + } + } +} + +static void +scan_block_for_locals(basicblock *b, basicblock ***sp) +{ + // bit i is set if local i is potentially uninitialized + uint64_t unsafe_mask = b->b_unsafe_locals_mask; + for (int i = 0; i < b->b_iused; i++) { + cfg_instr *instr = &b->b_instr[i]; + assert(instr->i_opcode != EXTENDED_ARG); + assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode)); + if (instr->i_except != NULL) { + maybe_push(instr->i_except, unsafe_mask, sp); + } + if (instr->i_oparg >= 64) { + continue; + } + assert(instr->i_oparg >= 0); + uint64_t bit = (uint64_t)1 << instr->i_oparg; + switch (instr->i_opcode) { + case DELETE_FAST: + unsafe_mask |= bit; + break; + case STORE_FAST: + unsafe_mask &= ~bit; + break; + case LOAD_FAST_CHECK: + // If this doesn't raise, then the local is defined. + unsafe_mask &= ~bit; + break; + case LOAD_FAST: + if (unsafe_mask & bit) { + instr->i_opcode = LOAD_FAST_CHECK; + } + unsafe_mask &= ~bit; + break; + } + } + if (b->b_next && BB_HAS_FALLTHROUGH(b)) { + maybe_push(b->b_next, unsafe_mask, sp); + } + cfg_instr *last = _PyCfg_BasicblockLastInstr(b); + if (last && is_jump(last)) { + assert(last->i_target != NULL); + maybe_push(last->i_target, unsafe_mask, sp); + } +} + +static int +fast_scan_many_locals(basicblock *entryblock, int nlocals) +{ + assert(nlocals > 64); + Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t)); + if (states == NULL) { + PyErr_NoMemory(); + return ERROR; + } + Py_ssize_t blocknum = 0; + // state[i - 64] == blocknum if local i is guaranteed to + // be initialized, i.e., if it has had a previous LOAD_FAST or + // STORE_FAST within that basicblock (not followed by DELETE_FAST). + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + blocknum++; + for (int i = 0; i < b->b_iused; i++) { + cfg_instr *instr = &b->b_instr[i]; + assert(instr->i_opcode != EXTENDED_ARG); + assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode)); + int arg = instr->i_oparg; + if (arg < 64) { + continue; + } + assert(arg >= 0); + switch (instr->i_opcode) { + case DELETE_FAST: + states[arg - 64] = blocknum - 1; + break; + case STORE_FAST: + states[arg - 64] = blocknum; + break; + case LOAD_FAST: + if (states[arg - 64] != blocknum) { + instr->i_opcode = LOAD_FAST_CHECK; + } + states[arg - 64] = blocknum; + break; + Py_UNREACHABLE(); + } + } + } + PyMem_Free(states); + return SUCCESS; +} + +static int +remove_unused_consts(basicblock *entryblock, PyObject *consts) +{ + assert(PyList_CheckExact(consts)); + Py_ssize_t nconsts = PyList_GET_SIZE(consts); + if (nconsts == 0) { + return SUCCESS; /* nothing to do */ + } + + Py_ssize_t *index_map = NULL; + Py_ssize_t *reverse_index_map = NULL; + int err = ERROR; + + index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t)); + if (index_map == NULL) { + goto end; + } + for (Py_ssize_t i = 1; i < nconsts; i++) { + index_map[i] = -1; + } + // The first constant may be docstring; keep it always. + index_map[0] = 0; + + /* mark used consts */ + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (int i = 0; i < b->b_iused; i++) { + if (HAS_CONST(b->b_instr[i].i_opcode)) { + int index = b->b_instr[i].i_oparg; + index_map[index] = index; + } + } + } + /* now index_map[i] == i if consts[i] is used, -1 otherwise */ + /* condense consts */ + Py_ssize_t n_used_consts = 0; + for (int i = 0; i < nconsts; i++) { + if (index_map[i] != -1) { + assert(index_map[i] == i); + index_map[n_used_consts++] = index_map[i]; + } + } + if (n_used_consts == nconsts) { + /* nothing to do */ + err = SUCCESS; + goto end; + } + + /* move all used consts to the beginning of the consts list */ + assert(n_used_consts < nconsts); + for (Py_ssize_t i = 0; i < n_used_consts; i++) { + Py_ssize_t old_index = index_map[i]; + assert(i <= old_index && old_index < nconsts); + if (i != old_index) { + PyObject *value = PyList_GET_ITEM(consts, index_map[i]); + assert(value != NULL); + PyList_SetItem(consts, i, Py_NewRef(value)); + } + } + + /* truncate the consts list at its new size */ + if (PyList_SetSlice(consts, n_used_consts, nconsts, NULL) < 0) { + goto end; + } + /* adjust const indices in the bytecode */ + reverse_index_map = PyMem_Malloc(nconsts * sizeof(Py_ssize_t)); + if (reverse_index_map == NULL) { + goto end; + } + for (Py_ssize_t i = 0; i < nconsts; i++) { + reverse_index_map[i] = -1; + } + for (Py_ssize_t i = 0; i < n_used_consts; i++) { + assert(index_map[i] != -1); + assert(reverse_index_map[index_map[i]] == -1); + reverse_index_map[index_map[i]] = i; + } + + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (int i = 0; i < b->b_iused; i++) { + if (HAS_CONST(b->b_instr[i].i_opcode)) { + int index = b->b_instr[i].i_oparg; + assert(reverse_index_map[index] >= 0); + assert(reverse_index_map[index] < n_used_consts); + b->b_instr[i].i_oparg = (int)reverse_index_map[index]; + } + } + } + + err = SUCCESS; +end: + PyMem_Free(index_map); + PyMem_Free(reverse_index_map); + return err; +} + + + +static int +add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock, + int nlocals, + int nparams) +{ + if (nlocals == 0) { + return SUCCESS; + } + if (nlocals > 64) { + // To avoid O(nlocals**2) compilation, locals beyond the first + // 64 are only analyzed one basicblock at a time: initialization + // info is not passed between basicblocks. + if (fast_scan_many_locals(entryblock, nlocals) < 0) { + return ERROR; + } + nlocals = 64; + } + basicblock **stack = make_cfg_traversal_stack(entryblock); + if (stack == NULL) { + return ERROR; + } + basicblock **sp = stack; + + // First origin of being uninitialized: + // The non-parameter locals in the entry block. + uint64_t start_mask = 0; + for (int i = nparams; i < nlocals; i++) { + start_mask |= (uint64_t)1 << i; + } + maybe_push(entryblock, start_mask, &sp); + + // Second origin of being uninitialized: + // There could be DELETE_FAST somewhere, so + // be sure to scan each basicblock at least once. + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + scan_block_for_locals(b, &sp); + } + // Now propagate the uncertainty from the origins we found: Use + // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined. + while (sp > stack) { + basicblock *b = *--sp; + // mark as no longer on stack + b->b_visited = 0; + scan_block_for_locals(b, &sp); + } + PyMem_Free(stack); + return SUCCESS; +} + + +static int +mark_warm(basicblock *entryblock) { + basicblock **stack = make_cfg_traversal_stack(entryblock); + if (stack == NULL) { + return ERROR; + } + basicblock **sp = stack; + + *sp++ = entryblock; + entryblock->b_visited = 1; + while (sp > stack) { + basicblock *b = *(--sp); + assert(!b->b_except_handler); + b->b_warm = 1; + basicblock *next = b->b_next; + if (next && BB_HAS_FALLTHROUGH(b) && !next->b_visited) { + *sp++ = next; + next->b_visited = 1; + } + for (int i=0; i < b->b_iused; i++) { + cfg_instr *instr = &b->b_instr[i]; + if (is_jump(instr) && !instr->i_target->b_visited) { + *sp++ = instr->i_target; + instr->i_target->b_visited = 1; + } + } + } + PyMem_Free(stack); + return SUCCESS; +} + +static int +mark_cold(basicblock *entryblock) { + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + assert(!b->b_cold && !b->b_warm); + } + if (mark_warm(entryblock) < 0) { + return ERROR; + } + + basicblock **stack = make_cfg_traversal_stack(entryblock); + if (stack == NULL) { + return ERROR; + } + + basicblock **sp = stack; + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + if (b->b_except_handler) { + assert(!b->b_warm); + *sp++ = b; + b->b_visited = 1; + } + } + + while (sp > stack) { + basicblock *b = *(--sp); + b->b_cold = 1; + basicblock *next = b->b_next; + if (next && BB_HAS_FALLTHROUGH(b)) { + if (!next->b_warm && !next->b_visited) { + *sp++ = next; + next->b_visited = 1; + } + } + for (int i = 0; i < b->b_iused; i++) { + cfg_instr *instr = &b->b_instr[i]; + if (is_jump(instr)) { + assert(i == b->b_iused - 1); + basicblock *target = b->b_instr[i].i_target; + if (!target->b_warm && !target->b_visited) { + *sp++ = target; + target->b_visited = 1; + } + } + } + } + PyMem_Free(stack); + return SUCCESS; +} + + +static int +push_cold_blocks_to_end(cfg_builder *g, int code_flags) { + basicblock *entryblock = g->g_entryblock; + if (entryblock->b_next == NULL) { + /* single basicblock, no need to reorder */ + return SUCCESS; + } + RETURN_IF_ERROR(mark_cold(entryblock)); + + /* If we have a cold block with fallthrough to a warm block, add */ + /* an explicit jump instead of fallthrough */ + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + if (b->b_cold && BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_next->b_warm) { + basicblock *explicit_jump = cfg_builder_new_block(g); + if (explicit_jump == NULL) { + return ERROR; + } + basicblock_addop(explicit_jump, JUMP, b->b_next->b_label.id, NO_LOCATION); + explicit_jump->b_cold = 1; + explicit_jump->b_next = b->b_next; + b->b_next = explicit_jump; + + /* set target */ + cfg_instr *last = _PyCfg_BasicblockLastInstr(explicit_jump); + last->i_target = explicit_jump->b_next; + } + } + + assert(!entryblock->b_cold); /* First block can't be cold */ + basicblock *cold_blocks = NULL; + basicblock *cold_blocks_tail = NULL; + + basicblock *b = entryblock; + while(b->b_next) { + assert(!b->b_cold); + while (b->b_next && !b->b_next->b_cold) { + b = b->b_next; + } + if (b->b_next == NULL) { + /* no more cold blocks */ + break; + } + + /* b->b_next is the beginning of a cold streak */ + assert(!b->b_cold && b->b_next->b_cold); + + basicblock *b_end = b->b_next; + while (b_end->b_next && b_end->b_next->b_cold) { + b_end = b_end->b_next; + } + + /* b_end is the end of the cold streak */ + assert(b_end && b_end->b_cold); + assert(b_end->b_next == NULL || !b_end->b_next->b_cold); + + if (cold_blocks == NULL) { + cold_blocks = b->b_next; + } + else { + cold_blocks_tail->b_next = b->b_next; + } + cold_blocks_tail = b_end; + b->b_next = b_end->b_next; + b_end->b_next = NULL; + } + assert(b != NULL && b->b_next == NULL); + b->b_next = cold_blocks; + + if (cold_blocks != NULL) { + RETURN_IF_ERROR(remove_redundant_jumps(g)); + } + return SUCCESS; +} + +int +_PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache, + int code_flags, int nlocals, int nparams) +{ + assert(cfg_builder_check(g)); + /** Preprocessing **/ + /* Map labels to targets and mark exception handlers */ + RETURN_IF_ERROR(translate_jump_labels_to_targets(g->g_entryblock)); + RETURN_IF_ERROR(mark_except_handlers(g->g_entryblock)); + RETURN_IF_ERROR(label_exception_targets(g->g_entryblock)); + + /** Optimization **/ + RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache)); + RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts)); + RETURN_IF_ERROR( + add_checks_for_loads_of_uninitialized_variables( + g->g_entryblock, nlocals, nparams)); + + RETURN_IF_ERROR(push_cold_blocks_to_end(g, code_flags)); + return SUCCESS; +} + +void +_PyCfg_ConvertExceptionHandlersToNops(basicblock *entryblock) +{ + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (int i = 0; i < b->b_iused; i++) { + cfg_instr *instr = &b->b_instr[i]; + if (is_block_push(instr) || instr->i_opcode == POP_BLOCK) { + INSTR_SET_OP0(instr, NOP); + } + } + } + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + remove_redundant_nops(b); + } +} + +static inline bool +is_exit_without_lineno(basicblock *b) { + if (!basicblock_exits_scope(b)) { + return false; + } + for (int i = 0; i < b->b_iused; i++) { + if (b->b_instr[i].i_loc.lineno >= 0) { + return false; + } + } + return true; +} + +/* PEP 626 mandates that the f_lineno of a frame is correct + * after a frame terminates. It would be prohibitively expensive + * to continuously update the f_lineno field at runtime, + * so we make sure that all exiting instruction (raises and returns) + * have a valid line number, allowing us to compute f_lineno lazily. + * We can do this by duplicating the exit blocks without line number + * so that none have more than one predecessor. We can then safely + * copy the line number from the sole predecessor block. + */ +static int +duplicate_exits_without_lineno(cfg_builder *g) +{ + assert(no_empty_basic_blocks(g)); + /* Copy all exit blocks without line number that are targets of a jump. + */ + basicblock *entryblock = g->g_entryblock; + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + cfg_instr *last = _PyCfg_BasicblockLastInstr(b); + assert(last != NULL); + if (is_jump(last)) { + basicblock *target = last->i_target; + if (is_exit_without_lineno(target) && target->b_predecessors > 1) { + basicblock *new_target = copy_basicblock(g, target); + if (new_target == NULL) { + return ERROR; + } + new_target->b_instr[0].i_loc = last->i_loc; + last->i_target = new_target; + target->b_predecessors--; + new_target->b_predecessors = 1; + new_target->b_next = target->b_next; + target->b_next = new_target; + } + } + } + + /* Any remaining reachable exit blocks without line number can only be reached by + * fall through, and thus can only have a single predecessor */ + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) { + if (is_exit_without_lineno(b->b_next)) { + cfg_instr *last = _PyCfg_BasicblockLastInstr(b); + assert(last != NULL); + b->b_next->b_instr[0].i_loc = last->i_loc; + } + } + } + return SUCCESS; +} + + +/* If an instruction has no line number, but it's predecessor in the BB does, + * then copy the line number. If a successor block has no line number, and only + * one predecessor, then inherit the line number. + * This ensures that all exit blocks (with one predecessor) receive a line number. + * Also reduces the size of the line number table, + * but has no impact on the generated line number events. + */ +static void +propagate_line_numbers(basicblock *entryblock) { + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + cfg_instr *last = _PyCfg_BasicblockLastInstr(b); + if (last == NULL) { + continue; + } + + location prev_location = NO_LOCATION; + for (int i = 0; i < b->b_iused; i++) { + if (b->b_instr[i].i_loc.lineno < 0) { + b->b_instr[i].i_loc = prev_location; + } + else { + prev_location = b->b_instr[i].i_loc; + } + } + if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) { + assert(b->b_next->b_iused); + if (b->b_next->b_instr[0].i_loc.lineno < 0) { + b->b_next->b_instr[0].i_loc = prev_location; + } + } + if (is_jump(last)) { + basicblock *target = last->i_target; + if (target->b_predecessors == 1) { + if (target->b_instr[0].i_loc.lineno < 0) { + target->b_instr[0].i_loc = prev_location; + } + } + } + } +} + +/* Make sure that all returns have a line number, even if early passes + * have failed to propagate a correct line number. + * The resulting line number may not be correct according to PEP 626, + * but should be "good enough", and no worse than in older versions. */ +static void +guarantee_lineno_for_exits(basicblock *entryblock, int firstlineno) { + int lineno = firstlineno; + assert(lineno > 0); + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + cfg_instr *last = _PyCfg_BasicblockLastInstr(b); + if (last == NULL) { + continue; + } + if (last->i_loc.lineno < 0) { + if (last->i_opcode == RETURN_VALUE) { + for (int i = 0; i < b->b_iused; i++) { + assert(b->b_instr[i].i_loc.lineno < 0); + + b->b_instr[i].i_loc.lineno = lineno; + } + } + } + else { + lineno = last->i_loc.lineno; + } + } +} + +int +_PyCfg_ResolveLineNumbers(cfg_builder *g, int firstlineno) +{ + RETURN_IF_ERROR(duplicate_exits_without_lineno(g)); + propagate_line_numbers(g->g_entryblock); + guarantee_lineno_for_exits(g->g_entryblock, firstlineno); + return SUCCESS; +} + diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index 08032519f383ce..5c984eb4ae12e3 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -3,7 +3,7 @@ // Python/bytecodes.c // Do not edit! -#ifndef NEED_OPCODE_TABLES +#ifndef NEED_OPCODE_METADATA extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #else int @@ -349,7 +349,7 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) { } #endif -#ifndef NEED_OPCODE_TABLES +#ifndef NEED_OPCODE_METADATA extern int _PyOpcode_num_pushed(int opcode, int oparg, bool jump); #else int @@ -701,7 +701,7 @@ struct opcode_metadata { enum InstructionFormat instr_format; }; -#ifndef NEED_OPCODE_TABLES +#ifndef NEED_OPCODE_METADATA extern const struct opcode_metadata _PyOpcode_opcode_metadata[256]; #else const struct opcode_metadata _PyOpcode_opcode_metadata[256] = { diff --git a/Tools/build/generate_opcode_h.py b/Tools/build/generate_opcode_h.py index 9b2112f7f5f31d..614b17df740929 100644 --- a/Tools/build/generate_opcode_h.py +++ b/Tools/build/generate_opcode_h.py @@ -60,7 +60,7 @@ def write_int_array_from_ops(name, ops, out): bits = 0 for op in ops: bits |= 1<>= 32 @@ -130,6 +130,8 @@ def main(opcode_py, outfile='Include/opcode.h', internaloutfile='Include/interna for name, op in specialized_opmap.items(): fobj.write(DEFINE.format(name, op)) + iobj.write("\nextern const uint32_t _PyOpcode_RelativeJump[9];\n") + iobj.write("\nextern const uint32_t _PyOpcode_Jump[9];\n") iobj.write("\nextern const uint8_t _PyOpcode_Caches[256];\n") iobj.write("\nextern const uint8_t _PyOpcode_Deopt[256];\n") iobj.write("\n#ifdef NEED_OPCODE_TABLES\n") diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index a0bba65545d4f8..62ddeac0265ad8 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -930,7 +930,7 @@ def write_function( direction: str, data: list[tuple[AnyInstruction, str]] ) -> None: self.out.emit("") - self.out.emit("#ifndef NEED_OPCODE_TABLES") + self.out.emit("#ifndef NEED_OPCODE_METADATA") self.out.emit(f"extern int _PyOpcode_num_{direction}(int opcode, int oparg, bool jump);") self.out.emit("#else") self.out.emit("int") @@ -999,7 +999,7 @@ def write_metadata(self) -> None: self.out.emit("") # Write metadata array declaration - self.out.emit("#ifndef NEED_OPCODE_TABLES") + self.out.emit("#ifndef NEED_OPCODE_METADATA") self.out.emit("extern const struct opcode_metadata _PyOpcode_opcode_metadata[256];") self.out.emit("#else") self.out.emit("const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {")