From bf64c2017f4ee1a0f190859e2da3ad2d3bea4246 Mon Sep 17 00:00:00 2001 From: Ellipse0934 <7466364+Ellipse0934@users.noreply.github.com> Date: Wed, 19 Jan 2022 00:47:46 +0530 Subject: [PATCH 1/3] Implement PTX syntax highlighting in Julia. --- res/pygments/ptx.py | 72 ---------------------------- src/reflection.jl | 114 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 94 insertions(+), 92 deletions(-) delete mode 100644 res/pygments/ptx.py diff --git a/res/pygments/ptx.py b/res/pygments/ptx.py deleted file mode 100644 index 52e437d3..00000000 --- a/res/pygments/ptx.py +++ /dev/null @@ -1,72 +0,0 @@ -from pygments.lexer import RegexLexer, include, words -from pygments.token import * - -# https://docs.nvidia.com/cuda/parallel-thread-execution/index.html - -class CustomLexer(RegexLexer): - string = r'"[^"]*?"' - - followsym = r'[a-zA-Z0-9_$]*' - identifier = r'(?:[a-zA-Z]' + followsym + r'| [_$%]' + followsym + r')' - - tokens = { - 'root': [ - include('whitespace'), - (r'%' + identifier, Name.Variable), - - include('definition'), - include('statement'), - include('type'), - - (identifier, Name.Variable), - - (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), - (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), - (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), - (r'0[0-7]+[LlUu]*', Number.Oct), - (r'\b\d+[LlUu]*\b', Number.Integer), - - (r'[&|^+*/%=~-]', Operator), - (r'[()\[\]\{\},.;<>@]', Punctuation), - ], - 'whitespace': [ - (r'(\n|\s)+', Text), - (r'/\*.*?\*/', Comment.Multiline), - (r'//.*?\n', Comment.Single), - ], - 'definition': [ - (words(('func', 'reg'), prefix=r'\.', suffix=r'\b'), Keyword.Reserved), - (r'^' + identifier + r':', Name.Label), - ], - 'statement': [ - # directive - (words(( - 'address_size', 'file', 'minnctapersm', 'target', 'align', 'func', 'param', - 'tex', 'branchtarget', 'global', 'pragma', 'version', 'callprototype', - 'loc', 'reg', 'visible', 'calltargets', 'local', 'reqntid', 'weak', 'const', - 'maxnctapersm', 'section', 'entry', 'maxnreg', 'shared', 'extern', - 'maxntid', 'sreg', ), prefix=r'\.', suffix=r'\b'), Keyword), - # instruction - (words(( - 'abs', 'div', 'or', 'sin', 'add', 'ex2', 'pmevent', 'slct', 'vmad', 'addc', - 'exit', 'popc', 'sqrt', 'vmax', 'and', 'fma', 'prefetch', 'st', 'atom', - 'isspacep', 'prefetchu', 'sub', 'vmin', 'bar', 'ld', 'prmt', 'subc', 'bfe', - 'ldu', 'rcp', 'suld', 'vote', 'bfi', 'lg2', 'red', 'suq', 'vset', 'bfind', - 'mad', 'rem', 'sured', 'bret', 'sust', 'vshl', 'brev', 'madc', 'rsqrt', - 'testp', 'vshr', 'brkpt', 'max', 'sad', 'tex', 'vsub', 'call', 'membar', - 'selp', 'tld4', 'clz', 'min', 'set', 'trap', 'xor', 'cnot', 'mov', 'setp', - 'txq', 'copysign', 'mul', 'shf', 'vabsdiff', 'cos', 'shfl', 'cvta', 'not', - 'shr', 'cvt', 'neg', 'shl', 'vadd'), prefix=r'\b', suffix=r'[\.\w]+\b'), Keyword), - (words(( - 'vavrg', 'vmax', 'vmin', 'vset', 'mad', 'vsub', 'mul', 'vabsdiff', - 'vadd'), prefix=r'\b', suffix=r'[24]\b'), Keyword), - ], - 'type': [ - (words(( - 's8', 's16', 's32', 's64', - 'u8', 'u16', 'u32', 'u64', - 'f16', 'f16x2', 'f32', 'f64', - 'b8', 'b16', 'b32', 'b64', - 'pred'), prefix=r'\.', suffix=r'\b'), Keyword.Type), - ], - } diff --git a/src/reflection.jl b/src/reflection.jl index 48c001b0..d9520c1f 100644 --- a/src/reflection.jl +++ b/src/reflection.jl @@ -8,33 +8,107 @@ const Cthulhu = Base.PkgId(UUID("f68482b8-f384-11e8-15f7-abe071a5a75f"), "Cthulh # syntax highlighting # -const _pygmentize = Ref{Union{String,Nothing}}() -function pygmentize() - if !isassigned(_pygmentize) - _pygmentize[] = Sys.which("pygmentize") - end - return _pygmentize[] -end +# https://github.com/JuliaLang/julia/blob/dacd16f068fb27719b31effbe8929952ee2d5b32/stdlib/InteractiveUtils/src/codeview.jl +const hlscheme = Dict{Symbol, Tuple{Bool, Union{Symbol, Int}}}( + :default => (false, :normal), # e.g. comma, equal sign, unknown token + :comment => (false, :light_black), + :label => (false, :light_red), + :instruction => ( true, :light_cyan), + :type => (false, :cyan), + :number => (false, :yellow), + :bracket => (false, :yellow), + :variable => (false, :normal), # e.g. variable, register + :keyword => (false, :light_magenta), + :funcname => (false, :light_yellow), +) function highlight(io::IO, code, lexer) - highlighter = pygmentize() - have_color = get(io, :color, false) - if highlighter === nothing || !have_color + if !haskey(io, :color) print(io, code) + elseif lexer == "llvm" + InteractiveUtils.print_llvm(io, code) + elseif lexer == "ptx" + highlight_ptx(io, code) else - custom_lexer = joinpath(dirname(@__DIR__), "res", "pygments", "$lexer.py") - if isfile(custom_lexer) - lexer = `$custom_lexer -x` - end - - pipe = open(`$highlighter -f terminal -P bg=dark -l $lexer`, "r+") - print(pipe, code) - close(pipe.in) - print(io, read(pipe, String)) + print(io, code) end - return end +const ptx_instructions = [ + "abs", "cvt", "min", "shfl", "vadd", "activemask", "cvta", "mma", "shl", "vadd2", + "add", "discard", "mov", "shr", "vadd4", "addc", "div", "mul", "sin", "vavrg2", + "alloca", "dp2a", "mul24", "slct", "vavrg4", "and", "dp4a", "nanosleep", "sqrt", + "vmad", "applypriority", "ex2", "neg", "st", "vmax", "atom", "exit", "not", + "stackrestore", "vmax2", "bar", "fence", "or", "stacksave", "vmax4", "barrier", + "fma", "pmevent", "sub", "vmin", "bfe", "fns", "popc", "subc", "vmin2", "bfi", + "isspacep", "prefetch", "suld", "vmin4", "bfind", "istypep", "prefetchu", "suq", + "vote", "bmsk", "ld", "prmt", "sured", "vset", "bra", "ldmatrix", "rcp", "sust", + "vset2", "brev", "ldu", "red", "szext", "vset4", "brkpt", "lg2", "redux", "tanh", + "vshl", "brx", "lop3", "rem", "testp", "vshr", "call", "mad", "ret", "tex", "vsub", + "clz", "mad24", "rsqrt", "tld4", "vsub2", "cnot", "madc", "sad", "trap", "vsub4", + "copysign", "match", "selp", "txq", "wmma", "cos", "max", "set", "vabsdiff", "xor", + "cp", "mbarrier", "setp", "vabsdiff2", "createpolicy", "membar", "shf", "vabsdiff4"] + +# simple regex-based highlighter +# https://docs.nvidia.com/cuda/parallel-thread-execution/index.html +function highlight_ptx(io::IO, code) + function get_token(s) + # TODO: doesn't handle `ret;`, `{1`, etc; not properly tokenizing symbols + m = match(r"(\s*)([^\s]+)(.*)", s) + m !== nothing && (return m.captures[1:3]) + return nothing, nothing, nothing + end + print_tok(token, type) = Base.printstyled(io, + token, + bold = hlscheme[type][1], + color = hlscheme[type][2]) + buf = IOBuffer(code) + while !eof(buf) + line = readline(buf) + indent, tok, line = get_token(line) + istok(regex) = match(regex, tok) !== nothing + isinstr() = first(split(tok, '.')) in ptx_instructions + while (tok !== nothing) + print(io, indent) + + # comments + if istok(r"^\/\/") + print_tok(tok, :comment) + print_tok(line, :comment) + break + # labels + elseif istok(r"^[\w]+:") + print_tok(tok, :label) + # instructions + elseif isinstr() + print_tok(tok, :instruction) + # directives + elseif istok(r"^\.[\w]+") + print_tok(tok, :type) + # guard predicates + elseif istok(r"^@!?%p.+") + print_tok(tok, :keyword) + # registers + elseif istok(r"^%[\w]+") + print_tok(tok, :variable) + # constants + elseif istok(r"^0[xX][A-F]+U?") || # hexadecimal + istok(r"^0[0-8]+U?") || # octal + istok(r"^0[bB][01]+U?") || # binary + istok(r"^[0-9]+U?") || # decimal + istok(r"^0[fF]{hexdigit}{8}") || # single-precision floating point + istok(r"^0[dD]{hexdigit}{16}") # double-precision floating point + print_tok(tok, :number) + # TODO: function names + # TODO: labels as RHS + else + print_tok(tok, :default) + end + indent, tok, line = get_token(line) + end + print(io, '\n') + end +end # # code_* replacements From 89f3e3d478af31656247e6f8f045911f9a14bac2 Mon Sep 17 00:00:00 2001 From: Ellipse0934 <7466364+Ellipse0934@users.noreply.github.com> Date: Sat, 22 Jan 2022 01:43:48 +0530 Subject: [PATCH 2/3] more language awareness --- src/reflection.jl | 148 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 103 insertions(+), 45 deletions(-) diff --git a/src/reflection.jl b/src/reflection.jl index d9520c1f..b0c2c1eb 100644 --- a/src/reflection.jl +++ b/src/reflection.jl @@ -34,73 +34,131 @@ function highlight(io::IO, code, lexer) end end -const ptx_instructions = [ - "abs", "cvt", "min", "shfl", "vadd", "activemask", "cvta", "mma", "shl", "vadd2", - "add", "discard", "mov", "shr", "vadd4", "addc", "div", "mul", "sin", "vavrg2", - "alloca", "dp2a", "mul24", "slct", "vavrg4", "and", "dp4a", "nanosleep", "sqrt", - "vmad", "applypriority", "ex2", "neg", "st", "vmax", "atom", "exit", "not", - "stackrestore", "vmax2", "bar", "fence", "or", "stacksave", "vmax4", "barrier", - "fma", "pmevent", "sub", "vmin", "bfe", "fns", "popc", "subc", "vmin2", "bfi", - "isspacep", "prefetch", "suld", "vmin4", "bfind", "istypep", "prefetchu", "suq", - "vote", "bmsk", "ld", "prmt", "sured", "vset", "bra", "ldmatrix", "rcp", "sust", - "vset2", "brev", "ldu", "red", "szext", "vset4", "brkpt", "lg2", "redux", "tanh", - "vshl", "brx", "lop3", "rem", "testp", "vshr", "call", "mad", "ret", "tex", "vsub", - "clz", "mad24", "rsqrt", "tld4", "vsub2", "cnot", "madc", "sad", "trap", "vsub4", - "copysign", "match", "selp", "txq", "wmma", "cos", "max", "set", "vabsdiff", "xor", - "cp", "mbarrier", "setp", "vabsdiff2", "createpolicy", "membar", "shf", "vabsdiff4"] +ptx_instructions = ["abs", "activemask", "add", "addc", "alloca", "and", + "applypriority", "atom", "bar", "barrier", "bfe", "bfi", + "bfind", "bmsk", "bra", "brev", "brkpt", "brx", "call", "clz", + "cnot", "copysign", "cos", "cp", "createpolicy", "cvt", "cvta", + "discard", "div", "dp2a", "dp4a", "ex2", "exit", "fence", + "fma", "fns", "isspacep", "istypep", "ld", "ldmatrix", "ldu", + "lg2", "lop3", "mad", "mad24", "madc", "match", "max", "mbarrier", + "membar", "min", "mma", "mov", "mul", "mul24", "nanosleep", "neg", + "not", "or", "pmevent", "popc", "prefetch", "prefetchu", "prmt", + "rcp", "red", "redux", "rem", "ret", "rsqrt", "sad", "selp", + "set", "setp", "shf", "shfl", "shl", "shr", "sin", "slct", "sqrt", + "st", "stackrestore", "stacksave", "sub", "subc", "suld", "suq", + "sured", "sust", "szext", "tanh", "testp", "tex", "tld4", "trap", + "txq", "vabsdiff", "vabsdiff2", "vabsdiff4", "vadd", "vadd2", "vadd4", + "vavrg2", "vavrg4", "vmad", "vmax", "vmax2", "vmax4", "vmin", "vmin2", + "vmin4", "vote", "vset", "vset2", "vset4", "vshl", "vshr", "vsub", + "vsub2", "vsub4", "wmma", "xor"] + +r_ptx_instruction = join(ptx_instructions, "|") + +types = ["s8", "s16", "s32", "s64", "u8,", "u16,", "u32", "u64", "f16", "f16x2", "f32", "f64", "b8,", "b16", "b32", "b64", "pred"] +r_types = join(types, "|") + + +# https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-and-bit-size-comparisons + operators_comparison_sint = ["eq", "ne", "lt", "le", "gt", "ge"] + operators_comparison_uint = ["eq", "ne", "lo", "ls", "hi", "hs"] + operators_comparison_bit = ["eq", "ne"] + + operators_comparison_float = ["eq", "ne", "lt", "le", "gt", "ge"] + operators_comparison_nanfloat = ["equ", "neu", "ltu", "leu", "gtu", "geu"] + operators_comparison_nan = ["num", "nan"] + + modifiers_int = ["rni", "rzi", "rmi", "rpi"] + modifiers_float = ["rn", "rna", "rz", "rm", "rp"] + modifiers = sort(unique([modifiers_int...,modifiers_float...])) + + state_spaces = ["reg", "sreg", "const", "global", "local", "param", "shared", "tex"] + + + operators = sort(unique([operators_comparison_sint..., operators_comparison_uint..., + operators_comparison_bit..., operators_comparison_float..., + operators_comparison_nanfloat..., operators_comparison_nan..., + modifiers..., state_spaces..., types...])) + + +r_operators = join(operators, "|") + +# We can divide into types of instructions as all combinations of instructions, types and operators are not valid. +r_instruction = "(?:(?:$r_ptx_instruction)\\.(?:(?:$r_operators)(?:\\.))?(?:$(r_types)))" + +directives = ["address_size", "align", "branchtargets", "callprototype", + "calltargets", "const", "entry", "extern", "file", "func", "global", + "loc", "local", "maxnctapersm", "maxnreg", "maxntid", + "minnctapersm", "param", "pragma", "reg", "reqntid", "section", + "shared", "sreg", "target", "tex", "version", "visible", "weak"] + +r_directive = join(directives, "|") + + +r_hex = "0[xX][A-F]+U?" +r_octal = "0[0-8]+U?" +r_binary = "0[bB][01]+U?" +r_decimal = "[0-9]+U?" +r_float = "0[fF]{hexdigit}{8}" +r_double = "0[dD]{hexdigit}{16}" + +r_number = join(map(x -> "(?:" * x * ")", [r_hex, r_octal, r_binary, r_decimal, r_float, r_double]), "|") + +r_register_special = ["%clock", "%clock64", "%clock_hi", "%ctaid", "%dynamic_smem_size", "%envreg\\d{0,2}", # envreg0-31 + "%globaltimer", "%globaltimer_hi", "%globaltimer_lo,", "%gridid", "%laneid", "%lanemask_eq", + "%lanemask_ge", "%lanemask_gt", "%lanemask_le", "%lanemask_lt", "%nctaid", "%nsmid", + "%ntid", "%nwarpid", "%pm\\d,", "%pm\\d_64", "%reserved_smem_offset<2>", + "%reserved_smem_offset_begin", "%reserved_smem_offset_cap", "%reserved_smem_offset_end", "%smid", + "%tid", "%total_smem_size", "%warpid", "%\\w{1,2}\\d{0,2}"] + +r_register = join(r_register_special, "|") + + +r_followsym = "[a-zA-Z0-9_\$]" +r_identifier= "[a-zA-Z]{$r_followsym}* | {[_\$%]{$r_followsym}+" + +r_guard_predicate = "@!?%p\\d{0,2}" +r_label = "[\\w_]+:" +r_comment = "//" +r_unknown = "[^\\s]*" + +r_line = "(?:(?:.$r_directive)|(?:$r_instruction)|(?:$r_register)|(?:$r_number)|(?:$r_label)|(?:$r_guard_predicate)|(?:$r_comment)|(?:$r_identifier)|(?:$r_unknown))" + +get_token(n::Nothing) = nothing, nothing, nothing # simple regex-based highlighter # https://docs.nvidia.com/cuda/parallel-thread-execution/index.html -function highlight_ptx(io::IO, code) +function highlight_ptx(io::IO, code::AbstractString) function get_token(s) - # TODO: doesn't handle `ret;`, `{1`, etc; not properly tokenizing symbols - m = match(r"(\s*)([^\s]+)(.*)", s) + m = match(Regex("^(\\s*)($r_line)([^\\w\\d]+.*)?"), s) m !== nothing && (return m.captures[1:3]) return nothing, nothing, nothing end + get_token(n::Nothing) = nothing, nothing, nothing print_tok(token, type) = Base.printstyled(io, token, bold = hlscheme[type][1], color = hlscheme[type][2]) - buf = IOBuffer(code) - while !eof(buf) - line = readline(buf) + code = IOBuffer(code) + while !eof(code) + line = readline(code) indent, tok, line = get_token(line) - istok(regex) = match(regex, tok) !== nothing - isinstr() = first(split(tok, '.')) in ptx_instructions + is_tok(regex) = match(Regex("^(" * regex * ")"), tok) !== nothing while (tok !== nothing) print(io, indent) - - # comments - if istok(r"^\/\/") + if is_tok(r_comment) print_tok(tok, :comment) print_tok(line, :comment) break - # labels - elseif istok(r"^[\w]+:") + elseif is_tok(r_label) print_tok(tok, :label) - # instructions - elseif isinstr() + elseif is_tok(r_instruction) print_tok(tok, :instruction) - # directives - elseif istok(r"^\.[\w]+") + elseif is_tok(r_directive) print_tok(tok, :type) - # guard predicates - elseif istok(r"^@!?%p.+") + elseif is_tok(r_guard_predicate) print_tok(tok, :keyword) - # registers - elseif istok(r"^%[\w]+") - print_tok(tok, :variable) - # constants - elseif istok(r"^0[xX][A-F]+U?") || # hexadecimal - istok(r"^0[0-8]+U?") || # octal - istok(r"^0[bB][01]+U?") || # binary - istok(r"^[0-9]+U?") || # decimal - istok(r"^0[fF]{hexdigit}{8}") || # single-precision floating point - istok(r"^0[dD]{hexdigit}{16}") # double-precision floating point + elseif is_tok(r_register) print_tok(tok, :number) - # TODO: function names - # TODO: labels as RHS else print_tok(tok, :default) end From 64f6123abea4285a1f4ed1dbdea572d4ff3168ff Mon Sep 17 00:00:00 2001 From: Ellipse0934 <7466364+Ellipse0934@users.noreply.github.com> Date: Sat, 22 Jan 2022 02:04:42 +0530 Subject: [PATCH 3/3] fix directive --- src/reflection.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reflection.jl b/src/reflection.jl index b0c2c1eb..1fe0248c 100644 --- a/src/reflection.jl +++ b/src/reflection.jl @@ -91,7 +91,7 @@ directives = ["address_size", "align", "branchtargets", "callprototype", "minnctapersm", "param", "pragma", "reg", "reqntid", "section", "shared", "sreg", "target", "tex", "version", "visible", "weak"] -r_directive = join(directives, "|") +r_directive = "(?:.(?:" * join(directives, "|") * "))" r_hex = "0[xX][A-F]+U?" @@ -121,7 +121,7 @@ r_label = "[\\w_]+:" r_comment = "//" r_unknown = "[^\\s]*" -r_line = "(?:(?:.$r_directive)|(?:$r_instruction)|(?:$r_register)|(?:$r_number)|(?:$r_label)|(?:$r_guard_predicate)|(?:$r_comment)|(?:$r_identifier)|(?:$r_unknown))" +r_line = "(?:(?:$r_directive)|(?:$r_instruction)|(?:$r_register)|(?:$r_number)|(?:$r_label)|(?:$r_guard_predicate)|(?:$r_comment)|(?:$r_identifier)|(?:$r_unknown))" get_token(n::Nothing) = nothing, nothing, nothing