diff --git a/lib/rouge/demos/solidity b/lib/rouge/demos/solidity new file mode 100644 index 0000000000..e8d9453639 --- /dev/null +++ b/lib/rouge/demos/solidity @@ -0,0 +1,13 @@ +pragma solidity ~0.4.15; + +interface IMirror { + function reflect() external payable returns(bool /* ain't I pretty?.. */); +} + +contract Mirror is IMirror { + event logMessage(address indexed sender, uint256 value, uint256 gas, bytes data); + + function () { // no funny stuff + revert(); + } +} diff --git a/lib/rouge/lexers/solidity.rb b/lib/rouge/lexers/solidity.rb new file mode 100644 index 0000000000..cd6000665e --- /dev/null +++ b/lib/rouge/lexers/solidity.rb @@ -0,0 +1,185 @@ +# -*- coding: utf-8 -*- # + +module Rouge + module Lexers + class Solidity < RegexLexer + title "Solidity" + desc "Solidity, an Ethereum smart contract programming language" + tag 'solidity' + filenames '*.sol', '*.solidity' + mimetypes 'text/x-solidity' + + # optional comment or whitespace + ws = %r((?:\s|//.*?\n|/[*].*?[*]/)+) + id = /[a-zA-Z$_][\w$_]*/ + + def self.detect?(text) + return true if text.start_with? 'pragma solidity' + end + + # TODO: seperate by "type" + def self.keywords + @keywords ||= Set.new %w( + abstract anonymous as assembly break catch calldata constant + constructor continue contract do delete else emit enum event + external fallback for function hex if indexed interface + internal import is library mapping memory modifier new + override payable public pure pragma private receive return + returns storage struct throw try type using var view virtual + while + ) + end + + def self.builtins + return @builtins if @builtins + + @builtins = Set.new %w( + now + false true + balance now selector super this + blockhash gasleft + assert require revert + selfdestruct suicide + call callcode delegatecall + send transfer + addmod ecrecover keccak256 mulmod sha256 sha3 ripemd160 + ) + + # TODO: use (currently shadowed by catch-all in :statements) + abi = %w(decode encode encodePacked encodeWithSelector encodeWithSignature) + @builtins.merge( abi.map { |i| "abi.#{i}" } ) + block = %w(coinbase difficulty gaslimit hash number timestamp) + @builtins.merge( block.map { |i| "block.#{i}" } ) + msg = %w(data gas sender sig value) + @builtins.merge( msg.map { |i| "msg.#{i}" } ) + tx = %w(gasprice origin) + @builtins.merge( tx.map { |i| "tx.#{i}" } ) + end + + def self.constants + @constants ||= Set.new %w( + wei finney szabo ether + seconds minutes hours days weeks years + ) + end + + def self.keywords_type + @keywords_type ||= Set.new %w( + address bool byte bytes int string uint + ) + end + + def self.reserved + @reserved ||= Set.new %w( + alias after apply auto case copyof default define final fixed + immutable implements in inline let macro match mutable null of + partial promise reference relocatable sealed sizeof static + supports switch typedef typeof ufixed unchecked + ) + end + + start { push :bol } + + state :expr_bol do + mixin :inline_whitespace + + rule(//) { pop! } + end + + # :expr_bol is the same as :bol but without labels, since + # labels can only appear at the beginning of a statement. + state :bol do + mixin :expr_bol + end + + # TODO: natspec in comments + state :inline_whitespace do + rule %r/[ \t\r]+/, Text + rule %r/\\\n/, Text # line continuation + rule %r(/\*), Comment::Multiline, :comment_multi + end + + state :whitespace do + rule %r/\n+/m, Text, :bol + rule %r(//(\\.|.)*?\n), Comment::Single, :bol + mixin :inline_whitespace + end + + state :expr_whitespace do + rule %r/\n+/m, Text, :expr_bol + mixin :whitespace + end + + state :statements do + mixin :whitespace + rule %r/(hex)?\"/, Str, :string_double + rule %r/(hex)?\'/, Str, :string_single + rule %r('(\\.|\\[0-7]{1,3}|\\x[a-f0-9]{1,2}|[^\\'\n])')i, Str::Char + rule %r/\d\d*\.\d+([eE]\d+)?/i, Num::Float + rule %r/0x[0-9a-f]+/i, Num::Hex + rule %r/\d+([eE]\d+)?/i, Num::Integer + rule %r(\*/), Error + rule %r([~!%^&*+=\|?:<>/-]), Operator + rule %r/[()\[\],.]/, Punctuation + rule %r/u?fixed\d+x\d+/, Keyword::Reserved + rule %r/bytes\d+/, Keyword::Type + rule %r/u?int\d+/, Keyword::Type + rule id do |m| + name = m[0] + + if self.class.keywords.include? name + token Keyword + elsif self.class.builtins.include? name + token Name::Builtin + elsif self.class.constants.include? name + token Keyword::Constant + elsif self.class.keywords_type.include? name + token Keyword::Type + elsif self.class.reserved.include? name + token Keyword::Reserved + else + token Name + end + end + end + + state :root do + mixin :expr_whitespace + rule(//) { push :statement } + # TODO: function declarations + end + + state :statement do + rule %r/;/, Punctuation, :pop! + mixin :expr_whitespace + mixin :statements + rule %r/[{}]/, Punctuation + end + + state :string_common do + rule %r/\\(u[a-fA-F0-9]{4}|x..|[^x])/, Str::Escape + rule %r/[^\\\"\'\n]+/, Str + rule %r/\\\n/, Str # line continuation + rule %r/\\/, Str # stray backslash + end + + state :string_double do + mixin :string_common + rule %r/\"/, Str, :pop! + rule %r/\'/, Str + end + + state :string_single do + mixin :string_common + rule %r/\'/, Str, :pop! + rule %r/\"/, Str + end + + state :comment_multi do + rule %r(\*/), Comment::Multiline, :pop! + rule %r([^*/]+), Comment::Multiline + rule %r([*/]), Comment::Multiline + end + end + end +end diff --git a/spec/lexers/solidity_spec.rb b/spec/lexers/solidity_spec.rb new file mode 100644 index 0000000000..9c8dccf11f --- /dev/null +++ b/spec/lexers/solidity_spec.rb @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- # + +describe Rouge::Lexers::Solidity do + let(:subject) { Rouge::Lexers::Solidity.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.sol' + assert_guess :filename => 'foo.solidity' + end + + it 'guesses by mimetype' do + assert_guess :mimetype => 'text/x-solidity' + end + + it 'guesses by source' do + assert_guess :source => 'pragma solidity' + end + end +end diff --git a/spec/visual/samples/solidity b/spec/visual/samples/solidity new file mode 100644 index 0000000000..6ea06598f4 --- /dev/null +++ b/spec/visual/samples/solidity @@ -0,0 +1,248 @@ +pragma solidity ^0.6.0; +pragma ABIEncoderV2; +pragma experimental SMTChecker; + +/********************************************************************** + * example.sol * + **********************************************************************/ + +// Code in this contract is not meant to work (or be a good example). +// It is meant to demonstrate good syntax highlighting by the lexer, +// even if otherwise hazardous. + +// Comments relevant to the lexer are single-line. +/* Comments relevant to the code are multi-line. */ + +library Assembly { + function junk(address _addr) private returns (address _ret) { + assembly { + let tmp := 0 + + // nested code block + let mulmod_ := 0 { // evade collision with `mulmod` + let tmp:=sub(mulmod_,1) // `tmp` is not a label + mulmod_ := tmp + } + /* guess what mulmod_ is now... */ + _loop: // JIC, dots are invalid in labels + let i := 0x10 + loop: + // Escape sequences in comments are not parsed. + /* Not sure what's going on here, but it sure is funky! + \o/ \o/ \o/ \o/ \o/ \o/ \o/ \o/ \o/ \o/ \o/ \o/ \o/ */ + mulmod(_addr, mulmod_, 160) + + 0x1 i sub // instructional style + i =: tmp /* tmp not used */ + + jumpi(loop, not(iszero(i))) + + mstore(0x0, _addr) + return(0x0, 160) + } + } +} + +contract Strings { + // `double` is not a keyword (yet) + string double = "This\ is a string\nwith \"escapes\",\ +and it's multi-line. // no comment"; // comment ok // even nested :) + string single = 'This\ is a string\nwith "escapes",\ +and it\'s multi-line. // no comment'; // same thing, single-quote + string hexstr = hex'537472696e67732e73656e6428746869732e62616c616e6365293b'; + + fallback() external payable virtual {} + + receive() external payable { + revert(); + } +} + +contract Types is Strings { + using Assembly for Assembly; + + bytes stringsruntime = type(Strings).runtimeCode; + + // typesM (compiler chokes on invalid) + int8 i8; // valid + //int10 i10; // invalid + uint256 ui256; // valid + //uint9001 ui9001; // invalid + bytes1 b1; //valid + //bytes42 b42; // invalid - M out of range for `bytes` + + // typesMxN (compiler chokes on invalid) + fixed8x0 f8x0; // valid + fixed8x1 f8x1; // valid + fixed8x8 f8x8; // valid + //fixed0x8 f0x8; // invalid since MxN scheme changed + ufixed256x80 uf256x80; // valid + //ufixed42x217 uf42x217; // invalid - M must be multiple of 8, N <= 80 + + // special cases (internally not types) + string str; // dynamic array (not a value-type) + bytes bs; // same as above + //var v = 5; // `var` is a keyword, not a type, and compiler chokes + uint unu$ed; // `var` is highlighted, though, and `$` is a valid char + + address a = "0x1"; // lexer parses as string + struct AddressMap { + address origin; + address result; + address sender; + bool touched; + } + mapping (address => AddressMap) touchedMe; + + function failOnNegative(int8 _arg) + private + pure + returns (uint256) + { + /* implicit type conversion from `int8` to `uint256` */ + return _arg; + } + + // some arithmetic operators + built-in names + function opportunisticSend(address k) private { + /* `touchedMe[k].result` et al are addresses, so + `send()` available */ + touchedMe[k].origin.send(uint256(k)**2 % 100 finney); + touchedMe[k].result.send(1 wei); + touchedMe[k].sender.send(mulmod(1 szabo, k, 42)); + } + + fallback() external payable override { + /* inferred type: address */ + var k = msg.sender; + /* inferred type: `ufixed0x256` */ + var v = 1/42; + /* can't be `var` - location specifier requires explicit type */ + int negative = -1; + + // valid syntax, unexpected result - not our problem + ui256 = failOnNegative(negative); + + // logic operators + if ((!touchedMe[msg.sender].touched && + !touchedMe[tx.origin].touched) || + ((~(msg.sender * v + a)) % 256 == 42) + ) { + address garbled = Assembly.junk(a + msg.sender); + + /* create a new AddressMap struct in storage */ + AddressMap storage tmp; + + // TODO: highlight all known internal keywords? + tmp.origin = tx.origin; + tmp.result = garbled; + tmp.sender = msg.sender; + tmp.touched = true; + + /* does this link-by-reference as expected?.. */ + touchedMe[msg.sender] = tmp; + touchedMe[tx.origin] = tmp; + } + else { + /* weak guard against re-entry */ + touchedMe[k].touched = false; + + opportunisticSend(k); + + delete touchedMe[k]; + /* these probably do nothing... */ + delete touchedMe[msg.sender]; + delete touchedMe[tx.origin]; + } + } +} + +/** + \brief Examples of bad practices. + + TODO: This special doxygen natspec notation is not parsed yet. + + @author Noel Maersk + */ +/// TODO: Neither is this one. + +contract BadPractices { + address constant creator; /* `internal` by default */ + address private owner; /* forbid inheritance */ + bool mutex; + + modifier critical { + assert(!mutex); + mutex = true; + _; + mutex = false; + } + + constructor() external { + creator = tx.origin; + owner = msg.sender; + } + + /* Dangerous - function public, and doesn't check who's calling. */ + function withdraw(uint _amount) + public + critical + returns (bool) + { /* `mutex` set via modifier */ + /* Throwing on failed call may be dangerous. Consider + returning false instead?.. */ + require(msg.sender.call.value(_amount)()); + return true; + } /* `mutex` reset via modifier */ + + /* fallback */ + fallback() external payable { + /* `i` will be `uint8`, so this is an endless loop + that will consume all gas and eventually throw. + */ + for (var i = 0; i < 257; i++) { + owner++; + } + } + + /* receive()?.. nah, why bother */ +} + +/* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* /* +// A regular multi-line comment closure, including an escaped variant as +// demonstrated shortly, should close the comment; note that the lexer +// should not be nesting multi-line comments. +// +// If the comment is still shown as "open", then a +// +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// !!! MALICIOUS CODE SEGMENT !!! +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// +// can be erroneously thought of as inactive, and left unread. +// In fact, the compiler will produce executable code it, possibly +// overriding the program above. +// +// It is imperative that syntax highlighters do parse it if either of +// `* /` or `\* /` (with space removed) are present. +// +// Now, let's party! :) \*/ + +contract MoreBadPractices is BadPractices { + uint balance; + + fallback() external payable override { + balance += msg.value; + if (!msg.sender.send(this.balance / 10)) throw; + balance -= this.balance; + } +} + +/* +// Open comment to EOF. Compiler chokes on this, but it's useful for +// highlighting to show that there's an unmatched multi-line comment +// open. + +contract CommentToEndOfFile is MoreBadPractices { + fallback() external payable override {} +}