Skip to content

Commit

Permalink
strip leading whitespace from triple-quoted strings (closes #70)
Browse files Browse the repository at this point in the history
For example,

    s = """
        a
         b
        """

is now equivalent to "a\n b".
  • Loading branch information
nolta committed Feb 16, 2013
1 parent b8eae0e commit 865ea16
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 21 deletions.
2 changes: 2 additions & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,8 @@ export

# Macros
@str,
@mstr,
@imstr,
@I_str,
@E_str,
@B_str,
Expand Down
44 changes: 44 additions & 0 deletions base/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -652,9 +652,53 @@ function interp_parse_bytes(s::String)
interp_parse(s, unescape_string, writer)
end

## multiline strings ##

function multiline_lstrip(s::String)
if length(s) == 0 || !isspace(s[1])
return s
end
lines = split(s, '\n')

# trim leading,trailing whitespace
a,b = 1,length(lines)
if b == 1 return s end
if lstrip(lines[a]) == "" a += 1 end
if lstrip(lines[b]) == "" b -= 1 end
if a > b return s end

# find prefix
first_line = lines[a]
n = 0
for c in first_line
if isspace(c)
n += 1
else
break
end
end
prefix = (n == 0) ? "" : first_line[1:n]

# output string
prefix_len = length(prefix)
buf = memio(length(s) - (b-a+1)*prefix_len, false)
for i = a:b
line = lines[i]
if begins_with(line, prefix)
print(buf, line[prefix_len+1:end])
else
print(buf, line)
end
if i != b print(buf, '\n') end
end
takebuf_string(buf)
end

## core string macros ##

macro str(s); interp_parse(s); end
macro mstr(s); multiline_lstrip(s); end
macro imstr(s); interp_parse(multiline_lstrip(s)); end
macro I_str(s); interp_parse(s, x->unescape_chars(x,"\"")); end
macro E_str(s); check_utf8(unescape_string(s)); end
macro B_str(s); interp_parse_bytes(s); end
Expand Down
50 changes: 30 additions & 20 deletions src/julia-parser.scm
Original file line number Diff line number Diff line change
Expand Up @@ -800,16 +800,19 @@
(if (and (symbol? ex) (not (operator? ex))
(not (ts:space? s)))
;; custom prefixed string literals, x"s" => @x_str "s"
(let ((str (begin (take-token s)
(parse-string-literal s)))
(macname (symbol (string #\@ ex '_str))))
(let ((nxt (peek-token s)))
(if (and (symbol? nxt) (not (operator? nxt))
(not (ts:space? s)))
;; string literal suffix, "s"x
(loop `(macrocall ,macname ,(car str)
,(string (take-token s))))
(loop `(macrocall ,macname ,(car str))))))
(let* ((str (begin (take-token s)
(parse-string-literal s)))
(nxt (peek-token s))
(macname (symbol (string #\@ ex '_str)))
(macstr (if (triplequote-string-literal? str)
`(macrocall @mstr ,(car str))
(car str))))
(if (and (symbol? nxt) (not (operator? nxt))
(not (ts:space? s)))
;; string literal suffix, "s"x
(loop `(macrocall ,macname ,macstr
,(string (take-token s))))
(loop `(macrocall ,macname ,macstr))))
ex))
(else ex))))))))

Expand Down Expand Up @@ -1312,7 +1315,7 @@
c))

(define (take-char p)
(begin (read-char p) p))
(begin (read-char p) p))

; reads a raw string literal with no processing.
; quote can be escaped with \, but the \ is left in place.
Expand All @@ -1322,7 +1325,7 @@
(if (eqv? (peek-char p) #\")
(if (eqv? (peek-char (take-char p)) #\")
(parse-string-literal-3 (take-char p))
(cons "" #f))
(cons "" (cons #f #f)))
(parse-string-literal-1 p))))

(define (parse-string-literal-1 p)
Expand All @@ -1340,7 +1343,7 @@
(set! interpolate #t))
(write-char (not-eof-3 c) b)))
(loop (read-char p)))))
(cons (io.tostring! b) interpolate)))
(cons (io.tostring! b) (cons interpolate #f))))

(define (parse-string-literal-3 p)
(let ((b (open-output-string))
Expand All @@ -1366,7 +1369,10 @@
(set! interpolate #t))
(write-char (not-eof-3 c) b)))
(loop (read-char p)))))
(cons (io.tostring! b) interpolate)))
(cons (io.tostring! b) (cons interpolate #t))))

(define (interpolate-string-literal? s) (cadr s))
(define (triplequote-string-literal? s) (cddr s))

(define (not-eof-1 c)
(if (eof-object? c)
Expand Down Expand Up @@ -1542,12 +1548,16 @@
((eqv? t #\")
(take-token s)
(let ((ps (parse-string-literal s)))
(if (cdr ps)
`(macrocall @str ,(car ps))
(let ((str (unescape-string (car ps))))
(if (not (string.isutf8 str))
(error "invalid UTF-8 sequence"))
str))))
(if (interpolate-string-literal? ps)
(if (triplequote-string-literal? ps)
`(macrocall @imstr ,(car ps))
`(macrocall @str ,(car ps)))
(let ((str (unescape-string (car ps))))
(if (not (string.isutf8 str))
(error "invalid UTF-8 sequence"))
(if (triplequote-string-literal? ps)
`(macrocall @mstr ,str)
str)))))

;; macro call
((eqv? t #\@)
Expand Down
3 changes: 2 additions & 1 deletion src/julia-syntax.scm
Original file line number Diff line number Diff line change
Expand Up @@ -2121,7 +2121,8 @@ So far only the second case can actually occur.

(define (julia-expand-strs e)
(cond ((not (pair? e)) e)
((and (eq? (car e) 'macrocall) (eq? (cadr e) '@str))
((and (eq? (car e) 'macrocall) (or (eq? (cadr e) '@str)
(eq? (cadr e) '@mstr)))
;; expand macro
(let ((form
(apply invoke-julia-macro (cadr e) (cddr e))))
Expand Down
19 changes: 19 additions & 0 deletions test/strings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -508,3 +508,22 @@ str = "s\u2200"
@test """ab""c""" == "ab\"\"c"
@test """ab"\"c""" == "ab\"\"c"
@test """abc\"""" == "abc\""
n = 3
@test """$n""" == "$n"
@test E"""$n""" == E"$n"
@test """
a
b
c
""" == "a\nb\n\nc"
@test """x
a
""" == "x\n a\n "
@test """
$n
""" == "3"
@test E"""
$n
""" == E"$n"

6 comments on commit 865ea16

@StefanKarpinski
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool. This isn't quite the behavior I was thinking of, but it's close. The trailing newline should be kept, so that

julia> str = """
             Hello,
             world.
             """
"Hello,\nworld.\n"

Note the newline at the end. Also, with this it is impossible to express intentionally indented text. For example, I would want this:

julia> str = """
               Hello,
               world.
             """
"  Hello,\n  world.\n"

This can be done by only stripping the whitespace leading up to the closing triple quote. You can merge now though and I can tweak it later. Or we can debate a bit if people strongly disagree.

@JeffBezanson
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What should it do if the string doesn't end in a newline, e.g.

        world."""

@StefanKarpinski
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably use the ident of that line.

@nolta
Copy link
Member Author

@nolta nolta commented on 865ea16 Feb 17, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, i'll switch to using the last line to strip whitespace, instead of the first.

But i think we should still discard the trailing newline.

@StefanKarpinski
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But i think we should still discard the trailing newline.

Justification? I've found that you almost always want the trailing newline when generating text.

@nolta
Copy link
Member Author

@nolta nolta commented on 865ea16 Feb 17, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Personal preference: i almost always don't want the trailing newline.
  • Symmetry: leading and trailing whitespace are treated the same.
  • As @kmsquire pointed out, it would be impossible to custom indent a string w/o the trailing newline.

Please sign in to comment.