diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..edc6d3b --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.cov +*.mem +data/*.json diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..1bc2f60 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,12 @@ +language: julia +os: + - osx + - linux +julia: + - 0.7 + - 1.0 + - nightly +notifications: + email: false +after_success: + - julia -e 'import Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(process_folder())'; diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..d916e61 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,25 @@ +The Julia JSON package is licensed under the MIT Expat License: + +> Copyright (c) 2002: JSON.org, 2012–2016: Avik Sengupta, Stefan Karpinski, +> David de Laat, Dirk Gadsen, Milo Yip and other contributors +> – https://github.com/JuliaLang/JSON.jl/contributors +> and https://github.com/miloyip/nativejson-benchmark/contributors +> +> Permission is hereby granted, free of charge, to any person obtaining +> a copy of this software and associated documentation files (the +> "Software"), to deal in the Software without restriction, including +> without limitation the rights to use, copy, modify, merge, publish, +> distribute, sublicense, and/or sell copies of the Software, and to +> permit persons to whom the Software is furnished to do so, subject to +> the following conditions: +> +> The above copyright notice and this permission notice shall be +> included in all copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +> NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +> LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +> OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +> WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..2a0d1b3 --- /dev/null +++ b/Project.toml @@ -0,0 +1,22 @@ +name = "JSON" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.20.1" + +[deps] +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +Mmap = "a63ad114-7e13-5084-954f-fe012c677804" +Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[extras] +DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" +FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" +OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +Sockets = "6462fe0b-24de-5631-8697-dd941f90decc" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[compat] +julia = "0.7, 1" + +[targets] +test = ["DataStructures", "Distributed", "FixedPointNumbers", "OffsetArrays", "Sockets", "Test"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..9ccbd6d --- /dev/null +++ b/README.md @@ -0,0 +1,108 @@ +# JSON.jl +### Parsing and printing JSON in pure Julia. + +[![Build Status](https://travis-ci.org/JuliaIO/JSON.jl.svg)](https://travis-ci.org/JuliaIO/JSON.jl) +[![Build status](https://ci.appveyor.com/api/projects/status/2sfomjwl29k6y6oy)](https://ci.appveyor.com/project/staticfloat/json-jl) +[![codecov.io](http://codecov.io/github/JuliaIO/JSON.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaIO/JSON.jl?branch=master) + +[![JSON](http://pkg.julialang.org/badges/JSON_0.3.svg)](http://pkg.julialang.org/?pkg=JSON&ver=0.3) +[![JSON](http://pkg.julialang.org/badges/JSON_0.4.svg)](http://pkg.julialang.org/?pkg=JSON&ver=0.4) +[![JSON](http://pkg.julialang.org/badges/JSON_0.5.svg)](http://pkg.julialang.org/?pkg=JSON&ver=0.5) +[![JSON](http://pkg.julialang.org/badges/JSON_0.6.svg)](http://pkg.julialang.org/?pkg=JSON&ver=0.6) + +**Installation**: `julia> Pkg.add("JSON")` + + +## Basic Usage + + +```julia +import JSON + +# JSON.parse - string or stream to Julia data structures +s = "{\"a_number\" : 5.0, \"an_array\" : [\"string\", 9]}" +j = JSON.parse(s) +# Dict{AbstractString,Any} with 2 entries: +# "an_array" => {"string",9} +# "a_number" => 5.0 + +# JSON.json - Julia data structures to a string +JSON.json([2,3]) +# "[2,3]" +JSON.json(j) +# "{\"an_array\":[\"string\",9],\"a_number\":5.0}" +``` + +## Documentation + + +```julia +JSON.print(io::IO, s::AbstractString) +JSON.print(io::IO, s::Union{Integer, AbstractFloat}) +JSON.print(io::IO, n::Nothing) +JSON.print(io::IO, b::Bool) +JSON.print(io::IO, a::AbstractDict) +JSON.print(io::IO, v::AbstractVector) +JSON.print{T, N}(io::IO, v::Array{T, N}) +``` + +Writes a compact (no extra whitespace or indentation) JSON representation +to the supplied IO. + +```julia +JSON.print(a::AbstractDict, indent) +JSON.print(io::IO, a::AbstractDict, indent) +``` + +Writes a JSON representation with newlines, and indentation if specified. Non-zero `indent` will be applied recursively to nested elements. + + +```julia +json(a::Any) +``` + +Returns a compact JSON representation as an `AbstractString`. + +```julia +JSON.parse(s::AbstractString; dicttype=Dict, inttype=Int64) +JSON.parse(io::IO; dicttype=Dict, inttype=Int64) +JSON.parsefile(filename::AbstractString; dicttype=Dict, inttype=Int64, use_mmap=true) +``` + +Parses a JSON `AbstractString` or IO stream into a nested `Array` or `Dict`. + +The `dicttype` indicates the dictionary type (`<: Associative`), or a function that +returns an instance of a dictionary type, +that JSON objects are parsed to. It defaults to `Dict` (the built-in Julia +dictionary), but a different type can be passed for additional functionality. +For example, if you `import DataStructures` +(assuming the [DataStructures +package](https://github.com/JuliaLang/DataStructures.jl) is +installed) + + - you can pass `dicttype=DataStructures.OrderedDict` to maintain the insertion order + of the items in the object; + - or you can pass `()->DefaultDict{String,Any}(Missing)` to having any non-found keys + return `missing` when you index the result. + + +The `inttype` argument controls how integers are parsed. If a number in a JSON +file is recognized to be an integer, it is parsed as one; otherwise it is parsed +as a `Float64`. The `inttype` defaults to `Int64`, but, for example, if you know +that your integer numbers are all small and want to save space, you can pass +`inttype=Int32`. Alternatively, if your JSON input has integers which are too large +for Int64, you can pass `inttype=Int128` or `inttype=BigInt`. `inttype` can be any +subtype of `Real`. + +```julia +JSONText(s::AbstractString) +``` +A wrapper around a Julia string representing JSON-formatted text, +which is inserted *as-is* in the JSON output of `JSON.print` and `JSON.json`. + +```julia +JSON.lower(p::Point2D) = [p.x, p.y] +``` + +Define a custom serialization rule for a particular data type. Must return a +value that can be directly serialized; see help for more details. diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..912635f --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,43 @@ +environment: + matrix: + - julia_version: 0.7 + - julia_version: 1 + - julia_version: nightly + +platform: + - x86 # 32-bit + - x64 # 64-bit + +# # Uncomment the following lines to allow failures on nightly julia +# # (tests will run but not make your overall status red) +# matrix: +# allow_failures: +# - julia_version: nightly + +branches: + only: + - master + - /release-.*/ + +notifications: + - provider: Email + on_build_success: false + on_build_failure: false + on_build_status_changed: false + +install: + - ps: iex ((new-object net.webclient).DownloadString("https://raw.githubusercontent.com/JuliaCI/Appveyor.jl/version-1/bin/install.ps1")) + +build_script: + - echo "%JL_BUILD_SCRIPT%" + - C:\julia\bin\julia -e "%JL_BUILD_SCRIPT%" + +test_script: + - echo "%JL_TEST_SCRIPT%" + - C:\julia\bin\julia -e "%JL_TEST_SCRIPT%" + +# # Uncomment to support code coverage upload. Should only be enabled for packages +# # which would have coverage gaps without running on Windows +# on_success: +# - echo "%JL_CODECOV_SCRIPT%" +# - C:\julia\bin\julia -e "%JL_CODECOV_SCRIPT%" \ No newline at end of file diff --git a/bench/bench.jl b/bench/bench.jl new file mode 100644 index 0000000..a9b4be5 --- /dev/null +++ b/bench/bench.jl @@ -0,0 +1,92 @@ +#!/usr/bin/julia --color=yes + +using ArgParse +using JSON + + +function bench(f, simulate=false) + fp = joinpath(JSON_DATA_DIR, string(f, ".json")) + if !isfile(fp) + println("Downloading benchmark file...") + download(DATA_SOURCES[f], fp) + end + GC.gc() # run gc so it doesn't affect benchmarks + t = if args["parse"]["parse-file"] + @elapsed JSON.parsefile(fp) + else + data = read(fp, String) + @elapsed JSON.Parser.parse(data) + end + + if !simulate + printstyled(" [Bench$FLAGS] "; color=:yellow) + println(f, " ", t, " seconds") + end + t +end + + +const JSON_DATA_DIR = joinpath(dirname(dirname(@__FILE__)), "data") +const s = ArgParseSettings(description="Benchmark JSON.jl") + +const DATA_SOURCES = Dict( + "canada" => "https://raw.githubusercontent.com/miloyip/nativejson-benchmark/v1.0.0/data/canada.json", + "citm_catalog" => "https://raw.githubusercontent.com/miloyip/nativejson-benchmark/v1.0.0/data/citm_catalog.json", + "citylots" => "https://raw.githubusercontent.com/zemirco/sf-city-lots-json/master/citylots.json", + "twitter" => "https://raw.githubusercontent.com/miloyip/nativejson-benchmark/v1.0.0/data/twitter.json") + +@add_arg_table s begin + "parse" + action = :command + help = "Run a JSON parser benchmark" + "list" + action = :command + help = "List available JSON files for use" +end + +@add_arg_table s["parse"] begin + "--include-compile", "-c" + help = "If set, include the compile time in measurements" + action = :store_true + "--parse-file", "-f" + help = "If set, measure JSON.parsefile, hence including IO time" + action = :store_true + "file" + help = "The JSON file to benchmark (leave out to benchmark all)" + required = false +end + +const args = parse_args(ARGS, s) + +if args["%COMMAND%"] == "parse" + const FLAGS = string( + args["parse"]["include-compile"] ? "C" : "", + args["parse"]["parse-file"] ? "F" : "") + + if args["parse"]["file"] ≠ nothing + const file = args["parse"]["file"] + + if !args["parse"]["include-compile"] + bench(file, true) + end + bench(file) + else + times = 1.0 + if args["parse"]["include-compile"] + error("Option --include-compile can only be used for single file.") + end + for k in sort(collect(keys(DATA_SOURCES))) + bench(k, true) # warm up compiler + end + for k in sort(collect(keys(DATA_SOURCES))) + times *= bench(k) # do benchmark + end + print_with_color(:yellow, " [Bench$FLAGS] ") + println("Total (G.M.) ", times^(1/length(DATA_SOURCES)), " seconds") + end +elseif args["%COMMAND%"] == "list" + println("Available benchmarks are:") + for k in sort(collect(keys(DATA_SOURCES))) + println(" • $k") + end +end diff --git a/bench/micro.jl b/bench/micro.jl new file mode 100644 index 0000000..9c3f653 --- /dev/null +++ b/bench/micro.jl @@ -0,0 +1,56 @@ +# JSON Microbenchmarks +# 0.6 required for running benchmarks + +using JSON +using BenchmarkTools +using Dates + +const suite = BenchmarkGroup() + +suite["print"] = BenchmarkGroup(["serialize"]) +suite["pretty-print"] = BenchmarkGroup(["serialize"]) + +struct CustomListType + x::Int + y::Float64 + z::Union{CustomListType, Nothing} +end + +struct CustomTreeType + x::String + y::Union{CustomTreeType, Nothing} + z::Union{CustomTreeType, Nothing} +end + +list(x) = x == 0 ? nothing : CustomListType(1, 1.0, list(x - 1)) +tree(x) = x == 0 ? nothing : CustomTreeType("!!!", tree(x - 1), tree(x - 1)) + +const micros = Dict( + "integer" => 88, + "float" => -88.8, + "ascii" => "Hello World!", + "ascii-1024" => "x" ^ 1024, + "unicode" => "ສະບາຍດີຊາວໂລກ!", + "unicode-1024" => "ℜ" ^ 1024, + "bool" => true, + "null" => nothing, + "flat-homogenous-array-16" => collect(1:16), + "flat-homogenous-array-1024" => collect(1:1024), + "heterogenous-array" => [ + 1, 2, 3, 7, "A", "C", "E", "N", "Q", "R", "Shuttle to Grand Central"], + "nested-array-16^2" => [collect(1:16) for _ in 1:16], + "nested-array-16^3" => [[collect(1:16) for _ in 1:16] for _ in 1:16], + "small-dict" => Dict( + :a => :b, :c => "💙💙💙💙💙💙", :e => 10, :f => Dict(:a => :b)), + "flat-dict-128" => Dict(zip(collect(1:128), collect(1:128))), + "date" => Date(2016, 08, 09), + "matrix-16" => [i == j ? 1.0 : 0.0 for i in 1:16, j in 1:16], + "custom-list-128" => list(128), + "custom-tree-8" => tree(8)) + +for (k, v) in micros + io = IOBuffer() + suite["print"][k] = @benchmarkable JSON.print($(IOBuffer()), $v) + suite["pretty-print"][k] = @benchmarkable JSON.print( + $(IOBuffer()), $v, 4) +end diff --git a/data/jsonchecker/fail01.json b/data/jsonchecker/fail01.json new file mode 100644 index 0000000..92a451e --- /dev/null +++ b/data/jsonchecker/fail01.json @@ -0,0 +1 @@ +fable diff --git a/data/jsonchecker/fail02.json b/data/jsonchecker/fail02.json new file mode 100644 index 0000000..6b7c11e --- /dev/null +++ b/data/jsonchecker/fail02.json @@ -0,0 +1 @@ +["Unclosed array" \ No newline at end of file diff --git a/data/jsonchecker/fail03.json b/data/jsonchecker/fail03.json new file mode 100644 index 0000000..168c81e --- /dev/null +++ b/data/jsonchecker/fail03.json @@ -0,0 +1 @@ +{unquoted_key: "keys must be quoted"} \ No newline at end of file diff --git a/data/jsonchecker/fail04.json b/data/jsonchecker/fail04.json new file mode 100644 index 0000000..9de168b --- /dev/null +++ b/data/jsonchecker/fail04.json @@ -0,0 +1 @@ +["extra comma",] \ No newline at end of file diff --git a/data/jsonchecker/fail05.json b/data/jsonchecker/fail05.json new file mode 100644 index 0000000..ddf3ce3 --- /dev/null +++ b/data/jsonchecker/fail05.json @@ -0,0 +1 @@ +["double extra comma",,] \ No newline at end of file diff --git a/data/jsonchecker/fail06.json b/data/jsonchecker/fail06.json new file mode 100644 index 0000000..ed91580 --- /dev/null +++ b/data/jsonchecker/fail06.json @@ -0,0 +1 @@ +[ , "<-- missing value"] \ No newline at end of file diff --git a/data/jsonchecker/fail07.json b/data/jsonchecker/fail07.json new file mode 100644 index 0000000..8a96af3 --- /dev/null +++ b/data/jsonchecker/fail07.json @@ -0,0 +1 @@ +["Comma after the close"], \ No newline at end of file diff --git a/data/jsonchecker/fail08.json b/data/jsonchecker/fail08.json new file mode 100644 index 0000000..b28479c --- /dev/null +++ b/data/jsonchecker/fail08.json @@ -0,0 +1 @@ +["Extra close"]] \ No newline at end of file diff --git a/data/jsonchecker/fail09.json b/data/jsonchecker/fail09.json new file mode 100644 index 0000000..5815574 --- /dev/null +++ b/data/jsonchecker/fail09.json @@ -0,0 +1 @@ +{"Extra comma": true,} \ No newline at end of file diff --git a/data/jsonchecker/fail10.json b/data/jsonchecker/fail10.json new file mode 100644 index 0000000..5d8c004 --- /dev/null +++ b/data/jsonchecker/fail10.json @@ -0,0 +1 @@ +{"Extra value after close": true} "misplaced quoted value" \ No newline at end of file diff --git a/data/jsonchecker/fail11.json b/data/jsonchecker/fail11.json new file mode 100644 index 0000000..76eb95b --- /dev/null +++ b/data/jsonchecker/fail11.json @@ -0,0 +1 @@ +{"Illegal expression": 1 + 2} \ No newline at end of file diff --git a/data/jsonchecker/fail12.json b/data/jsonchecker/fail12.json new file mode 100644 index 0000000..77580a4 --- /dev/null +++ b/data/jsonchecker/fail12.json @@ -0,0 +1 @@ +{"Illegal invocation": alert()} \ No newline at end of file diff --git a/data/jsonchecker/fail13.json b/data/jsonchecker/fail13.json new file mode 100644 index 0000000..379406b --- /dev/null +++ b/data/jsonchecker/fail13.json @@ -0,0 +1 @@ +{"Numbers cannot have leading zeroes": 013} \ No newline at end of file diff --git a/data/jsonchecker/fail14.json b/data/jsonchecker/fail14.json new file mode 100644 index 0000000..0ed366b --- /dev/null +++ b/data/jsonchecker/fail14.json @@ -0,0 +1 @@ +{"Numbers cannot be hex": 0x14} \ No newline at end of file diff --git a/data/jsonchecker/fail15.json b/data/jsonchecker/fail15.json new file mode 100644 index 0000000..fc8376b --- /dev/null +++ b/data/jsonchecker/fail15.json @@ -0,0 +1 @@ +["Illegal backslash escape: \x15"] \ No newline at end of file diff --git a/data/jsonchecker/fail16.json b/data/jsonchecker/fail16.json new file mode 100644 index 0000000..3fe21d4 --- /dev/null +++ b/data/jsonchecker/fail16.json @@ -0,0 +1 @@ +[\naked] \ No newline at end of file diff --git a/data/jsonchecker/fail17.json b/data/jsonchecker/fail17.json new file mode 100644 index 0000000..62b9214 --- /dev/null +++ b/data/jsonchecker/fail17.json @@ -0,0 +1 @@ +["Illegal backslash escape: \017"] \ No newline at end of file diff --git a/data/jsonchecker/fail18.json b/data/jsonchecker/fail18.json new file mode 100644 index 0000000..bd7f1d6 --- /dev/null +++ b/data/jsonchecker/fail18.json @@ -0,0 +1,2 @@ +"mutliple" +"things" diff --git a/data/jsonchecker/fail19.json b/data/jsonchecker/fail19.json new file mode 100644 index 0000000..3b9c46f --- /dev/null +++ b/data/jsonchecker/fail19.json @@ -0,0 +1 @@ +{"Missing colon" null} \ No newline at end of file diff --git a/data/jsonchecker/fail20.json b/data/jsonchecker/fail20.json new file mode 100644 index 0000000..27c1af3 --- /dev/null +++ b/data/jsonchecker/fail20.json @@ -0,0 +1 @@ +{"Double colon":: null} \ No newline at end of file diff --git a/data/jsonchecker/fail21.json b/data/jsonchecker/fail21.json new file mode 100644 index 0000000..6247457 --- /dev/null +++ b/data/jsonchecker/fail21.json @@ -0,0 +1 @@ +{"Comma instead of colon", null} \ No newline at end of file diff --git a/data/jsonchecker/fail22.json b/data/jsonchecker/fail22.json new file mode 100644 index 0000000..a775258 --- /dev/null +++ b/data/jsonchecker/fail22.json @@ -0,0 +1 @@ +["Colon instead of comma": false] \ No newline at end of file diff --git a/data/jsonchecker/fail23.json b/data/jsonchecker/fail23.json new file mode 100644 index 0000000..494add1 --- /dev/null +++ b/data/jsonchecker/fail23.json @@ -0,0 +1 @@ +["Bad value", truth] \ No newline at end of file diff --git a/data/jsonchecker/fail24.json b/data/jsonchecker/fail24.json new file mode 100644 index 0000000..caff239 --- /dev/null +++ b/data/jsonchecker/fail24.json @@ -0,0 +1 @@ +['single quote'] \ No newline at end of file diff --git a/data/jsonchecker/fail25.json b/data/jsonchecker/fail25.json new file mode 100644 index 0000000..8b7ad23 --- /dev/null +++ b/data/jsonchecker/fail25.json @@ -0,0 +1 @@ +[" tab character in string "] \ No newline at end of file diff --git a/data/jsonchecker/fail26.json b/data/jsonchecker/fail26.json new file mode 100644 index 0000000..845d26a --- /dev/null +++ b/data/jsonchecker/fail26.json @@ -0,0 +1 @@ +["tab\ character\ in\ string\ "] \ No newline at end of file diff --git a/data/jsonchecker/fail27.json b/data/jsonchecker/fail27.json new file mode 100644 index 0000000..6b01a2c --- /dev/null +++ b/data/jsonchecker/fail27.json @@ -0,0 +1,2 @@ +["line +break"] \ No newline at end of file diff --git a/data/jsonchecker/fail28.json b/data/jsonchecker/fail28.json new file mode 100644 index 0000000..621a010 --- /dev/null +++ b/data/jsonchecker/fail28.json @@ -0,0 +1,2 @@ +["line\ +break"] \ No newline at end of file diff --git a/data/jsonchecker/fail29.json b/data/jsonchecker/fail29.json new file mode 100644 index 0000000..47ec421 --- /dev/null +++ b/data/jsonchecker/fail29.json @@ -0,0 +1 @@ +[0e] \ No newline at end of file diff --git a/data/jsonchecker/fail30.json b/data/jsonchecker/fail30.json new file mode 100644 index 0000000..8ab0bc4 --- /dev/null +++ b/data/jsonchecker/fail30.json @@ -0,0 +1 @@ +[0e+] \ No newline at end of file diff --git a/data/jsonchecker/fail31.json b/data/jsonchecker/fail31.json new file mode 100644 index 0000000..1cce602 --- /dev/null +++ b/data/jsonchecker/fail31.json @@ -0,0 +1 @@ +[0e+-1] \ No newline at end of file diff --git a/data/jsonchecker/fail32.json b/data/jsonchecker/fail32.json new file mode 100644 index 0000000..cb1f560 --- /dev/null +++ b/data/jsonchecker/fail32.json @@ -0,0 +1 @@ +{"Comma instead of closing brace": true, diff --git a/data/jsonchecker/fail33.json b/data/jsonchecker/fail33.json new file mode 100644 index 0000000..ca5eb19 --- /dev/null +++ b/data/jsonchecker/fail33.json @@ -0,0 +1 @@ +["mismatch"} \ No newline at end of file diff --git a/data/jsonchecker/fail34.json b/data/jsonchecker/fail34.json new file mode 100644 index 0000000..7ce16bd --- /dev/null +++ b/data/jsonchecker/fail34.json @@ -0,0 +1 @@ +{"garbage" before : "separator"} diff --git a/data/jsonchecker/fail35.json b/data/jsonchecker/fail35.json new file mode 100644 index 0000000..7a46973 --- /dev/null +++ b/data/jsonchecker/fail35.json @@ -0,0 +1 @@ +{"no separator" diff --git a/data/jsonchecker/fail36.json b/data/jsonchecker/fail36.json new file mode 100644 index 0000000..bf08400 --- /dev/null +++ b/data/jsonchecker/fail36.json @@ -0,0 +1 @@ +{"no closing brace": true diff --git a/data/jsonchecker/fail37.json b/data/jsonchecker/fail37.json new file mode 100644 index 0000000..558ed37 --- /dev/null +++ b/data/jsonchecker/fail37.json @@ -0,0 +1 @@ +[ diff --git a/data/jsonchecker/fail38.json b/data/jsonchecker/fail38.json new file mode 100644 index 0000000..98232c6 --- /dev/null +++ b/data/jsonchecker/fail38.json @@ -0,0 +1 @@ +{ diff --git a/data/jsonchecker/pass01.json b/data/jsonchecker/pass01.json new file mode 100644 index 0000000..2c10f22 --- /dev/null +++ b/data/jsonchecker/pass01.json @@ -0,0 +1,58 @@ +[ + "JSON Test Pattern pass1", + {"object with 1 member":["array with 1 element"]}, + {}, + [], + -42, + true, + false, + null, + { + "integer": 1234567890, + "real": -9876.543210, + "e": 0.123456789e-12, + "E": 1.234567890E+34, + "": 23456789012E66, + "zero": 0, + "one": 1, + "space": " ", + "quote": "\"", + "backslash": "\\", + "controls": "\b\f\n\r\t", + "slash": "/ & \/", + "alpha": "abcdefghijklmnopqrstuvwyz", + "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", + "digit": "0123456789", + "0123456789": "digit", + "special": "`1~!@#$%^&*()_+-={':[,]}|;.>?", + "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", + "true": true, + "false": false, + "null": null, + "array":[ ], + "object":{ }, + "address": "50 St. James Street", + "url": "http://www.JSON.org/", + "comment": "// /* */": " ", + " s p a c e d " :[1,2 , 3 + +, + +4 , 5 , 6 ,7 ],"compact":[1,2,3,4,5,6,7], + "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", + "quotes": "" \u0022 %22 0x22 034 "", + "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +: "A key can be any string" + }, + 0.5 ,98.6 +, +99.44 +, + +1066, +1e1, +0.1e1, +1e-1, +1e00,2e+00,2e-00 +,"rosebud"] diff --git a/data/jsonchecker/pass02.json b/data/jsonchecker/pass02.json new file mode 100644 index 0000000..fea5710 --- /dev/null +++ b/data/jsonchecker/pass02.json @@ -0,0 +1 @@ +[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] diff --git a/data/jsonchecker/pass03.json b/data/jsonchecker/pass03.json new file mode 100644 index 0000000..4528d51 --- /dev/null +++ b/data/jsonchecker/pass03.json @@ -0,0 +1,6 @@ +{ + "JSON Test Pattern pass3": { + "The outermost value": "must be an object or array.", + "In this test": "It is an object." + } +} diff --git a/data/jsonchecker/readme.txt b/data/jsonchecker/readme.txt new file mode 100644 index 0000000..321d89d --- /dev/null +++ b/data/jsonchecker/readme.txt @@ -0,0 +1,3 @@ +Test suite from http://json.org/JSON_checker/. + +If the JSON_checker is working correctly, it must accept all of the pass*.json files and reject all of the fail*.json files. diff --git a/data/roundtrip/roundtrip01.json b/data/roundtrip/roundtrip01.json new file mode 100644 index 0000000..500db4a --- /dev/null +++ b/data/roundtrip/roundtrip01.json @@ -0,0 +1 @@ +[null] \ No newline at end of file diff --git a/data/roundtrip/roundtrip02.json b/data/roundtrip/roundtrip02.json new file mode 100644 index 0000000..de601e3 --- /dev/null +++ b/data/roundtrip/roundtrip02.json @@ -0,0 +1 @@ +[true] \ No newline at end of file diff --git a/data/roundtrip/roundtrip03.json b/data/roundtrip/roundtrip03.json new file mode 100644 index 0000000..67b2f07 --- /dev/null +++ b/data/roundtrip/roundtrip03.json @@ -0,0 +1 @@ +[false] \ No newline at end of file diff --git a/data/roundtrip/roundtrip04.json b/data/roundtrip/roundtrip04.json new file mode 100644 index 0000000..6e7ea63 --- /dev/null +++ b/data/roundtrip/roundtrip04.json @@ -0,0 +1 @@ +[0] \ No newline at end of file diff --git a/data/roundtrip/roundtrip05.json b/data/roundtrip/roundtrip05.json new file mode 100644 index 0000000..6dfd298 --- /dev/null +++ b/data/roundtrip/roundtrip05.json @@ -0,0 +1 @@ +["foo"] \ No newline at end of file diff --git a/data/roundtrip/roundtrip06.json b/data/roundtrip/roundtrip06.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/data/roundtrip/roundtrip06.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/data/roundtrip/roundtrip07.json b/data/roundtrip/roundtrip07.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/data/roundtrip/roundtrip07.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/data/roundtrip/roundtrip08.json b/data/roundtrip/roundtrip08.json new file mode 100644 index 0000000..bfa3412 --- /dev/null +++ b/data/roundtrip/roundtrip08.json @@ -0,0 +1 @@ +[0,1] \ No newline at end of file diff --git a/data/roundtrip/roundtrip09.json b/data/roundtrip/roundtrip09.json new file mode 100644 index 0000000..9f5dd4e --- /dev/null +++ b/data/roundtrip/roundtrip09.json @@ -0,0 +1 @@ +{"foo":"bar"} \ No newline at end of file diff --git a/data/roundtrip/roundtrip10.json b/data/roundtrip/roundtrip10.json new file mode 100644 index 0000000..2355b4d --- /dev/null +++ b/data/roundtrip/roundtrip10.json @@ -0,0 +1 @@ +{"a":null,"foo":"bar"} \ No newline at end of file diff --git a/data/roundtrip/roundtrip11.json b/data/roundtrip/roundtrip11.json new file mode 100644 index 0000000..99d21a2 --- /dev/null +++ b/data/roundtrip/roundtrip11.json @@ -0,0 +1 @@ +[-1] \ No newline at end of file diff --git a/data/roundtrip/roundtrip12.json b/data/roundtrip/roundtrip12.json new file mode 100644 index 0000000..56c78be --- /dev/null +++ b/data/roundtrip/roundtrip12.json @@ -0,0 +1 @@ +[-2147483648] \ No newline at end of file diff --git a/data/roundtrip/roundtrip13.json b/data/roundtrip/roundtrip13.json new file mode 100644 index 0000000..029580f --- /dev/null +++ b/data/roundtrip/roundtrip13.json @@ -0,0 +1 @@ +[-1234567890123456789] \ No newline at end of file diff --git a/data/roundtrip/roundtrip14.json b/data/roundtrip/roundtrip14.json new file mode 100644 index 0000000..d865800 --- /dev/null +++ b/data/roundtrip/roundtrip14.json @@ -0,0 +1 @@ +[-9223372036854775808] \ No newline at end of file diff --git a/data/roundtrip/roundtrip15.json b/data/roundtrip/roundtrip15.json new file mode 100644 index 0000000..bace2a0 --- /dev/null +++ b/data/roundtrip/roundtrip15.json @@ -0,0 +1 @@ +[1] \ No newline at end of file diff --git a/data/roundtrip/roundtrip16.json b/data/roundtrip/roundtrip16.json new file mode 100644 index 0000000..dfe696d --- /dev/null +++ b/data/roundtrip/roundtrip16.json @@ -0,0 +1 @@ +[2147483647] \ No newline at end of file diff --git a/data/roundtrip/roundtrip17.json b/data/roundtrip/roundtrip17.json new file mode 100644 index 0000000..6640b07 --- /dev/null +++ b/data/roundtrip/roundtrip17.json @@ -0,0 +1 @@ +[4294967295] \ No newline at end of file diff --git a/data/roundtrip/roundtrip18.json b/data/roundtrip/roundtrip18.json new file mode 100644 index 0000000..a3ab143 --- /dev/null +++ b/data/roundtrip/roundtrip18.json @@ -0,0 +1 @@ +[1234567890123456789] \ No newline at end of file diff --git a/data/roundtrip/roundtrip19.json b/data/roundtrip/roundtrip19.json new file mode 100644 index 0000000..8ab4a50 --- /dev/null +++ b/data/roundtrip/roundtrip19.json @@ -0,0 +1 @@ +[9223372036854775807] \ No newline at end of file diff --git a/data/roundtrip/roundtrip20.json b/data/roundtrip/roundtrip20.json new file mode 100644 index 0000000..92df1df --- /dev/null +++ b/data/roundtrip/roundtrip20.json @@ -0,0 +1 @@ +[0.0] \ No newline at end of file diff --git a/data/roundtrip/roundtrip21.json b/data/roundtrip/roundtrip21.json new file mode 100644 index 0000000..cfef815 --- /dev/null +++ b/data/roundtrip/roundtrip21.json @@ -0,0 +1 @@ +[-0.0] \ No newline at end of file diff --git a/data/roundtrip/roundtrip22.json b/data/roundtrip/roundtrip22.json new file mode 100644 index 0000000..a7b7eef --- /dev/null +++ b/data/roundtrip/roundtrip22.json @@ -0,0 +1 @@ +[1.2345] \ No newline at end of file diff --git a/data/roundtrip/roundtrip23.json b/data/roundtrip/roundtrip23.json new file mode 100644 index 0000000..b553e84 --- /dev/null +++ b/data/roundtrip/roundtrip23.json @@ -0,0 +1 @@ +[-1.2345] \ No newline at end of file diff --git a/data/roundtrip/roundtrip24.json b/data/roundtrip/roundtrip24.json new file mode 100644 index 0000000..f01efb6 --- /dev/null +++ b/data/roundtrip/roundtrip24.json @@ -0,0 +1 @@ +[5e-324] \ No newline at end of file diff --git a/data/roundtrip/roundtrip25.json b/data/roundtrip/roundtrip25.json new file mode 100644 index 0000000..cdef14d --- /dev/null +++ b/data/roundtrip/roundtrip25.json @@ -0,0 +1 @@ +[2.225073858507201e-308] \ No newline at end of file diff --git a/data/roundtrip/roundtrip26.json b/data/roundtrip/roundtrip26.json new file mode 100644 index 0000000..f4121b7 --- /dev/null +++ b/data/roundtrip/roundtrip26.json @@ -0,0 +1 @@ +[2.2250738585072014e-308] \ No newline at end of file diff --git a/data/roundtrip/roundtrip27.json b/data/roundtrip/roundtrip27.json new file mode 100644 index 0000000..17ce521 --- /dev/null +++ b/data/roundtrip/roundtrip27.json @@ -0,0 +1 @@ +[1.7976931348623157e308] \ No newline at end of file diff --git a/src/Common.jl b/src/Common.jl new file mode 100644 index 0000000..55b1fe5 --- /dev/null +++ b/src/Common.jl @@ -0,0 +1,11 @@ +""" +Internal implementation detail. +""" +module Common + +using Unicode + +include("bytes.jl") +include("errors.jl") + +end diff --git a/src/JSON.jl b/src/JSON.jl new file mode 100644 index 0000000..66fb855 --- /dev/null +++ b/src/JSON.jl @@ -0,0 +1,31 @@ +VERSION < v"0.7.0-beta2.199" && __precompile__() + +module JSON + +export json # returns a compact (or indented) JSON representation as a string +export JSONText # string wrapper to insert raw JSON into JSON output + +include("Common.jl") + +# Parser modules +include("Parser.jl") + +# Writer modules +include("Serializations.jl") +include("Writer.jl") + +# stuff to re-"export" +# note that this package does not actually export anything except `json` but +# all of the following are part of the public interface in one way or another +using .Parser: parse, parsefile +using .Writer: show_json, json, lower, print, StructuralContext, show_element, + show_string, show_key, show_pair, show_null, begin_array, + end_array, begin_object, end_object, indent, delimit, separate, + JSONText +using .Serializations: Serialization, CommonSerialization, + StandardSerialization + +# for pretty-printed (non-compact) output, JSONText must be re-parsed: +Writer.lower(json::JSONText) = parse(json.s) + +end # module diff --git a/src/Parser.jl b/src/Parser.jl new file mode 100644 index 0000000..b7556bb --- /dev/null +++ b/src/Parser.jl @@ -0,0 +1,444 @@ +module Parser # JSON + +using Mmap +using ..Common + +include("pushvector.jl") + +""" +Like `isspace`, but work on bytes and includes only the four whitespace +characters defined by the JSON standard: space, tab, line feed, and carriage +return. +""" +isjsonspace(b::UInt8) = b == SPACE || b == TAB || b == NEWLINE || b == RETURN + +""" +Like `isdigit`, but for bytes. +""" +isjsondigit(b::UInt8) = DIGIT_ZERO ≤ b ≤ DIGIT_NINE + +abstract type ParserState end + +mutable struct MemoryParserState <: ParserState + utf8::String + s::Int +end + +# it is convenient to access MemoryParserState like a Vector{UInt8} to avoid copies +Base.@propagate_inbounds Base.getindex(state::MemoryParserState, i::Int) = codeunit(state.utf8, i) +Base.length(state::MemoryParserState) = sizeof(state.utf8) +Base.unsafe_convert(::Type{Ptr{UInt8}}, state::MemoryParserState) = Base.unsafe_convert(Ptr{UInt8}, state.utf8) + +mutable struct StreamingParserState{T <: IO} <: ParserState + io::T + cur::UInt8 + used::Bool + utf8array::PushVector{UInt8, Vector{UInt8}} +end +StreamingParserState(io::IO) = StreamingParserState(io, 0x00, true, PushVector{UInt8}()) + +struct ParserContext{DictType, IntType} end + +""" +Return the byte at the current position of the `ParserState`. If there is no +byte (that is, the `ParserState` is done), then an error is thrown that the +input ended unexpectedly. +""" +@inline function byteat(ps::MemoryParserState) + @inbounds if hasmore(ps) + return ps[ps.s] + else + _error(E_UNEXPECTED_EOF, ps) + end +end + +@inline function byteat(ps::StreamingParserState) + if ps.used + ps.used = false + if eof(ps.io) + _error(E_UNEXPECTED_EOF, ps) + else + ps.cur = read(ps.io, UInt8) + end + end + ps.cur +end + +""" +Like `byteat`, but with no special bounds check and error message. Useful when +a current byte is known to exist. +""" +@inline current(ps::MemoryParserState) = ps[ps.s] +@inline current(ps::StreamingParserState) = byteat(ps) + +""" +Require the current byte of the `ParserState` to be the given byte, and then +skip past that byte. Otherwise, an error is thrown. +""" +@inline function skip!(ps::ParserState, c::UInt8) + if byteat(ps) == c + incr!(ps) + else + _error_expected_char(c, ps) + end +end +@noinline _error_expected_char(c, ps) = _error("Expected '$(Char(c))' here", ps) + +function skip!(ps::ParserState, cs::UInt8...) + for c in cs + skip!(ps, c) + end +end + +""" +Move the `ParserState` to the next byte. +""" +@inline incr!(ps::MemoryParserState) = (ps.s += 1) +@inline incr!(ps::StreamingParserState) = (ps.used = true) + +""" +Move the `ParserState` to the next byte, and return the value at the byte before +the advancement. If the `ParserState` is already done, then throw an error. +""" +@inline advance!(ps::ParserState) = (b = byteat(ps); incr!(ps); b) + +""" +Return `true` if there is a current byte, and `false` if all bytes have been +exausted. +""" +@inline hasmore(ps::MemoryParserState) = ps.s ≤ length(ps) +@inline hasmore(ps::StreamingParserState) = true # no more now ≠ no more ever + +""" +Remove as many whitespace bytes as possible from the `ParserState` starting from +the current byte. +""" +@inline function chomp_space!(ps::ParserState) + @inbounds while hasmore(ps) && isjsonspace(current(ps)) + incr!(ps) + end +end + + +# Used for line counts +function _count_before(haystack::AbstractString, needle::Char, _end::Int) + count = 0 + for (i,c) in enumerate(haystack) + i >= _end && return count + count += c == needle + end + return count +end + + +# Throws an error message with an indicator to the source +@noinline function _error(message::AbstractString, ps::MemoryParserState) + orig = ps.utf8 + lines = _count_before(orig, '\n', ps.s) + # Replace all special multi-line/multi-space characters with a space. + strnl = replace(orig, r"[\b\f\n\r\t\s]" => " ") + li = (ps.s > 20) ? ps.s - 9 : 1 # Left index + ri = min(lastindex(orig), ps.s + 20) # Right index + error(message * + "\nLine: " * string(lines) * + "\nAround: ..." * strnl[li:ri] * "..." * + "\n " * (" " ^ (ps.s - li)) * "^\n" + ) +end + +@noinline function _error(message::AbstractString, ps::StreamingParserState) + error("$message\n ...when parsing byte with value '$(current(ps))'") +end + +# PARSING + +""" +Given a `ParserState`, after possibly any amount of whitespace, return the next +parseable value. +""" +function parse_value(pc::ParserContext, ps::ParserState) + chomp_space!(ps) + + @inbounds byte = byteat(ps) + if byte == STRING_DELIM + parse_string(ps) + elseif isjsondigit(byte) || byte == MINUS_SIGN + parse_number(pc, ps) + elseif byte == OBJECT_BEGIN + parse_object(pc, ps) + elseif byte == ARRAY_BEGIN + parse_array(pc, ps) + else + parse_jsconstant(ps::ParserState) + end +end + +function parse_jsconstant(ps::ParserState) + c = advance!(ps) + if c == LATIN_T # true + skip!(ps, LATIN_R, LATIN_U, LATIN_E) + true + elseif c == LATIN_F # false + skip!(ps, LATIN_A, LATIN_L, LATIN_S, LATIN_E) + false + elseif c == LATIN_N # null + skip!(ps, LATIN_U, LATIN_L, LATIN_L) + nothing + else + _error(E_UNEXPECTED_CHAR, ps) + end +end + +function parse_array(pc::ParserContext, ps::ParserState) + result = Any[] + @inbounds incr!(ps) # Skip over opening '[' + chomp_space!(ps) + if byteat(ps) ≠ ARRAY_END # special case for empty array + @inbounds while true + push!(result, parse_value(pc, ps)) + chomp_space!(ps) + byteat(ps) == ARRAY_END && break + skip!(ps, DELIMITER) + end + end + + @inbounds incr!(ps) + result +end + + +function parse_object(pc::ParserContext{DictType, <:Real}, ps::ParserState) where DictType + obj = DictType() + keyT = keytype(typeof(obj)) + + incr!(ps) # Skip over opening '{' + chomp_space!(ps) + if byteat(ps) ≠ OBJECT_END # special case for empty object + @inbounds while true + # Read key + chomp_space!(ps) + byteat(ps) == STRING_DELIM || _error(E_BAD_KEY, ps) + key = parse_string(ps) + chomp_space!(ps) + skip!(ps, SEPARATOR) + # Read value + value = parse_value(pc, ps) + chomp_space!(ps) + obj[keyT === Symbol ? Symbol(key) : convert(keyT, key)] = value + byteat(ps) == OBJECT_END && break + skip!(ps, DELIMITER) + end + end + + incr!(ps) + obj +end + + +utf16_is_surrogate(c::UInt16) = (c & 0xf800) == 0xd800 +utf16_get_supplementary(lead::UInt16, trail::UInt16) = Char(UInt32(lead-0xd7f7)<<10 + trail) + +function read_four_hex_digits!(ps::ParserState) + local n::UInt16 = 0 + + for _ in 1:4 + b = advance!(ps) + n = n << 4 + if isjsondigit(b) + b - DIGIT_ZERO + elseif LATIN_A ≤ b ≤ LATIN_F + b - (LATIN_A - UInt8(10)) + elseif LATIN_UPPER_A ≤ b ≤ LATIN_UPPER_F + b - (LATIN_UPPER_A - UInt8(10)) + else + _error(E_BAD_ESCAPE, ps) + end + end + + n +end + +function read_unicode_escape!(ps) + u1 = read_four_hex_digits!(ps) + if utf16_is_surrogate(u1) + skip!(ps, BACKSLASH) + skip!(ps, LATIN_U) + u2 = read_four_hex_digits!(ps) + utf16_get_supplementary(u1, u2) + else + Char(u1) + end +end + +function parse_string(ps::ParserState) + b = IOBuffer() + incr!(ps) # skip opening quote + while true + c = advance!(ps) + + if c == BACKSLASH + c = advance!(ps) + if c == LATIN_U # Unicode escape + write(b, read_unicode_escape!(ps)) + else + c = get(ESCAPES, c, 0x00) + c == 0x00 && _error(E_BAD_ESCAPE, ps) + write(b, c) + end + continue + elseif c < SPACE + _error(E_BAD_CONTROL, ps) + elseif c == STRING_DELIM + return String(take!(b)) + end + + write(b, c) + end +end + +""" +Return `true` if the given bytes vector, starting at `from` and ending at `to`, +has a leading zero. +""" +function hasleadingzero(bytes, from::Int, to::Int) + c = bytes[from] + from + 1 < to && c == UInt8('-') && + bytes[from + 1] == DIGIT_ZERO && isjsondigit(bytes[from + 2]) || + from < to && to > from + 1 && c == DIGIT_ZERO && + isjsondigit(bytes[from + 1]) +end + +""" +Parse a float from the given bytes vector, starting at `from` and ending at the +byte before `to`. Bytes enclosed should all be ASCII characters. +""" +function float_from_bytes(bytes, from::Int, to::Int) + # The ccall is not ideal (Base.tryparse would be better), but it actually + # makes an 2× difference to performance + hasvalue, val = ccall(:jl_try_substrtod, Tuple{Bool, Float64}, + (Ptr{UInt8}, Csize_t, Csize_t), bytes, from - 1, to - from + 1) + hasvalue ? val : nothing +end + +""" +Parse an integer from the given bytes vector, starting at `from` and ending at +the byte before `to`. Bytes enclosed should all be ASCII characters. +""" +function int_from_bytes(pc::ParserContext{<:Any,IntType}, + ps::ParserState, + bytes, + from::Int, + to::Int) where IntType <: Real + @inbounds isnegative = bytes[from] == MINUS_SIGN ? (from += 1; true) : false + num = IntType(0) + @inbounds for i in from:to + c = bytes[i] + dig = c - DIGIT_ZERO + if dig < 0x10 + num = IntType(10) * num + IntType(dig) + else + _error(E_BAD_NUMBER, ps) + end + end + ifelse(isnegative, -num, num) +end + +function number_from_bytes(pc::ParserContext, + ps::ParserState, + isint::Bool, + bytes, + from::Int, + to::Int) + @inbounds if hasleadingzero(bytes, from, to) + _error(E_LEADING_ZERO, ps) + end + + if isint + @inbounds if to == from && bytes[from] == MINUS_SIGN + _error(E_BAD_NUMBER, ps) + end + int_from_bytes(pc, ps, bytes, from, to) + else + res = float_from_bytes(bytes, from, to) + res === nothing ? _error(E_BAD_NUMBER, ps) : res + end +end + + +function parse_number(pc::ParserContext, ps::ParserState) + # Determine the end of the floating point by skipping past ASCII values + # 0-9, +, -, e, E, and . + number = ps.utf8array + isint = true + + @inbounds while hasmore(ps) + c = current(ps) + + if isjsondigit(c) || c == MINUS_SIGN + push!(number, UInt8(c)) + elseif c in (PLUS_SIGN, LATIN_E, LATIN_UPPER_E, DECIMAL_POINT) + push!(number, UInt8(c)) + isint = false + else + break + end + + incr!(ps) + end + + v = number_from_bytes(pc, ps, isint, number, 1, length(number)) + resize!(number, 0) + return v +end + +unparameterize_type(x) = x # Fallback for nontypes -- functions etc +function unparameterize_type(T::Type) + candidate = typeintersect(T, AbstractDict{String, Any}) + candidate <: Union{} ? T : candidate +end + +# Workaround for slow dynamic dispatch for creating objects +const DEFAULT_PARSERCONTEXT = ParserContext{Dict{String, Any}, Int64}() +function _get_parsercontext(dicttype, inttype) + if dicttype == Dict{String, Any} && inttype == Int64 + DEFAULT_PARSERCONTEXT + else + ParserContext{unparameterize_type(dicttype), inttype}.instance + end +end + +function parse(str::AbstractString; + dicttype=Dict{String,Any}, + inttype::Type{<:Real}=Int64) + pc = _get_parsercontext(dicttype, inttype) + ps = MemoryParserState(str, 1) + v = parse_value(pc, ps) + chomp_space!(ps) + if hasmore(ps) + _error(E_EXPECTED_EOF, ps) + end + v +end + +function parse(io::IO; + dicttype=Dict{String,Any}, + inttype::Type{<:Real}=Int64) + pc = _get_parsercontext(dicttype, inttype) + ps = StreamingParserState(io) + parse_value(pc, ps) +end + +function parsefile(filename::AbstractString; + dicttype=Dict{String, Any}, + inttype::Type{<:Real}=Int64, + use_mmap=true) + sz = filesize(filename) + open(filename) do io + s = use_mmap ? String(Mmap.mmap(io, Vector{UInt8}, sz)) : read(io, String) + parse(s; dicttype=dicttype, inttype=inttype) + end +end + +# Efficient implementations of some of the above for in-memory parsing +include("specialized.jl") + +end # module Parser diff --git a/src/Serializations.jl b/src/Serializations.jl new file mode 100644 index 0000000..e4398ce --- /dev/null +++ b/src/Serializations.jl @@ -0,0 +1,39 @@ +""" +JSON writer serialization contexts. + +This module defines the `Serialization` abstract type and several concrete +implementations, as they relate to JSON. +""" +module Serializations + +using ..Common + +""" +A `Serialization` defines how objects are lowered to JSON format. +""" +abstract type Serialization end + +""" +The `CommonSerialization` comes with a default set of rules for serializing +Julia types to their JSON equivalents. Additional rules are provided either by +packages explicitly defining `JSON.show_json` for this serialization, or by the +`JSON.lower` method. Most concrete implementations of serializers should subtype +`CommonSerialization`, unless it is desirable to bypass the `lower` system, in +which case `Serialization` should be subtyped. +""" +abstract type CommonSerialization <: Serialization end + +""" +The `StandardSerialization` defines a common, standard JSON serialization format +that is optimized to: + +- strictly follow the JSON standard +- be useful in the greatest number of situations + +All serializations defined for `CommonSerialization` are inherited by +`StandardSerialization`. It is therefore generally advised to add new +serialization behaviour to `CommonSerialization`. +""" +struct StandardSerialization <: CommonSerialization end + +end diff --git a/src/Writer.jl b/src/Writer.jl new file mode 100644 index 0000000..5c4cc63 --- /dev/null +++ b/src/Writer.jl @@ -0,0 +1,357 @@ +module Writer + +using Dates +using ..Common +using ..Serializations: Serialization, StandardSerialization, + CommonSerialization + +using Unicode + + +""" +Internal JSON.jl implementation detail; do not depend on this type. + +A JSON primitive that wraps around any composite type to enable `Dict`-like +serialization. +""" +struct CompositeTypeWrapper{T} + wrapped::T + fns::Vector{Symbol} +end + +CompositeTypeWrapper(x, syms) = CompositeTypeWrapper(x, collect(syms)) +CompositeTypeWrapper(x) = CompositeTypeWrapper(x, fieldnames(typeof(x))) + +""" + lower(x) + +Return a value of a JSON-encodable primitive type that `x` should be lowered +into before encoding as JSON. Supported types are: `AbstractDict` to JSON +objects, `Tuple` and `AbstractVector` to JSON arrays, `AbstractArray` to nested +JSON arrays, `AbstractString`, `Symbol`, `Enum`, or `Char` to JSON string, +`Integer` and `AbstractFloat` to JSON number, `Bool` to JSON boolean, and +`Nothing` to JSON null, or any other types with a `show_json` method defined. + +Extensions of this method should preserve the property that the return value is +one of the aforementioned types. If first lowering to some intermediate type is +required, then extensions should call `lower` before returning a value. + +Note that the return value need not be *recursively* lowered—this function may +for instance return an `AbstractArray{Any, 1}` whose elements are not JSON +primitives. +""" +function lower(a) + if nfields(a) > 0 + CompositeTypeWrapper(a) + else + error("Cannot serialize type $(typeof(a))") + end +end + +# To avoid allocating an intermediate string, we directly define `show_json` +# for this type instead of lowering it to a string first (which would +# allocate). However, the `show_json` method does call `lower` so as to allow +# users to change the lowering of their `Enum` or even `AbstractString` +# subtypes if necessary. +const IsPrintedAsString = Union{ + Dates.TimeType, Char, Type, AbstractString, Enum, Symbol} +lower(x::IsPrintedAsString) = x + +lower(m::Module) = throw(ArgumentError("cannot serialize Module $m as JSON")) +lower(x::Real) = convert(Float64, x) +lower(x::Base.AbstractSet) = collect(x) + +""" +Abstract supertype of all JSON and JSON-like structural writer contexts. +""" +abstract type StructuralContext <: IO end + +""" +Internal implementation detail. + +A JSON structural context around an `IO` object. Structural writer contexts +define the behaviour of serializing JSON structural objects, such as objects, +arrays, and strings to JSON. The translation of Julia types to JSON structural +objects is not handled by a `JSONContext`, but by a `Serialization` wrapper +around it. Abstract supertype of `PrettyContext` and `CompactContext`. Data can +be written to a JSON context in the usual way, but often higher-level operations +such as `begin_array` or `begin_object` are preferred to directly writing bytes +to the stream. +""" +abstract type JSONContext <: StructuralContext end + +""" +Internal implementation detail. + +Keeps track of the current location in the array or object, which winds and +unwinds during serialization. +""" +mutable struct PrettyContext{T<:IO} <: JSONContext + io::T + step::Int # number of spaces to step + state::Int # number of steps at present + first::Bool # whether an object/array was just started +end +PrettyContext(io::IO, step) = PrettyContext(io, step, 0, false) + +""" +Internal implementation detail. + +For compact printing, which in JSON is fully recursive. +""" +mutable struct CompactContext{T<:IO} <: JSONContext + io::T + first::Bool +end +CompactContext(io::IO) = CompactContext(io, false) + +""" +Internal implementation detail. + +Implements an IO context safe for printing into JSON strings. +""" +struct StringContext{T<:IO} <: IO + io::T +end + +# These aliases make defining additional methods on `show_json` easier. +const CS = CommonSerialization +const SC = StructuralContext + +# Low-level direct access +Base.write(io::JSONContext, byte::UInt8) = write(io.io, byte) +Base.write(io::StringContext, byte::UInt8) = + write(io.io, ESCAPED_ARRAY[byte + 0x01]) +#= turn on if there's a performance benefit +write(io::StringContext, char::Char) = + char <= '\x7f' ? write(io, ESCAPED_ARRAY[UInt8(c) + 0x01]) : + Base.print(io, c) +=# + +""" + indent(io::StructuralContext) + +If appropriate, write a newline to the given context, then indent it by the +appropriate number of spaces. Otherwise, do nothing. +""" +@inline function indent(io::PrettyContext) + write(io, NEWLINE) + for _ in 1:io.state + write(io, SPACE) + end +end +@inline indent(io::CompactContext) = nothing + +""" + separate(io::StructuralContext) + +Write a colon, followed by a space if appropriate, to the given context. +""" +@inline separate(io::PrettyContext) = write(io, SEPARATOR, SPACE) +@inline separate(io::CompactContext) = write(io, SEPARATOR) + +""" + delimit(io::StructuralContext) + +If this is not the first item written in a collection, write a comma in the +structural context. Otherwise, do not write a comma, but set a flag that the +first element has been written already. +""" +@inline function delimit(io::JSONContext) + if !io.first + write(io, DELIMITER) + end + io.first = false +end + +for kind in ("object", "array") + beginfn = Symbol("begin_", kind) + beginsym = Symbol(uppercase(kind), "_BEGIN") + endfn = Symbol("end_", kind) + endsym = Symbol(uppercase(kind), "_END") + # Begin and end objects + @eval function $beginfn(io::PrettyContext) + write(io, $beginsym) + io.state += io.step + io.first = true + end + @eval $beginfn(io::CompactContext) = (write(io, $beginsym); io.first = true) + @eval function $endfn(io::PrettyContext) + io.state -= io.step + if !io.first + indent(io) + end + write(io, $endsym) + io.first = false + end + @eval $endfn(io::CompactContext) = (write(io, $endsym); io.first = false) +end + +""" + show_string(io::IO, str) + +Print `str` as a JSON string (that is, properly escaped and wrapped by double +quotes) to the given IO object `io`. +""" +function show_string(io::IO, x) + write(io, STRING_DELIM) + Base.print(StringContext(io), x) + write(io, STRING_DELIM) +end + +""" + show_null(io::IO) + +Print the string `null` to the given IO object `io`. +""" +show_null(io::IO) = Base.print(io, "null") + +""" + show_element(io::StructuralContext, s, x) + +Print object `x` as an element of a JSON array to context `io` using rules +defined by serialization `s`. +""" +function show_element(io::JSONContext, s, x) + delimit(io) + indent(io) + show_json(io, s, x) +end + +""" + show_key(io::StructuralContext, k) + +Print string `k` as the key of a JSON key-value pair to context `io`. +""" +function show_key(io::JSONContext, k) + delimit(io) + indent(io) + show_string(io, k) + separate(io) +end + +""" + show_pair(io::StructuralContext, s, k, v) + +Print the key-value pair defined by `k => v` as JSON to context `io`, using +rules defined by serialization `s`. +""" +function show_pair(io::JSONContext, s, k, v) + show_key(io, k) + show_json(io, s, v) +end +show_pair(io::JSONContext, s, kv) = show_pair(io, s, first(kv), last(kv)) + +# Default serialization rules for CommonSerialization (CS) +function show_json(io::SC, s::CS, x::IsPrintedAsString) + # We need this check to allow `lower(x::Enum)` overrides to work if needed; + # it should be optimized out if `lower` is a no-op + lx = lower(x) + if x === lx + show_string(io, x) + else + show_json(io, s, lx) + end +end + +function show_json(io::SC, s::CS, x::Union{Integer, AbstractFloat}) + if isfinite(x) + Base.print(io, x) + else + show_null(io) + end +end + +show_json(io::SC, ::CS, ::Nothing) = show_null(io) +show_json(io::SC, ::CS, ::Missing) = show_null(io) + +function show_json(io::SC, s::CS, a::AbstractDict) + begin_object(io) + for kv in a + show_pair(io, s, kv) + end + end_object(io) +end + +function show_json(io::SC, s::CS, kv::Pair) + begin_object(io) + show_pair(io, s, kv) + end_object(io) +end + +function show_json(io::SC, s::CS, x::CompositeTypeWrapper) + begin_object(io) + for fn in x.fns + show_pair(io, s, fn, getfield(x.wrapped, fn)) + end + end_object(io) +end + +function show_json(io::SC, s::CS, x::Union{AbstractVector, Tuple}) + begin_array(io) + for elt in x + show_element(io, s, elt) + end + end_array(io) +end + +""" +Serialize a multidimensional array to JSON in column-major format. That is, +`json([1 2 3; 4 5 6]) == "[[1,4],[2,5],[3,6]]"`. +""" +function show_json(io::SC, s::CS, A::AbstractArray{<:Any,n}) where n + begin_array(io) + newdims = ntuple(_ -> :, n - 1) + for j in axes(A, n) + show_element(io, s, view(A, newdims..., j)) + end + end_array(io) +end + +# special case for 0-dimensional arrays +show_json(io::SC, s::CS, A::AbstractArray{<:Any,0}) = show_json(io, s, A[]) + +show_json(io::SC, s::CS, a) = show_json(io, s, lower(a)) + +# Fallback show_json for non-SC types +""" +Serialize Julia object `obj` to IO `io` using the behaviour described by `s`. If +`indent` is provided, then the JSON will be pretty-printed; otherwise it will be +printed on one line. If pretty-printing is enabled, then a trailing newline will +be printed; otherwise there will be no trailing newline. +""" +function show_json(io::IO, s::Serialization, obj; indent=nothing) + ctx = indent === nothing ? CompactContext(io) : PrettyContext(io, indent) + show_json(ctx, s, obj) + if indent !== nothing + println(io) + end +end + +""" + JSONText(s::AbstractString) + +`JSONText` is a wrapper around a Julia string representing JSON-formatted +text, which is inserted *as-is* in the JSON output of `JSON.print` and `JSON.json` +for compact output, and is otherwise re-parsed for pretty-printed output. + +`s` *must* contain valid JSON text. Otherwise compact output will contain +the malformed `s` and other serialization output will throw a parsing exception. +""" +struct JSONText + s::String +end +show_json(io::CompactContext, s::CS, json::JSONText) = write(io, json.s) +# other contexts for JSONText are handled by lower(json) = parse(json.s) + +print(io::IO, obj, indent) = + show_json(io, StandardSerialization(), obj; indent=indent) +print(io::IO, obj) = show_json(io, StandardSerialization(), obj) + +print(a, indent) = print(stdout, a, indent) +print(a) = print(stdout, a) + +json(a) = sprint(print, a) +json(a, indent) = sprint(print, a, indent) + +end diff --git a/src/bytes.jl b/src/bytes.jl new file mode 100644 index 0000000..57b92a8 --- /dev/null +++ b/src/bytes.jl @@ -0,0 +1,67 @@ +# The following bytes have significant meaning in JSON +const BACKSPACE = UInt8('\b') +const TAB = UInt8('\t') +const NEWLINE = UInt8('\n') +const FORM_FEED = UInt8('\f') +const RETURN = UInt8('\r') +const SPACE = UInt8(' ') +const STRING_DELIM = UInt8('"') +const PLUS_SIGN = UInt8('+') +const DELIMITER = UInt8(',') +const MINUS_SIGN = UInt8('-') +const DECIMAL_POINT = UInt8('.') +const SOLIDUS = UInt8('/') +const DIGIT_ZERO = UInt8('0') +const DIGIT_NINE = UInt8('9') +const SEPARATOR = UInt8(':') +const LATIN_UPPER_A = UInt8('A') +const LATIN_UPPER_E = UInt8('E') +const LATIN_UPPER_F = UInt8('F') +const ARRAY_BEGIN = UInt8('[') +const BACKSLASH = UInt8('\\') +const ARRAY_END = UInt8(']') +const LATIN_A = UInt8('a') +const LATIN_B = UInt8('b') +const LATIN_E = UInt8('e') +const LATIN_F = UInt8('f') +const LATIN_L = UInt8('l') +const LATIN_N = UInt8('n') +const LATIN_R = UInt8('r') +const LATIN_S = UInt8('s') +const LATIN_T = UInt8('t') +const LATIN_U = UInt8('u') +const OBJECT_BEGIN = UInt8('{') +const OBJECT_END = UInt8('}') + +const ESCAPES = Dict( + STRING_DELIM => STRING_DELIM, + BACKSLASH => BACKSLASH, + SOLIDUS => SOLIDUS, + LATIN_B => BACKSPACE, + LATIN_F => FORM_FEED, + LATIN_N => NEWLINE, + LATIN_R => RETURN, + LATIN_T => TAB) + +const REVERSE_ESCAPES = Dict(reverse(p) for p in ESCAPES) +const ESCAPED_ARRAY = Vector{Vector{UInt8}}(undef, 256) +for c in 0x00:0xFF + ESCAPED_ARRAY[c + 1] = if c == SOLIDUS + [SOLIDUS] # don't escape this one + elseif c ≥ 0x80 + [c] # UTF-8 character copied verbatim + elseif haskey(REVERSE_ESCAPES, c) + [BACKSLASH, REVERSE_ESCAPES[c]] + elseif iscntrl(Char(c)) || !isprint(Char(c)) + UInt8[BACKSLASH, LATIN_U, string(c, base=16, pad=4)...] + else + [c] + end +end + +export BACKSPACE, TAB, NEWLINE, FORM_FEED, RETURN, SPACE, STRING_DELIM, + PLUS_SIGN, DELIMITER, MINUS_SIGN, DECIMAL_POINT, SOLIDUS, DIGIT_ZERO, + DIGIT_NINE, SEPARATOR, LATIN_UPPER_A, LATIN_UPPER_E, LATIN_UPPER_F, + ARRAY_BEGIN, BACKSLASH, ARRAY_END, LATIN_A, LATIN_B, LATIN_E, LATIN_F, + LATIN_L, LATIN_N, LATIN_R, LATIN_S, LATIN_T, LATIN_U, OBJECT_BEGIN, + OBJECT_END, ESCAPES, REVERSE_ESCAPES, ESCAPED_ARRAY diff --git a/src/errors.jl b/src/errors.jl new file mode 100644 index 0000000..c9c1c87 --- /dev/null +++ b/src/errors.jl @@ -0,0 +1,12 @@ +# The following errors may be thrown by the parser +const E_EXPECTED_EOF = "Expected end of input" +const E_UNEXPECTED_EOF = "Unexpected end of input" +const E_UNEXPECTED_CHAR = "Unexpected character" +const E_BAD_KEY = "Invalid object key" +const E_BAD_ESCAPE = "Invalid escape sequence" +const E_BAD_CONTROL = "ASCII control character in string" +const E_LEADING_ZERO = "Invalid leading zero in number" +const E_BAD_NUMBER = "Invalid number" + +export E_EXPECTED_EOF, E_UNEXPECTED_EOF, E_UNEXPECTED_CHAR, E_BAD_KEY, + E_BAD_ESCAPE, E_BAD_CONTROL, E_LEADING_ZERO, E_BAD_NUMBER diff --git a/src/pushvector.jl b/src/pushvector.jl new file mode 100644 index 0000000..01399f1 --- /dev/null +++ b/src/pushvector.jl @@ -0,0 +1,33 @@ +# This is a vector wrapper that we use as a workaround for `push!` +# being slow (it always calls into the runtime even if the underlying buffer, +# has enough space). Here we keep track of the length using an extra field +mutable struct PushVector{T, A<:AbstractVector{T}} <: AbstractVector{T} + v::A + l::Int +end + +# Default length of 20 should be enough to never need to grow in most cases +PushVector{T}() where {T} = PushVector(Vector{T}(undef, 20), 0) + +Base.unsafe_convert(::Type{Ptr{UInt8}}, v::PushVector) = pointer(v.v) +Base.length(v::PushVector) = v.l +Base.size(v::PushVector) = (v.l,) +@inline function Base.getindex(v::PushVector, i) + @boundscheck checkbounds(v, i) + @inbounds v.v[i] +end + +function Base.push!(v::PushVector, i) + v.l += 1 + if v.l > length(v.v) + resize!(v.v, v.l * 2) + end + v.v[v.l] = i + return v +end + +function Base.resize!(v::PushVector, l::Integer) + # Only support shrinking for now, since that is all we need + @assert l <= v.l + v.l = l +end diff --git a/src/specialized.jl b/src/specialized.jl new file mode 100644 index 0000000..e204299 --- /dev/null +++ b/src/specialized.jl @@ -0,0 +1,144 @@ +function maxsize_buffer(maxsize::Int) + IOBuffer(maxsize=maxsize) +end + +# Specialized functions for increased performance when JSON is in-memory +function parse_string(ps::MemoryParserState) + # "Dry Run": find length of string so we can allocate the right amount of + # memory from the start. Does not do full error checking. + fastpath, len = predict_string(ps) + + # Now read the string itself: + + # Fast path occurs when the string has no escaped characters. This is quite + # often the case in real-world data, especially when keys are short strings. + # We can just copy the data from the buffer in this case. + if fastpath + s = ps.s + ps.s = s + len + 2 # byte after closing quote + return unsafe_string(pointer(ps.utf8)+s, len) + else + String(take!(parse_string(ps, maxsize_buffer(len)))) + end +end + +""" +Scan through a string at the current parser state and return a tuple containing +information about the string. This function avoids memory allocation where +possible. + +The first element of the returned tuple is a boolean indicating whether the +string may be copied directly from the parser state. Special casing string +parsing when there are no escaped characters leads to substantially increased +performance in common situations. + +The second element of the returned tuple is an integer representing the exact +length of the string, in bytes when encoded as UTF-8. This information is useful +for pre-sizing a buffer to contain the parsed string. + +This function will throw an error if: + + - invalid control characters are found + - an invalid unicode escape is read + - the string is not terminated + +No error is thrown when other invalid backslash escapes are encountered. +""" +function predict_string(ps::MemoryParserState) + e = length(ps) + fastpath = true # true if no escapes in this string, so it can be copied + len = 0 # the number of UTF8 bytes the string contains + + s = ps.s + 1 # skip past opening string character " + @inbounds while s <= e + c = ps[s] + if c == BACKSLASH + fastpath = false + (s += 1) > e && break + if ps[s] == LATIN_U # Unicode escape + t = ps.s + ps.s = s + 1 + len += write(devnull, read_unicode_escape!(ps)) + s = ps.s + ps.s = t + continue + end + elseif c == STRING_DELIM + return fastpath, len + elseif c < SPACE + ps.s = s + _error(E_BAD_CONTROL, ps) + end + len += 1 + s += 1 + end + + ps.s = s + _error(E_UNEXPECTED_EOF, ps) +end + +""" +Parse the string starting at the parser state’s current location into the given +pre-sized IOBuffer. The only correctness checking is for escape sequences, so the +passed-in buffer must exactly represent the amount of space needed for parsing. +""" +function parse_string(ps::MemoryParserState, b::IOBuffer) + s = ps.s + e = length(ps) + + s += 1 # skip past opening string character " + len = b.maxsize + @inbounds while b.size < len + c = ps[s] + if c == BACKSLASH + s += 1 + s > e && break + c = ps[s] + if c == LATIN_U # Unicode escape + ps.s = s + 1 + write(b, read_unicode_escape!(ps)) + s = ps.s + continue + else + c = get(ESCAPES, c, 0x00) + if c == 0x00 + ps.s = s + _error(E_BAD_ESCAPE, ps) + end + end + end + + # UTF8-encoded non-ascii characters will be copied verbatim, which is + # the desired behaviour + write(b, c) + s += 1 + end + + # don't worry about non-termination or other edge cases; those should have + # been caught in the dry run. + ps.s = s + 1 + b +end + +function parse_number(pc::ParserContext, ps::MemoryParserState) + s = p = ps.s + e = length(ps) + isint = true + + # Determine the end of the floating point by skipping past ASCII values + # 0-9, +, -, e, E, and . + while p ≤ e + @inbounds c = ps[p] + if isjsondigit(c) || MINUS_SIGN == c # no-op + elseif PLUS_SIGN == c || LATIN_E == c || LATIN_UPPER_E == c || + DECIMAL_POINT == c + isint = false + else + break + end + p += 1 + end + ps.s = p + + number_from_bytes(pc, ps, isint, ps, s, p - 1) +end diff --git a/test/async.jl b/test/async.jl new file mode 100644 index 0000000..1612a6e --- /dev/null +++ b/test/async.jl @@ -0,0 +1,109 @@ +finished_async_tests = RemoteChannel() + +using Sockets + +@async begin + s = listen(7777) + s = accept(s) + + Base.start_reading(s) + + @test JSON.parse(s) != nothing # a + @test JSON.parse(s) != nothing # b + validate_c(s) # c + @test JSON.parse(s) != nothing # d + validate_svg_tviewer_menu(s) # svg_tviewer_menu + @test JSON.parse(s) != nothing # gmaps + @test JSON.parse(s) != nothing # colors1 + @test JSON.parse(s) != nothing # colors2 + @test JSON.parse(s) != nothing # colors3 + @test JSON.parse(s) != nothing # twitter + @test JSON.parse(s) != nothing # facebook + validate_flickr(s) # flickr + @test JSON.parse(s) != nothing # youtube + @test JSON.parse(s) != nothing # iphone + @test JSON.parse(s) != nothing # customer + @test JSON.parse(s) != nothing # product + @test JSON.parse(s) != nothing # interop + validate_unicode(s) # unicode + @test JSON.parse(s) != nothing # issue5 + @test JSON.parse(s) != nothing # dollars + @test JSON.parse(s) != nothing # brackets + + put!(finished_async_tests, nothing) +end + +w = connect("localhost", 7777) + +@test JSON.parse(a) != nothing +write(w, a) + +@test JSON.parse(b) != nothing +write(w, b) + +validate_c(c) +write(w, c) + +@test JSON.parse(d) != nothing +write(w, d) + +validate_svg_tviewer_menu(svg_tviewer_menu) +write(w, svg_tviewer_menu) + +@test JSON.parse(gmaps) != nothing +write(w, gmaps) + +@test JSON.parse(colors1) != nothing +write(w, colors1) + +@test JSON.parse(colors2) != nothing +write(w, colors2) + +@test JSON.parse(colors3) != nothing +write(w, colors3) + +@test JSON.parse(twitter) != nothing +write(w, twitter) + +@test JSON.parse(facebook) != nothing +write(w, facebook) + +validate_flickr(flickr) +write(w, flickr) + +@test JSON.parse(youtube) != nothing +write(w, youtube) + +@test JSON.parse(iphone) != nothing +write(w, iphone) + +@test JSON.parse(customer) != nothing +write(w, customer) + +@test JSON.parse(product) != nothing +write(w, product) + +@test JSON.parse(interop) != nothing +write(w, interop) + +validate_unicode(unicode) +write(w, unicode) + +# issue #5 +issue5 = "[\"A\",\"B\",\"C\\n\"]" +JSON.parse(issue5) +write(w, issue5) + +# $ escaping issue +dollars = ["all of the \$s", "µniçø∂\$"] +json_dollars = json(dollars) +@test JSON.parse(json_dollars) != nothing +write(w, json_dollars) + +# unmatched brackets +brackets = Dict("foo"=>"ba}r", "be}e]p"=>"boo{p") +json_brackets = json(brackets) +@test JSON.parse(json_brackets) != nothing +write(w, json_dollars) + +fetch(finished_async_tests) diff --git a/test/enum.jl b/test/enum.jl new file mode 100644 index 0000000..ead3d99 --- /dev/null +++ b/test/enum.jl @@ -0,0 +1,4 @@ +@enum Animal zebra aardvark horse +@test json(zebra) == "\"zebra\"" +@test json([aardvark, horse, Dict("z" => zebra)]) == + "[\"aardvark\",\"horse\",{\"z\":\"zebra\"}]" diff --git a/test/indentation.jl b/test/indentation.jl new file mode 100644 index 0000000..98fa5f0 --- /dev/null +++ b/test/indentation.jl @@ -0,0 +1,10 @@ +# check indented json has same final value as non indented +fb = JSON.parse(facebook) +fbjson1 = json(fb, 2) +fbjson2 = json(fb) +@test JSON.parse(fbjson1) == JSON.parse(fbjson2) + +ev = JSON.parse(svg_tviewer_menu) +ejson1 = json(ev, 2) +ejson2 = json(ev) +@test JSON.parse(ejson1) == JSON.parse(ejson2) diff --git a/test/json-checker.jl b/test/json-checker.jl new file mode 100644 index 0000000..7d0594b --- /dev/null +++ b/test/json-checker.jl @@ -0,0 +1,28 @@ +# Run modified JSON checker tests + +const JSON_DATA_DIR = joinpath(dirname(@__DIR__), "data") + +for i in 1:38 + file = "fail$(lpad(string(i), 2, "0")).json" + filepath = joinpath(JSON_DATA_DIR, "jsonchecker", file) + + @test_throws ErrorException JSON.parsefile(filepath) +end + +for i in 1:3 + # Test that the files parse successfully and match streaming parser + tf = joinpath(JSON_DATA_DIR, "jsonchecker", "pass$(lpad(string(i), 2, "0")).json") + @test JSON.parsefile(tf) == open(JSON.parse, tf) +end + +# Run JSON roundtrip tests (check consistency of .json) + +roundtrip(data) = JSON.json(JSON.Parser.parse(data)) + +for i in 1:27 + file = "roundtrip$(lpad(string(i), 2, "0")).json" + filepath = joinpath(JSON_DATA_DIR, "roundtrip", file) + + rt = roundtrip(read(filepath, String)) + @test rt == roundtrip(rt) +end diff --git a/test/json-samples.jl b/test/json-samples.jl new file mode 100644 index 0000000..2df326f --- /dev/null +++ b/test/json-samples.jl @@ -0,0 +1,644 @@ +#Examples from http://json.org/example.html +a="{\"menu\": { + \"id\": \"file\", + \"value\": \"File\", + \"popup\": { + \"menuitem\": [ + {\"value\": \"New\", \"onclick\": \"CreateNewDoc()\"}, + {\"value\": \"Open\", \"onclick\": \"OpenDoc()\"}, + {\"value\": \"Close\", \"onclick\": \"CloseDoc()\"} + ] + } + }} + " + + +b="{ + \"glossary\": { + \"title\": \"example glossary\", + \"GlossDiv\": { + \"title\": \"S\", + \"GlossList\": { + \"GlossEntry\": { + \"ID\": \"SGML\", + \"SortAs\": \"SGML\", + \"GlossTerm\": \"Standard Generalized Markup Language\", + \"Acronym\": \"SGML\", + \"Abbrev\": \"ISO 8879:1986\", + \"GlossDef\": { + \"para\": \"A meta-markup language, used to create markup languages such as DocBook.\", + \"GlossSeeAlso\": [\"GML\", \"XML\"] + }, + \"GlossSee\": \"markup\" + } + } + } + } +} +" + +const c = """ +{"widget": { + "debug": "on", + "window": { + "title": "Sample Konfabulator Widget", + "name": "main_window", + "width": 500, + "height": 500 + }, + "image": { + "src": "Images/Sun.png", + "name": "sun1", + "hOffset": 250, + "vOffset": 250, + "alignment": "center" + }, + "text": { + "data": "Click Here", + "size": 36.5, + "style": "bold", + "name": "text1", + "hOffset": 250, + "vOffset": 100, + "alignment": "center", + "onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;" + } +}}""" +function validate_c(c) + j = JSON.parse(c) + @test j != nothing + @test typeof(j["widget"]["image"]["hOffset"]) == Int64 + @test j["widget"]["image"]["hOffset"] == 250 + @test typeof(j["widget"]["text"]["size"]) == Float64 + @test j["widget"]["text"]["size"] == 36.5 +end + +d = "{\"web-app\": { + \"servlet\": [ + { + \"servlet-name\": \"cofaxCDS\", + \"servlet-class\": \"org.cofax.cds.CDSServlet\", + \"init-param\": { + \"configGlossary:installationAt\": \"Philadelphia, PA\", + \"configGlossary:adminEmail\": \"ksm@pobox.com\", + \"configGlossary:poweredBy\": \"Cofax\", + \"configGlossary:poweredByIcon\": \"/images/cofax.gif\", + \"configGlossary:staticPath\": \"/content/static\", + \"templateProcessorClass\": \"org.cofax.WysiwygTemplate\", + \"templateLoaderClass\": \"org.cofax.FilesTemplateLoader\", + \"templatePath\": \"templates\", + \"templateOverridePath\": \"\", + \"defaultListTemplate\": \"listTemplate.htm\", + \"defaultFileTemplate\": \"articleTemplate.htm\", + \"useJSP\": false, + \"jspListTemplate\": \"listTemplate.jsp\", + \"jspFileTemplate\": \"articleTemplate.jsp\", + \"cachePackageTagsTrack\": 200, + \"cachePackageTagsStore\": 200, + \"cachePackageTagsRefresh\": 60, + \"cacheTemplatesTrack\": 100, + \"cacheTemplatesStore\": 50, + \"cacheTemplatesRefresh\": 15, + \"cachePagesTrack\": 200, + \"cachePagesStore\": 100, + \"cachePagesRefresh\": 10, + \"cachePagesDirtyRead\": 10, + \"searchEngineListTemplate\": \"forSearchEnginesList.htm\", + \"searchEngineFileTemplate\": \"forSearchEngines.htm\", + \"searchEngineRobotsDb\": \"WEB-INF/robots.db\", + \"useDataStore\": true, + \"dataStoreClass\": \"org.cofax.SqlDataStore\", + \"redirectionClass\": \"org.cofax.SqlRedirection\", + \"dataStoreName\": \"cofax\", + \"dataStoreDriver\": \"com.microsoft.jdbc.sqlserver.SQLServerDriver\", + \"dataStoreUrl\": \"jdbc:microsoft:sqlserver://LOCALHOST:1433;DatabaseName=goon\", + \"dataStoreUser\": \"sa\", + \"dataStorePassword\": \"dataStoreTestQuery\", + \"dataStoreTestQuery\": \"SET NOCOUNT ON;select test='test';\", + \"dataStoreLogFile\": \"/usr/local/tomcat/logs/datastore.log\", + \"dataStoreInitConns\": 10, + \"dataStoreMaxConns\": 100, + \"dataStoreConnUsageLimit\": 100, + \"dataStoreLogLevel\": \"debug\", + \"maxUrlLength\": 500}}, + { + \"servlet-name\": \"cofaxEmail\", + \"servlet-class\": \"org.cofax.cds.EmailServlet\", + \"init-param\": { + \"mailHost\": \"mail1\", + \"mailHostOverride\": \"mail2\"}}, + { + \"servlet-name\": \"cofaxAdmin\", + \"servlet-class\": \"org.cofax.cds.AdminServlet\"}, + + { + \"servlet-name\": \"fileServlet\", + \"servlet-class\": \"org.cofax.cds.FileServlet\"}, + { + \"servlet-name\": \"cofaxTools\", + \"servlet-class\": \"org.cofax.cms.CofaxToolsServlet\", + \"init-param\": { + \"templatePath\": \"toolstemplates/\", + \"log\": 1, + \"logLocation\": \"/usr/local/tomcat/logs/CofaxTools.log\", + \"logMaxSize\": \"\", + \"dataLog\": 1, + \"dataLogLocation\": \"/usr/local/tomcat/logs/dataLog.log\", + \"dataLogMaxSize\": \"\", + \"removePageCache\": \"/content/admin/remove?cache=pages&id=\", + \"removeTemplateCache\": \"/content/admin/remove?cache=templates&id=\", + \"fileTransferFolder\": \"/usr/local/tomcat/webapps/content/fileTransferFolder\", + \"lookInContext\": 1, + \"adminGroupID\": 4, + \"betaServer\": true}}], + \"servlet-mapping\": { + \"cofaxCDS\": \"/\", + \"cofaxEmail\": \"/cofaxutil/aemail/*\", + \"cofaxAdmin\": \"/admin/*\", + \"fileServlet\": \"/static/*\", + \"cofaxTools\": \"/tools/*\"}, + + \"taglib\": { + \"taglib-uri\": \"cofax.tld\", + \"taglib-location\": \"/WEB-INF/tlds/cofax.tld\"}}}" + +const svg_tviewer_menu = """ +{"menu": { + "header": "SVG\\tViewer\\u03b1", + "items": [ + {"id": "Open"}, + {"id": "OpenNew", "label": "Open New"}, + null, + {"id": "ZoomIn", "label": "Zoom In"}, + {"id": "ZoomOut", "label": "Zoom Out"}, + {"id": "OriginalView", "label": "Original View"}, + null, + {"id": "Quality"}, + {"id": "Pause"}, + {"id": "Mute"}, + null, + {"id": "Find", "label": "Find..."}, + {"id": "FindAgain", "label": "Find Again"}, + {"id": "Copy"}, + {"id": "CopyAgain", "label": "Copy Again"}, + {"id": "CopySVG", "label": "Copy SVG"}, + {"id": "ViewSVG", "label": "View SVG"}, + {"id": "ViewSource", "label": "View Source"}, + {"id": "SaveAs", "label": "Save As"}, + null, + {"id": "Help"}, + {"id": "About", "label": "About Adobe SVG Viewer..."} + ] +}}""" +function validate_svg_tviewer_menu(str) + j = JSON.parse(str) + @test j != nothing + @test typeof(j) == Dict{String, Any} + @test length(j) == 1 + @test typeof(j["menu"]) == Dict{String, Any} + @test length(j["menu"]) == 2 + @test j["menu"]["header"] == "SVG\tViewerα" + @test isa(j["menu"]["items"], Vector{Any}) + @test length(j["menu"]["items"]) == 22 + @test j["menu"]["items"][3] == nothing + @test j["menu"]["items"][2]["id"] == "OpenNew" + @test j["menu"]["items"][2]["label"] == "Open New" +end + + +#Example JSON strings from http://www.jquery4u.com/json/10-example-json-files/ + +gmaps= "{\"markers\": [ + { + \"point\":\"new GLatLng(40.266044,-74.718479)\", + \"homeTeam\":\"Lawrence Library\", + \"awayTeam\":\"LUGip\", + \"markerImage\":\"images/red.png\", + \"information\": \"Linux users group meets second Wednesday of each month.\", + \"fixture\":\"Wednesday 7pm\", + \"capacity\":\"\", + \"previousScore\":\"\" + }, + { + \"point\":\"new GLatLng(40.211600,-74.695702)\", + \"homeTeam\":\"Hamilton Library\", + \"awayTeam\":\"LUGip HW SIG\", + \"markerImage\":\"images/white.png\", + \"information\": \"Linux users can meet the first Tuesday of the month to work out harward and configuration issues.\", + \"fixture\":\"Tuesday 7pm\", + \"capacity\":\"\", + \"tv\":\"\" + }, + { + \"point\":\"new GLatLng(40.294535,-74.682012)\", + \"homeTeam\":\"Applebees\", + \"awayTeam\":\"After LUPip Mtg Spot\", + \"markerImage\":\"images/newcastle.png\", + \"information\": \"Some of us go there after the main LUGip meeting, drink brews, and talk.\", + \"fixture\":\"Wednesday whenever\", + \"capacity\":\"2 to 4 pints\", + \"tv\":\"\" + } +] }" + +colors1 = "{ + \"colorsArray\":[{ + \"colorName\":\"red\", + \"hexValue\":\"#f00\" + }, + { + \"colorName\":\"green\", + \"hexValue\":\"#0f0\" + }, + { + \"colorName\":\"blue\", + \"hexValue\":\"#00f\" + }, + { + \"colorName\":\"cyan\", + \"hexValue\":\"#0ff\" + }, + { + \"colorName\":\"magenta\", + \"hexValue\":\"#f0f\" + }, + { + \"colorName\":\"yellow\", + \"hexValue\":\"#ff0\" + }, + { + \"colorName\":\"black\", + \"hexValue\":\"#000\" + } + ] +}" + +colors2 = "{ + \"colorsArray\":[{ + \"red\":\"#f00\", + \"green\":\"#0f0\", + \"blue\":\"#00f\", + \"cyan\":\"#0ff\", + \"magenta\":\"#f0f\", + \"yellow\":\"#ff0\", + \"black\":\"#000\" + } + ] +}" + +colors3 = "{ + \"red\":\"#f00\", + \"green\":\"#0f0\", + \"blue\":\"#00f\", + \"cyan\":\"#0ff\", + \"magenta\":\"#f0f\", + \"yellow\":\"#ff0\", + \"black\":\"#000\" +}" + +twitter = "{\"results\":[ + + {\"text\":\"@twitterapi http://tinyurl.com/ctrefg\", + \"to_user_id\":396524, + \"to_user\":\"TwitterAPI\", + \"from_user\":\"jkoum\", + \"metadata\": + { + \"result_type\":\"popular\", + \"recent_retweets\": 109 + }, + \"id\":1478555574, + \"from_user_id\":1833773, + \"iso_language_code\":\"nl\", + \"source\":\"twitter\", + \"profile_image_url\":\"http://s3.amazonaws.com/twitter_production/profile_images/118412707/2522215727_a5f07da155_b_normal.jpg\", + \"created_at\":\"Wed, 08 Apr 2009 19:22:10 +0000\"}], + \"since_id\":0, + \"max_id\":1480307926, + \"refresh_url\":\"?since_id=1480307926&q=%40twitterapi\", + \"results_per_page\":15, + \"next_page\":\"?page=2&max_id=1480307926&q=%40twitterapi\", + \"completed_in\":0.031704, + \"page\":1, + \"query\":\"%40twitterapi\"}" + +facebook= "{ + \"data\": [ + { + \"id\": \"X999_Y999\", + \"from\": { + \"name\": \"Tom Brady\", \"id\": \"X12\" + }, + \"message\": \"Looking forward to 2010!\", + \"actions\": [ + { + \"name\": \"Comment\", + \"link\": \"http://www.facebook.com/X999/posts/Y999\" + }, + { + \"name\": \"Like\", + \"link\": \"http://www.facebook.com/X999/posts/Y999\" + } + ], + \"type\": \"status\", + \"created_time\": \"2010-08-02T21:27:44+0000\", + \"updated_time\": \"2010-08-02T21:27:44+0000\" + }, + { + \"id\": \"X998_Y998\", + \"from\": { + \"name\": \"Peyton Manning\", \"id\": \"X18\" + }, + \"message\": \"Where's my contract?\", + \"actions\": [ + { + \"name\": \"Comment\", + \"link\": \"http://www.facebook.com/X998/posts/Y998\" + }, + { + \"name\": \"Like\", + \"link\": \"http://www.facebook.com/X998/posts/Y998\" + } + ], + \"type\": \"status\", + \"created_time\": \"2010-08-02T21:27:44+0000\", + \"updated_time\": \"2010-08-02T21:27:44+0000\" + } + ] +}" + +const flickr = """{ + "title": "Talk On Travel Pool", + "link": "http://www.flickr.com/groups/talkontravel/pool/", + "description": "Travel and vacation photos from around the world.", + "modified": "2009-02-02T11:10:27Z", + "generator": "http://www.flickr.com/", + "totalItems":222, + "items": [ + { + "title": "View from the hotel", + "link": "http://www.flickr.com/photos/33112458@N08/3081564649/in/pool-998875@N22", + "media": {"m":"http://farm4.static.flickr.com/3037/3081564649_4a6569750c_m.jpg"}, + "date_taken": "2008-12-04T04:43:03-08:00", + "description": "
Talk On Travel has added a photo to the pool:
", + "published": "2008-12-04T12:43:03Z", + "author": "nobody@flickr.com (Talk On Travel)", + "author_id": "33112458@N08", + "tags": "spain dolphins tenerife canaries lagomera aqualand playadelasamericas junglepark losgigantos loscristines talkontravel" + } + ] +}""" +function validate_flickr(str) + k = JSON.parse(str) + @test k != nothing + @test k["totalItems"] == 222 + @test k["items"][1]["description"][12] == '\"' +end + +youtube = "{\"apiVersion\":\"2.0\", + \"data\":{ + \"updated\":\"2010-01-07T19:58:42.949Z\", + \"totalItems\":800, + \"startIndex\":1, + \"itemsPerPage\":1, + \"items\":[ + {\"id\":\"hYB0mn5zh2c\", + \"uploaded\":\"2007-06-05T22:07:03.000Z\", + \"updated\":\"2010-01-07T13:26:50.000Z\", + \"uploader\":\"GoogleDeveloperDay\", + \"category\":\"News\", + \"title\":\"Google Developers Day US - Maps API Introduction\", + \"description\":\"Google Maps API Introduction ...\", + \"tags\":[ + \"GDD07\",\"GDD07US\",\"Maps\" + ], + \"thumbnail\":{ + \"default\":\"http://i.ytimg.com/vi/hYB0mn5zh2c/default.jpg\", + \"hqDefault\":\"http://i.ytimg.com/vi/hYB0mn5zh2c/hqdefault.jpg\" + }, + \"player\":{ + \"default\":\"http://www.youtube.com/watch?v\u003dhYB0mn5zh2c\" + }, + \"content\":{ + \"1\":\"rtsp://v5.cache3.c.youtube.com/CiILENy.../0/0/0/video.3gp\", + \"5\":\"http://www.youtube.com/v/hYB0mn5zh2c?f...\", + \"6\":\"rtsp://v1.cache1.c.youtube.com/CiILENy.../0/0/0/video.3gp\" + }, + \"duration\":2840, + \"aspectRatio\":\"widescreen\", + \"rating\":4.63, + \"ratingCount\":68, + \"viewCount\":220101, + \"favoriteCount\":201, + \"commentCount\":22, + \"status\":{ + \"value\":\"restricted\", + \"reason\":\"limitedSyndication\" + }, + \"accessControl\":{ + \"syndicate\":\"allowed\", + \"commentVote\":\"allowed\", + \"rate\":\"allowed\", + \"list\":\"allowed\", + \"comment\":\"allowed\", + \"embed\":\"allowed\", + \"videoRespond\":\"moderated\" + } + } + ] + } +}" + +iphone = "{ + \"menu\": { + \"header\": \"xProgress SVG Viewer\", + \"items\": [ + { + \"id\": \"Open\" + }, + { + \"id\": \"OpenNew\", + \"label\": \"Open New\" + }, + null, + { + \"id\": \"ZoomIn\", + \"label\": \"Zoom In\" + }, + { + \"id\": \"ZoomOut\", + \"label\": \"Zoom Out\" + }, + { + \"id\": \"OriginalView\", + \"label\": \"Original View\" + }, + null, + { + \"id\": \"Quality\" + }, + { + \"id\": \"Pause\" + }, + { + \"id\": \"Mute\" + }, + null, + { + \"id\": \"Find\", + \"label\": \"Find...\" + }, + { + \"id\": \"FindAgain\", + \"label\": \"Find Again\" + }, + { + \"id\": \"Copy\" + }, + { + \"id\": \"CopyAgain\", + \"label\": \"Copy Again\" + }, + { + \"id\": \"CopySVG\", + \"label\": \"Copy SVG\" + }, + { + \"id\": \"ViewSVG\", + \"label\": \"View SVG\" + }, + { + \"id\": \"ViewSource\", + \"label\": \"View Source\" + }, + { + \"id\": \"SaveAs\", + \"label\": \"Save As\" + }, + null, + { + \"id\": \"Help\" + }, + { + \"id\": \"About\", + \"label\": \"About xProgress CVG Viewer...\" + } + ] + } +}" + +customer = "{ + \"firstName\": \"John\", + \"lastName\": \"Smith\", + \"age\": 25, + \"address\": + { + \"streetAddress\": \"21 2nd Street\", + \"city\": \"New York\", + \"state\": \"NY\", + \"postalCode\": \"10021\" + }, + \"phoneNumber\": + [ + { + \"type\": \"home\", + \"number\": \"212 555-1234\" + }, + { + \"type\": \"fax\", + \"number\": \"646 555-4567\" + } + ] + }" + + product = "{ + \"name\":\"Product\", + \"properties\": + { + \"id\": + { + \"type\":\"number\", + \"description\":\"Product identifier\", + \"required\":true + }, + \"name\": + { + \"description\":\"Name of the product\", + \"type\":\"string\", + \"required\":true + }, + \"price\": + { + \"type\":\"number\", + \"minimum\":0, + \"required\":true + }, + \"tags\": + { + \"type\":\"array\", + \"items\": + { + \"type\":\"string\" + } + } + } +}" + +interop = "{ + \"ResultSet\": { + \"totalResultsAvailable\": \"1827221\", + \"totalResultsReturned\": 2, + \"firstResultPosition\": 1, + \"Result\": [ + { + \"Title\": \"potato jpg\", + \"Summary\": \"Kentang Si bungsu dari keluarga Solanum tuberosum L ini ternyata memiliki khasiat untuk mengurangi kerutan jerawat bintik hitam dan kemerahan pada kulit Gunakan seminggu sekali sebagai\", + \"Url\": \"http://www.mediaindonesia.com/spaw/uploads/images/potato.jpg\", + \"ClickUrl\": \"http://www.mediaindonesia.com/spaw/uploads/images/potato.jpg\", + \"RefererUrl\": \"http://www.mediaindonesia.com/mediaperempuan/index.php?ar_id=Nzkw\", + \"FileSize\": 22630, + \"FileFormat\": \"jpeg\", + \"Height\": \"362\", + \"Width\": \"532\", + \"Thumbnail\": { + \"Url\": \"http://thm-a01.yimg.com/nimage/557094559c18f16a\", + \"Height\": \"98\", + \"Width\": \"145\" + } + }, + { + \"Title\": \"potato jpg\", + \"Summary\": \"Introduction of puneri aloo This is a traditional potato preparation flavoured with curry leaves and peanuts and can be eaten on fasting day Preparation time 10 min\", + \"Url\": \"http://www.infovisual.info/01/photo/potato.jpg\", + \"ClickUrl\": \"http://www.infovisual.info/01/photo/potato.jpg\", + \"RefererUrl\": \"http://sundayfood.com/puneri-aloo-indian-%20recipe\", + \"FileSize\": 119398, + \"FileFormat\": \"jpeg\", + \"Height\": \"685\", + \"Width\": \"1024\", + \"Thumbnail\": { + \"Url\": \"http://thm-a01.yimg.com/nimage/7fa23212efe84b64\", + \"Height\": \"107\", + \"Width\": \"160\" + } + } + ] + } +}" + +const unicode = """ +{"অলিম্পিকস": { + "অ্যাথলেট": "২২টি দেশ থেকে ২,০৩৫ জন প্রতিযোগী", + "ইভেন্ট": "২২টি ইভেন্টের মধ্যে ছিল দড়ি টানাটানি", + "রেকর্ড": [ + {"১০০মি. স্প্রিন্ট": "রেজি ওয়াকার, দক্ষিণ আফ্রিকা"}, + {"Marathon": "জনি হেইস"}, + {" ফ্রি-স্টাইল সাঁতার": "Henry Taylor, Britain"} + ] +}} +""" +function validate_unicode(str) + u = JSON.parse(str) + @test u != nothing + @test u["অলিম্পিকস"]["রেকর্ড"][2]["Marathon"] == "জনি হেইস" +end diff --git a/test/lowering.jl b/test/lowering.jl new file mode 100644 index 0000000..388cff1 --- /dev/null +++ b/test/lowering.jl @@ -0,0 +1,37 @@ +module TestLowering + +using JSON +using Test +using Dates +using FixedPointNumbers: Fixed + +@test JSON.json(Date(2016, 8, 3)) == "\"2016-08-03\"" + +@test JSON.json(:x) == "\"x\"" +@test_throws ArgumentError JSON.json(Base) + +struct Type151{T} + x::T +end + +@test JSON.parse(JSON.json(Type151)) == string(Type151) + +JSON.lower(v::Type151{T}) where {T} = Dict(:type => T, :value => v.x) +@test JSON.parse(JSON.json(Type151(1.0))) == Dict( + "type" => "Float64", + "value" => 1.0) + +fixednum = Fixed{Int16, 15}(0.1234) +@test JSON.parse(JSON.json(fixednum)) == convert(Float64, fixednum) + +# test that the default string-serialization of enums can be overriden by +# `lower` if needed +@enum Fruit apple orange banana +JSON.lower(x::Fruit) = string("Fruit: ", x) +@test JSON.json(apple) == "\"Fruit: apple\"" + +@enum Vegetable carrot tomato potato +JSON.lower(x::Vegetable) = Dict(string(x) => Int(x)) +@test JSON.json(potato) == "{\"potato\":2}" + +end diff --git a/test/parser/dicttype.jl b/test/parser/dicttype.jl new file mode 100644 index 0000000..6e4d328 --- /dev/null +++ b/test/parser/dicttype.jl @@ -0,0 +1,22 @@ +MissingDict() = DataStructures.DefaultDict{String,Any}(Missing) + +@testset for T in [ + DataStructures.OrderedDict, + Dict{Symbol, Int32}, + MissingDict +] + val = JSON.parse("{\"x\": 3}", dicttype=T) + @test length(val) == 1 + key = collect(keys(val))[1] + @test string(key) == "x" + @test val[key] == 3 + + if T == MissingDict + @test val isa DataStructures.DefaultDict{String} + @test val["y"] === missing + else + @test val isa T + @test_throws KeyError val["y"] + end +end + diff --git a/test/parser/inttype.jl b/test/parser/inttype.jl new file mode 100644 index 0000000..30e9ca1 --- /dev/null +++ b/test/parser/inttype.jl @@ -0,0 +1,16 @@ +@testset for T in [Int32, Int64, Int128, BigInt] + val = JSON.parse("{\"x\": 3}", inttype=T) + @test isa(val, Dict{String, Any}) + @test length(val) == 1 + key = collect(keys(val))[1] + @test string(key) == "x" + value = val[key] + @test value == 3 + @test typeof(value) == T +end + +@testset begin + teststr = """{"201736327611975630": 18005722827070440994}""" + val = JSON.parse(teststr, inttype=Int128) + @test val == Dict{String,Any}("201736327611975630"=> 18005722827070440994) +end diff --git a/test/parser/invalid-input.jl b/test/parser/invalid-input.jl new file mode 100644 index 0000000..924f225 --- /dev/null +++ b/test/parser/invalid-input.jl @@ -0,0 +1,33 @@ +const FAILURES = [ + # Unexpected character in array + "[1,2,3/4,5,6,7]", + # Unexpected character in object + "{\"1\":2, \"2\":3 _ \"4\":5}", + # Invalid escaped character + "[\"alpha\\α\"]", + "[\"\\u05AG\"]", + # Invalid 'simple' and 'unknown value' + "[tXXe]", + "[fail]", + "∞", + # Invalid number + "[5,2,-]", + "[5,2,+β]", + # Incomplete escape + "\"\\", + # Control character + "\"\0\"", + # Issue #99 + "[\"🍕\"_\"🍕\"", + # Issue #260 + "1997-03-03", + "1997.1-", +] + +@testset for fail in FAILURES + # Test memory parser + @test_throws ErrorException JSON.parse(fail) + + # Test streaming parser + @test_throws ErrorException JSON.parse(IOBuffer(fail)) +end diff --git a/test/parser/parsefile.jl b/test/parser/parsefile.jl new file mode 100644 index 0000000..f5b9f6c --- /dev/null +++ b/test/parser/parsefile.jl @@ -0,0 +1,10 @@ +tmppath, io = mktemp() +write(io, facebook) +close(io) +if Sys.iswindows() + # don't use mmap on Windows, to avoid ERROR: unlink: operation not permitted (EPERM) + @test haskey(JSON.parsefile(tmppath; use_mmap=false), "data") +else + @test haskey(JSON.parsefile(tmppath), "data") +end +rm(tmppath) diff --git a/test/regression/issue021.jl b/test/regression/issue021.jl new file mode 100644 index 0000000..856f820 --- /dev/null +++ b/test/regression/issue021.jl @@ -0,0 +1,4 @@ +test21 = "[\r\n{\r\n\"a\": 1,\r\n\"b\": 2\r\n},\r\n{\r\n\"a\": 3,\r\n\"b\": 4\r\n}\r\n]" +a = JSON.parse(test21) +@test isa(a, Vector{Any}) +@test length(a) == 2 diff --git a/test/regression/issue026.jl b/test/regression/issue026.jl new file mode 100644 index 0000000..ff9ea6d --- /dev/null +++ b/test/regression/issue026.jl @@ -0,0 +1,2 @@ +obj = JSON.parse("{\"a\":2e10}") +@test obj["a"] == 2e10 diff --git a/test/regression/issue057.jl b/test/regression/issue057.jl new file mode 100644 index 0000000..1797a8a --- /dev/null +++ b/test/regression/issue057.jl @@ -0,0 +1,2 @@ +obj = JSON.parse("{\"\U0001d712\":\"\\ud835\\udf12\"}") +@test(obj["𝜒"] == "𝜒") diff --git a/test/regression/issue109.jl b/test/regression/issue109.jl new file mode 100644 index 0000000..6dc2d9d --- /dev/null +++ b/test/regression/issue109.jl @@ -0,0 +1,8 @@ +mutable struct t109 + i::Int +end + +let iob = IOBuffer() + JSON.print(iob, t109(1)) + @test get(JSON.parse(String(take!(iob))), "i", 0) == 1 +end diff --git a/test/regression/issue152.jl b/test/regression/issue152.jl new file mode 100644 index 0000000..5b4a01b --- /dev/null +++ b/test/regression/issue152.jl @@ -0,0 +1,2 @@ +@test json([Int64[] Int64[]]) == "[[],[]]" +@test json([Int64[] Int64[]]') == "[]" diff --git a/test/regression/issue163.jl b/test/regression/issue163.jl new file mode 100644 index 0000000..5ace4fa --- /dev/null +++ b/test/regression/issue163.jl @@ -0,0 +1 @@ +@test Float32(JSON.parse(json(2.1f-8))) == 2.1f-8 diff --git a/test/runtests.jl b/test/runtests.jl new file mode 100644 index 0000000..e732e5d --- /dev/null +++ b/test/runtests.jl @@ -0,0 +1,80 @@ +using JSON +using Test +using Dates +using Distributed: RemoteChannel +using OffsetArrays + +import DataStructures + +include("json-samples.jl") + +@testset "Parser" begin + @testset "Parser Failures" begin + include("parser/invalid-input.jl") + end + + @testset "parsefile" begin + include("parser/parsefile.jl") + end + + @testset "dicttype" begin + include("parser/dicttype.jl") + end + + @testset "inttype" begin + include("parser/inttype.jl") + end + + @testset "Miscellaneous" begin + # test for single values + @test JSON.parse("true") == true + @test JSON.parse("null") == nothing + @test JSON.parse("\"hello\"") == "hello" + @test JSON.parse("\"a\"") == "a" + @test JSON.parse("1") == 1 + @test JSON.parse("1.5") == 1.5 + @test JSON.parse("[true]") == [true] + end +end + +@testset "Serializer" begin + @testset "Standard Serializer" begin + include("standard-serializer.jl") + end + + @testset "Lowering" begin + include("lowering.jl") + end + + @testset "Custom Serializer" begin + include("serializer.jl") + end +end + +@testset "Integration" begin + # ::Nothing values should be encoded as null + testDict = Dict("a" => nothing) + nothingJson = JSON.json(testDict) + nothingDict = JSON.parse(nothingJson) + @test testDict == nothingDict + + @testset "async" begin + include("async.jl") + end + + @testset "indentation" begin + include("indentation.jl") + end + + @testset "JSON Checker" begin + include("json-checker.jl") + end +end + +@testset "Regression" begin + @testset "for issue #$i" for i in [21, 26, 57, 109, 152, 163] + include("regression/issue$(lpad(string(i), 3, "0")).jl") + end +end + +# Check that printing to the default stdout doesn't fail diff --git a/test/serializer.jl b/test/serializer.jl new file mode 100644 index 0000000..87927fe --- /dev/null +++ b/test/serializer.jl @@ -0,0 +1,95 @@ +module TestSerializer + +using JSON +using Test + +# to define a new serialization behaviour, import these first +import JSON.Serializations: CommonSerialization, StandardSerialization +import JSON: StructuralContext + +# those names are long so we can define some type aliases +const CS = CommonSerialization +const SC = StructuralContext + +# for test harness purposes +function sprint_kwarg(f, args...; kwargs...) + b = IOBuffer() + f(b, args...; kwargs...) + String(take!(b)) +end + +# issue #168: Print NaN and Inf as Julia would +struct NaNSerialization <: CS end +JSON.show_json(io::SC, ::NaNSerialization, f::AbstractFloat) = Base.print(io, f) + +@test sprint(JSON.show_json, NaNSerialization(), [NaN, Inf, -Inf, 0.0]) == + "[NaN,Inf,-Inf,0.0]" + +@test sprint_kwarg( + JSON.show_json, + NaNSerialization(), + [NaN, Inf, -Inf, 0.0]; + indent=4 +) == """ +[ + NaN, + Inf, + -Inf, + 0.0 +] +""" + +# issue #170: Print JavaScript functions directly +struct JSSerialization <: CS end +struct JSFunction + data::String +end + +function JSON.show_json(io::SC, ::JSSerialization, f::JSFunction) + first = true + for line in split(f.data, '\n') + if !first + JSON.indent(io) + end + first = false + Base.print(io, line) + end +end + +@test sprint_kwarg(JSON.show_json, JSSerialization(), Any[ + 1, + 2, + JSFunction("function test() {\n return 1;\n}") +]; indent=2) == """ +[ + 1, + 2, + function test() { + return 1; + } +] +""" + +# test serializing a type without any fields +struct SingletonType end +@test_throws ErrorException json(SingletonType()) + +# test printing to stdout +let filename = tempname() + open(filename, "w") do f + redirect_stdout(f) do + JSON.print(Any[1, 2, 3.0]) + end + end + @test read(filename, String) == "[1,2,3.0]" + rm(filename) +end + +# issue #184: serializing a 0-dimensional array +@test sprint(JSON.show_json, JSON.StandardSerialization(), view([184], 1)) == "184" + +# test serializing with a JSONText object +@test json([JSONText("{\"bar\": [3,4,5]}"),314159]) == "[{\"bar\": [3,4,5]},314159]" +@test json([JSONText("{\"bar\": [3,4,5]}"),314159], 1) == "[\n {\n \"bar\": [\n 3,\n 4,\n 5\n ]\n },\n 314159\n]\n" + +end diff --git a/test/standard-serializer.jl b/test/standard-serializer.jl new file mode 100644 index 0000000..034bfc4 --- /dev/null +++ b/test/standard-serializer.jl @@ -0,0 +1,72 @@ +@testset "Symbol" begin + symtest = Dict(:symbolarray => [:apple, :pear], :symbolsingleton => :hello) + @test (JSON.json(symtest) == "{\"symbolarray\":[\"apple\",\"pear\"],\"symbolsingleton\":\"hello\"}" + || JSON.json(symtest) == "{\"symbolsingleton\":\"hello\",\"symbolarray\":[\"apple\",\"pear\"]}") +end + +@testset "Floats" begin + @test sprint(JSON.print, [NaN]) == "[null]" + @test sprint(JSON.print, [Inf]) == "[null]" +end + +@testset "Union{Nothing,T} (old Nullable)" begin + @test sprint(JSON.print, Union{Any,Nothing}[nothing]) == "[null]" + @test sprint(JSON.print, Union{Int64,Nothing}[nothing]) == "[null]" + @test sprint(JSON.print, Union{Int64,Nothing}[1]) == "[1]" +end + +@testset "Char" begin + @test json('a') == "\"a\"" + @test json('\\') == "\"\\\\\"" + @test json('\n') == "\"\\n\"" + @test json('🍩') =="\"🍩\"" +end + +@testset "Enum" begin + include("enum.jl") +end + +@testset "Type" begin + @test sprint(JSON.print, Float64) == string("\"Float64\"") +end + +@testset "Module" begin + @test_throws ArgumentError sprint(JSON.print, JSON) +end + +@testset "Dates" begin + @test json(Date("2016-04-13")) == "\"2016-04-13\"" + @test json([Date("2016-04-13"), Date("2016-04-12")]) == "[\"2016-04-13\",\"2016-04-12\"]" + @test json(DateTime("2016-04-13T00:00:00")) == "\"2016-04-13T00:00:00\"" + @test json([DateTime("2016-04-13T00:00:00"), DateTime("2016-04-12T00:00:00")]) == "[\"2016-04-13T00:00:00\",\"2016-04-12T00:00:00\"]" +end + +@testset "Null bytes" begin + zeros = Dict("\0" => "\0") + json_zeros = json(zeros) + @test occursin("\\u0000", json_zeros) + @test !occursin("\\0", json_zeros) + @test JSON.parse(json_zeros) == zeros +end + +@testset "Arrays" begin + # Printing an empty array or Dict shouldn't cause a BoundsError + @test json(String[]) == "[]" + @test json(Dict()) == "{}" + + #Multidimensional arrays + @test json([0 1; 2 0]) == "[[0,2],[1,0]]" + @test json(OffsetArray([0 1; 2 0], 0:1, 10:11)) == "[[0,2],[1,0]]" +end + +@testset "Pairs" begin + @test json(1 => 2) == "{\"1\":2}" + @test json(:foo => 2) == "{\"foo\":2}" + @test json([1, 2] => [3, 4]) == "{\"$([1, 2])\":[3,4]}" + @test json([1 => 2]) == "[{\"1\":2}]" +end + +@testset "Sets" begin + @test json(Set()) == "[]" + @test json(Set([1, 2])) in ["[1,2]", "[2,1]"] +end