This is an experimental project to improve my LPeg/LPeg.re/parser/lexer/... skills. I started from something simple : parsing a CSV file!
I publish my own sample of code, make step by step... I hope it will be usefull for someone else.
See "the Comma-Separated Values (CSV)" sample.
You get the first sample (try1a.lua) :
local field = '"' * lpeg.Cs(((lpeg.P(1) - '"') + lpeg.P'""' / '"')^0) * '"' +
lpeg.C((1 - lpeg.S',\n"')^0)
local record = field * (',' * field)^0 * (lpeg.P'\n' + -1)
function csv (s)
return lpeg.match(record, s)
end
Run: lua try1/test.try1a.lua
And get:
foo bar baz
The doc say we can capture values into a table. Just change the record definition :
-local record = field * (',' * field)^0 * (lpeg.P'\n' + -1)
+local record = lpeg.Ct(field * (',' * field)^0) * (lpeg.P'\n' + -1)
We get (try1b.lua) :
local field = '"' * lpeg.Cs(((lpeg.P(1) - '"') + lpeg.P'""' / '"')^0) * '"' +
lpeg.C((1 - lpeg.S',\n"')^0)
local record = lpeg.Ct(field * (',' * field)^0) * (lpeg.P'\n' + -1)
function csv (s)
return lpeg.match(record, s)
end
Run: lua try1/test.try1b.lua
And get:
{
[1] = "foo",
[2] = "bar",
[3] = "baz",
}
for now we make tries with this CSV sample :
foo,bar,baz
1,2,"trois"
11,22,"trois trois"
We need the LPeg.re documentation.
Lua code is used to get lines from the input and add the parsed line result into a table.
local re = require"re"
local input = io.stdin
local record = re.compile[[
record <- {| field (',' field)* |} (%nl / !.)
field <- escaped / nonescaped
nonescaped <- { [^,"%nl]* }
escaped <- '"' {~ ([^"] / '""' -> '"')* ~} '"'
]]
local parsed = {}
while true do
local line = input:read("*l")
if not line then break end
parsed[#parsed+1]= record:match(line)
end
-- show the result
print("return "..require"tprint"(parsed)) -- in lua
--print(require"json".encode(parsed)) -- in json
Run: lua csv-parser-1.lua < sample.csv
See files :
Get the result:
return {
[1] = {
[1] = "foo",
[2] = "bar",
[3] = "baz",
},
[2] = {
[1] = "1",
[2] = "2",
[3] = "trois",
},
[3] = {
[1] = "11",
[2] = "22",
[3] = "trois trois",
},
}
We add a records
= some record
to parse the entire file without extra lua code.
local re = require"re"
local input = io.stdin
local csvfile = re.compile[[
records <- {| (record)* |} !.
record <- {| field (',' field)* |} %nl
field <- escaped / nonescaped
nonescaped <- { [^,"%nl]* }
escaped <- '"' {~ ([^"] / '""' -> '"')* ~} '"'
]]
local parsed = csvfile:match(input:read("*a"))
-- show the result
print("return "..require"tprint"(parsed)) -- in lua
--print(require"json".encode(parsed)) -- in json
Run: lua csv-parser-2.lua < sample.csv
See files :
Get the result:
return {
[1] = {
[1] = "foo",
[2] = "bar",
[3] = "baz",
},
[2] = {
[1] = "1",
[2] = "2",
[3] = "trois",
},
[3] = {
[1] = "11",
[2] = "22",
[3] = "trois trois",
},
}
local re = require"re"
local input = io.stdin
local csvfile = re.compile[[
csvfile <- {| {:tag: '' -> "csvfile":} hdr (row)+ |} !.
hdr <- row
row <- {| {:tag: '' -> "row" :} field (',' field)* |} %nl
eol <- %nl -- end of line (%nl is newline, "\n")
field <- escaped / nonescaped
nonescaped <- { [^,"%nl]* }
escaped <- '"' {~ ([^"] / '""' -> '"')* ~} '"'
]]
local parsed = csvfile:match(input:read("*a"))
-- show the result
print("return "..require"tprint"(parsed)) -- in lua
--print(require"json".encode(parsed)) -- in json
Run: lua csv-parser-3.lua < sample.csv
See files :
Get the result:
return {
[1] = {
[1] = "foo",
[2] = "bar",
[3] = "baz",
["tag"] = "row",
},
[2] = {
[1] = "1",
[2] = "2",
[3] = "trois",
["tag"] = "row",
},
[3] = {
[1] = "11",
[2] = "22",
[3] = "trois trois",
["tag"] = "row",
},
["tag"] = "csvfile",
}