Skip to content

Commit

Permalink
Absolutise pipespec paths (prepend CWD) when creating PipeSpec
Browse files Browse the repository at this point in the history
Should fix #41

This already happened in gen-sh, but not when just running from
.xml's. Now it should happen when actually reading the pipespec xml,
regardless of how it's used.

Note: When reading from archives, we assume paths are always
relative.
  • Loading branch information
unhammer committed Nov 17, 2020
1 parent a0533a7 commit 1ecfe54
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 49 deletions.
107 changes: 60 additions & 47 deletions src/pipespec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,40 @@

namespace divvun {

void PipeSpec::parsePipeSpec(pugi::xml_parse_result& result, const string& file) {
string abspath(const string& path) {
// TODO: would love to use <experimental/filesystem> here, but doesn't exist on Mac :(
// return std::experimental::filesystem::absolute(specfile).remove_filename();
char resolved_path[PATH_MAX];
if(realpath(path.c_str(), resolved_path) == NULL) {
throw std::runtime_error("libdivvun: ERROR: realpath(3) failed to resolve '" + path + "', got: " + std::strerror(errno));
}
return resolved_path;
}


/*
* return path1 / path2
*/
string pathconcat(const string& path1, const string& path2) {
// TODO: would love to use <experimental/filesystem> here, but doesn't exist on Mac :(
// TODO: \\ on Windows
if(path2.length() > 0 && path2[0] == '/') {
// second path is absolute, use second
return path2;
}
if(path1.length() > 0 && path1[path1.length() - 1] == '/') {
// first path ends in slash, just concatenate
return path1 + path2;
}
if(path1.length() == 0) {
// nothing to concatenate, keep relative if relative:
return path2;
}
// path1 is nonempty and doesn't end in slash already:
return path1 + "/" + path2;
}

void PipeSpec::parsePipeSpec(const string& dir, pugi::xml_parse_result& result, const string& file) {
if (result) {
language = doc.child("pipespec").attribute("language").value();
if(language == "") {
Expand All @@ -34,27 +67,34 @@ void PipeSpec::parsePipeSpec(pugi::xml_parse_result& result, const string& file)
// If no attribute, the first pipe is the default:
default_pipe = pipename;
}
auto pr = std::make_pair(pipename, pipeline);
pnodes[pipename] = pipeline;
for (const pugi::xml_node &cmd : pipeline.children()) {
for (const pugi::xml_node &arg : cmd.children()) {
arg.attribute("n").set_value(
pathconcat(dir,
arg.attribute("n").value()).c_str());
}
}
pnodes[pipename] = pipeline;
}
if(pnodes.find(default_pipe) == pnodes.end()) {
throw std::runtime_error("libdivvun: ERROR: Couldn't find pipeline with name of default-pipe " + toUtf8(default_pipe));
}
}
}
else {
std::cerr << file << ":" << result.offset << " ERROR: " << result.description() << "\n";
throw std::runtime_error("libdivvun: ERROR: Couldn't load the pipespec.xml from archive");
}
}

PipeSpec::PipeSpec(const string& file) {
const auto dir = dirname(abspath(file));
pugi::xml_parse_result result = doc.load_file(file.c_str());
parsePipeSpec(result, file);
parsePipeSpec(dir, result, file);
}

PipeSpec::PipeSpec(pugi::char_t* buff, size_t size) {
pugi::xml_parse_result result = doc.load_buffer(buff, size);
parsePipeSpec(result, "<buffer>");
parsePipeSpec("", result,"<buffer>");
}

// TODO: return optional string message instead?
Expand Down Expand Up @@ -135,38 +175,13 @@ string makeDebugSuff(string name, std::unordered_map<string, string> args) {
return name;
}

string abspath(const string& path) {
// TODO: would love to use <experimental/filesystem> here, but doesn't exist on Mac :(
// return std::experimental::filesystem::absolute(specfile).remove_filename();
char resolved_path[PATH_MAX];
if(realpath(path.c_str(), resolved_path) == NULL) {
throw std::runtime_error("libdivvun: ERROR: realpath(3) failed to resolve '" + path + "', got: " + std::strerror(errno));
}
return resolved_path;
}

string pathconcat(const string& path1, const string& path2) {
// TODO: would love to use <experimental/filesystem> here, but doesn't exist on Mac :(
// return path1 / path2;
if(path2.length() > 0 && path2[0] == '/') {
return path2;
}
if(path1.length() > 0 && path1[path1.length() - 1] == '/') {
return path1 + path2;
}
else {
// TODO: \\ on Windows
return path1 + "/" + path2;
}
}

string argprepare(const string& dir, string file) {
string argprepare(string file) {
// Wrap the whole thing in single-quotes, but put existing single-quotes in double-quotes
replaceAll(file, "'", "'\"'\"'");
return " '" + pathconcat(dir, file) + "'";
return " '" + file + "'";
}

vector<std::pair<string,string>> toPipeSpecShVector(const string& dir, const PipeSpec& spec, const u16string& pipename, bool trace, bool json) {
vector<std::pair<string,string>> toPipeSpecShVector(const PipeSpec& spec, const u16string& pipename, bool trace, bool json) {
vector<std::pair<string, string>> cmds = {};
for (const pugi::xml_node& cmd: spec.pnodes.at(pipename).children()) {
const auto& name = string(cmd.name());
Expand All @@ -185,7 +200,7 @@ vector<std::pair<string,string>> toPipeSpecShVector(const string& dir, const Pip
if(json) {
prog += " -S";
}
prog += argprepare(dir, args["tokenizer"]);
prog += argprepare(args["tokenizer"]);
}
else if(name == "cg") {
prog = "vislcg3";
Expand All @@ -195,7 +210,7 @@ vector<std::pair<string,string>> toPipeSpecShVector(const string& dir, const Pip
if(json) {
prog += " --quiet";
}
prog += " -g" + argprepare(dir, args["grammar"]);
prog += " -g" + argprepare(args["grammar"]);
}
else if(name == "cgspell") {
int limit = cmd.attribute("limit").as_int(10);
Expand All @@ -207,14 +222,14 @@ vector<std::pair<string,string>> toPipeSpecShVector(const string& dir, const Pip
prog += " -b " + std::to_string(beam);
prog += " -w " + std::to_string(max_weight);
prog += " -u " + std::to_string(max_sent_unknown_rate);
prog += " -l" + argprepare(dir, args["lexicon"]);
prog += " -m" + argprepare(dir, args["errmodel"]);
prog += " -l" + argprepare(args["lexicon"]);
prog += " -m" + argprepare(args["errmodel"]);
}
else if(name == "mwesplit") {
prog = "cg-mwesplit";
}
else if(name == "blanktag") {
prog = "divvun-blanktag" + argprepare(dir, args["blanktagger"]);
prog = "divvun-blanktag" + argprepare(args["blanktagger"]);
}
else if(name == "suggest") {
bool generate_all_readings = cmd.attribute("generate-all").as_bool(false);
Expand All @@ -225,14 +240,14 @@ vector<std::pair<string,string>> toPipeSpecShVector(const string& dir, const Pip
if(generate_all_readings) {
prog += " --generate-all";
}
prog += " -g" + argprepare(dir, args["generator"]);
prog += " -m" + argprepare(dir, args["messages"]);
prog += " -g" + argprepare(args["generator"]);
prog += " -m" + argprepare(args["messages"]);
prog += " -l " + spec.language;
}
else if(name == "sh") {
prog = cmd.attribute("prog").value();
for(auto& a : args) {
prog += argprepare(dir, a.second);
prog += argprepare(a.second);
}
}
else if(name == "prefs") {
Expand All @@ -251,8 +266,7 @@ vector<std::pair<string,string>> toPipeSpecShVector(const string& dir, const Pip

void writePipeSpecSh(const string& specfile, const u16string& pipename, bool json, std::ostream& os) {
const std::unique_ptr<PipeSpec> spec(new PipeSpec(specfile));
const auto dir = dirname(abspath(specfile));
const auto cmds = toPipeSpecShVector(dir, *spec, pipename, false, json);
const auto cmds = toPipeSpecShVector(*spec, pipename, false, json);
bool first = true;
os << "#!/bin/sh" << std::endl << std::endl;
for (const auto& cmd: cmds) {
Expand Down Expand Up @@ -307,15 +321,14 @@ void writePipeSpecShDirOne(const vector<std::pair<string, string>> cmds, const s

void writePipeSpecShDir(const string& specfile, bool json, const string& modesdir, bool nodebug) {
const std::unique_ptr<PipeSpec> spec(new PipeSpec(specfile));
const auto dir = dirname(abspath(specfile));
for(const auto& p : spec->pnodes) {
const auto& pipename = toUtf8(p.first);
writePipeSpecShDirOne(toPipeSpecShVector(dir, *spec, p.first, false, json),
writePipeSpecShDirOne(toPipeSpecShVector(*spec, p.first, false, json),
pipename,
modesdir,
nodebug);
if(!nodebug) {
writePipeSpecShDirOne(toPipeSpecShVector(dir, *spec, p.first, true, json),
writePipeSpecShDirOne(toPipeSpecShVector(*spec, p.first, true, json),
"trace-" + pipename,
modesdir,
nodebug);
Expand Down
2 changes: 1 addition & 1 deletion src/pipespec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class PipeSpec {
string language;
u16string default_pipe;
private:
void parsePipeSpec(pugi::xml_parse_result& result, const string& file);
void parsePipeSpec(const string& dir, pugi::xml_parse_result& result, const string& file);
pugi::xml_document doc; // needs to be alive for as long as we're referring to nodes in it
};

Expand Down
2 changes: 1 addition & 1 deletion test/checker/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ blanktagger.hfst: blanktagger.xfst
check_DATA = sme.zcheck tokeniser.pmhfst generator.hfstol errors.xml blanktagger.hfst

if HAVE_CGSPELL
TESTS = ./run.xml ./run.archive ./run.spell
TESTS = ./run.xml ./run.archive ./run.spell ./run.workingdir
if HAVE_PYTHON_BINDINGS
TESTS += ./run-python-bindings
endif # HAVE_PYTHON_BINDINGS
Expand Down
25 changes: 25 additions & 0 deletions test/checker/run.workingdir
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

# Test that we can run pipespec.xml's from other working directories.

set -e
if [[ ${BASH_VERSINFO[0]} -gt 4
|| ${BASH_VERSINFO[0]} -eq 4 && ${BASH_VERSINFO[1]} -ge 4 ]]; then
set -u
# With bash <4.4, using ${a[@]} or ${a[*]} with an array without
# any assigned elements when the nounset option is enabled will
# throw an unbound variable error; so don't -u there
fi

cd "$(dirname "$0")"/../..

n="xml"
src/divvun-validate-pipespec test/checker/pipespec.xml
(
set -x
src/divvun-checker -s test/checker/pipespec.xml -n smegram < test/checker/input."$n".txt > test/checker/output."$n".json
)
if ! diff test/checker/output."$n".json test/checker/expected."$n".json; then
echo diff test/checker/output."$n".json test/checker/expected."$n".json
exit 1
fi

0 comments on commit 1ecfe54

Please sign in to comment.