diff --git a/worker/export.go b/worker/export.go index 424c02e81f6..a1c338a770e 100644 --- a/worker/export.go +++ b/worker/export.go @@ -20,11 +20,11 @@ import ( "bufio" "bytes" "compress/gzip" + "encoding/json" "fmt" "os" "path" "path/filepath" - "strconv" "strings" "time" @@ -54,6 +54,20 @@ var rdfTypeMap = map[types.TypeID]string{ types.PasswordID: "xs:password", } +// escapedString converts a string into an escaped string for exporting. +func escapedString(str string) string { + // We use the Marshal function in the JSON package for all export formats + // because it properly escapes strings. + byt, err := json.Marshal(str) + if err != nil { + // All valid stings should be able to be escaped to a JSON string so + // it's safe to panic here. Marshal has to return an error because it + // accepts an interface. + panic("Could not marshal string to JSON string") + } + return string(byt) +} + func toRDF(pl *posting.List, prefix string, readTs uint64) (*bpb.KVList, error) { var buf bytes.Buffer @@ -76,7 +90,7 @@ func toRDF(pl *posting.List, prefix string, readTs uint64) (*bpb.KVList, error) // trim null character at end trimmed := strings.TrimRight(str.Value.(string), "\x00") - buf.WriteString(strconv.Quote(trimmed)) + buf.WriteString(escapedString(trimmed)) if p.PostingType == pb.Posting_VALUE_LANG { buf.WriteByte('@') buf.WriteString(string(p.LangTag)) @@ -121,7 +135,7 @@ func toRDF(pl *posting.List, prefix string, readTs uint64) (*bpb.KVList, error) } if facetTid == types.StringID { - buf.WriteString(strconv.Quote(fStringVal.Value.(string))) + buf.WriteString(escapedString(fStringVal.Value.(string))) } else { buf.WriteString(fStringVal.Value.(string)) } diff --git a/worker/export_test.go b/worker/export_test.go index 75d5630659e..2ed37b1f1cc 100644 --- a/worker/export_test.go +++ b/worker/export_test.go @@ -54,6 +54,7 @@ func populateGraphExport(t *testing.T) { `<3> "First Line\nSecondLine" .`, "<1> <5> .", `<5> "" .`, + `<6> "Ding!\u0007Ding!\u0007Ding!\u0007" .`, } idMap := map[string]uint64{ "1": 1, @@ -61,6 +62,7 @@ func populateGraphExport(t *testing.T) { "3": 3, "4": 4, "5": 5, + "6": 6, } for _, edge := range rdfEdges { @@ -149,7 +151,8 @@ func TestExport(t *testing.T) { for scanner.Scan() { nq, err := rdf.Parse(scanner.Text()) require.NoError(t, err) - require.Contains(t, []string{"_:uid1", "_:uid2", "_:uid3", "_:uid4", "_:uid5"}, nq.Subject) + require.Contains(t, []string{"_:uid1", "_:uid2", "_:uid3", "_:uid4", "_:uid5", "_:uid6"}, + nq.Subject) if nq.ObjectValue != nil { switch nq.Subject { case "_:uid1", "_:uid2": @@ -161,6 +164,9 @@ func TestExport(t *testing.T) { case "_:uid4": case "_:uid5": require.Equal(t, `<_:uid5> "" .`, scanner.Text()) + case "_:uid6": + require.Equal(t, `<_:uid6> "Ding!\u0007Ding!\u0007Ding!\u0007" .`, + scanner.Text()) default: t.Errorf("Unexpected subject: %v", nq.Subject) } @@ -203,7 +209,7 @@ func TestExport(t *testing.T) { } require.NoError(t, scanner.Err()) // This order will be preserved due to file naming. - require.Equal(t, 8, count) + require.Equal(t, 9, count) require.Equal(t, 1, len(schemaFileList)) file = schemaFileList[0]