From 40ae65db58aa5a97b33cd33312245de629051917 Mon Sep 17 00:00:00 2001 From: Jason Kulatunga Date: Fri, 6 Mar 2020 00:14:42 -0800 Subject: [PATCH] set the correct header for requesting plain text from tika sevice. re-add trim operation before storing content. --- pkg/processor/document/document.go | 5 ++--- pkg/processor/document/tika_client.go | 3 +++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pkg/processor/document/document.go b/pkg/processor/document/document.go index c685520..cde2448 100644 --- a/pkg/processor/document/document.go +++ b/pkg/processor/document/document.go @@ -189,9 +189,8 @@ func (dp *DocumentProcessor) parseDocument(bucketName string, bucketPath string, return model.Document{}, err } //trim whitespace/newline characters - dp.logger.Debugf("docContent: %s", docContent) - dp.logger.Debugf("trim Content: %s", strings.TrimSpace(docContent)) - //docContent = strings.TrimSpace(docContent) + docContent = strings.TrimSpace(docContent) + dp.logger.Debugf("docContent: '%s'", docContent) metaFile, err := os.Open(localFilePath) if err != nil { diff --git a/pkg/processor/document/tika_client.go b/pkg/processor/document/tika_client.go index a2131d2..4b3b617 100644 --- a/pkg/processor/document/tika_client.go +++ b/pkg/processor/document/tika_client.go @@ -13,6 +13,9 @@ func (mrt TikaRoundTripper) RoundTrip(r *http.Request) (*http.Response, error) { if r.URL.Path == "/meta" { r.Header.Add("Accept", "application/json") } + if r.URL.Path == "/tika" { + r.Header.Add("Accept", "text/plain") + } return mrt.r.RoundTrip(r) }