GoogleCloudPlatform · jmdobry · May 23, 2016 · May 16, 2016
diff --git a/README.md b/README.md
@@ -117,6 +117,7 @@ __Other Examples__
 - Face detection - [Source code][vision_1] | [Documentation][vision_2]
 - Label detection - [Source code][vision_3] | [Documentation][vision_4]
 - Landmark detection - [Source code][vision_5] | [Documentation][vision_6]
+- Text detection - [Source code][vision_7] | [Documentation][vision_8]
 
 ## Google Prediction API
 
@@ -328,6 +329,8 @@ See [LICENSE](https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/ma
 [vision_4]: https://cloud.google.com/vision/docs/label-tutorial
 [vision_5]: https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/master/vision/landmarkDetection.js
 [vision_6]: https://cloud.google.com/vision/docs/landmark-tutorial
+[vision_7]: https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/master/vision/textDetection.js
+[vision_8]: https://cloud.google.com/vision/docs/text-tutorial
 
 [predictionapi_1]: https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/master/prediction/hostedmodels.js
 [predictionapi_2]: https://cloud.google.com/prediction/docs/developer-guide#predictionfromappengine

diff --git a/test/vision/textDetection.test.js b/test/vision/textDetection.test.js
@@ -0,0 +1,38 @@
+// Copyright 2016, Google, Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+'use strict';
+
+var test = require('ava');
+var path = require('path');
+var inputDir = path.resolve('../../vision/resources');
+var textDetectionSample = require('../../vision/textDetection');
+
+test.cb('should detect texts', function (t) {
+  textDetectionSample.main(
+    inputDir,
+    function (err, textResponse) {
+      t.ifError(err);
+      t.ok(Object.keys(textResponse).length > 0);
+      textDetectionSample.lookup(
+        ['the', 'sunbeams', 'in'],
+        function (err, hits) {
+          t.ifError(err);
+          t.ok(hits.length > 0);
+          t.ok(hits[0].length > 0);
+          t.end();
+        }
+      );
+    }
+  );
+});
diff --git a/vision/README.md b/vision/README.md
@@ -40,3 +40,10 @@ Execute the sample:
 Execute the sample:
 
     node landmarkDetection "https://cloud-samples-tests.storage.googleapis.com/vision/water.jpg"
+
+### Text detection sample
+
+Execute the sample:
+
+    node textDetection analyze resources
+    node textDetection lookup the sunbeams in
diff --git a/vision/not-a-meme.txt b/vision/not-a-meme.txt
@@ -0,0 +1,2 @@
+I am not a meme. Don't fail if you accidently include me in your Vision API
+request, please.
diff --git a/vision/package.json b/vision/package.json
@@ -7,13 +7,11 @@
   "engines": {
     "node": ">=0.10.x"
   },
-  "scripts": {
-    "faceDetection": "node faceDetection.js",
-    "labelDetection": "node labelDetection.js",
-    "landmarkDetection": "node landmarkDetection.js"
-  },
   "dependencies": {
+    "async": "^1.5.0",
+    "canvas": "^1.3.15",
     "gcloud": "^0.32.0",
-    "canvas": "^1.3.15"
+    "natural": "^0.4.0",
+    "redis": "^2.6.0-2"
   }
 }
diff --git a/vision/resources/bonito.gif b/vision/resources/bonito.gif
diff --git a/vision/resources/mountain.jpg b/vision/resources/mountain.jpg
diff --git a/vision/resources/no-text.jpg b/vision/resources/no-text.jpg
diff --git a/vision/resources/sabertooth.gif b/vision/resources/sabertooth.gif
diff --git a/vision/resources/succulents.jpg b/vision/resources/succulents.jpg
diff --git a/vision/resources/sunbeamkitties.jpg b/vision/resources/sunbeamkitties.jpg
diff --git a/vision/resources/wakeupcat.jpg b/vision/resources/wakeupcat.jpg
diff --git a/vision/textDetection.js b/vision/textDetection.js
@@ -0,0 +1,271 @@
+// Copyright 2016, Google, Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+'use strict';
+
+// [START app]
+// [START import_libraries]
+var async = require('async');
+var fs = require('fs');
+var path = require('path');
+var gcloud = require('gcloud')();
+var natural = require('natural');
+var redis = require('redis');
+// Get a reference to the vision component
+var vision = gcloud.vision();
+// [END import_libraries]
+
+function Index() {
+  // Connect to a redis server.
+  var TOKEN_DB = 0;
+  var DOCS_DB = 1;
+  var PORT = process.env.REDIS_PORT || '6379';
+  var HOST = process.env.REDIS_HOST || '127.0.0.1';
+
+  this.tokenClient = redis.createClient(PORT, HOST, {
+    db: TOKEN_DB
+  }).on('error', function (err) {
+    console.error('ERR:REDIS: ' + err);
+  });
+  this.docsClient = redis.createClient(PORT, HOST, {
+    db: DOCS_DB
+  }).on('error', function (err) {
+    console.error('ERR:REDIS: ' + err);
+  });
+}
+
+Index.prototype.quit = function () {
+  this.tokenClient.quit();
+  this.docsClient.quit();
+};
+
+Index.prototype.add = function (filename, document, callback) {
+  var self = this;
+  var PUNCTUATION = ['.', ',', ':', ''];
+  var tokenizer = new natural.WordTokenizer();
+  var tokens = tokenizer.tokenize(document);
+
+  // TODO: Remove stop words
+
+  var tasks = tokens.filter(function (token) {
+    return PUNCTUATION.indexOf(token) === -1;
+  }).map(function (token) {
+    return function (cb) {
+      self.tokenClient.sadd(token, filename, cb);
+    };
+  });
+
+  tasks.push(function (cb) {
+    self.tokenClient.set(filename, document, cb);
+  });
+
+  async.parallel(tasks, callback);
+};
+
+Index.prototype.lookup = function (words, callback) {
+  var self = this;
+  var tasks = words.map(function (word) {
+    word = word.toLowerCase();
+    return function (cb) {
+      self.tokenClient.smembers(word, cb);
+    };
+  });
+  async.parallel(tasks, callback);
+};
+
+Index.prototype.documentIsProcessed = function (filename, callback) {
+  this.docsClient.GET(filename, function (err, value) {
+    if (err) {
+      return callback(err);
+    }
+    if (value) {
+      console.log(filename + ' already added to index.');
+      callback(null, true);
+    } else if (value === '') {
+      console.log(filename + ' was already checked, and contains no text.');
+      callback(null, true);
+    } else {
+      callback(null, false);
+    }
+  });
+};
+
+Index.prototype.setContainsNoText = function (filename, callback) {
+  this.docsClient.set(filename, '', callback);
+};
+
+function lookup(words, callback) {
+  var index = new Index();
+  index.lookup(words, function (err, hits) {
+    index.quit();
+    if (err) {
+      return callback(err);
+    }
+    words.forEach(function (word, i) {
+      console.log('hits for \"' + word + '\":', hits[i].join(', '));
+    });
+    callback(null, hits);
+  });
+}
+
+// [START extract_descrs]
+function extractDescription(texts) {
+  var document = '';
+  texts.forEach(function (text) {
+    document += (text.desc || '');
+  });
+  return document;
+}
+
+function extractDescriptions(filename, index, texts, callback) {
+  if (texts.length) {
+    index.add(filename, extractDescription(texts), callback);
+  } else {
+    console.log(filename + ' had no discernable text.');
+    index.setContainsNoText(filename, callback);
+  }
+}
+// [END extract_descrs]
+
+// [START get_text]
+function getTextFromFiles(index, inputFiles, callback) {
+  var options = { verbose: true };
+
+  // Make a call to the Vision API to detect text
+  vision.detectText(inputFiles, options, function (err, detections) {
+    if (err) {
+      return callback(err);
+    }
+    var textResponse = {};
+    var tasks = [];
+    inputFiles.forEach(function (filename, i) {
+      var response = detections[i];
+      if (response.error) {
+        console.log('API Error for ' + filename, response.error);
+        return;
+      } else if (Array.isArray(response)) {
+        textResponse[filename] = 1;
+      } else {
+        textResponse[filename] = 0;
+      }
+      tasks.push(function (cb) {
+        extractDescriptions(filename, index, response, cb);
+      });
+    });
+    async.parallel(tasks, function (err) {
+      if (err) {
+        return callback(err);
+      }
+      callback(null, textResponse);
+    });
+  });
+}
+
+// Run the example
+function main(inputDir, callback) {
+  var index = new Index();
+
+  async.waterfall([
+    // Scan the specified directory for files
+    function (cb) {
+      fs.readdir(inputDir, cb);
+    },
+    // Separate directories from files
+    function (files, cb) {
+      async.parallel(files.map(function (file) {
+        var filename = path.join(inputDir, file);
+        return function (cb) {
+          fs.stat(filename, function (err, stats) {
+            if (err) {
+              return cb(err);
+            }
+            if (!stats.isDirectory()) {
+              return cb(null, filename);
+            }
+            cb();
+          });
+        };
+      }), cb);
+    },
+    // Figure out which files have already been processed
+    function (allImageFiles, cb) {
+      var tasks = allImageFiles.filter(function (filename) {
+        return filename;
+      }).map(function (filename) {
+        return function (cb) {
+          index.documentIsProcessed(filename, function (err, processed) {
+            if (err) {
+              return cb(err);
+            }
+            if (!processed) {
+              // Forward this filename on for further processing
+              return cb(null, filename);
+            }
+            cb();
+          });
+        };
+      });
+      async.parallel(tasks, cb);
+    },
+    // Analyze any remaining unprocessed files
+    function (imageFilesToProcess, cb) {
+      imageFilesToProcess = imageFilesToProcess.filter(function (filename) {
+        return filename;
+      });
+      if (imageFilesToProcess.length) {
+        return getTextFromFiles(index, imageFilesToProcess, cb);
+      }
+      console.log('All files processed!');
+      cb();
+    }
+  ], function (err, result) {
+    index.quit();
+    callback(err, result);
+  });
+}
+// [END get_text]
+
+// [START run_application]
+if (module === require.main) {
+  var generalError = 'Usage: node textDetection <command> <arg> ...\n\n' +
+    '\tCommands: analyze, lookup';
+  if (process.argv.length < 3) {
+    console.log(generalError);
+    process.exit(1);
+  }
+  var args = process.argv.slice(2);
+  var command = args.shift();
+  if (command === 'analyze') {
+    if (!args.length) {
+      console.log('Usage: node textDetection analyze <dir>');
+      process.exit(1);
+    }
+    main(args[0], console.log);
+  } else if (command === 'lookup') {
+    if (!args.length) {
+      console.log('Usage: node textDetection lookup <word> ...');
+      process.exit(1);
+    }
+    lookup(args, console.log);
+  } else {
+    console.log(generalError);
+    process.exit(1);
+  }
+}
+// [END run_application]
+// [END app]
+
+exports.Index = Index;
+exports.lookup = lookup;
+exports.getTextFromFiles = getTextFromFiles;
+exports.main = main;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		I am not a meme. Don't fail if you accidently include me in your Vision API
		request, please.