diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index a512c4f68..06a00883f 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -466,6 +466,9 @@ jobs: with: fetch-depth: 0 submodules: 'recursive' + - uses: actions/setup-node@v2 + with: + node-version: 16 - name: Install dependencies run: | apt-get update @@ -477,10 +480,41 @@ jobs: - run: ./ci_scripts/tf-setup.sh - run: ./ci_scripts/wasm-build.sh - run: ./ci_scripts/wasm-package.sh + - run: BAZEL_WASM_EXTRA_FLAGS="--//native_client:wasm_emit=es6" ./ci_scripts/wasm-build.sh + - run: make -C native_client/wasm clean pack + shell: bash - uses: actions/upload-artifact@v2 with: name: "libstt.tflite.wasm.zip" path: ${{ github.workspace }}/artifacts/libstt.zip + - uses: actions/upload-artifact@v2 + with: + name: "libstt.tflite.wasm.es6.tgz" + path: ${{ github.workspace }}/native_client/wasm/stt-wasm-*.tgz + test-wasm: + name: "Wasm|Test libstt" + runs-on: ubuntu-20.04 + needs: [ build-wasm ] + container: + image: quay.io/pypa/manylinux_2_24_x86_64:2021-12-05-142ef77 + volumes: + - ${{ github.workspace }}:${{ github.workspace }} + env: + CI_TMP_DIR: ${{ github.workspace }}/tmp/ + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + submodules: 'recursive' + - uses: actions/setup-node@v2 + with: + node-version: 16 + - uses: actions/download-artifact@v2 + with: + name: "stt-wasm-*.tgz" + path: ${{ env.CI_TMP_DIR }} + - run: make -C native_client/wasm/test clean test + shell: bash test-cpp-Linux: name: "Lin|Test C++ binary" runs-on: ubuntu-20.04 diff --git a/.gitignore b/.gitignore index a60ac55ac..38fd44bb6 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ /runs /logs /exports +/artifacts /data/ldc93s1 /native_client/setup.cfg /native_client/build @@ -29,6 +30,8 @@ /native_client/python/dist /native_client/python/impl.py /native_client/python/impl_wrap.cpp +/native_client/wasm/dist +/native_client/wasm/package.json /doc/.build/ /doc/xml-c/ /doc/xml-java/ diff --git a/ci_scripts/package-utils.sh b/ci_scripts/package-utils.sh index a9596bf86..145964df0 100755 --- a/ci_scripts/package-utils.sh +++ b/ci_scripts/package-utils.sh @@ -133,5 +133,7 @@ package_libstt_wasm() ${ZIP} -r9 --junk-paths "${artifacts_dir}/${artifact_name}" \ ${stt_dir}/native_client/kenlm/COPYING \ - ${tensorflow_dir}/bazel-bin/native_client/stt_wasm_bindings/* + ${tensorflow_dir}/bazel-bin/native_client/stt_wasm_bindings/stt_wasm.js \ + ${tensorflow_dir}/bazel-bin/native_client/stt_wasm_bindings/stt_wasm.worker.js \ + ${tensorflow_dir}/bazel-bin/native_client/stt_wasm_bindings/stt_wasm.wasm } diff --git a/ci_scripts/wasm-build.sh b/ci_scripts/wasm-build.sh index 611e4e923..25fb2ffd2 100755 --- a/ci_scripts/wasm-build.sh +++ b/ci_scripts/wasm-build.sh @@ -16,7 +16,7 @@ BAZEL_OPT_FLAGS="--copt=-pthread --copt=-fexceptions" # Bazel caching and emsdk do not play nice together: unless path # is explicitly passed, emsdk would end up using an old version of # Python which does not support f-strings, making build fail. -BAZEL_EXTRA_FLAGS="${BAZEL_EXTRA_FLAGS} --action_env=PATH" +BAZEL_EXTRA_FLAGS="${BAZEL_EXTRA_FLAGS} ${BAZEL_WASM_EXTRA_FLAGS} --action_env=PATH" BAZEL_BUILD_FLAGS="${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}" SYSTEM_TARGET= diff --git a/native_client/BUILD b/native_client/BUILD index f3fec9cb3..4127108ca 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -1,5 +1,6 @@ # Description: Coqui STT native client library. +load("@bazel_skylib//rules:common_settings.bzl", "string_flag") load("@org_tensorflow//tensorflow:tensorflow.bzl", "lrt_if_needed") load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps") load("@build_bazel_rules_apple//apple:ios.bzl", "ios_static_framework") @@ -352,6 +353,37 @@ cc_binary( linkopts = DECODER_LINKOPTS, ) +DECODER_LINKOPTS_WASM = DECODER_LINKOPTS + [ + "-Os", + "--bind", + "-sWASM=1", + # Allow to grow memory allocation when needed (for example for loading models). + "-sALLOW_MEMORY_GROWTH", + "-sMAXIMUM_MEMORY=4GB", + "-sMALLOC=emmalloc", + "-sMODULARIZE=1", + "-sEXPORT_NAME=STT", + # We need to specify a threadpool size, otherwise TFLite will just do nothing + # and deadlock. + "-sPTHREAD_POOL_SIZE=4", + # This is a library, so no 'main' is expected. + "--no-entry", + # Experiencing problems? Uncomment the flags below to have better + # error messages and sanity checks. + # "-sASSERTIONS=2", + # "-g3", + # "-gsource-map", +] + +string_flag(name = "wasm_emit", build_setting_default = "wasm_es5") + +config_setting( + name = "wasm_es6", + flag_values = { + ":wasm_emit": "es6", + } +) + cc_binary( # Note that the .js suffix is significant here. See the `-o` # option at https://emscripten.org/docs/tools_reference/emcc.html @@ -361,24 +393,12 @@ cc_binary( ], deps = [":decoder", ":coqui_stt_bundle"], copts = ["-std=c++14", "-fno-exceptions", "-fwrapv", "-pthread"], - linkopts = DECODER_LINKOPTS + [ - "--bind", - "-sWASM=1", - # Allow to grow memory allocation when needed (for example for loading models). - "-sALLOW_MEMORY_GROWTH", - "-sMAXIMUM_MEMORY=4GB", - "-sMALLOC=emmalloc", - # We need to specify a threadpool size, otherwise TFLite will just do nothing - # and deadlock. - "-sPTHREAD_POOL_SIZE=4", - # This is a library, so no 'main' is expected. - "--no-entry", - # Experiencing problems? Uncomment the flags below to have better - # error messages and sanity checks. - # "-sASSERTIONS=2", - # "-g3", - # "-gsource-map", - ], + linkopts = select({ + ":wasm_es6": DECODER_LINKOPTS_WASM + [ + "-sEXPORT_ES6=1", + ], + "//conditions:default": DECODER_LINKOPTS_WASM, + }), ) wasm_cc_binary( diff --git a/native_client/wasm/Makefile b/native_client/wasm/Makefile new file mode 100644 index 000000000..788f587b5 --- /dev/null +++ b/native_client/wasm/Makefile @@ -0,0 +1,30 @@ +NPM_TOOL ?= npm +PROJECT_NAME ?= stt-wasm +PROJECT_VERSION ?= $(shell cat ../../training/coqui_stt_training/VERSION | tr -d '\n') + +clean: + rm -rf ./stt-wasm-*.tgz package.json + rm -rf dist + +package.json: package.json.in + sed \ + -e 's/$$(PROJECT_NAME)/$(PROJECT_NAME)/' \ + -e 's/$$(PROJECT_VERSION)/$(PROJECT_VERSION)/' \ + package.json.in > package.json && cat package.json + +dist: + mkdir -p dist + +dist/stt_wasm.js: dist ../../tensorflow/bazel-bin/native_client/stt_wasm_bindings/stt_wasm.js + cp ../../tensorflow/bazel-bin/native_client/stt_wasm_bindings/stt_wasm.js dist + +dist/stt_wasm.wasm: dist ../../tensorflow/bazel-bin/native_client/stt_wasm_bindings/stt_wasm.wasm + cp ../../tensorflow/bazel-bin/native_client/stt_wasm_bindings/stt_wasm.wasm dist + +dist/stt_wasm.worker.js: dist ../../tensorflow/bazel-bin/native_client/stt_wasm_bindings/stt_wasm.worker.js + cp ../../tensorflow/bazel-bin/native_client/stt_wasm_bindings/stt_wasm.worker.js dist + +build: package.json dist/stt_wasm.js dist/stt_wasm.wasm dist/stt_wasm.worker.js + +pack: build + ${NPM_TOOL} pack diff --git a/native_client/wasm/package.json.in b/native_client/wasm/package.json.in new file mode 100644 index 000000000..91de61a87 --- /dev/null +++ b/native_client/wasm/package.json.in @@ -0,0 +1,14 @@ +{ + "name" : "$(PROJECT_NAME)", + "version" : "$(PROJECT_VERSION)", + "description": "A Webassembly build for doing speech recognition using a Coqui STT model", + "main": "./dist/stt_wasm.js", + "files": [ + "dist" + ], + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "Coqui.ai", + "license": "LGPL-2.1-only" +} diff --git a/native_client/wasm/test/.gitignore b/native_client/wasm/test/.gitignore new file mode 100644 index 000000000..134d9def4 --- /dev/null +++ b/native_client/wasm/test/.gitignore @@ -0,0 +1,3 @@ +/node_modules +package.json +package-lock.json \ No newline at end of file diff --git a/native_client/wasm/test/Makefile b/native_client/wasm/test/Makefile new file mode 100644 index 000000000..86e7da8f5 --- /dev/null +++ b/native_client/wasm/test/Makefile @@ -0,0 +1,19 @@ +NPM_TOOL ?= npm +TEST_PROJECT_NAME ?= stt-wasm +TEST_PROJECT_VERSION ?= $(shell cat ../../../training/coqui_stt_training/VERSION | tr -d '\n') + +clean: + rm -rf package.json package-lock.json + rm -rf node_modules + +package.json: package.json.in + cp package.json.in package.json + +install-dependencies: package.json + ${NPM_TOOL} install + +install-stt-wasm: package.json + ${NPM_TOOL} install ${CI_TMP_DIR}/${TEST_PROJECT_NAME}-${TEST_PROJECT_VERSION}.tgz + +test: install-dependencies install-stt-wasm + ${NPM_TOOL} run test diff --git a/native_client/wasm/test/babel.config.cjs b/native_client/wasm/test/babel.config.cjs new file mode 100644 index 000000000..04844b8d9 --- /dev/null +++ b/native_client/wasm/test/babel.config.cjs @@ -0,0 +1,8 @@ +module.exports = { + presets: [['@babel/preset-env', { + targets: { + node: 'current' + }, + }]], + plugins: ['babel-plugin-transform-import-meta'], + } \ No newline at end of file diff --git a/native_client/wasm/test/jest.config.js b/native_client/wasm/test/jest.config.js new file mode 100644 index 000000000..583f46dca --- /dev/null +++ b/native_client/wasm/test/jest.config.js @@ -0,0 +1,195 @@ +/* + * For a detailed explanation regarding each configuration property, visit: + * https://jestjs.io/docs/configuration + */ + +module.exports = { + // All imported modules in your tests should be mocked automatically + // automock: false, + + // Stop running tests after `n` failures + // bail: 0, + + // The directory where Jest should store its cached dependency information + // cacheDirectory: "/private/var/folders/0v/5zh_zyy95jnflnkr1bvst61r0000gn/T/jest_dx", + + // Automatically clear mock calls, instances, contexts and results before every test + clearMocks: true, + + // Indicates whether the coverage information should be collected while executing the test + // collectCoverage: false, + + // An array of glob patterns indicating a set of files for which coverage information should be collected + // collectCoverageFrom: undefined, + + // The directory where Jest should output its coverage files + // coverageDirectory: undefined, + + // An array of regexp pattern strings used to skip coverage collection + // coveragePathIgnorePatterns: [ + // "/node_modules/" + // ], + + // Indicates which provider should be used to instrument code for coverage + coverageProvider: "v8", + + // A list of reporter names that Jest uses when writing coverage reports + // coverageReporters: [ + // "json", + // "text", + // "lcov", + // "clover" + // ], + + // An object that configures minimum threshold enforcement for coverage results + // coverageThreshold: undefined, + + // A path to a custom dependency extractor + // dependencyExtractor: undefined, + + // Make calling deprecated APIs throw helpful error messages + // errorOnDeprecated: false, + + // The default configuration for fake timers + // fakeTimers: { + // "enableGlobally": false + // }, + + // Force coverage collection from ignored files using an array of glob patterns + // forceCoverageMatch: [], + + // A path to a module which exports an async function that is triggered once before all test suites + // globalSetup: undefined, + + // A path to a module which exports an async function that is triggered once after all test suites + // globalTeardown: undefined, + + // A set of global variables that need to be available in all test environments + // globals: {}, + + // The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers. + // maxWorkers: "50%", + + // An array of directory names to be searched recursively up from the requiring module's location + // moduleDirectories: [ + // "node_modules" + // ], + + // An array of file extensions your modules use + // moduleFileExtensions: [ + // "js", + // "mjs", + // "cjs", + // "jsx", + // "ts", + // "tsx", + // "json", + // "node" + // ], + + // A map from regular expressions to module names or to arrays of module names that allow to stub out resources with a single module + // moduleNameMapper: {}, + + // An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader + // modulePathIgnorePatterns: [], + + // Activates notifications for test results + // notify: false, + + // An enum that specifies notification mode. Requires { notify: true } + // notifyMode: "failure-change", + + // A preset that is used as a base for Jest's configuration + // preset: undefined, + + // Run tests from one or more projects + // projects: undefined, + + // Use this configuration option to add custom reporters to Jest + // reporters: undefined, + + // Automatically reset mock state before every test + // resetMocks: false, + + // Reset the module registry before running each individual test + // resetModules: false, + + // A path to a custom resolver + // resolver: undefined, + + // Automatically restore mock state and implementation before every test + // restoreMocks: false, + + // The root directory that Jest should scan for tests and modules within + // rootDir: undefined, + + // A list of paths to directories that Jest should use to search for files in + // roots: [ + // "" + // ], + + // Allows you to use a custom runner instead of Jest's default test runner + // runner: "jest-runner", + + // The paths to modules that run some code to configure or set up the testing environment before each test + // setupFiles: [], + + // A list of paths to modules that run some code to configure or set up the testing framework before each test + // setupFilesAfterEnv: [], + + // The number of seconds after which a test is considered as slow and reported as such in the results. + // slowTestThreshold: 5, + + // A list of paths to snapshot serializer modules Jest should use for snapshot testing + // snapshotSerializers: [], + + // The test environment that will be used for testing + testEnvironment: "jsdom", + + // Options that will be passed to the testEnvironment + // testEnvironmentOptions: {}, + + // Adds a location field to test results + // testLocationInResults: false, + + // The glob patterns Jest uses to detect test files + // testMatch: [ + // "**/__tests__/**/*.[jt]s?(x)", + // "**/?(*.)+(spec|test).[tj]s?(x)" + // ], + + // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped + // testPathIgnorePatterns: [ + // "/node_modules/" + // ], + + // The regexp pattern or array of patterns that Jest uses to detect test files + // testRegex: [], + + // This option allows the use of a custom results processor + // testResultsProcessor: undefined, + + // This option allows use of a custom test runner + // testRunner: "jest-circus/runner", + + // A map from regular expressions to paths to transformers + // transform: undefined, + + // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation + // transformIgnorePatterns: [ + // "/node_modules/", + // "\\.pnp\\.[^\\/]+$" + // ], + + // An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them + // unmockedModulePathPatterns: undefined, + + // Indicates whether each individual test should be reported during the run + // verbose: undefined, + + // An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode + // watchPathIgnorePatterns: [], + + // Whether to use watchman for file crawling + // watchman: true, +}; diff --git a/native_client/wasm/test/mock.worker.js b/native_client/wasm/test/mock.worker.js new file mode 100644 index 000000000..1f015f115 --- /dev/null +++ b/native_client/wasm/test/mock.worker.js @@ -0,0 +1,7 @@ +// Mocks stt-wasm.worker.js as it is currently not being transpiled by babel +var nodeWorkerThreads = require('worker_threads'); + +var parentPort = nodeWorkerThreads.parentPort; +parentPort.postMessage({ + "cmd": "loaded" +}); \ No newline at end of file diff --git a/native_client/wasm/test/package.json.in b/native_client/wasm/test/package.json.in new file mode 100644 index 000000000..9c6cd385c --- /dev/null +++ b/native_client/wasm/test/package.json.in @@ -0,0 +1,23 @@ +{ + "name": "stt-wasm-test", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "jest" + }, + "jest": { + "testEnvironment": "node", + "transform": { + "\\.js$": "babel-jest" + }, + "transformIgnorePatterns": [] + }, + "author": "", + "license": "ISC", + "devDependencies": { + "jest": "28.1.3", + "babel-plugin-transform-import-meta": "^2.2.0", + "@babel/preset-env": "7.18.9" + } +} diff --git a/native_client/wasm/test/stt-wasm.test.js b/native_client/wasm/test/stt-wasm.test.js new file mode 100644 index 000000000..366eb086c --- /dev/null +++ b/native_client/wasm/test/stt-wasm.test.js @@ -0,0 +1,20 @@ +import STT from 'stt-wasm'; + +describe('STT WASM', () => { + it('Checks stt-wasm was imported', () => { + expect(STT).toBeDefined(); + }) + it.skip('Instantiates correctly the module', async () => { + let ready = false; + let instance = await STT({ + 'locateFile': (filename) => { + if (filename == 'stt_wasm.worker.js') { + return `${__dirname}/mock.worker.js`; + } + return `${__dirname}/node_modules/stt-wasm/dist/${filename}` + } + }); + expect(ready).toBeTrue(); + expect(instance).toBeDefined(); + }) +}) \ No newline at end of file