Skip to content

Commit

Permalink
feat: html parsing depend on striptags lib
Browse files Browse the repository at this point in the history
  • Loading branch information
朱志洋 committed Nov 15, 2018
1 parent 49c4d3f commit 674070d
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 85 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,6 @@
"typescript": "^3.0.3"
},
"dependencies": {
"html-to-text": "^4.0.0"
"striptags": "^3.1.1"
}
}
19 changes: 9 additions & 10 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,24 +1,23 @@
const htmlToText = require('html-to-text')
const striptags = require('striptags')

const CN_PATTERN = /[\u4E00-\u9FA5]/g
const EN_PATTERN = /[a-zA-Z0-9_\u0392-\u03c9\u0400-\u04FF]+|[\u4E00-\u9FFF\u3400-\u4dbf\uf900-\ufaff\u3040-\u309f\uac00-\ud7af\u0400-\u04FF]+|[\u00E4\u00C4\u00E5\u00C5\u00F6\u00D6]+|\w+/g

function countContent(content: any = ''): [number, number] {
function countContent(content: any): [number, number] {
if (typeof content !== 'string') {
throw new Error('[word-counter] content must be string type')
}
let cn = 0
let en = 0
if (typeof content === 'string' && content.length > 0) {
content = htmlToText.fromString(content, {
wordwrap: false,
ignoreImage: true,
ignoreHref: true
})
if (content.length > 0) {
content = striptags(content)
cn = (content.match(CN_PATTERN) || []).length
en = (content.replace(CN_PATTERN, '').match(EN_PATTERN) || []).length
}
return [cn, en]
}

export function wordCount(content: string, transformFn?: (count: number) => any): number | string {
export function wordCount(content?: any, transformFn?: (count: number) => any): any {
const [cn, en] = countContent(content)
const count = cn + en
if (typeof transformFn === 'function') {
Expand All @@ -32,7 +31,7 @@ interface TimeConfig {
en?: number
}

export function timeCalc(content: string, { cn = 300, en = 160 }: TimeConfig = {}): number {
export function timeCalc(content?: any, { cn = 300, en = 160 }: TimeConfig = {}): number {
const [cnCount, enCount] = countContent(content)
const time = cnCount / cn + enCount / en
return time === 0 ? 0 : time < 1 ? 1 : Math.ceil(time)
Expand Down
6 changes: 6 additions & 0 deletions test/word-counter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,10 @@ describe('word-counter test', () => {
})
).toEqual(3)
})

it('content be counted should be string type', () => {
;[null, undefined, {}, [], function() {}, Symbol(), true].forEach(type => {
expect(() => wordCount(type)).toThrow()
})
})
})
84 changes: 10 additions & 74 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1563,44 +1563,12 @@ doctrine@0.7.2:
esutils "^1.1.6"
isarray "0.0.1"

dom-serializer@0:
version "0.1.0"
resolved "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.0.tgz#073c697546ce0780ce23be4a28e293e40bc30c82"
dependencies:
domelementtype "~1.1.1"
entities "~1.1.1"

domelementtype@1:
version "1.2.1"
resolved "https://registry.npmjs.org/domelementtype/-/domelementtype-1.2.1.tgz#578558ef23befac043a1abb0db07635509393479"

domelementtype@^1.3.0:
version "1.3.0"
resolved "http://registry.npmjs.org/domelementtype/-/domelementtype-1.3.0.tgz#b17aed82e8ab59e52dd9c19b1756e0fc187204c2"

domelementtype@~1.1.1:
version "1.1.3"
resolved "http://registry.npmjs.org/domelementtype/-/domelementtype-1.1.3.tgz#bd28773e2642881aec51544924299c5cd822185b"

domexception@^1.0.1:
version "1.0.1"
resolved "https://registry.npmjs.org/domexception/-/domexception-1.0.1.tgz#937442644ca6a31261ef36e3ec677fe805582c90"
dependencies:
webidl-conversions "^4.0.2"

domhandler@^2.3.0:
version "2.4.2"
resolved "https://registry.npmjs.org/domhandler/-/domhandler-2.4.2.tgz#8805097e933d65e85546f726d60f5eb88b44f803"
dependencies:
domelementtype "1"

domutils@^1.5.1:
version "1.7.0"
resolved "https://registry.npmjs.org/domutils/-/domutils-1.7.0.tgz#56ea341e834e06e6748af7a1cb25da67ea9f8c2a"
dependencies:
dom-serializer "0"
domelementtype "1"

dot-prop@^3.0.0:
version "3.0.0"
resolved "https://registry.npmjs.org/dot-prop/-/dot-prop-3.0.0.tgz#1b708af094a49c9a0e7dbcad790aba539dac1177"
Expand Down Expand Up @@ -1663,10 +1631,6 @@ end-of-stream@^1.0.0, end-of-stream@^1.1.0:
dependencies:
once "^1.4.0"

entities@^1.1.1, entities@~1.1.1:
version "1.1.2"
resolved "https://registry.npmjs.org/entities/-/entities-1.1.2.tgz#bdfa735299664dfafd34529ed4f8522a275fea56"

env-ci@^3.0.0:
version "3.1.0"
resolved "https://registry.npmjs.org/env-ci/-/env-ci-3.1.0.tgz#8aef2340389ae17e27623988ae1002f130491185"
Expand Down Expand Up @@ -2426,10 +2390,6 @@ has@^1.0.1:
dependencies:
function-bind "^1.1.1"

he@^1.0.0:
version "1.2.0"
resolved "https://registry.npmjs.org/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f"

home-or-tmp@^2.0.0:
version "2.0.0"
resolved "https://registry.npmjs.org/home-or-tmp/-/home-or-tmp-2.0.0.tgz#e36c3f2d2cae7d746a857e38d18d5f32a7882db8"
Expand Down Expand Up @@ -2457,26 +2417,6 @@ html-encoding-sniffer@^1.0.2:
dependencies:
whatwg-encoding "^1.0.1"

html-to-text@^4.0.0:
version "4.0.0"
resolved "https://registry.npmjs.org/html-to-text/-/html-to-text-4.0.0.tgz#c1f4e100d74e9feab5b152d7b6b3be3c1c6412b0"
dependencies:
he "^1.0.0"
htmlparser2 "^3.9.2"
lodash "^4.17.4"
optimist "^0.6.1"

htmlparser2@^3.9.2:
version "3.10.0"
resolved "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.10.0.tgz#5f5e422dcf6119c0d983ed36260ce9ded0bee464"
dependencies:
domelementtype "^1.3.0"
domhandler "^2.3.0"
domutils "^1.5.1"
entities "^1.1.1"
inherits "^2.0.1"
readable-stream "^3.0.6"

http-cache-semantics@^3.8.1:
version "3.8.1"
resolved "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-3.8.1.tgz#39b0e16add9b605bf0a9ef3d9daaf4843b4cacd2"
Expand Down Expand Up @@ -5190,14 +5130,6 @@ read@1, read@1.0.x, read@~1.0.1, read@~1.0.7:
string_decoder "~1.1.1"
util-deprecate "~1.0.1"

readable-stream@^3.0.6:
version "3.0.6"
resolved "https://registry.npmjs.org/readable-stream/-/readable-stream-3.0.6.tgz#351302e4c68b5abd6a2ed55376a7f9a25be3057a"
dependencies:
inherits "^2.0.3"
string_decoder "^1.1.1"
util-deprecate "^1.0.1"

readable-stream@~1.1.10:
version "1.1.14"
resolved "http://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz#7cf4c54ef648e3813084c636dd2079e166c081d9"
Expand Down Expand Up @@ -5955,16 +5887,16 @@ string-width@^1.0.1:
is-fullwidth-code-point "^2.0.0"
strip-ansi "^4.0.0"

string_decoder@^1.1.1, string_decoder@~1.1.1:
string_decoder@~0.10.x:
version "0.10.31"
resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz#62e203bc41766c6c28c9fc84301dab1c5310fa94"

string_decoder@~1.1.1:
version "1.1.1"
resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz#9cf1611ba62685d7030ae9e4ba34149c3af03fc8"
dependencies:
safe-buffer "~5.1.0"

string_decoder@~0.10.x:
version "0.10.31"
resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz#62e203bc41766c6c28c9fc84301dab1c5310fa94"

stringify-object@^3.2.2:
version "3.3.0"
resolved "https://registry.npmjs.org/stringify-object/-/stringify-object-3.3.0.tgz#703065aefca19300d3ce88af4f5b3956d7556629"
Expand Down Expand Up @@ -6011,6 +5943,10 @@ strip-json-comments@2.0.1, strip-json-comments@~2.0.1:
version "2.0.1"
resolved "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz#3c531942e908c2697c0ec344858c286c7ca0a60a"

striptags@^3.1.1:
version "3.1.1"
resolved "https://registry.npmjs.org/striptags/-/striptags-3.1.1.tgz#c8c3e7fdd6fb4bb3a32a3b752e5b5e3e38093ebd"

supports-color@^2.0.0:
version "2.0.0"
resolved "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz#535d045ce6b6363fa40117084629995e9df324c7"
Expand Down Expand Up @@ -6382,7 +6318,7 @@ use@^3.1.0:
version "3.1.1"
resolved "https://registry.npmjs.org/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f"

util-deprecate@^1.0.1, util-deprecate@~1.0.1:
util-deprecate@~1.0.1:
version "1.0.2"
resolved "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"

Expand Down

0 comments on commit 674070d

Please sign in to comment.