-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit f60248e
Showing
12 changed files
with
415 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
files: | ||
- package.json | ||
plugins: | ||
prerelease: | ||
Adding authors: | ||
plugin: bumped-terminal | ||
command: npx git-authors-cli | ||
Linting config files: | ||
plugin: bumped-finepack | ||
postrelease: | ||
Generating CHANGELOG file: | ||
plugin: bumped-changelog | ||
Committing new version: | ||
plugin: bumped-terminal | ||
command: 'git add CHANGELOG.md package.json && git commit -m "Release $newVersion"' | ||
Detecting problems before publish: | ||
plugin: bumped-terminal | ||
command: 'git-dirty && npm test' | ||
Publishing tag to GitHub: | ||
plugin: bumped-terminal | ||
command: 'git tag $newVersion && git push && git push --tags' | ||
Publishing to NPM: | ||
plugin: bumped-terminal | ||
command: npm publish |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# http://editorconfig.org | ||
|
||
root = true | ||
|
||
[*] | ||
indent_style = space | ||
indent_size = 2 | ||
end_of_line = lf | ||
charset = utf-8 | ||
trim_trailing_whitespace = true | ||
insert_final_newline = true | ||
max_line_length = 100 | ||
indent_brace_style = 1TBS | ||
spaces_around_operators = true | ||
quote_type = auto | ||
|
||
[package.json] | ||
indent_style = space | ||
indent_size = 2 | ||
|
||
[*.md] | ||
trim_trailing_whitespace = false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
* text=auto |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
############################ | ||
# npm | ||
############################ | ||
node_modules | ||
npm-debug.log | ||
.node_history | ||
yarn.lock | ||
package-lock.json | ||
|
||
############################ | ||
# tmp, editor & OS files | ||
############################ | ||
.tmp | ||
*.swo | ||
*.swp | ||
*.swn | ||
*.swm | ||
.DS_Store | ||
*# | ||
*~ | ||
.idea | ||
*sublime* | ||
nbproject | ||
|
||
############################ | ||
# Tests | ||
############################ | ||
testApp | ||
coverage | ||
.nyc_output | ||
|
||
############################ | ||
# Other | ||
############################ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
unsafe-perm=true | ||
save-prefix=~ | ||
shrinkwrap=false | ||
save=false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
language: node_js | ||
node_js: | ||
- "node" | ||
- "lts/*" | ||
after_success: npm run coveralls |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
The MIT License (MIT) | ||
|
||
Copyright © 2018 Kiko Beats <josefrancisco.verdu@gmail.com> (kikobeats.com) | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in | ||
all copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
# get-html | ||
|
||
![Last version](https://img.shields.io/github/tag/Kikobeats/get-html.svg?style=flat-square) | ||
[![Build Status](https://img.shields.io/travis/Kikobeats/get-html/master.svg?style=flat-square)](https://travis-ci.org/Kikobeats/get-html) | ||
[![Coverage Status](https://img.shields.io/coveralls/Kikobeats/get-html.svg?style=flat-square)](https://coveralls.io/github/Kikobeats/get-html) | ||
[![Dependency status](https://img.shields.io/david/Kikobeats/get-html.svg?style=flat-square)](https://david-dm.org/Kikobeats/get-html) | ||
[![Dev Dependencies Status](https://img.shields.io/david/dev/Kikobeats/get-html.svg?style=flat-square)](https://david-dm.org/Kikobeats/get-html#info=devDependencies) | ||
[![NPM Status](https://img.shields.io/npm/dm/get-html.svg?style=flat-square)](https://www.npmjs.org/package/get-html) | ||
[![Donate](https://img.shields.io/badge/donate-paypal-blue.svg?style=flat-square)](https://paypal.me/Kikobeats) | ||
|
||
> Get the HTML from any website, using prerendering when is necessary. | ||
## Features | ||
|
||
- Get HTML markup from any website (client side apps as well) | ||
- Prerendering detection based on domains whitelist. | ||
- Speed up process blocking ads trackers. | ||
- Encoding body response properly. | ||
|
||
## Install | ||
|
||
```bash | ||
$ npm install get-html --save | ||
``` | ||
|
||
## Usage | ||
|
||
```js | ||
'use strict' | ||
|
||
const getHtml = require('get-html') | ||
;(async () => { | ||
const url = 'https://kikobeats.com' | ||
const { html, stats } = await getHTML(url) | ||
console.log(html) | ||
})() | ||
``` | ||
|
||
## API | ||
|
||
### getHTML(url, [options]) | ||
|
||
#### url | ||
|
||
*Required*<br> | ||
Type: `string` | ||
|
||
The target URL for getting the HTML markup. | ||
|
||
#### options | ||
|
||
##### prerender | ||
|
||
Type: `boolean|string`<br> | ||
Default: `'auto'` | ||
|
||
Enable or disable prerendering as mechanism for getting the HTML markup explicitly. | ||
|
||
The value `auto` means that that internally use a list of whitelist website that don't need to use prerendering by default. This list is used for speedup the process, using `fetch` mode for these websites. | ||
|
||
See [fetchMode parameter](#fetchMode) for know more. | ||
|
||
##### browserless | ||
|
||
Type: `object`<br> | ||
|
||
A [browserless](https://browserless.js.org/) instance to be used for interact with puppeteer. If you don't provide one, a browser instance will be created in each library call. | ||
|
||
##### encoding | ||
|
||
Type: `string`<br> | ||
Default: `'utf-8'` | ||
|
||
Encoding the HTML markup properly from the body response. | ||
|
||
It determines the encode to use A Node.js library for converting HTML documents of arbitrary encoding into a target encoding (utf8, utf16, etc). | ||
|
||
##### fetchMode | ||
|
||
Type: `function`<br> | ||
|
||
A function evaluation that will be invoked to determinate the resolutive `mode` for getting the HTML markup from the target URL. | ||
|
||
The default `fetchMode` is: | ||
|
||
```js | ||
const getFetchMode = (url, { prerender }) => { | ||
if (prerender === false) return 'fetch' | ||
if (prerender !== 'auto') return 'prerender' | ||
return autoDomains.includes(parseDomain(url).domain) ? 'fetch' : 'prerender' | ||
} | ||
``` | ||
|
||
##### gotOptions | ||
|
||
Type: `object`<br> | ||
|
||
Under `mode=fetch`, pass configuration object to [got](https://www.npmjs.com/package/got). | ||
|
||
##### puppeteerOpts | ||
|
||
Type: `object` | ||
|
||
Under non `mode=fetch`, pass configuration object to [puppeteer](https://www.npmjs.com/package/puppeteer). | ||
|
||
## License | ||
|
||
**get-html** © [Kiko Beats](https://kikobeats.com), released under the [MIT](https://github.com/Kikobeats/get-html/blob/master/LICENSE.md) License.<br> | ||
Authored and maintained by Kiko Beats with help from [contributors](https://github.com/Kikobeats/get-html/contributors). | ||
|
||
> [kikobeats.com](https://kikobeats.com) · GitHub [Kiko Beats](https://github.com/Kikobeats) · Twitter [@Kikobeats](https://twitter.com/Kikobeats) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
{ | ||
"name": "get-html", | ||
"description": "Get the HTML from any website, using prerendering when is necessary.", | ||
"homepage": "https://documentup.com/Kikobeats/get-html", | ||
"version": "0.0.0", | ||
"main": "src/index.js", | ||
"author": { | ||
"email": "josefrancisco.verdu@gmail.com", | ||
"name": "Kiko Beats", | ||
"url": "https://kikobeats.com" | ||
}, | ||
"repository": { | ||
"type": "git", | ||
"url": "git+https://github.com/Kikobeats/get-html.git" | ||
}, | ||
"bugs": { | ||
"url": "https://github.com/Kikobeats/get-html/issues" | ||
}, | ||
"keywords": [ | ||
"fetch", | ||
"get", | ||
"got", | ||
"headless", | ||
"html", | ||
"prerender", | ||
"request" | ||
], | ||
"dependencies": { | ||
"browserless": "~3.6.1", | ||
"got": "~8.3.1", | ||
"html-encode": "~2.0.1", | ||
"parse-domain": "~2.1.2", | ||
"puppeteer": "~1.5.0", | ||
"time-span": "~2.0.0" | ||
}, | ||
"devDependencies": { | ||
"ava": "latest", | ||
"coveralls": "latest", | ||
"finepack": "latest", | ||
"git-authors-cli": "latest", | ||
"git-dirty": "latest", | ||
"husky": "latest", | ||
"lint-staged": "latest", | ||
"nyc": "latest", | ||
"prettier-standard": "latest", | ||
"standard": "latest", | ||
"standard-markdown": "latest" | ||
}, | ||
"engines": { | ||
"node": ">= 8" | ||
}, | ||
"files": [ | ||
"src" | ||
], | ||
"scripts": { | ||
"clean": "rm -rf node_modules", | ||
"coveralls": "nyc report --reporter=text-lcov | coveralls", | ||
"lint": "standard-markdown && standard", | ||
"precommit": "lint-staged", | ||
"pretest": "npm run lint", | ||
"pretty": "prettier-standard index.js {core,test,bin,scripts}/**/*.js --single-quote --print-width 100", | ||
"test": "nyc ava" | ||
}, | ||
"license": "MIT", | ||
"lint-staged": { | ||
"package.json": [ | ||
"finepack", | ||
"git add" | ||
], | ||
"*.js": [ | ||
"prettier-standard", | ||
"git add" | ||
], | ||
"*.md": [ | ||
"standard-markdown", | ||
"git add" | ||
] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
module.exports = [ | ||
'apple', | ||
'bbc', | ||
'bloomberg', | ||
'digg', | ||
'engadget', | ||
'etsy', | ||
'eventbrite', | ||
'facebook', | ||
'flickr', | ||
'github', | ||
'gizmodo', | ||
'huffingtonpost', | ||
'imdb', | ||
'instagram', | ||
'medium', | ||
'microsoft', | ||
'nytimes', | ||
'pinterest', | ||
'reddit', | ||
'slideshare', | ||
'sourceforge', | ||
'techcrunch', | ||
'telegraph', | ||
'theverge', | ||
'twitter', | ||
'vimeo', | ||
'yelp', | ||
'youtube' | ||
] |
Oops, something went wrong.