From 6fc6fb2c99fb9a7d8b51a7a674eae74960f98069 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ionic=C4=83=20Biz=C4=83u?= Date: Sun, 19 Mar 2023 15:33:19 +0100 Subject: [PATCH] Updated docs --- LICENSE | 2 +- README.md | 257 +++++++++++++++++++++++----------------------- package-lock.json | 4 +- 3 files changed, 134 insertions(+), 129 deletions(-) diff --git a/LICENSE b/LICENSE index 2496334..7fd198a 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2016-21 Ionică Bizău (https://ionicabizau.net) +Copyright (c) 2016-23 Ionică Bizău (https://ionicabizau.net) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 5d65337..7ff559e 100644 --- a/README.md +++ b/README.md @@ -38,8 +38,6 @@ -Want to save time or not using Node.js? Try our [hosted API](https://scrape-it.saasify.sh). - @@ -117,93 +115,95 @@ scrapeIt("https://ionicabizau.net", { selector: ".header img" , attr: "src" } -}).then(({ data, response }) => { - console.log(`Status Code: ${response.statusCode}`) +}).then(({ data, status }) => { + console.log(`Status Code: ${status}`) console.log(data) -}) - -// Callback interface -scrapeIt("https://ionicabizau.net", { - // Fetch the articles - articles: { - listItem: ".article" - , data: { - - // Get the article date and convert it into a Date object - createdAt: { - selector: ".date" - , convert: x => new Date(x) - } - - // Get the title - , title: "a.article-title" - - // Nested list - , tags: { - listItem: ".tags > span" - } - - // Get the content - , content: { - selector: ".article-content" - , how: "html" - } - - // Get attribute value of root listItem by omitting the selector - , classes: { - attr: "class" +}); + + +// Async-Await +(async () => { + const { data } = await scrapeIt("https://ionicabizau.net", { + // Fetch the articles + articles: { + listItem: ".article" + , data: { + + // Get the article date and convert it into a Date object + createdAt: { + selector: ".date" + , convert: x => new Date(x) + } + + // Get the title + , title: "a.article-title" + + // Nested list + , tags: { + listItem: ".tags > span" + } + + // Get the content + , content: { + selector: ".article-content" + , how: "html" + } + + // Get attribute value of root listItem by omitting the selector + , classes: { + attr: "class" + } } } - } - // Fetch the blog pages - , pages: { - listItem: "li.page" - , name: "pages" - , data: { - title: "a" - , url: { - selector: "a" - , attr: "href" + // Fetch the blog pages + , pages: { + listItem: "li.page" + , name: "pages" + , data: { + title: "a" + , url: { + selector: "a" + , attr: "href" + } } } - } - // Fetch some other data from the page - , title: ".header h1" - , desc: ".header h2" - , avatar: { - selector: ".header img" - , attr: "src" - } -}, (err, { data }) => { - console.log(err || data) -}) -// { articles: -// [ { createdAt: Mon Mar 14 2016 00:00:00 GMT+0200 (EET), -// title: 'Pi Day, Raspberry Pi and Command Line', -// tags: [Object], -// content: '

Everyone knows (or should know)...a" alt="">

\n', -// classes: [Object] }, -// { createdAt: Thu Feb 18 2016 00:00:00 GMT+0200 (EET), -// title: 'How I ported Memory Blocks to modern web', -// tags: [Object], -// content: '

Playing computer games is a lot of fun. ...', -// classes: [Object] }, -// { createdAt: Mon Nov 02 2015 00:00:00 GMT+0200 (EET), -// title: 'How to convert JSON to Markdown using json2md', -// tags: [Object], -// content: '

I love and ...', -// classes: [Object] } ], -// pages: -// [ { title: 'Blog', url: '/' }, -// { title: 'About', url: '/about' }, -// { title: 'FAQ', url: '/faq' }, -// { title: 'Training', url: '/training' }, -// { title: 'Contact', url: '/contact' } ], -// title: 'Ionică Bizău', -// desc: 'Web Developer, Linux geek and Musician', -// avatar: '/images/logo.png' } + // Fetch some other data from the page + , title: ".header h1" + , desc: ".header h2" + , avatar: { + selector: ".header img" + , attr: "src" + } + }) + console.log(data) + // { articles: + // [ { createdAt: Mon Mar 14 2016 00:00:00 GMT+0200 (EET), + // title: 'Pi Day, Raspberry Pi and Command Line', + // tags: [Object], + // content: '

Everyone knows (or should know)...a" alt="">

\n', + // classes: [Object] }, + // { createdAt: Thu Feb 18 2016 00:00:00 GMT+0200 (EET), + // title: 'How I ported Memory Blocks to modern web', + // tags: [Object], + // content: '

Playing computer games is a lot of fun. ...', + // classes: [Object] }, + // { createdAt: Mon Nov 02 2015 00:00:00 GMT+0200 (EET), + // title: 'How to convert JSON to Markdown using json2md', + // tags: [Object], + // content: '

I love and ...', + // classes: [Object] } ], + // pages: + // [ { title: 'Blog', url: '/' }, + // { title: 'About', url: '/about' }, + // { title: 'FAQ', url: '/faq' }, + // { title: 'Training', url: '/training' }, + // { title: 'Contact', url: '/contact' } ], + // title: 'Ionică Bizău', + // desc: 'Web Developer, Linux geek and Musician', + // avatar: '/images/logo.png' } +})() ``` @@ -380,66 +380,71 @@ Thanks! :heart: If you are using this library in one of your projects, add it in this list. :sparkles: - `@web-master/node-web-scraper` - - `proxylist` + - `macoolka-network` + - `@tryghost/mg-webscraper` - `mit-ocw-scraper` - - `beervana-scraper` - `cnn-market` - `bandcamp-scraper` - - `@tryghost/mg-webscraper` - - `blockchain-notifier` - - `dncli` - - `degusta-scrapper` - - `trump-cabinet-picks` - - `cevo-lookup` - - `camaleon` - - `scrape-vinmonopolet` - - `do-fn` + - `scrapos-worker` - `university-news-notifier` - - `selfrefactor` - - `parn` - - `picarto-lib` + - `fa.js` - `mix-dl` - - `jishon` - - `sahibinden` - - `sahibindenServer` - - `sgdq-collector` - - `ubersetzung` - - `ui-studentsearch` - - `paklek-cli` - - `egg-crawler` - - `@thetrg/gibson` - - `jobs-fetcher` - - `fmgo-marketdata` - `rayko-tools` - - `leximaven` + - `beervana-scraper` - `codinglove-scraper` + - `sgdq-collector` - `vandalen.rhyme.js` - - `uniwue-lernplaetze-scraper` + - `node-red-contrib-getdata-website` + - `startpage-quick-search` + - `wikitools` - `spon-market` - - `macoolka-net-scrape` + - `fmgo-marketdata` - `gatsby-source-bandcamp` - - `salesforcerelease-parser` - - `yu-ncov-scrape-dxy` - - `rs-api` - - `startpage-quick-search` + - `carirs` - `helyesiras` - - `covidau` - `3abn` - - `scrape-it-cli` + - `cevo-lookup` + - `sahibinden` + - `dncli` + - `flamescraper` - `codementor` + - `scrape-it-cli` + - `jishon` + - `@thetrg/gibson` + - `blockchain-notifier` + - `camaleon` + - `parn` + - `@lukekarrys/ebp` + - `selfrefactor` + - `yu-ncov-scrape-dxy` - `u-pull-it-ne-parts-finder` - - `blankningsregistret` - - `scrapos-worker` - - `@ben-wormald/bandcamp-scraper` - - `bible-scraper` - - `flamescraper` - - `fa.js` + - `apixpress` - `growapi` - - `node-red-contrib-scrape-it` - - `carirs` - `steam-workshop-scraper` - - `macoolka-network` - - `apixpress` + - `scrape-vinmonopolet` + - `paklek-cli` + - `rs-api` + - `sahibindenServer` + - `salesforcerelease-parser` + - `picarto-lib` + - `ui-studentsearch` + - `macoolka-net-scrape` + - `node-red-contrib-scrape-it` + - `egg-crawler` + - `uniwue-lernplaetze-scraper` + - `simple-ai-alpha` + - `ubersetzung` + - `blankningsregistret` + - `do-fn` + - `bible-scraper` + - `covidau` + - `jobs-fetcher` + - `trump-cabinet-picks` + - `leximaven` + - `proxylist` + - `@ben-wormald/bandcamp-scraper` + - `degusta-scrapper` + - `nurlresolver` diff --git a/package-lock.json b/package-lock.json index a05ce67..d319d92 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "scrape-it", - "version": "5.4.0", + "version": "6.0.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "scrape-it", - "version": "5.4.0", + "version": "6.0.0", "license": "MIT", "dependencies": { "@types/cheerio": "^0.22.31",