From 72f5e13039bfe93b0b5d9b2f60736208f139bfd5 Mon Sep 17 00:00:00 2001 From: Eran Sakal Date: Sat, 19 Feb 2022 09:53:32 +0200 Subject: [PATCH] feat(beyahad-bishvilha): support new scraper for the histadrut site beyahad bishvilha (#642) --- .eslintrc.js | 1 + README.md | 11 ++ src/definitions.ts | 7 +- src/scrapers/base-scraper-with-browser.ts | 24 ++- src/scrapers/base-scraper.ts | 3 +- src/scrapers/beyahad-bishvilha.test.ts | 52 +++++++ src/scrapers/beyahad-bishvilha.ts | 176 ++++++++++++++++++++++ src/scrapers/factory.ts | 3 + src/tests/.tests-config.tpl.js | 3 +- src/transactions.ts | 2 +- 10 files changed, 272 insertions(+), 10 deletions(-) create mode 100644 src/scrapers/beyahad-bishvilha.test.ts create mode 100644 src/scrapers/beyahad-bishvilha.ts diff --git a/.eslintrc.js b/.eslintrc.js index ad5aea2b..b046c36e 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -3,6 +3,7 @@ module.exports = { "rules": { "import/prefer-default-export": 0, "no-nested-ternary": 0, + "class-methods-use-this": 0, "arrow-body-style": 0, "no-shadow": 0, "no-await-in-loop": 0, diff --git a/README.md b/README.md index fbf9ca30..612004a4 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ Currently only the following banks are supported: - Beinleumi (Thanks to [@dudiventura](https://github.com/dudiventura) from the Intuit FDP OpenSource Team) - Massad - Yahav (Thanks to [@gczobel](https://github.com/gczobel)) +- Beyhad Bishvilha - [ביחד בשבילך](https://www.hist.org.il/) (thanks [@esakal](https://github.com/esakal)) # Prerequisites To use this you will need to have [Node.js](https://nodejs.org) >= 10.x installed. @@ -282,6 +283,16 @@ const credentials = { ``` This scraper supports fetching transaction from up to six months. +## Beyhad Bishvilha +This scraper expects the following credentials object:: +```node +const credentials = { + id: , + password: +}; +``` + + # Known projects These are the projects known to be using this module: - [Israeli YNAB updater](https://github.com/eshaham/israeli-ynab-updater) - A command line tool for exporting banks data to CSVs, formatted specifically for [YNAB](https://www.youneedabudget.com) diff --git a/src/definitions.ts b/src/definitions.ts index 252bb215..bd2251bb 100644 --- a/src/definitions.ts +++ b/src/definitions.ts @@ -17,7 +17,8 @@ export enum CompanyTypes { mizrahi = 'mizrahi', leumi = 'leumi', massad = 'massad', - yahav = 'yahav' + yahav = 'yahav', + beyahadBishvilha = 'beyahadBishvilha' } export const SCRAPERS = { @@ -81,4 +82,8 @@ export const SCRAPERS = { name: 'Bank Yahav', loginFields: ['username', 'nationalID', PASSWORD_FIELD], }, + [CompanyTypes.beyahadBishvilha]: { + name: 'Beyahad Bishvilha', + loginFields: ['id', PASSWORD_FIELD], + }, }; diff --git a/src/scrapers/base-scraper-with-browser.ts b/src/scrapers/base-scraper-with-browser.ts index 884d0801..b85e6f22 100644 --- a/src/scrapers/base-scraper-with-browser.ts +++ b/src/scrapers/base-scraper-with-browser.ts @@ -41,7 +41,7 @@ export interface LoginOptions { loginUrl: string; checkReadiness?: () => Promise; fields: {selector: string, value: string}[]; - submitButtonSelector: string; + submitButtonSelector: string | (() => Promise); preAction?: () => Promise; postAction?: () => Promise; possibleResults: PossibleLoginResults; @@ -116,6 +116,13 @@ class BaseScraperWithBrowser extends BaseScraper { // all the classes that inherit from this base assume is it mandatory. protected page!: Page; + protected getViewPort() { + return { + width: VIEWPORT_WIDTH, + height: VIEWPORT_HEIGHT, + }; + } + async initialize() { debug('initialize scraper'); this.emitProgress(ScaperProgressTypes.Initializing); @@ -166,10 +173,11 @@ class BaseScraperWithBrowser extends BaseScraper { await this.options.preparePage(this.page); } - debug(`set viewport to width ${VIEWPORT_WIDTH}, height ${VIEWPORT_HEIGHT}`); + const viewport = this.getViewPort(); + debug(`set viewport to width ${viewport.width}, height ${viewport.height}`); await this.page.setViewport({ - width: VIEWPORT_WIDTH, - height: VIEWPORT_HEIGHT, + width: viewport.width, + height: viewport.height, }); this.page.on('requestfailed', (request) => { @@ -229,7 +237,7 @@ class BaseScraperWithBrowser extends BaseScraper { if (loginOptions.checkReadiness) { debug('execute \'checkReadiness\' interceptor provided in login options'); await loginOptions.checkReadiness(); - } else { + } else if (typeof loginOptions.submitButtonSelector === 'string') { debug('wait until submit button is available'); await waitUntilElementFound(this.page, loginOptions.submitButtonSelector); } @@ -243,7 +251,11 @@ class BaseScraperWithBrowser extends BaseScraper { debug('fill login components input with relevant values'); await this.fillInputs(loginFrameOrPage, loginOptions.fields); debug('click on login submit button'); - await clickButton(loginFrameOrPage, loginOptions.submitButtonSelector); + if (typeof loginOptions.submitButtonSelector === 'string') { + await clickButton(loginFrameOrPage, loginOptions.submitButtonSelector); + } else { + await loginOptions.submitButtonSelector(); + } this.emitProgress(ScaperProgressTypes.LoggingIn); if (loginOptions.postAction) { diff --git a/src/scrapers/base-scraper.ts b/src/scrapers/base-scraper.ts index 718f62ee..93f6a6c7 100644 --- a/src/scrapers/base-scraper.ts +++ b/src/scrapers/base-scraper.ts @@ -97,9 +97,10 @@ export interface ScaperOptions { preparePage?: (page: Page) => Promise; /** - * if set, store a screnshot if failed to scrape. Used for debug purposes + * if set, store a screenshot if failed to scrape. Used for debug purposes */ storeFailureScreenShotPath?: string; + } export enum ScaperProgressTypes { diff --git a/src/scrapers/beyahad-bishvilha.test.ts b/src/scrapers/beyahad-bishvilha.test.ts new file mode 100644 index 00000000..820cb0b4 --- /dev/null +++ b/src/scrapers/beyahad-bishvilha.test.ts @@ -0,0 +1,52 @@ +import BeyahadBishvilhaScraper from './beyahad-bishvilha'; +import { + maybeTestCompanyAPI, extendAsyncTimeout, getTestsConfig, exportTransactions, +} from '../tests/tests-utils'; +import { SCRAPERS } from '../definitions'; +import { LoginResults } from './base-scraper-with-browser'; + +const COMPANY_ID = 'beyahadBishvilha'; // TODO this property should be hard-coded in the provider +const testsConfig = getTestsConfig(); + +describe('Beyahad Bishvilha scraper', () => { + beforeAll(() => { + extendAsyncTimeout(); // The default timeout is 5 seconds per async test, this function extends the timeout value + }); + + test('should expose login fields in scrapers constant', () => { + expect(SCRAPERS.beyahadBishvilha).toBeDefined(); + expect(SCRAPERS.beyahadBishvilha.loginFields).toContain('id'); + expect(SCRAPERS.beyahadBishvilha.loginFields).toContain('password'); + }); + + maybeTestCompanyAPI(COMPANY_ID, (config) => config.companyAPI.invalidPassword)('should fail on invalid user/password"', async () => { + const options = { + ...testsConfig.options, + companyId: COMPANY_ID, + }; + + const scraper = new BeyahadBishvilhaScraper(options); + + const result = await scraper.scrape({ id: 'e10s12', password: '3f3ss3d' }); + + expect(result).toBeDefined(); + expect(result.success).toBeFalsy(); + expect(result.errorType).toBe(LoginResults.InvalidPassword); + }); + + maybeTestCompanyAPI(COMPANY_ID)('should scrape transactions"', async () => { + const options = { + ...testsConfig.options, + companyId: COMPANY_ID, + }; + + const scraper = new BeyahadBishvilhaScraper(options); + const result = await scraper.scrape(testsConfig.credentials.beyahadBishvilha); + expect(result).toBeDefined(); + const error = `${result.errorType || ''} ${result.errorMessage || ''}`.trim(); + expect(error).toBe(''); + expect(result.success).toBeTruthy(); + + exportTransactions(COMPANY_ID, result.accounts || []); + }); +}); diff --git a/src/scrapers/beyahad-bishvilha.ts b/src/scrapers/beyahad-bishvilha.ts new file mode 100644 index 00000000..e2fd1cb2 --- /dev/null +++ b/src/scrapers/beyahad-bishvilha.ts @@ -0,0 +1,176 @@ +import { Page } from 'puppeteer'; +import moment from 'moment'; +import { BaseScraperWithBrowser, LoginResults, PossibleLoginResults } from './base-scraper-with-browser'; +import { ScaperOptions, ScraperCredentials } from './base-scraper'; +import { Transaction, TransactionStatuses, TransactionTypes } from '../transactions'; +import { pageEval, pageEvalAll, waitUntilElementFound } from '../helpers/elements-interactions'; +import { getDebug } from '../helpers/debug'; +import { filterOldTransactions } from '../helpers/transactions'; +import { + DOLLAR_CURRENCY, + DOLLAR_CURRENCY_SYMBOL, EURO_CURRENCY, + EURO_CURRENCY_SYMBOL, + SHEKEL_CURRENCY, + SHEKEL_CURRENCY_SYMBOL, +} from '../constants'; + +const debug = getDebug('beyahadBishvilha'); + +const DATE_FORMAT = 'DD/MM/YY'; +const LOGIN_URL = 'https://www.hist.org.il/login'; +const SUCCESS_URL = 'https://www.hist.org.il/'; +const CARD_URL = 'https://www.hist.org.il/card/balanceAndUses'; + +interface ScrapedTransaction { + date: string; + description: string; + type: string; + chargedAmount: string; + identifier: string; +} + +function getAmountData(amountStr: string) { + const amountStrCln = amountStr.replace(',', ''); + let currency: string | null = null; + let amount: number | null = null; + if (amountStrCln.includes(SHEKEL_CURRENCY_SYMBOL)) { + amount = parseFloat(amountStrCln.replace(SHEKEL_CURRENCY_SYMBOL, '')); + currency = SHEKEL_CURRENCY; + } else if (amountStrCln.includes(DOLLAR_CURRENCY_SYMBOL)) { + amount = parseFloat(amountStrCln.replace(DOLLAR_CURRENCY_SYMBOL, '')); + currency = DOLLAR_CURRENCY; + } else if (amountStrCln.includes(EURO_CURRENCY_SYMBOL)) { + amount = parseFloat(amountStrCln.replace(EURO_CURRENCY_SYMBOL, '')); + currency = EURO_CURRENCY; + } else { + const parts = amountStrCln.split(' '); + [currency] = parts; + amount = parseFloat(parts[1]); + } + + return { + amount, + currency, + }; +} + +function convertTransactions(txns: ScrapedTransaction[]): Transaction[] { + debug(`convert ${txns.length} raw transactions to official Transaction structure`); + return txns.map((txn) => { + const chargedAmountTuple = getAmountData(txn.chargedAmount || ''); + const txnProcessedDate = moment(txn.date, DATE_FORMAT); + + const result: Transaction = { + type: TransactionTypes.Normal, + status: TransactionStatuses.Completed, + date: txnProcessedDate.toISOString(), + processedDate: txnProcessedDate.toISOString(), + originalAmount: chargedAmountTuple.amount, + originalCurrency: chargedAmountTuple.currency, + chargedAmount: chargedAmountTuple.amount, + chargedCurrency: chargedAmountTuple.currency, + description: txn.description || '', + memo: '', + identifier: txn.identifier, + }; + + return result; + }); +} + + +async function fetchTransactions(page: Page, options: ScaperOptions) { + await page.goto(CARD_URL); + await waitUntilElementFound(page, '.react-loading.hide', false); + const defaultStartMoment = moment().subtract(1, 'years'); + const startDate = options.startDate || defaultStartMoment.toDate(); + const startMoment = moment.max(defaultStartMoment, moment(startDate)); + + const accountNumber = await pageEval(page, '.wallet-details div:nth-of-type(2)', null, (element) => { + return (element as any).innerText.replace('מספר כרטיס ', ''); + }); + + const balance = await pageEval(page, '.wallet-details div:nth-of-type(4) > span:nth-of-type(2)', null, (element) => { + return (element as any).innerText; + }); + + debug('fetch raw transactions from page'); + + const rawTransactions: (ScrapedTransaction | null)[] = await pageEvalAll<(ScrapedTransaction | null)[]>(page, '.transaction-container, .transaction-component-container', [], (items) => { + return (items).map((el) => { + const columns: NodeListOf = el.querySelectorAll('.transaction-item > span'); + if (columns.length === 7) { + return { + date: columns[0].innerText, + identifier: columns[1].innerText, + description: columns[3].innerText, + type: columns[5].innerText, + chargedAmount: columns[6].innerText, + }; + } + return null; + }); + }); + debug(`fetched ${rawTransactions.length} raw transactions from page`); + + const accountTransactions = convertTransactions(rawTransactions.filter((item) => !!item) as ScrapedTransaction[]); + + debug('filer out old transactions'); + const txns = filterOldTransactions(accountTransactions, startMoment, false); + debug(`found ${txns.length} valid transactions out of ${accountTransactions.length} transactions for account ending with ${accountNumber.substring(accountNumber.length - 2)}`); + + return { + accountNumber, + balance: getAmountData(balance).amount, + txns, + }; +} + +function getPossibleLoginResults(): PossibleLoginResults { + const urls: PossibleLoginResults = {}; + urls[LoginResults.Success] = [SUCCESS_URL]; + urls[LoginResults.ChangePassword] = []; // TODO + urls[LoginResults.InvalidPassword] = []; // TODO + urls[LoginResults.UnknownError] = []; // TODO + return urls; +} + +function createLoginFields(credentials: ScraperCredentials) { + return [ + { selector: '#loginId', value: credentials.id }, + { selector: '#loginPassword', value: credentials.password }, + ]; +} + +class BeyahadBishvilhaScraper extends BaseScraperWithBrowser { + protected getViewPort(): { width: number, height: number } { + return { + width: 1500, + height: 800, + }; + } + + getLoginOptions(credentials: ScraperCredentials) { + return { + loginUrl: LOGIN_URL, + fields: createLoginFields(credentials), + submitButtonSelector: async () => { + const [button] = await this.page.$x("//button[contains(., 'התחבר')]"); + if (button) { + await button.click(); + } + }, + possibleResults: getPossibleLoginResults(), + }; + } + + async fetchData() { + const account = await fetchTransactions(this.page, this.options); + return { + success: true, + accounts: [account], + }; + } +} + +export default BeyahadBishvilhaScraper; diff --git a/src/scrapers/factory.ts b/src/scrapers/factory.ts index a2f6ee1b..aff1a38e 100644 --- a/src/scrapers/factory.ts +++ b/src/scrapers/factory.ts @@ -13,6 +13,7 @@ import MassadScraper from './massad'; import YahavScraper from './yahav'; import { ScaperOptions } from './base-scraper'; import { CompanyTypes } from '../definitions'; +import BeyahadBishvilhaScraper from './beyahad-bishvilha'; export default function createScraper(options: ScaperOptions) { switch (options.companyId) { @@ -24,6 +25,8 @@ export default function createScraper(options: ScaperOptions) { return new HapoalimScraper(options); case CompanyTypes.leumi: return new LeumiScraper(options); + case CompanyTypes.beyahadBishvilha: + return new BeyahadBishvilhaScraper(options); case CompanyTypes.mizrahi: return new MizrahiScraper(options); case CompanyTypes.discount: diff --git a/src/tests/.tests-config.tpl.js b/src/tests/.tests-config.tpl.js index 0274c462..c24b146b 100644 --- a/src/tests/.tests-config.tpl.js +++ b/src/tests/.tests-config.tpl.js @@ -25,7 +25,8 @@ module.exports = { // mizrahi: { username: '', password: ''}, // union: {username:'',password:''} // beinleumi: { username: '', password: ''}, - //yahav: {username: '', nationalID: '', password: ''} + // yahav: {username: '', nationalID: '', password: ''} + // beyahadBishvilha: { id: '', password: ''}, }, companyAPI: { // enable companyAPI to execute tests against the real companies api enabled: true, diff --git a/src/transactions.ts b/src/transactions.ts index 81380f93..8d15d1c6 100644 --- a/src/transactions.ts +++ b/src/transactions.ts @@ -32,7 +32,7 @@ export interface Transaction { /** * sometimes called Asmachta */ - identifier?: number; + identifier?: string | number; /** * ISO date string */