-
Notifications
You must be signed in to change notification settings - Fork 164
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(beyahad-bishvilha): support new scraper for the histadrut site b…
…eyahad bishvilha (#642)
- Loading branch information
Showing
10 changed files
with
272 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import BeyahadBishvilhaScraper from './beyahad-bishvilha'; | ||
import { | ||
maybeTestCompanyAPI, extendAsyncTimeout, getTestsConfig, exportTransactions, | ||
} from '../tests/tests-utils'; | ||
import { SCRAPERS } from '../definitions'; | ||
import { LoginResults } from './base-scraper-with-browser'; | ||
|
||
const COMPANY_ID = 'beyahadBishvilha'; // TODO this property should be hard-coded in the provider | ||
const testsConfig = getTestsConfig(); | ||
|
||
describe('Beyahad Bishvilha scraper', () => { | ||
beforeAll(() => { | ||
extendAsyncTimeout(); // The default timeout is 5 seconds per async test, this function extends the timeout value | ||
}); | ||
|
||
test('should expose login fields in scrapers constant', () => { | ||
expect(SCRAPERS.beyahadBishvilha).toBeDefined(); | ||
expect(SCRAPERS.beyahadBishvilha.loginFields).toContain('id'); | ||
expect(SCRAPERS.beyahadBishvilha.loginFields).toContain('password'); | ||
}); | ||
|
||
maybeTestCompanyAPI(COMPANY_ID, (config) => config.companyAPI.invalidPassword)('should fail on invalid user/password"', async () => { | ||
const options = { | ||
...testsConfig.options, | ||
companyId: COMPANY_ID, | ||
}; | ||
|
||
const scraper = new BeyahadBishvilhaScraper(options); | ||
|
||
const result = await scraper.scrape({ id: 'e10s12', password: '3f3ss3d' }); | ||
|
||
expect(result).toBeDefined(); | ||
expect(result.success).toBeFalsy(); | ||
expect(result.errorType).toBe(LoginResults.InvalidPassword); | ||
}); | ||
|
||
maybeTestCompanyAPI(COMPANY_ID)('should scrape transactions"', async () => { | ||
const options = { | ||
...testsConfig.options, | ||
companyId: COMPANY_ID, | ||
}; | ||
|
||
const scraper = new BeyahadBishvilhaScraper(options); | ||
const result = await scraper.scrape(testsConfig.credentials.beyahadBishvilha); | ||
expect(result).toBeDefined(); | ||
const error = `${result.errorType || ''} ${result.errorMessage || ''}`.trim(); | ||
expect(error).toBe(''); | ||
expect(result.success).toBeTruthy(); | ||
|
||
exportTransactions(COMPANY_ID, result.accounts || []); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
import { Page } from 'puppeteer'; | ||
import moment from 'moment'; | ||
import { BaseScraperWithBrowser, LoginResults, PossibleLoginResults } from './base-scraper-with-browser'; | ||
import { ScaperOptions, ScraperCredentials } from './base-scraper'; | ||
import { Transaction, TransactionStatuses, TransactionTypes } from '../transactions'; | ||
import { pageEval, pageEvalAll, waitUntilElementFound } from '../helpers/elements-interactions'; | ||
import { getDebug } from '../helpers/debug'; | ||
import { filterOldTransactions } from '../helpers/transactions'; | ||
import { | ||
DOLLAR_CURRENCY, | ||
DOLLAR_CURRENCY_SYMBOL, EURO_CURRENCY, | ||
EURO_CURRENCY_SYMBOL, | ||
SHEKEL_CURRENCY, | ||
SHEKEL_CURRENCY_SYMBOL, | ||
} from '../constants'; | ||
|
||
const debug = getDebug('beyahadBishvilha'); | ||
|
||
const DATE_FORMAT = 'DD/MM/YY'; | ||
const LOGIN_URL = 'https://www.hist.org.il/login'; | ||
const SUCCESS_URL = 'https://www.hist.org.il/'; | ||
const CARD_URL = 'https://www.hist.org.il/card/balanceAndUses'; | ||
|
||
interface ScrapedTransaction { | ||
date: string; | ||
description: string; | ||
type: string; | ||
chargedAmount: string; | ||
identifier: string; | ||
} | ||
|
||
function getAmountData(amountStr: string) { | ||
const amountStrCln = amountStr.replace(',', ''); | ||
let currency: string | null = null; | ||
let amount: number | null = null; | ||
if (amountStrCln.includes(SHEKEL_CURRENCY_SYMBOL)) { | ||
amount = parseFloat(amountStrCln.replace(SHEKEL_CURRENCY_SYMBOL, '')); | ||
currency = SHEKEL_CURRENCY; | ||
} else if (amountStrCln.includes(DOLLAR_CURRENCY_SYMBOL)) { | ||
amount = parseFloat(amountStrCln.replace(DOLLAR_CURRENCY_SYMBOL, '')); | ||
currency = DOLLAR_CURRENCY; | ||
} else if (amountStrCln.includes(EURO_CURRENCY_SYMBOL)) { | ||
amount = parseFloat(amountStrCln.replace(EURO_CURRENCY_SYMBOL, '')); | ||
currency = EURO_CURRENCY; | ||
} else { | ||
const parts = amountStrCln.split(' '); | ||
[currency] = parts; | ||
amount = parseFloat(parts[1]); | ||
} | ||
|
||
return { | ||
amount, | ||
currency, | ||
}; | ||
} | ||
|
||
function convertTransactions(txns: ScrapedTransaction[]): Transaction[] { | ||
debug(`convert ${txns.length} raw transactions to official Transaction structure`); | ||
return txns.map((txn) => { | ||
const chargedAmountTuple = getAmountData(txn.chargedAmount || ''); | ||
const txnProcessedDate = moment(txn.date, DATE_FORMAT); | ||
|
||
const result: Transaction = { | ||
type: TransactionTypes.Normal, | ||
status: TransactionStatuses.Completed, | ||
date: txnProcessedDate.toISOString(), | ||
processedDate: txnProcessedDate.toISOString(), | ||
originalAmount: chargedAmountTuple.amount, | ||
originalCurrency: chargedAmountTuple.currency, | ||
chargedAmount: chargedAmountTuple.amount, | ||
chargedCurrency: chargedAmountTuple.currency, | ||
description: txn.description || '', | ||
memo: '', | ||
identifier: txn.identifier, | ||
}; | ||
|
||
return result; | ||
}); | ||
} | ||
|
||
|
||
async function fetchTransactions(page: Page, options: ScaperOptions) { | ||
await page.goto(CARD_URL); | ||
await waitUntilElementFound(page, '.react-loading.hide', false); | ||
const defaultStartMoment = moment().subtract(1, 'years'); | ||
const startDate = options.startDate || defaultStartMoment.toDate(); | ||
const startMoment = moment.max(defaultStartMoment, moment(startDate)); | ||
|
||
const accountNumber = await pageEval(page, '.wallet-details div:nth-of-type(2)', null, (element) => { | ||
return (element as any).innerText.replace('מספר כרטיס ', ''); | ||
}); | ||
|
||
const balance = await pageEval(page, '.wallet-details div:nth-of-type(4) > span:nth-of-type(2)', null, (element) => { | ||
return (element as any).innerText; | ||
}); | ||
|
||
debug('fetch raw transactions from page'); | ||
|
||
const rawTransactions: (ScrapedTransaction | null)[] = await pageEvalAll<(ScrapedTransaction | null)[]>(page, '.transaction-container, .transaction-component-container', [], (items) => { | ||
return (items).map((el) => { | ||
const columns: NodeListOf<HTMLSpanElement> = el.querySelectorAll('.transaction-item > span'); | ||
if (columns.length === 7) { | ||
return { | ||
date: columns[0].innerText, | ||
identifier: columns[1].innerText, | ||
description: columns[3].innerText, | ||
type: columns[5].innerText, | ||
chargedAmount: columns[6].innerText, | ||
}; | ||
} | ||
return null; | ||
}); | ||
}); | ||
debug(`fetched ${rawTransactions.length} raw transactions from page`); | ||
|
||
const accountTransactions = convertTransactions(rawTransactions.filter((item) => !!item) as ScrapedTransaction[]); | ||
|
||
debug('filer out old transactions'); | ||
const txns = filterOldTransactions(accountTransactions, startMoment, false); | ||
debug(`found ${txns.length} valid transactions out of ${accountTransactions.length} transactions for account ending with ${accountNumber.substring(accountNumber.length - 2)}`); | ||
|
||
return { | ||
accountNumber, | ||
balance: getAmountData(balance).amount, | ||
txns, | ||
}; | ||
} | ||
|
||
function getPossibleLoginResults(): PossibleLoginResults { | ||
const urls: PossibleLoginResults = {}; | ||
urls[LoginResults.Success] = [SUCCESS_URL]; | ||
urls[LoginResults.ChangePassword] = []; // TODO | ||
urls[LoginResults.InvalidPassword] = []; // TODO | ||
urls[LoginResults.UnknownError] = []; // TODO | ||
return urls; | ||
} | ||
|
||
function createLoginFields(credentials: ScraperCredentials) { | ||
return [ | ||
{ selector: '#loginId', value: credentials.id }, | ||
{ selector: '#loginPassword', value: credentials.password }, | ||
]; | ||
} | ||
|
||
class BeyahadBishvilhaScraper extends BaseScraperWithBrowser { | ||
protected getViewPort(): { width: number, height: number } { | ||
return { | ||
width: 1500, | ||
height: 800, | ||
}; | ||
} | ||
|
||
getLoginOptions(credentials: ScraperCredentials) { | ||
return { | ||
loginUrl: LOGIN_URL, | ||
fields: createLoginFields(credentials), | ||
submitButtonSelector: async () => { | ||
const [button] = await this.page.$x("//button[contains(., 'התחבר')]"); | ||
if (button) { | ||
await button.click(); | ||
} | ||
}, | ||
possibleResults: getPossibleLoginResults(), | ||
}; | ||
} | ||
|
||
async fetchData() { | ||
const account = await fetchTransactions(this.page, this.options); | ||
return { | ||
success: true, | ||
accounts: [account], | ||
}; | ||
} | ||
} | ||
|
||
export default BeyahadBishvilhaScraper; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters