-
Notifications
You must be signed in to change notification settings - Fork 0
/
web_scraper.ps1
60 lines (51 loc) · 1.95 KB
/
web_scraper.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
cd $PSScriptRoot
#http://www.wccnet.edu/search/employee/search/search/
Add-Type -AssemblyName System.Web
$ie = New-Object -com InternetExplorer.Application
$ie.silent = $true
$ie.navigate2("http://www.wccnet.edu/search/employee/search/search/")
while($ie.Busy) {Start-Sleep -Seconds 1}
$doc = $ie.Document
$departmentElements = $doc.getElementById("department")
$departments = @()
foreach($ele in $departmentElements) {
if($ele.value) {
$departments += $ele.value
}
}
<#$filter = @(
"Business & Computer Technologies",
"Industrial Technology",
"Math, Science & Engineering Technology"
"Hum, Soc & Behav Sciences",
"Health Sciences"
"PR & Marketing"
"Learning Resources"
)#>
$filter = @(
"Administration & Finance",
"Business & Computer Technologies",
"Entrepreneurship Center",
"Pooled Business Services - SBDC",
"Small Business Development Ctr",
"PR & Marketing"
)
#Loop through departments and gather emails
foreach($dep in $departments) {
if($dep -in $filter) {
Clear-Content -Path "./emails-${dep}.csv" -Force
Write-Host ""
Write-Host "Department: [${dep}]"
$ie.Navigate2("http://www.wccnet.edu/search/employee/department/" + [System.Web.HttpUtility]::UrlEncode(${dep}) + "/search/search/")
while($ie.Busy) {Start-Sleep -Seconds 1}
$emailElements = $ie.Document.getElementsByClassName('more')
foreach($detail in $emailElements) {
$detail.previousSibling.previousSibling.previousSibling.previousSibling.click()
while(($detail.getElementsByTagName('a') | Where {$_.protocol -eq 'mailto:'}).pathname -eq $null) {Start-Sleep -Seconds .1}
$email = ($detail.getElementsByTagName('a') | Where {$_.protocol -eq 'mailto:'}).pathname
Add-Content -Value $email -Path "./emails-${dep}.csv" -Force
Write-Host $email
}
}
}
$ie.Quit()