-
Notifications
You must be signed in to change notification settings - Fork 3
/
get-answer.ps1
58 lines (46 loc) · 1.74 KB
/
get-answer.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
## Get-Answer.msh
## Use Encarta's Instant Answers to answer your question.
param([string] $question = $(throw "Please ask a question."))
function Main
{
# Load the System.Web.HttpUtility DLL, to let us URLEncode
[void] [System.Reflection.Assembly]::LoadWithPartialName("System.Web")
## Get the web page into a single string
$encoded = [System.Web.HttpUtility]::UrlEncode($question)
$text = get-webpage "http://search.msn.com/encarta/results.aspx?q=$encoded"
## Get the answer with annotations
$startIndex = $text.IndexOf('<div id="results">')
$endIndex = $text.IndexOf('</div></div><h2>Results</h2>')
## If we found a result, then filter the result
if(($startIndex -ge 0) -and ($endIndex -ge 0))
{
$partialText = $text.Substring($startIndex, $endIndex - $startIndex)
## Very fragile, voodoo screen scraping here
$regex = "<\s*a\s*[^>]*?href\s*=\s*[`"']*[^`"'>]+[^>]*>.*?</a>"
$partialText = [Regex]::Replace("$partialText", $regex, "")
$partialText = $partialText -replace "</div>", "`n"
$partialText = $partialText -replace "</span>", "`n"
$partialText = clean-html $partialText
$partialText = $partialText -replace "`n`n", "`n"
$partialText.TrimEnd()
}
else
{
"No answer found."
}
}
## Get a web page
function Get-WebPage ($url=$(throw "need to specify the URL to fetch"))
{
# canonicalize the url
if ($url -notmatch "^[a-z]+://") { $url = "http://$url" }
$wc = new-object System.Net.WebClient
$wc.Headers.Add("user-agent", $userAgent)
$wc.DownloadString($url)
}
## Clean HTML from a text chunk
function Clean-Html ($htmlInput)
{
[Regex]::Replace($htmlInput, "<[^>]*>", "")
}
. Main