forked from amolsharma99/Scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
english.py
103 lines (90 loc) · 2.93 KB
/
english.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import re
import os
import urllib
import urllib2
from bs4 import BeautifulSoup
import requests
import json
import psycopg2
try:
conn = psycopg2.connect("dbname='english' user='postgres' host='localhost' password='postgres'")
except:
print "I am unable to connect to the database"
def getSoup(url):
html = urllib2.urlopen(url)
return BeautifulSoup(html)
def getMeaning(word):
soup = getSoup('http://www.merriam-webster.com/dictionary/'+word)
all_definitions = soup.find_all('span',class_='ssens')
meaning = []
for p in all_definitions:
meaning.append(p.text)
return meaning
def getAntonyms(word):
soup = getSoup('http://www.thesaurus.com/browse/'+word)
all_antonyms = soup.find_all('div',class_='list-holder')
if len(all_antonyms) > 0:
all_antonyms = all_antonyms[0].find_all('span',class_='text')
meaning = []
for p in all_antonyms:
meaning.append(p.text)
return meaning
def getSynomyms(word):
soup = getSoup('http://www.thesaurus.com/browse/'+word)
all_synomyms = soup.find_all('div',class_='relevancy-list')[0].find_all('span',class_='text')
synomyms = []
for p in all_synomyms:
synomyms.append(p.text)
return synomyms
def getSynonymsDiscussion(word):
soup = getSoup('http://www.merriam-webster.com/dictionary/'+word)
return soup.find_all('div',class_='synonyms-discussion')
def getEtymology(word):
url = 'http://www.etymonline.com/index.php?term='+word+'&allowed_in_frame=0'
soup=getSoup(url)
return soup.find_all('dd',class_='highlight')[0].text
def getMnenomonic(word):
url = 'http://mnemonicdictionary.com/word/'+word
soup = getSoup(url)
return soup.find_all('div',class_='span9')[0].text
def getExamples(word):
url='http://corpus.vocabulary.com/api/1.0/examples.json?maxResult=24&query='+word
data = json.load(urllib2.urlopen(url))
example = []
for s in data['result']['sentences']:
example.append(s['sentence'])
return example
def getImage(word):
url = 'http://wordpandit.com/2012/'+word+'/'
soup = getSoup(url)
content=soup.find_all('img',title=word.title())
imgUrl = content[0]['src']
image = urllib.urlretrieve(imgUrl, os.path.basename('/images/'+imgUrl))
def getWordDetails(word,title,desc):
data = {}
data['word'] = word
data['theme-title'] = title
data['theme-desc'] = desc
data['meaning'] = getMeaning(word)
data['Antonyms'] = getAntonyms(word)
data['Synomyms'] = getSynomyms(word)
data['etymology'] = getEtymology(word)
data['mnemonic'] = getMnenomonic(word)
data['examples'] = getExamples(word)
for k in data:
print(k)
print(data[k])
def allWordsByGroup(url):
soup = getSoup(url)
title=soup.find_all(id='themetitle')[0].string
desc = soup.find_all(id='themedesc')[0].string
wordList = soup.find_all('a',class_='positive')
for word in wordList:
word = re.sub("\\(.*?\\)","",word.string)
wordData = getWordDetails(word,title,desc)
def main():
#maximum possible is 95
for i in range(1,2):
url = 'https://www.greedge.com/grewordlist/words/'+str(i)
allWordsByGroup(url)
main()