-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcloud.py
89 lines (77 loc) · 3.49 KB
/
cloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from requests import get
from config import wordsDB, artistsDB
from etl import db_connection
from boto3.dynamodb.conditions import Attr
def wordCount(lyrics):
splitLyrics = lyrics.split()
return {word: splitLyrics.count(word) for word in splitLyrics}
def artistsToDynamo():
for row in db_connection.execute('SELECT * FROM artists').fetchall():
(artistName, dirtyData, cleanedData) = row
artistsDB.put_item(
Item = {
'artist_name': artistName,
#'kaggle_lyrics': dirtyData,
'cleaned_lyrics': cleanedData,
'word_count': wordCount(cleanedData)})
def wordsToDynamo():
for row in db_connection.execute('SELECT * FROM words').fetchall():
(word, syllables, rhyming_words, datamuse_searched) = row
wordsDB.put_item(
Item = {
'word': word,
'syllables': syllables,
'rhyming_words': rhyming_words,
'datamuse_searched': datamuse_searched})
def getWordFromDynamoWords(getWord): # if word exists in Dynamo then return record row else None
wordInDB = wordsDB.get_item(Key={'word': getWord})
return wordInDB['Item'] if 'Item' in wordInDB else None
def insertIntoDynamoWords(thisWord, syllableCount=None, rhymeList=[], searched=False):
existingRhymes = getWordFromDynamoWords(thisWord)
if not existingRhymes:
print('inserting:', thisWord)
wordsDB.put_item(
Item = {
'word': thisWord,
'syllables': syllableCount,
'rhyming_words': rhymeList,
'datamuse_searched': searched})
else:
updateFields = []
expressionAttributeValues = {}
if not existingRhymes['syllables'] and syllableCount:
updateFields.append('syllables=:s')
expressionAttributeValues[':s'] = syllableCount
if not existingRhymes['datamuse_searched'] and searched:
updateFields.append('datamuse_searched=:d')
expressionAttributeValues[':d'] = True
allRhymes = set(existingRhymes['rhyming_words']) | set(rhymeList)
if len(allRhymes) > len(existingRhymes['rhyming_words']):
updateFields.append('rhyming_words=:r')
expressionAttributeValues[':r'] = list(allRhymes)
if updateFields:
print('updating:', thisWord, updateFields)
wordsDB.update_item(
Key = {'word': thisWord},
UpdateExpression = 'SET ' + ','.join(updateFields),
ExpressionAttributeValues = expressionAttributeValues)
def getRhymes(responseItems): # GET request to datamuse API -> insert/update Dynamo
for rhymeRecord in responseItems:
word = rhymeRecord['word']
rhymes = get('https://api.datamuse.com/words?max=1000&rel_rhy=' + word).json()
rhymeList = [rhyme['word'] for rhyme in rhymes]
insertIntoDynamoWords(word, rhymeList=rhymeList, searched=True)
for rhyme in rhymes:
insertIntoDynamoWords(rhyme['word'], rhyme['numSyllables'], rhymeList)
def getWords():
response = wordsDB.scan(FilterExpression=Attr('datamuse_searched').eq(False))
getRhymes(response['Items'])
while 'LastEvaluatedKey' in response:
response = wordsDB.scan(FilterExpression=Attr('datamuse_searched').eq(False), ExclusiveStartKey=response['LastEvaluatedKey'])
getRhymes(response['Items'])
try:
getWords()
except Exception as e:
print('Something broke...', e)
finally:
print('done :)')