-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
70 lines (55 loc) · 2.05 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pickle
from gensim.models.keyedvectors import KeyedVectors
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import argparse
import json
import subprocess
parser = argparse.ArgumentParser()
parser.add_argument("word", help = "this is the word you want to skip to")
args = parser.parse_args()
input_word = args.word
filename = 'glove_model.pickle'
infile = open(filename,'rb')
glove_model = pickle.load(infile)
infile.close()
#lis = [['hey',0,1],['matrix', 0, 1],['row', 0 ,1],['matrix',1, 0],['addition',1 ,1],['algebra',1 ,2],['convolution',2,3],['matrix',2 ,3],['google',2,3],['forward',1,4],['matrix',2,3],['is',3,4]]
with open('data.json') as r:
dat=json.load(r)
n=(len(dat['results']['items']))
a=dat['results']['items']
l=[]
for i in range(0,n):
b=[]
b.append(a[i]["alternatives"][0]["content"])
try:
b.append(float(a[i]["start_time"]))
b.append(float(a[i]["end_time"]))
l.append(b)
except:
continue
#lis = [['hey','0','1'],['matrix', '0', '1'],['row', '0' ,'1'],['matrix','1', '0'],['addition','1' ,'1'],['algebra','1' ,'2'],['convolution','2','3'],['matrix','2' ,'3'],['google','2','3'],['forward','1','4'],['matrix','2','3'],['is','3','4']]
filtered_list,pos,counter = [],[],[]
distance, count = 0, 0
for word in l:
example_sent = word[0]
stop_words = set(stopwords.words('english'))
word_tokens = word_tokenize(example_sent)
for w in word_tokens:
if w not in stop_words:
filtered_list.append([w,word[1],word[2]])
for element in filtered_list:
word = element[0]
if word == input_word:
pos.append(count)
count = count + 1
for i in range (0,len(pos)-1):
for j in range (pos[i]+1,pos[i+1]):
try:
distance = distance + glove_model.similarity(filtered_list[j][0],input_word)
except:
pass
counter.append(distance/j)
distance = 0
maximum_value = max(counter)
subprocess.call(['vlc --start-time=' + str(filtered_list[pos[counter.index(maximum_value)]][1]) + ' vh.mp4'],shell =True)