-
Notifications
You must be signed in to change notification settings - Fork 0
/
Yelp_Crawling_Analysis.py
76 lines (50 loc) · 1.74 KB
/
Yelp_Crawling_Analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python
# coding: utf-8
# code for cleaning ratings; seperating dates into just year; finding mean and mode
# In[92]:
#import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# In[93]:
#opening nolita info in csv
nolita=pd.read_csv("Tacombi Nolita Yelp Crawl-Cleaned Data.csv")
#showing top 5 rows
nolita.head()
# In[155]:
#new dataframe of nolita's field rating
rating=nolita['rating']
#getting the first character of the string into a different column named ratingNum
nolita['ratingNum']=nolita['rating'].str[0]
#quantifing ratingNum for analytics- changing from string to integer
nolita['ratingNum']= nolita['ratingNum'].astype(int)
#new dataframe
date=nolita['date']
#getting year of the date- putting into new column dateYear
nolita['dateYear'] = pd.DatetimeIndex(nolita['date']).year
#exporting to new csv
csv=nolita.to_csv('nolita.csv')
#print Nolita mean
print(nolita.ratingNum.mean())
#print Nolita median
print(nolita.ratingNum.median())
# In[156]:
#read bleekers csv
bleeker=pd.read_csv("Tacombi Bleecker Yelp Crawl-Cleaned Data.csv")
#new dataframe of bleeker's field rating
rating=bleeker['rating']
#getting the first character of the string into a different column named ratingNum
bleeker['ratingNum']=bleeker['rating'].str[0]
#quantifing ratingNum for analytics- changing from string to integer
bleeker['ratingNum']= bleeker['ratingNum'].astype(int)
#new dataframe
date=bleeker['date']
#getting year of the date- putting into new column dateYear
bleeker['dateYear'] = pd.DatetimeIndex(bleeker['date']).year
#exporting to new csv
csv=bleeker.to_csv('bleeker.csv')
#print bleeker mean
print(bleeker.ratingNum.mean())
#print bleeker median
print(bleeker.ratingNum.median())
# In[ ]: