Skip to content

Commit

Permalink
added permanent logfiles
Browse files Browse the repository at this point in the history
  • Loading branch information
rayidghani committed Oct 3, 2019
1 parent cf731da commit 2f16ce2
Show file tree
Hide file tree
Showing 12 changed files with 169 additions and 42 deletions.
163 changes: 139 additions & 24 deletions app/latteart_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import time
import random
import urllib3
import datetime
import csv
from . import yelp_helper

logging.basicConfig(level=logging.INFO)
Expand All @@ -30,14 +32,14 @@
def load_graph(model_file):
graph = tf.Graph()
graph_def = tf.GraphDef()

print(model_file)
with open(model_file, "rb") as f:
graph_def.ParseFromString(f.read())
with graph.as_default():
tf.import_graph_def(graph_def)

tf.import_graph_def(graph_def, name='')
return graph


def load_labels(label_file):
label = []
proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()
Expand All @@ -50,9 +52,10 @@ def label_image(image_path, model_dir):
Args:
argv[1]: path to image
argv[2]: difrectory where trained model is stored
Returns:
Returns a score
Returns a score for the image
Todo:
test with non jpeg images
Expand All @@ -61,18 +64,19 @@ def label_image(image_path, model_dir):
# Read in the image_data
image_data = tf.gfile.FastGFile(image_path, 'rb').read()

# Load label file and strip off carriage return
#Load label file and strip off carriage return
label_lines = [line.rstrip() for line
in tf.gfile.GFile(model_dir + "retrained_labels.txt")]
logger.info('Loaded labels %s', label_lines)

logger.info('Loaded labels %s from %s', label_lines, model_dir)

# Unpersist graph from file
with tf.gfile.FastGFile(model_dir + "retrained_graph.pb", 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(graph_def, name='')

#load_labels_and_graph("retrained_labels.txt", "retrained_graph.pb")

with tf.Session() as sess:
# Feed the image_data as input to the graph and get first prediction
softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
Expand All @@ -86,7 +90,7 @@ def label_image(image_path, model_dir):
# Sort to show labels of first prediction in order of confidence
top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]

# Get prediction score for positive class
# Get prediction score for positive class - round to 2 decimals
positive_score = round(predictions[0][0],2)
logger.info('Score is %s', positive_score)
return positive_score
Expand All @@ -103,7 +107,62 @@ def label_directory(image_path, model_dir, threshold):
Args:
argv[1]: path to image directory
argv[2]: model dir
argv[3]: threshold above which to classify as art
argv[3]: threshold above which to classify as art
Returns:
Returns two numbers: # of latte art images, total # of images
todo:
modify to work with non jpeg images
"""

imgFiles = glob.glob(image_path+'/*.jpg')
# load urls for each image
url_file = image_path + '/log.txt'
url_for_imgfile = dict(line.rstrip('\n').split(',') for line in open(url_file))

#Load label file and strip off carriage return
label_lines = load_labels(model_dir + "retrained_labels.txt")
logger.info('Loaded labels %s from %s', label_lines, model_dir)
graph = tf.Graph()
graph = load_graph(model_dir + "retrained_graph.pb")

#load_labels_and_graph("retrained_labels.txt", "retrained_graph.pb")

with tf.Session(graph=graph) as sess:
# Feed the image_data as input to the graph and get first prediction
softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
img_count = 0
positive_count = 0
score_for_url = {}
output_list= []
for imageFile in imgFiles:
image_data = tf.gfile.FastGFile(imageFile, 'rb').read()
predictions = sess.run(softmax_tensor, \
{'DecodeJpeg/contents:0': image_data})
# Sort to show labels of first prediction in order of confidence
top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]
# Get prediction score for positive class
positive_score = round(predictions[0][0],2)
logger.info('Score for %s is %s', imageFile, positive_score)
#positive_score = label_image(imageFile, model_dir)
score_for_url[url_for_imgfile[os.path.basename(imageFile)]] = positive_score
if (positive_score > threshold):
positive_count+=1
img_count += 1

return score_for_url, positive_count, img_count




def label_directory_old(image_path, model_dir, threshold):
"""Function used to label all images in a directory
Args:
argv[1]: path to image directory
argv[2]: model dir
argv[3]: threshold above which to classify as art
Returns:
Returns two numbers: # of latte art images, total # of images
Expand Down Expand Up @@ -147,14 +206,16 @@ def label_directory(image_path, model_dir, threshold):
score_for_url[url_for_imgfile[os.path.basename(imageFile)]] = positive_score
if (positive_score > threshold):
positive_count+=1
#score_for_url[url_for_imgfile[os.path.basename(imageFile)]] = positive_score
img_count += 1

return score_for_url, positive_count, img_count

def is_ascii(s):
return all(ord(c) < 128 for c in s)

def log_business():
return 1

def rank_bizs_in_location(location, num_of_businesses_to_get, model_dir, tmpimgdir, threshold):
"""Function used to get scores for num_of_businesses_to_get businesses in a location
Expand All @@ -169,12 +230,18 @@ def rank_bizs_in_location(location, num_of_businesses_to_get, model_dir, tmpimgd
if location is None:
location = "chicago"

logger.info("loading log file")
datescored, numpositiveimages, numimages = load_logs("bizscores.log")
logger.info('loaded %s lines from log file', len(datescored))

logger.info('Starting to get %s businesses in %s from Yelp', num_of_businesses_to_get, location)
all_bizids = yelp_helper.get_business_ids_from_api(location, num_of_businesses_to_get)

# remove businesses with non ascii characters
clean_bizids = [b for b in all_bizids if is_ascii(b)]
logger.info('Got %s businesses in %s', len(clean_bizids), location)
biz_count = 0


if len(clean_bizids) > 0:
positive_counts = {} #store number of positive images for the business
Expand All @@ -183,28 +250,76 @@ def rank_bizs_in_location(location, num_of_businesses_to_get, model_dir, tmpimgd
for biz in clean_bizids:
bizresponse = yelp_helper.get_business(API_KEY, biz)
bizname = bizresponse['name']
bizalias = bizresponse['alias']
logger.info('Processing %s', bizname)
bizurl = 'http://www.yelp.com/biz/' + biz
num_images = 0
positive_count = 0
logger.info('Getting images for id %s name %s and putting them in %s', biz, bizname, tmpimgdir)
num_images = yelp_helper.get_business_images(biz, tmpimgdir)
logger.info('Labeling %s images in directory %s with threshold %s', num_images, tmpimgdir, threshold)
if num_images:
score_for_url, positive_count, img_count = label_directory(tmpimgdir, model_dir, threshold)

if biz in datescored:
# if this business has already been scored earlier, skip it
# todo: put time limit
positive_count=int(numpositiveimages[biz])
img_count=int(numimages[biz])
logger.info('business %s already scored in %s %s', biz, datescored[biz], numpositiveimages[biz])
positive_counts[bizurl]=positive_count
total_counts[bizurl]= img_count
biz_names[bizurl] = bizname
else:
bizresponse = yelp_helper.get_business(API_KEY, biz)
bizname = bizresponse['name']
logger.info('Processing %s', bizname)
bizurl = 'http://www.yelp.com/biz/' + biz
num_images = 0
positive_count = 0

positive_counts[bizurl]= positive_count
total_counts[bizurl]= num_images
biz_names[bizurl] = bizname
logger.info('%s has %s//%s arts', bizname, positive_count, img_count)
logger.info('Getting images for id %s name %s and putting them in %s', biz, bizname, tmpimgdir)
# check if we need to pass bizid or biz alias
num_images = yelp_helper.get_business_images(bizalias, tmpimgdir)
logger.info('Labeling %s images in directory %s with threshold %s', num_images, tmpimgdir, threshold)
if num_images:
score_for_url, positive_count, img_count = label_directory(tmpimgdir, model_dir, threshold)
else:
positive_count = 0

positive_counts[bizurl]= int(positive_count)
total_counts[bizurl]= num_images
biz_names[bizurl] = bizname

# permanent logging
with open("imgscores.log", "a+") as f:
for imgurl, score in score_for_url.items():
f.write(str(datetime.datetime.today().strftime('%Y-%m-%d')) + ',' + biz + ',' + bizname + ',' + imgurl + ',' + str(score) + '\n')

with open("bizscores.log", "a+", newline='') as f:
writer = csv.writer(f, delimiter=',')
line = [str(datetime.datetime.today().strftime('%Y-%m-%d')),biz ,bizname , str(positive_count), str(img_count)]
writer.writerow(line)
#f.write(str(datetime.datetime.today().strftime('%Y-%m-%d')) + ',' + biz + ',' + bizname + ',' + str(positive_count) + ',' + str(img_count) + '\n')

logger.info('%s has %s out of %s arts', bizname, positive_count, img_count)
wait_time = random.randint(1, 5)
logger.info('waiting %s seconds to process next business...',wait_time)
time.sleep(wait_time)
biz_count += 1
logger.info('Processed %s out of %s businesses', biz_count, len(clean_bizids))
if biz not in datescored:
logger.info('waiting %s seconds to process next business...',wait_time)
time.sleep(wait_time)
return positive_counts, total_counts, biz_names
else:
logger.error('No businesses returned by get_business_ids_from_api', exc_info=True)
return 0;

def load_logs(bizlogfile):
with open(bizlogfile, mode='r') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
#{logger.info('%s %s %s %s', rows[0],rows[1],rows[2],rows[3], rows[4]) for rows in csv_reader}
datescored=dict()
numpositiveimages=dict()
numimages=dict()

for rows in csv_reader:
datescored[rows[1]]=rows[0]
numpositiveimages[rows[1]]=rows[3]
numimages[rows[1]]=rows[4]
logger.info('function loaded %s lines from log file', len(datescored))

return datescored, numpositiveimages, numimages


7 changes: 7 additions & 0 deletions app/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from . import latteart_helpers
import logging
import requests
import os


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
Expand All @@ -12,6 +14,7 @@
imgdir ='images/'
threshold = 0.6


def is_ascii(s):
return all(ord(c) < 128 for c in s)

Expand All @@ -22,6 +25,9 @@ def score_imageurl(image_url):
yelp_helper.get_image_from_url(image_url, image_name)
# score image
positive_score = latteart_helpers.label_image(image_name, model_dir)

# log time, image_url, positive_score

return positive_score

def score_yelpbiz(bizid, verbose):
Expand All @@ -31,6 +37,7 @@ def score_yelpbiz(bizid, verbose):
score_for_url, positive_count, img_count = latteart_helpers.label_directory(imgdir, model_dir, threshold)
else:
positive_count = 0

return positive_count, img_count, score_for_url
else:
logger.error('bizid %s has non ascii characters', bizid)
Expand Down
2 changes: 1 addition & 1 deletion app/templates/location.html
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ <h3>Art Finder</h4>
{% if scores %}

{% for item in scores|dictsort(false, 'value')|reverse %}
<a href="{{ item[0] }}">{{ names[item[0]] }}</a> {{ scores[item[0]] }} (out of {{ counts[item[0]] }})<br>
<a href="{{ item[0] }}" target="_blank">{{ names[item[0]] }}</a> {{ scores[item[0]] }} (out of {{ counts[item[0]] }})<br>


{% endfor %}
Expand Down
15 changes: 4 additions & 11 deletions app/yelp_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@
from urllib.request import urlopen
from bs4 import BeautifulSoup


from . import creds

# This client code can run on Python 2.x or 3.x. Your imports can be
# simpler if you only need one of those.
try:
Expand All @@ -47,11 +44,7 @@
# You can find them on
# https://www.yelp.com/developers/v3/manage_app

# client_id and client_secret are now deprecated
#CLIENT_ID = creds.login['client_id']
#CLIENT_SECRET = creds.login['app_secret']
API_KEY = creds.login['api_key']
#API_KEY = os.environ.get('API_KEY')
API_KEY = os.environ.get('API_KEY')
if API_KEY:
logger.debug('Loaded Yelp API Key %s', API_KEY)
else:
Expand Down Expand Up @@ -178,7 +171,7 @@ def get_business_images(biz_name,image_download_path):
Returns:
Returns the number of images downloaded.
"""
logger.info('Grabbing images for %s and putting them in %s', biz_name, image_download_path)
logger.info('Downloading images for %s and putting them in %s', biz_name, image_download_path)

# delete if the directory already exists from last run
shutil.rmtree(image_download_path)
Expand All @@ -199,7 +192,7 @@ def get_business_images(biz_name,image_download_path):
page = requests.get(url, verify=False)
soup = BeautifulSoup(page.text, 'html.parser')
photos = soup.findAll ('img', {'class' : 'photo-box-img'}, limit=None)
logger.info('Found %s images for the business overall', len(photos))
logger.info('No drink imagees found. Getting %s images for the business overall', len(photos))
i=0
if len(photos) > 0:
for photo in photos:
Expand All @@ -212,6 +205,6 @@ def get_business_images(biz_name,image_download_path):
log_file.close()
return i
else:
logger.error('No photos found', exc_info=True)
logger.error('No images found', exc_info=True)
return 0

Binary file modified images/0.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified images/1.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified images/2.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified images/3.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified images/4.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 17 additions & 5 deletions images/log.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
0.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/dj4NBFtwzQc1s9S_5s1JRA/30s.jpg
1.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/dj4NBFtwzQc1s9S_5s1JRA/258s.jpg
2.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/nSrubwUAoT-6dRARB4tEow/258s.jpg
3.jpg,https://s3-media3.fl.yelpcdn.com/bphoto/CGH6kgpC9Ra-E49yUaEYRQ/258s.jpg
4.jpg,https://s3-media4.fl.yelpcdn.com/bphoto/4vqq15Mba0djNQneFpKX7w/258s.jpg
0.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/kra3eCscMMmumUl-WV05zA/30s.jpg
1.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/X0Q5LafbOEFTRrrfafeRgQ/258s.jpg
2.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/VlxpM90lZT-Yim3vpSZJuw/258s.jpg
3.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/zQmPkE7Qnt0yvmIVdwn4jg/258s.jpg
4.jpg,https://s3-media3.fl.yelpcdn.com/bphoto/jNcUBvdNbcned-JbS68AJA/258s.jpg
5.jpg,https://s3-media4.fl.yelpcdn.com/bphoto/Dx_6-S_9CwAq0JFQYu23GQ/258s.jpg
6.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/q56cFqxVLk4Z2HrXNqhrag/258s.jpg
7.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/yQI-qQhxPu59U8o9gq3v8g/258s.jpg
8.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/cSwnVK-eajvMLx78HaC3Fw/258s.jpg
9.jpg,https://s3-media3.fl.yelpcdn.com/bphoto/j7H5c6TKYb-FfU3wNOVupw/258s.jpg
10.jpg,https://s3-media3.fl.yelpcdn.com/bphoto/_DwlQ9b2aTT9Kpv5PGCChg/258s.jpg
11.jpg,https://s3-media3.fl.yelpcdn.com/bphoto/GPNoO623Tz0ILuPuV8-n5w/258s.jpg
12.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/HXHvJrI4BQ9Nx7kddMi0jg/258s.jpg
13.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/U0qE3pyo588svSlRu552Ng/258s.jpg
14.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/QxOYeGpePSBK7xC3TLkktQ/258s.jpg
15.jpg,https://s3-media4.fl.yelpcdn.com/bphoto/orCyYwGp7FoDlBhQMn5Yjw/258s.jpg
16.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/SE3bYBaDjSIsPYbVKFONGw/258s.jpg
Binary file modified model_trainer/latteart_model/retrained_graph.pb
Binary file not shown.
2 changes: 1 addition & 1 deletion model_trainer/latteart_model/retrained_labels.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
notart
art
notart

0 comments on commit 2f16ce2

Please sign in to comment.