added permanent logfiles

rayidghani · Oct 3, 2019 · 2f16ce2 · 2f16ce2
1 parent cf731da
commit 2f16ce2
Show file tree

Hide file tree

Showing 12 changed files with 169 additions and 42 deletions.
diff --git a/app/latteart_helpers.py b/app/latteart_helpers.py
@@ -14,6 +14,8 @@
 import time
 import random
 import urllib3
+import  datetime
+import csv
 from . import yelp_helper
 
 logging.basicConfig(level=logging.INFO)
@@ -30,14 +32,14 @@
 def load_graph(model_file):
     graph = tf.Graph()
     graph_def = tf.GraphDef()
-
+    print(model_file)
     with open(model_file, "rb") as f:
         graph_def.ParseFromString(f.read())
     with graph.as_default():
-        tf.import_graph_def(graph_def)
-
+        tf.import_graph_def(graph_def, name='')
     return graph
 
+
 def load_labels(label_file):
     label = []
     proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()
@@ -50,9 +52,10 @@ def label_image(image_path, model_dir):
 
     Args:
         argv[1]: path to image
+        argv[2]: difrectory where trained model is stored
 
     Returns:
-        Returns a score 
+        Returns a score for the image
 
     Todo:
         test with non jpeg images
@@ -61,18 +64,19 @@ def label_image(image_path, model_dir):
     # Read in the image_data
     image_data = tf.gfile.FastGFile(image_path, 'rb').read()
 
-    # Load label file and strip off carriage return
+    #Load label file and strip off carriage return
     label_lines = [line.rstrip() for line 
                        in tf.gfile.GFile(model_dir + "retrained_labels.txt")]
-    logger.info('Loaded labels %s', label_lines)
-
+    logger.info('Loaded labels %s from %s', label_lines, model_dir)
 
     # Unpersist graph from file
     with tf.gfile.FastGFile(model_dir + "retrained_graph.pb", 'rb') as f:
         graph_def = tf.GraphDef()
         graph_def.ParseFromString(f.read())
         _ = tf.import_graph_def(graph_def, name='')
 
+    #load_labels_and_graph("retrained_labels.txt", "retrained_graph.pb")
+
     with tf.Session() as sess:
         # Feed the image_data as input to the graph and get first prediction
         softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
@@ -86,7 +90,7 @@ def label_image(image_path, model_dir):
         # Sort to show labels of first prediction in order of confidence
         top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]
 
-        # Get prediction score for positive class
+        # Get prediction score for positive class - round to 2 decimals
         positive_score = round(predictions[0][0],2)
         logger.info('Score is %s', positive_score)
         return positive_score
@@ -103,7 +107,62 @@ def label_directory(image_path, model_dir, threshold):
     Args:
         argv[1]: path to image directory
         argv[2]: model dir
-        argv[3]:  threshold above which to classify as art
+        argv[3]: threshold above which to classify as art
+
+    Returns:
+        Returns two numbers:  # of latte art images, total # of images
+
+    todo:
+        modify to work with non jpeg images
+    """
+
+    imgFiles = glob.glob(image_path+'/*.jpg')
+    # load urls for each image
+    url_file = image_path + '/log.txt'
+    url_for_imgfile = dict(line.rstrip('\n').split(',') for line in open(url_file))
+
+    #Load label file and strip off carriage return
+    label_lines = load_labels(model_dir + "retrained_labels.txt")
+    logger.info('Loaded labels %s from %s', label_lines, model_dir)
+    graph = tf.Graph()
+    graph = load_graph(model_dir + "retrained_graph.pb")
+
+    #load_labels_and_graph("retrained_labels.txt", "retrained_graph.pb")
+
+    with tf.Session(graph=graph) as sess:
+        # Feed the image_data as input to the graph and get first prediction
+        softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')    
+        img_count = 0
+        positive_count = 0
+        score_for_url = {}
+        output_list= []
+        for imageFile in imgFiles:
+            image_data = tf.gfile.FastGFile(imageFile, 'rb').read()
+            predictions = sess.run(softmax_tensor, \
+                 {'DecodeJpeg/contents:0': image_data})
+            # Sort to show labels of first prediction in order of confidence
+            top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]
+            # Get prediction score for positive class
+            positive_score = round(predictions[0][0],2)
+            logger.info('Score for %s is %s', imageFile, positive_score)
+            #positive_score = label_image(imageFile, model_dir)
+            score_for_url[url_for_imgfile[os.path.basename(imageFile)]] = positive_score
+            if (positive_score > threshold):
+                positive_count+=1
+            img_count += 1
+
+        return score_for_url, positive_count, img_count
+
+
+
+
+def label_directory_old(image_path, model_dir, threshold):
+    """Function used to label all images in a directory
+
+    Args:
+        argv[1]: path to image directory
+        argv[2]: model dir
+        argv[3]: threshold above which to classify as art
 
     Returns:
         Returns two numbers:  # of latte art images, total # of images
@@ -147,14 +206,16 @@ def label_directory(image_path, model_dir, threshold):
             score_for_url[url_for_imgfile[os.path.basename(imageFile)]] = positive_score
             if (positive_score > threshold):
                 positive_count+=1
-                #score_for_url[url_for_imgfile[os.path.basename(imageFile)]] = positive_score
             img_count += 1
 
         return score_for_url, positive_count, img_count
 
 def is_ascii(s):
     return all(ord(c) < 128 for c in s)
 
+def log_business():
+    return 1
+
 def rank_bizs_in_location(location, num_of_businesses_to_get, model_dir, tmpimgdir, threshold):
     """Function used to get scores for num_of_businesses_to_get businesses in a location
 
@@ -169,12 +230,18 @@ def rank_bizs_in_location(location, num_of_businesses_to_get, model_dir, tmpimgd
     if location is None:
         location = "chicago"
 
+    logger.info("loading log file")
+    datescored, numpositiveimages, numimages = load_logs("bizscores.log")
+    logger.info('loaded %s lines from log file', len(datescored))
+
     logger.info('Starting to get %s businesses in %s from Yelp', num_of_businesses_to_get, location)
     all_bizids = yelp_helper.get_business_ids_from_api(location, num_of_businesses_to_get)
 
     # remove businesses with non ascii characters
     clean_bizids =  [b for b in all_bizids if is_ascii(b)]
     logger.info('Got %s businesses in %s', len(clean_bizids), location)
+    biz_count = 0
+
 
     if len(clean_bizids) > 0:
         positive_counts = {}  #store number of positive images for the business
@@ -183,28 +250,76 @@ def rank_bizs_in_location(location, num_of_businesses_to_get, model_dir, tmpimgd
         for biz in clean_bizids:
             bizresponse = yelp_helper.get_business(API_KEY, biz)
             bizname = bizresponse['name']
+            bizalias = bizresponse['alias']
             logger.info('Processing %s', bizname)
             bizurl = 'http://www.yelp.com/biz/' + biz
-            num_images = 0
-            positive_count = 0
-            logger.info('Getting images for id %s name %s and putting them in %s', biz, bizname, tmpimgdir)
-            num_images = yelp_helper.get_business_images(biz, tmpimgdir)
-            logger.info('Labeling %s images in directory %s with threshold %s', num_images, tmpimgdir, threshold)
-            if num_images:
-                score_for_url, positive_count, img_count = label_directory(tmpimgdir, model_dir, threshold)
+
+            if biz in datescored:
+                # if this business has already been scored earlier, skip it
+                # todo: put time limit 
+                positive_count=int(numpositiveimages[biz])
+                img_count=int(numimages[biz])
+                logger.info('business %s already scored in %s %s', biz, datescored[biz], numpositiveimages[biz])
+                positive_counts[bizurl]=positive_count
+                total_counts[bizurl]= img_count
+                biz_names[bizurl] = bizname
             else:
+                bizresponse = yelp_helper.get_business(API_KEY, biz)
+                bizname = bizresponse['name']
+                logger.info('Processing %s', bizname)
+                bizurl = 'http://www.yelp.com/biz/' + biz
+                num_images = 0
                 positive_count = 0
-
-            positive_counts[bizurl]= positive_count
-            total_counts[bizurl]= num_images
-            biz_names[bizurl] = bizname
-            logger.info('%s has %s//%s arts', bizname, positive_count, img_count)
+                logger.info('Getting images for id %s name %s and putting them in %s', biz, bizname, tmpimgdir)
+                # check if we need to pass bizid or biz alias
+                num_images = yelp_helper.get_business_images(bizalias, tmpimgdir)
+                logger.info('Labeling %s images in directory %s with threshold %s', num_images, tmpimgdir, threshold)
+                if num_images:
+                    score_for_url, positive_count, img_count = label_directory(tmpimgdir, model_dir, threshold)
+                else:
+                    positive_count = 0
+
+                positive_counts[bizurl]= int(positive_count)
+                total_counts[bizurl]= num_images
+                biz_names[bizurl] = bizname
+
+                # permanent logging
+                with open("imgscores.log", "a+") as f:
+                    for imgurl, score in score_for_url.items():
+                        f.write(str(datetime.datetime.today().strftime('%Y-%m-%d')) + ',' + biz + ',' + bizname + ',' + imgurl  + ','  + str(score) + '\n')      
+
+                with open("bizscores.log", "a+", newline='') as f:
+                    writer = csv.writer(f, delimiter=',')
+                    line = [str(datetime.datetime.today().strftime('%Y-%m-%d')),biz ,bizname , str(positive_count), str(img_count)]    
+                    writer.writerow(line)
+                    #f.write(str(datetime.datetime.today().strftime('%Y-%m-%d')) + ',' + biz + ',' + bizname + ',' + str(positive_count)  + ','  + str(img_count) + '\n')      
+
+            logger.info('%s has %s out of %s arts', bizname, positive_count, img_count)
             wait_time = random.randint(1, 5)
-            logger.info('waiting %s seconds to process next business...',wait_time)
-            time.sleep(wait_time)
+            biz_count += 1
+            logger.info('Processed %s out of %s businesses', biz_count, len(clean_bizids))
+            if biz not in datescored:
+                logger.info('waiting %s seconds to process next business...',wait_time)
+                time.sleep(wait_time)
         return positive_counts, total_counts, biz_names
     else:
         logger.error('No businesses returned by get_business_ids_from_api', exc_info=True)
         return 0;
 
+def load_logs(bizlogfile):
+    with open(bizlogfile, mode='r') as csv_file:
+        csv_reader = csv.reader(csv_file, delimiter=',')
+        #{logger.info('%s %s %s %s', rows[0],rows[1],rows[2],rows[3], rows[4]) for rows in csv_reader}
+        datescored=dict()
+        numpositiveimages=dict()
+        numimages=dict()
+
+        for rows in csv_reader:
+            datescored[rows[1]]=rows[0]
+            numpositiveimages[rows[1]]=rows[3]
+            numimages[rows[1]]=rows[4]
+        logger.info('function loaded %s lines from log file', len(datescored))
+
+    return datescored, numpositiveimages, numimages
+
 
diff --git a/app/model.py b/app/model.py
@@ -4,6 +4,8 @@
 from . import latteart_helpers
 import logging
 import requests
+import os
+
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -12,6 +14,7 @@
 imgdir ='images/'
 threshold = 0.6
 
+
 def is_ascii(s):
     return all(ord(c) < 128 for c in s)
 
@@ -22,6 +25,9 @@ def score_imageurl(image_url):
     yelp_helper.get_image_from_url(image_url, image_name)
     # score image
     positive_score = latteart_helpers.label_image(image_name, model_dir)
+
+    # log time, image_url, positive_score
+
     return positive_score
 
 def score_yelpbiz(bizid, verbose):
@@ -31,6 +37,7 @@ def score_yelpbiz(bizid, verbose):
             score_for_url, positive_count, img_count = latteart_helpers.label_directory(imgdir, model_dir, threshold)
         else:
             positive_count = 0
+
         return positive_count, img_count, score_for_url
     else:
         logger.error('bizid %s has non ascii characters', bizid)

diff --git a/app/templates/location.html b/app/templates/location.html
@@ -22,7 +22,7 @@ <h3>Art Finder</h4>
 {% if scores %}
 
 {% for item in scores|dictsort(false, 'value')|reverse %}
-<a href="{{ item[0] }}">{{ names[item[0]] }}</a> {{ scores[item[0]] }} (out of {{ counts[item[0]] }})<br>
+<a href="{{ item[0] }}" target="_blank">{{ names[item[0]] }}</a> {{ scores[item[0]] }} (out of {{ counts[item[0]] }})<br>
 
 
 {% endfor %}

diff --git a/app/yelp_helper.py b/app/yelp_helper.py
@@ -21,9 +21,6 @@
 from urllib.request import urlopen
 from bs4 import BeautifulSoup
 
-
-from . import creds
-
 # This client code can run on Python 2.x or 3.x.  Your imports can be
 # simpler if you only need one of those.
 try:
@@ -47,11 +44,7 @@
 # You can find them on
 # https://www.yelp.com/developers/v3/manage_app
 
-# client_id and client_secret are now deprecated
-#CLIENT_ID = creds.login['client_id']
-#CLIENT_SECRET = creds.login['app_secret']
-API_KEY = creds.login['api_key']
-#API_KEY = os.environ.get('API_KEY')
+API_KEY = os.environ.get('API_KEY')
 if API_KEY:
     logger.debug('Loaded Yelp API Key %s', API_KEY)
 else:
@@ -178,7 +171,7 @@ def get_business_images(biz_name,image_download_path):
     Returns:
         Returns the number of images downloaded.
     """
-    logger.info('Grabbing images for %s and putting them in %s', biz_name, image_download_path)
+    logger.info('Downloading images for %s and putting them in %s', biz_name, image_download_path)
 
     # delete if the directory already exists from last run
     shutil.rmtree(image_download_path)
@@ -199,7 +192,7 @@ def get_business_images(biz_name,image_download_path):
             page = requests.get(url, verify=False)
             soup = BeautifulSoup(page.text, 'html.parser')
             photos = soup.findAll ('img', {'class' : 'photo-box-img'}, limit=None)
-            logger.info('Found %s images for the business overall', len(photos))
+            logger.info('No drink imagees found. Getting %s images for the business overall', len(photos))
     i=0
     if len(photos) > 0:
         for photo in photos:
@@ -212,6 +205,6 @@ def get_business_images(biz_name,image_download_path):
         log_file.close()
         return i
     else:
-        logger.error('No photos found', exc_info=True)
+        logger.error('No images found', exc_info=True)
         return 0
 
diff --git a/images/0.jpg b/images/0.jpg
diff --git a/images/1.jpg b/images/1.jpg
diff --git a/images/2.jpg b/images/2.jpg
diff --git a/images/3.jpg b/images/3.jpg
diff --git a/images/4.jpg b/images/4.jpg
diff --git a/images/log.txt b/images/log.txt
@@ -1,5 +1,17 @@
-0.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/dj4NBFtwzQc1s9S_5s1JRA/30s.jpg
-1.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/dj4NBFtwzQc1s9S_5s1JRA/258s.jpg
-2.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/nSrubwUAoT-6dRARB4tEow/258s.jpg
-3.jpg,https://s3-media3.fl.yelpcdn.com/bphoto/CGH6kgpC9Ra-E49yUaEYRQ/258s.jpg
-4.jpg,https://s3-media4.fl.yelpcdn.com/bphoto/4vqq15Mba0djNQneFpKX7w/258s.jpg
+0.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/kra3eCscMMmumUl-WV05zA/30s.jpg
+1.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/X0Q5LafbOEFTRrrfafeRgQ/258s.jpg
+2.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/VlxpM90lZT-Yim3vpSZJuw/258s.jpg
+3.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/zQmPkE7Qnt0yvmIVdwn4jg/258s.jpg
+4.jpg,https://s3-media3.fl.yelpcdn.com/bphoto/jNcUBvdNbcned-JbS68AJA/258s.jpg
+5.jpg,https://s3-media4.fl.yelpcdn.com/bphoto/Dx_6-S_9CwAq0JFQYu23GQ/258s.jpg
+6.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/q56cFqxVLk4Z2HrXNqhrag/258s.jpg
+7.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/yQI-qQhxPu59U8o9gq3v8g/258s.jpg
+8.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/cSwnVK-eajvMLx78HaC3Fw/258s.jpg
+9.jpg,https://s3-media3.fl.yelpcdn.com/bphoto/j7H5c6TKYb-FfU3wNOVupw/258s.jpg
+10.jpg,https://s3-media3.fl.yelpcdn.com/bphoto/_DwlQ9b2aTT9Kpv5PGCChg/258s.jpg
+11.jpg,https://s3-media3.fl.yelpcdn.com/bphoto/GPNoO623Tz0ILuPuV8-n5w/258s.jpg
+12.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/HXHvJrI4BQ9Nx7kddMi0jg/258s.jpg
+13.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/U0qE3pyo588svSlRu552Ng/258s.jpg
+14.jpg,https://s3-media1.fl.yelpcdn.com/bphoto/QxOYeGpePSBK7xC3TLkktQ/258s.jpg
+15.jpg,https://s3-media4.fl.yelpcdn.com/bphoto/orCyYwGp7FoDlBhQMn5Yjw/258s.jpg
+16.jpg,https://s3-media2.fl.yelpcdn.com/bphoto/SE3bYBaDjSIsPYbVKFONGw/258s.jpg
diff --git a/model_trainer/latteart_model/retrained_graph.pb b/model_trainer/latteart_model/retrained_graph.pb
diff --git a/model_trainer/latteart_model/retrained_labels.txt b/model_trainer/latteart_model/retrained_labels.txt
@@ -1,2 +1,2 @@
-notart
 art
+notart