fossasia · vaibhavsingh97 · Oct 31, 2017
diff --git a/Sample_python_Scripts/Apple WWDC/attending.py b/Sample_python_Scripts/Apple WWDC/attending.py
@@ -0,0 +1,47 @@
+import requests
+import json
+import os
+import io
+from bs4 import BeautifulSoup
+try:
+    to_unicode = unicode
+except NameError:
+    to_unicode = str
+
+
+def clean_data(text):
+    return text.replace("\t", "")
+
+
+site = requests.get("https://developer.apple.com/wwdc/attending/")
+data = site.content.decode('utf-8')
+soup = BeautifulSoup(data, 'lxml')
+path = "Apple WWDC/Data/Attending/"
+
+if not os.path.exists(path):
+    os.makedirs(path)
+
+title = soup.find_all("h2", {"class": "typography-subsection-headline"})
+topic = soup.find_all("strong")
+description = soup.find_all("p", class_=lambda x:
+                            x != ('typography-caption' and 'date-time'))[:-2]
+time = soup.find_all("p", {"class": "date-time"})
+
+with io.open('Apple WWDC/Data/Attending/attending_data.json',
+             'w', encoding='utf8') as outfile:
+    Attending = {"title": [], "topic": [],
+                 "description": [], "time": []}
+    for ele in title:
+        Attending["title"].append(ele.text.strip())
+    for ele in topic:
+        Attending["topic"].append(ele.text.strip())
+    for ele in description:
+        Attending["description"].append(ele.text.strip().
+                                        replace("\t", "").replace("\n", ""))
+    for ele in time:
+        Attending["time"].append(
+            clean_data(ele.text.strip()).replace("\n", ", "))
+
+    str_ = json.dumps(Attending, indent=2, sort_keys=False,
+                      separators=(',', ': '), ensure_ascii=False)
+    outfile.write(to_unicode(str_))
diff --git a/Sample_python_Scripts/Apple WWDC/consultations.py b/Sample_python_Scripts/Apple WWDC/consultations.py
@@ -0,0 +1,46 @@
+import requests
+import json
+import os
+import io
+from bs4 import BeautifulSoup
+try:
+    to_unicode = unicode
+except NameError:
+    to_unicode = str
+
+
+def clean_data(text):
+    return text.replace("\t", "")
+
+
+site = requests.get("https://developer.apple.com/wwdc/consultations/")
+data = site.content.decode('utf-8')
+soup = BeautifulSoup(data, 'lxml')
+path = "Apple WWDC/Data/Consultations/"
+
+if not os.path.exists(path):
+    os.makedirs(path)
+
+title = soup.find_all("h2", {"class": "typography-subsection-headline"})
+topic = soup.find_all("strong")[:-1]
+time = soup.find_all("p", {"class": "date-time"})
+
+with io.open('Apple WWDC/Data/Consultations/consultations_data.json',
+             'w', encoding='utf8') as outfile:
+    consultation = {"title": [], "topic": [],
+                    "description": [], "time": []}
+    for ele in title:
+        consultation["title"].append(ele.text.strip())
+    for ele in topic:
+        consultation["topic"].append(ele.text.strip())
+        consultation["description"].append(
+            clean_data(ele.next_sibling.next_sibling).strip())
+    consultation["description"].append(
+        clean_data(title[-1].findNext("p").text.strip()))
+    for ele in time:
+        consultation["time"].append(
+            clean_data(ele.text.strip()).replace("\n", ", "))
+
+    str_ = json.dumps(consultation, indent=2, sort_keys=False,
+                      separators=(',', ': '), ensure_ascii=False)
+    outfile.write(to_unicode(str_))
diff --git a/Sample_python_Scripts/Apple WWDC/eveningEvents.py b/Sample_python_Scripts/Apple WWDC/eveningEvents.py
@@ -0,0 +1,31 @@
+import json
+import os
+import io
+import requests
+from bs4 import BeautifulSoup as BS
+try:
+    to_unicode = unicode
+except NameError:
+    to_unicode = str
+site = requests.get('https://developer.apple.com/wwdc/events/')
+data = site.content.decode('utf-8')
+Soup = BS(data, 'lxml')
+path = "Apple WWDC/Data/Special Events"
+if not os.path.exists(path):
+    os.makedirs(path)
+filename = 'Apple WWDC/Data/Special Events/evening_events_data.json'
+with io.open(filename, 'w', encoding='utf8') as outfile:
+    events = {'name': [], 'description': [], 'date-time': []}
+    for ele in Soup.find_all('h2'):
+        if ele.text != "\n\n":
+            events['name'].append(ele.text)
+    for ele in Soup.find_all('p', {'class': None}):
+        events['description'].append(
+            ele.text.replace("\t", "").replace("\n", ""))
+    for ele in Soup.find_all('p', {'class': 'date-time'}):
+        events['date-time'].append(
+            ele.text.replace("\t", "").replace("\n", ""))
+
+    str_ = json.dumps(events, indent=2, sort_keys=False,
+                      separators=(',', ': '), ensure_ascii=False)
+    outfile.write(to_unicode(str_))
diff --git a/Sample_python_Scripts/Apple WWDC/getTogether.py b/Sample_python_Scripts/Apple WWDC/getTogether.py
@@ -0,0 +1,37 @@
+import os
+import json
+from bs4 import BeautifulSoup
+import requests
+import io
+try:
+    to_unicode = unicode
+except NameError:
+    to_unicode = str
+
+
+def clean_data(text):
+    return text.replace("\t", "")
+
+
+site = requests.get('https://developer.apple.com/wwdc/get-togethers/')
+data = site.content.decode('utf-8')
+soup = BeautifulSoup(data, 'lxml')
+title = soup.find_all(attrs={'class': 'typography-subsection-headline'})
+description = soup.find_all('p', class_=lambda x: x != 'date-time')[1:]
+locationAndTime = soup.find_all('p', attrs={'class': 'date-time'})
+
+path = "Apple WWDC/Data/Get Together/"
+
+if not os.path.exists(path):
+    os.makedirs(path)
+
+dictionary = {"title": [], "description": [], "location and time": []}
+for x, y, z in zip(title, description, locationAndTime):
+    dictionary["title"].append(x.text)
+    dictionary["description"].append(clean_data(y.text))
+    dictionary["location and time"].append(clean_data(z.text))
+with io.open('Apple WWDC/Data/Get Together/get_togethers_data.json',
+             'w', encoding='utf8') as outfile:
+    str_ = json.dumps(dictionary, indent=2, sort_keys=False,
+                      separators=(',', ': '), ensure_ascii=False)
+    outfile.write(to_unicode(str_))
diff --git a/Sample_python_Scripts/Apple WWDC/guestSpeakers.py b/Sample_python_Scripts/Apple WWDC/guestSpeakers.py
@@ -0,0 +1,35 @@
+import json
+import os
+import io
+import requests
+import subprocess
+from bs4 import BeautifulSoup as BS
+try:
+    to_unicode = unicode
+except NameError:
+    to_unicode = str
+site = requests.get('https://developer.apple.com/wwdc/guest-speakers/')
+data = site.content.decode('utf-8')
+Soup = BS(data, 'lxml')
+path = "Apple WWDC/Data/Guest Speakers/"
+if not os.path.exists(path):
+    os.makedirs(path)
+with io.open('Apple WWDC/Data/Guest Speakers/guest_speakers_data.json',
+             'w', encoding='utf8') as outfile:
+    guest = {'speakers': [], 'description': [], 'date-time': []}
+    for ele in Soup.find_all('p', {'class': 'heading'}):
+        guest['speakers'].append(ele.text)
+    for ele in Soup.find_all('p', {'class': 'description'}):
+        guest['description'].append(
+            ele.text.replace("\t", "").replace("\n", ""))
+    for ele in Soup.find_all('p', {'class': 'date-time'}):
+        guest['date-time'].append(ele.text.replace("\t", "").replace("\n", ""))
+
+    str_ = json.dumps(guest, indent=2, sort_keys=False,
+                      separators=(',', ': '), ensure_ascii=False)
+    outfile.write(to_unicode(str_))
+speaker_pictures = Soup.find_all('img')[1:]
+for image in speaker_pictures:
+    url = "https://developer.apple.com" + image.get('src')
+    subprocess.call(
+        'wget -P Apple\ WWDC/Data/Guest\ Speakers/ --user-agent "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:11.0) Gecko/20100101 Firefox/11.0" ' + url, shell=True)
diff --git a/Sample_python_Scripts/Apple WWDC/schedule.py b/Sample_python_Scripts/Apple WWDC/schedule.py
@@ -0,0 +1,44 @@
+import json
+import os
+import io
+from selenium import webdriver
+from bs4 import BeautifulSoup
+try:
+    to_unicode = unicode
+except NameError:
+    to_unicode = str
+
+
+def clean_data(text):
+    return text.replace("\t", "")
+
+
+driver = webdriver.PhantomJS()
+driver.set_window_size(1120, 520)
+driver.get("https://developer.apple.com/wwdc/schedule/#/")
+site = driver.page_source
+data = site
+soup = BeautifulSoup(data, 'lxml')
+path = "Apple WWDC/Data/schedule/"
+
+if not os.path.exists(path):
+    os.makedirs(path)
+
+day = soup.find_all("h4", {"class": "small-caps"})
+title = soup.find_all("h4", {"class": "event-item-title"})
+venue = soup.find_all("span", {"class": "event-item-byline block smaller"})
+
+with io.open('Apple WWDC/Data/schedule/schedule_data.json',
+             'w', encoding='utf8') as outfile:
+    consultation = {"day": [], "title": [], "venue": []}
+    for ele in title:
+        consultation["title"].append(ele.text.strip())
+    for ele in day:
+        consultation["day"].append(ele.text.strip())
+    for ele in venue:
+        consultation["venue"].append(
+            clean_data(ele.text.strip()).replace("\n", ", "))
+
+    str_ = json.dumps(consultation, indent=2, sort_keys=False,
+                      separators=(',', ': '), ensure_ascii=False)
+    outfile.write(to_unicode(str_))
diff --git a/Sample_python_Scripts/Apple WWDC/scholarships.py b/Sample_python_Scripts/Apple WWDC/scholarships.py
@@ -0,0 +1,82 @@
+import json
+import os
+import io
+import re
+import requests
+from bs4 import BeautifulSoup as BS
+
+try:
+    to_unicode = unicode
+except NameError:
+    to_unicode = str
+site = requests.get('https://developer.apple.com/wwdc/scholarships/')
+data = site.content.decode('utf-8')
+Soup = BS(data, 'lxml')
+path = os.path.join(os.getcwd(), "Data/Scholarships/")
+if not os.path.exists(path):
+    os.makedirs(path)
+with io.open(os.path.join(os.getcwd(), "Data/Scholarships/scholarships.json"),
+             'w', encoding='utf8') as outfile:
+    scholarships = {}
+
+    for ele in Soup.find_all('h2',
+                             {'class': re.compile(
+                                 "typography-section-headline [a-z,A-Z]*")}):
+        scholarships[ele.string] = []
+        nextNode = ele
+        while nextNode is not None:
+            nextNode = nextNode.nextSibling
+            if nextNode is None:
+                break
+            if nextNode.string is None and nextNode.text is not None:
+                string = nextNode.text.replace("\t", "").replace("\n", "")
+                if len(string) != 0:
+                    scholarships[ele.string].append(string)
+            else:
+                string = nextNode.string.replace("\t", "").replace("\n", "")
+                if len(string) != 0:
+                    scholarships[ele.string].append(string)
+
+    for ele in Soup.find_all('h4',
+                             {'class': re.compile(
+                                 "typography-subsection-headline [a-z,A-Z]*")}):
+        scholarships[ele.string] = []
+        nextNode = ele
+        while nextNode is not None:
+            nextNode = nextNode.nextSibling
+            if nextNode is None:
+                break
+            try:
+                tag_name = nextNode.name
+            except AttributeError:
+                tag_name = ""
+            if tag_name != "h4" and tag_name is not None:
+                if tag_name == "ul":
+                    scholarships[ele.string].extend(
+                        filter(lambda x: len(x) > 0,
+                               nextNode.text.split('\n')))
+                if nextNode.string is None and nextNode.text is not None:
+                    scholarships[ele.string]\
+                        .append(nextNode.text.replace("\t", "")
+                                .replace("\n", ""))
+                else:
+                    scholarships[ele.string]\
+                        .append(nextNode.string.replace("\t", "")
+                                .replace("\n", ""))
+            elif tag_name == "h4":
+                break
+
+    scholarships['Deadline'] = []
+    for ele in Soup.find_all('p', {'class': 'typography-caption'}):
+        if ele.string is None and ele.text is not None:
+            string = ele.text.replace("\t", "").replace("\n", "")
+            if len(string) != 0:
+                scholarships['Deadline'].append(string)
+        else:
+            string = ele.string.replace("\t", "").replace("\n", "")
+            if len(string) != 0:
+                scholarships['Deadline'].append(string)
+
+    str_ = json.dumps(scholarships, indent=2, sort_keys=False,
+                      separators=(',', ': '), ensure_ascii=False)
+    outfile.write(to_unicode(str_))
diff --git a/Sample_python_Scripts/Apple WWDC/thirdPartyEvents.py b/Sample_python_Scripts/Apple WWDC/thirdPartyEvents.py
@@ -0,0 +1,35 @@
+import json
+import os
+import io
+import requests
+from bs4 import BeautifulSoup as BS
+try:
+    to_unicode = unicode
+except NameError:
+    to_unicode = str
+site = requests.get('https://developer.apple.com/wwdc/more/')
+data = site.content.decode('utf-8')
+Soup = BS(data, 'lxml')
+path = "Apple WWDC/Data/Special Events"
+if not os.path.exists(path):
+    os.makedirs(path)
+filename = 'Apple WWDC/Data/Special Events/third_party_events_data.json'
+with io.open(filename, 'w', encoding='utf8') as outfile:
+    events = {'name': [], 'description': [], 'location': [], 'date-day': []}
+    for ele in Soup.find_all('h4'):
+        events['name'].append(ele.text)
+    for ele in Soup.find_all('p', {'class': 'description'}):
+        events['description'].append(
+            ele.text.replace("\t", "").replace("\n", ""))
+    for ele in Soup.find_all('p', {'class': 'location'}):
+        events['location'].append(ele.text.replace("\t", "").replace("\n", ""))
+    dates = Soup.find_all('p', {'class': 'date'})
+    days = Soup.find_all('p', {'class': 'day'})
+    for x in range(len(days)):
+        date = dates[x].text.replace("\t", "").replace("\n", "")
+        day = days[x].text.replace("\t", "").replace("\n", "")
+        events['date-day'].append(date + ", " + day)
+
+    str_ = json.dumps(events, indent=2, sort_keys=False,
+                      separators=(',', ': '), ensure_ascii=False)
+    outfile.write(to_unicode(str_))
diff --git a/sample/Apple WWDC.zip b/sample/Apple WWDC.zip