-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunsplash.py
82 lines (68 loc) · 3.04 KB
/
unsplash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#coding=utf-8
import requests,re,os
from bs4 import BeautifulSoup
headers={
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4',
'Cache-Control':'no-cache',
'Connection':'keep-alive',
'Referer':'https://unsplash.com/',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36',
}
def get_max_num():
collection_page=requests.get("https://unsplash.com/collections/curated",headers=headers)
soup=BeautifulSoup(collection_page.text)
collection_word=soup.find("h2",class_="collection__title").text
max_num=re.findall('#(\d+)',collection_word)[0]
return max_num
def mkdir(path):
if not os.path.exists(path):
os.makedirs(path)
print "Create dirictoty "+path+" succeed!"
else:
print path+" is already existed!"
def download_image(image_url,image_name):
with open(image_name+'.jpeg', 'wb') as f:
f.write(requests.get(image_url,headers = headers).content)
def get_image_description(image_description_url):
r=requests.get(image_description_url,headers=headers)
soup=BeautifulSoup(r.text)
location=''
author=''
date=''
dimensions=''
try:
author=soup.find("span",class_="single-photo__user-heading text-overflow").text.strip()
info=soup.find_all("h3",class_="epsilon heading-zeroed single-info__entry-content")
date=info[0].text.strip()
dimensions=info[1].text.strip()
location=soup.find("h3",class_="single-photo__location text-overflow").text.strip()
except Exception,e:
pass
description=" Photo by "+author+" "+date+" "+location+" "+dimensions
return description
def unsplash(save_path):
max_num=int(get_max_num())
image_id=1
for page_num in range(max_num,0,-1):
collection_url="https://unsplash.com/collections/curated/"+str(page_num)
collection_html=requests.get(collection_url,headers=headers).text
soup=BeautifulSoup(collection_html)
image_description_urls=soup.find_all("a",class_="photo__image-container")
for image_description_url in image_description_urls:
image_description_url="https://unsplash.com"+image_description_url['href']
image_description=get_image_description(image_description_url)
image_name="Collection#"+str(page_num)+" ("+image_description_url.split('/')[4]+")"+image_description
print "Downloading the "+str(image_id)+"th image: "+image_name
print "From "+image_description_url+"/download"
download_image(image_description_url+"/download",save_path+'/'+image_name)
image_id+=1
print "Finished!"
print "Total downloaded "+str(image_id)+" photos"
if __name__ =="__main__":
save_path=str(raw_input("Please Input the save path(Empty to ../unsplash): "))
if save_path=='':
save_path='../unsplash'
mkdir(save_path)
unsplash(save_path)