-
Notifications
You must be signed in to change notification settings - Fork 1
/
rhfd.py
executable file
·58 lines (49 loc) · 1.6 KB
/
rhfd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#! /usr/bin/env python
#RHFD: Recursive Http Files Downloader
from lxml import html
import requests
import os, errno
import wget
try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse
import argparse
#def recursive downloader for apache download pages URLs
#dirUrl is encoded
def download(dirUrl, parentPath):
#create dir
encDirName=dirUrl[:-1].rpartition('/')[2]
downloadDir=parentPath+urllib.parse.unquote(encDirName)+'/'
os.makedirs(downloadDir, exist_ok=True)
print("\nDownloading: "+downloadDir)
#parse page
response = s.get(dirUrl)
page = response.text
tree = html.fromstring(page)
entries = tree.xpath('//td/a/@href')
#iterate over entries
iterLink = iter(entries)
next(iterLink) #skip first link (parent directory)
for entry in iterLink: #iterate over file and directories
if(entry[-1])=='/':
#This is a directory -> Recurse
download(dirUrl+entry, downloadDir)
else:
#This is a file -> Download
destFile=downloadDir+urllib.parse.unquote(entry)
fileURL=dirUrl+entry
print('\nDownloading: '+ urllib.parse.unquote(fileURL))
#print('Downloading: '+ fileURL+" to: "+destFile)
wget.download(fileURL, destFile)
#run from command line
parser = argparse.ArgumentParser(description='Welcome to RHFD: Recursive Http Files Downloader')
parser = argparse.ArgumentParser(epilog='author: DoNcK')
parser.add_argument('url', type=str, help='URL of the directory to download')
argUrl = parser.parse_args().url
if argUrl[-1] == '/':
s = requests.Session()
download(argUrl, './')
print('\n[DONE]')
else:
print('Error: this is not a directory')