-
Notifications
You must be signed in to change notification settings - Fork 1
/
fileinspector.py
216 lines (175 loc) · 5.87 KB
/
fileinspector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# -*- coding: utf-8 -*-
"""
This file is part of fileinspector.
fileinspector is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
fileinspector is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with fileinspector. If not, see <http://www.gnu.org/licenses/>.
Created on Wed Mar 2 11:37:36 2016
@author: Daniel Schreij
"""
# Python3 compatibility
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
__version__ = '1.0.3a1'
import sys
import mimetypes
import warnings
try:
import magic
except ImportError as e:
warnings.warn("python-magic could not be imported so its features will be "
"unavailable.\n{}".format(e), ImportWarning)
magic = None
def determine_category(mimetype):
""" Determines the category to which the file can be attributed.
Parameters
----------
mimetype : string
The mimetype specified as <type>/<subtype>
Returns
-------
string : the category or None if no match was found.
"""
if 'image' in mimetype:
return 'image'
elif 'pdf' in mimetype:
return 'pdf'
elif "text/x-" in mimetype:
return 'code'
elif "text/plain" in mimetype:
return 'text'
elif "msword" in mimetype or \
"officedocument.wordprocessingml" in mimetype or \
"opendocument.text" in mimetype:
return 'doc'
elif "powerpoint" in mimetype or \
"presentation" in mimetype:
return 'presentation'
elif "excel" in mimetype or \
"spreadsheet" in mimetype:
return 'spreadsheet'
elif "zip" in mimetype or "x-tar" in mimetype\
or "compressed" in mimetype:
return 'archive'
elif "video" in mimetype:
return 'video'
elif "audio" in mimetype:
return 'audio'
# If nothing matched, simply return False
return None
def determine_type_with_magic(filename, mime=True):
""" Determines the filetype using the python-magic library.
Parameters
----------
filename : string
The path to or name of the file (including extension)
format : string (optional)
The output format of the function.
The default is 'mime', in which case the mimetype is returned
in the format of <type>/<subtype>
Other options:
'verbose' for the standard more wordy python-magic description of files.
Returns
-------
string : the mimetype in the specified format.
"""
if magic is None:
raise ImportError("python-magic is not available. This function cannot be used")
try:
ftype = magic.from_file(filename,mime=mime)
# In some cases, magic doesn't have the from_file() function, which is why
# we also catch AttributeErrors.
except (IOError, AttributeError):
ftype = None
if type(ftype) == bytes:
ftype = ftype.decode('utf-8')
return ftype
def determine_type_with_mimetypes(filename):
""" Determines the filetype using mimetypes.
Parameters
----------
filename : string
The path to or name of the file (including extension)
Returns
-------
string : the determined mimetype as <type>/<subtype> or False if no type
could be determined.
"""
mime, encoding = mimetypes.guess_type(filename)
return mime
def determine_type(filename, output="mime"):
""" Determines the filetype. Tries to use python-magic first, but if this
doesn't work out (because the file for instance cannot be accessed locally,
or python-magic is not available for other reasons), it falls back to the
mimetypes modules, which uses the filename + extension to make an educated
guess about the filetype.
Parameters
----------
filename : string
The path to or name of the file (including extension)
format : string (optional)
The output format of the function.
The default is 'mime', in which case the mimetype is returned
in the format of <type>/<subtype>
Other options are:
- 'xdg' for a freedesktop specification (<type>-<subtype>).
- 'verbose' for the standard python-magic output, if the module is \
available. If not, it defaults back to 'mime'
Returns
-------
found_type : string/boolean
the mimetype in the specified format or None if nothing could be found.
"""
# Initialize ftype as None
ftype = None
# First try to use python-magic to determine the filetype, as it is not
# fooled by incorrect or absent file extensions.
# Only do this is python-magic is available
if not magic is None:
ftype = determine_type_with_magic(filename, (output!="verbose") )
# If python-magic is not available, or it could not determine the filetype,
# use mimetypes
if ftype == None:
ftype = determine_type_with_mimetypes(filename)
# freedesktop doesn't use <type>/<subtype> but <type>-<subtype> as mime
# format. Translate if requested.
if output=="xdg" and not ftype is None:
ftype = translate_to_xdg(ftype)
return ftype
def translate_to_xdg(mimetype):
""" Translates the mimetype into the xdg format of
<type>-<subtype>.
Parameters
----------
mimetype : string
The specified mimetype specified as <type>/<subtype>
Returns
-------
xdg-type : string
the mimetype translated to freedesktop.org format
"""
return mimetype.replace("/","-")
__all__ = ['translate_to_xdg', 'determine_type', 'determine_type_with_mimetypes',
'determine_type_with_magic', 'determine_category']
if __name__ == "__main__":
import os
if len(sys.argv) < 2:
files = filter(lambda x: not x in [".",".."], os.listdir("."))
print("Inspecting files in current folder.\nYou can also check specific files"
" by passing them as arguments.\n")
else:
files = sys.argv[1:]
for f in files:
f_full = os.path.abspath(f)
if(os.path.isdir(f_full)):
continue
print("{}:\t\t{}".format(f, determine_type(f_full,'verbose')))