-
Notifications
You must be signed in to change notification settings - Fork 2
/
cashe.py
1250 lines (1085 loc) · 41.1 KB
/
cashe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#! /usr/bin/python -tt
__version__ = '0.99.3'
__version_info__ = tuple([ int(num) for num in __version__.split('.')])
import sys # Only needed for stderr and exit()
import os
import os.path
import shutil
import tempfile
import errno
_checksum_aliases = {'sha' : 'sha1',
'sha2' : 'sha256'}
_checksum_d_len = {"md5" : 32,
"sha1" : 40,
"sha256" : 64,
"sha512" : 128}
try:
import hashlib
# Add sha384 ?? When we don't support it above?
_available_checksums = set(['md5', 'sha1', 'sha256', 'sha512'])
except ImportError:
# Python-2.4.z ... gah!
import sha
import md5
_available_checksums = set(['md5', 'sha1'])
class hashlib:
@staticmethod
def new(algo):
if algo == 'md5':
return md5.new()
if algo == 'sha1':
return sha.new()
raise ValueError, "Bad checksum type"
# some checksum types might be disabled
for ctype in list(_available_checksums):
try:
hashlib.new(ctype)
except:
print >> sys.stderr, 'Checksum type %s disabled' % repr(ctype)
_available_checksums.remove(ctype)
for ctype in 'sha256', 'sha1':
if ctype in _available_checksums:
_default_checksums = [ctype]
break
else:
raise ImportError, 'broken hashlib'
del ctype
def _listdir(D):
try:
return os.listdir(D)
except OSError, e:
if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EACCES):
return []
raise
def _unlink_f(filename):
""" Call os.unlink, but don't die if the file isn't there. This is the main
difference between "rm -f" and plain "rm". """
try:
os.unlink(filename)
return True
except OSError, e:
if e.errno not in (errno.ENOENT, errno.EPERM, errno.EACCES, errno.EROFS):
raise
return False
def _try_rmdir(dirname):
""" Call os.rmdir, but don't die if the dir. isn't empty. """
try:
os.rmdir(dirname)
return True
except OSError, e:
if e.errno != errno.ENOTEMPTY:
raise
return False
def _stat_f(filename, ignore_EACCES=False):
""" Call os.stat(), but don't die if the file isn't there. Returns None. """
try:
return os.stat(filename)
except OSError, e:
if e.errno in (errno.ENOENT, errno.ENOTDIR):
return None
if ignore_EACCES and e.errno == errno.EACCES:
return None
raise
def _link_xdev(src, dst):
try:
os.link(src, dst)
except OSError, e:
if e.errno == errno.EEXIST:
dname = os.path.dirname(dst)
out = tempfile.NamedTemporaryFile(dir=dname)
_unlink_f(out.name)
out.delete = False
if _link_xdev(src, out.name):
# From man 2 rename:
# If oldpath and newpath are existing hard links referring
# to the same file, then rename() does nothing, and returns
# a success status.
# ...yes, this is stupid.
_unlink_f(dst)
os.rename(out.name, dst)
return True
return False
if e.errno == errno.EXDEV:
return False
if e.errno == errno.EMLINK: # FIXME: should probably start new?
return False
raise
return True
def _copy_atomic(src, dst):
dname = os.path.dirname(dst)
try:
out = tempfile.NamedTemporaryFile(dir=dname)
except OSError, e:
if e.errno == errno.ENOENT:
os.makedirs(dname)
return _copy_atomic(src, dst)
raise
shutil.copy(src, out.name)
os.rename(out.name, dst)
out.delete = False
class Checksums:
""" Generate checksum(s), on given pieces of data. Producing the
Length and the result(s) when complete. """
def __init__(self, checksums=None, ignore_missing=False, ignore_none=False):
if checksums is None:
checksums = _default_checksums
self._sumalgos = []
self._sumtypes = []
self._len = 0
done = set()
for sumtype in checksums:
if sumtype == 'sha':
sumtype = 'sha1'
if sumtype in done:
continue
if sumtype in _available_checksums:
sumalgo = hashlib.new(sumtype)
elif ignore_missing:
continue
else:
raise MiscError, 'Error Checksumming, bad checksum type %s' % sumtype
done.add(sumtype)
self._sumtypes.append(sumtype)
self._sumalgos.append(sumalgo)
if not done and not ignore_none:
raise MiscError, 'Error Checksumming, no valid checksum type'
def __len__(self):
return self._len
# Note that len(x) is assert limited to INT_MAX, which is 2GB on i686.
length = property(fget=lambda self: self._len)
def update(self, data):
self._len += len(data)
for sumalgo in self._sumalgos:
sumalgo.update(data)
def read(self, fo, size=2**16):
data = fo.read(size)
self.update(data)
return data
def hexdigests(self):
ret = {}
for sumtype, sumdata in zip(self._sumtypes, self._sumalgos):
ret[sumtype] = sumdata.hexdigest()
return ret
def hexdigest(self, checksum=None):
if checksum is None:
if not self._sumtypes:
return None
checksum = self._sumtypes[0]
if checksum == 'sha':
checksum = 'sha1'
return self.hexdigests()[checksum]
def digests(self):
ret = {}
for sumtype, sumdata in zip(self._sumtypes, self._sumalgos):
ret[sumtype] = sumdata.digest()
return ret
def digest(self, checksum=None):
if checksum is None:
if not self._sumtypes:
return None
checksum = self._sumtypes[0]
if checksum == 'sha':
checksum = 'sha1'
return self.digests()[checksum]
class AutoFileChecksums:
""" Generate checksum(s), on given file/fileobject. Pretending to be a file
object (overrrides read). """
def __init__(self, fo, checksums, ignore_missing=False, ignore_none=False):
self._fo = fo
self.checksums = Checksums(checksums, ignore_missing, ignore_none)
def __getattr__(self, attr):
return getattr(self._fo, attr)
def read(self, size=-1):
return self.checksums.read(self._fo, size)
def _file2hexdigest(checksum_type, filename, datasize=None, utime=None):
data = Checksums([checksum_type])
CHUNK = 1024 * 8
try:
fo = open(filename)
while data.read(fo, CHUNK):
if datasize is not None and data.length > datasize:
break
fo.close()
if utime is not None:
try:
os.utime(filename, utime)
except:
pass
except Exception, e:
# print "JDBG:", "E", e
return None
return data.hexdigest(checksum_type)
def _valid_checksum_data(checksum_data):
for i in checksum_data:
if i in "0123456789abcdef":
continue
return False
return True
class CASheObj(object):
__slots__ = ['checksum_type', 'checksum_data']
def __init__(self, checksum_type, checksum_data):
checksum_type = _checksum_aliases.get(checksum_type, checksum_type)
if checksum_type not in _checksum_d_len:
raise TypeError, "Not a valid Checksum Type: %s" % checksum_type
if len(checksum_data) != _checksum_d_len[checksum_type]:
raise TypeError, ("Not a valid Checksum Length: %s (%d != %d)" %
(checksum_type, len(checksum_data), _checksum_d_len[checksum_type]))
checksum_data = checksum_data.lower()
if not _valid_checksum_data(checksum_data):
raise TypeError, ("Not a valid Checksum: %s (%s)" %
(checksum_type, checksum_data))
self.checksum_type = checksum_type
self.checksum_data = checksum_data
def __eq__(self, other):
if self.checksum_type != other.checksum_type:
return False
if self.checksum_data != other.checksum_data:
return False
return True
def x__ne__x(self, other):
if self == other:
return False
return True
def __str__(self):
return "%s:%s" % (self.checksum_type, self.checksum_data)
def __repr__(self):
return "<%s : %s (%s)>" % (self.__class__.__name__, str(self),hex(id(self)))
class CASheFileObj(CASheObj):
__slots__ = ['_exists', '_filename', '_stat', 'link', 'root']
def __init__(self, root, checksum_type, checksum_data, *args, **kwargs):
CASheObj.__init__(self, checksum_type, checksum_data, *args, **kwargs)
self.root = root
self.link = True
def __len__(self):
" Same as .size "
return self.size
def __nonzero__(self):
""" Always True, even if the len() is unknown (and thus 0). """
return True
def _getFilename(self):
if getattr(self, "_filename", None) is None:
self._filename = "%s/%s/%s/%s" % (self.root, self.checksum_type,
self.checksum_data[:4],
self.checksum_data)
return self._filename
filename = property(fget=lambda self: self._getFilename(),
doc="Full path to filename for the cached object")
def _getDirname(self):
return "%s/%s/%s" % (self.root, self.checksum_type,
self.checksum_data[:4])
dirname = property(fget=lambda self: self._getDirname(),
doc="Full path to dirname for the cached object")
def _getCheckedFilename(self):
checksum_data = _file2hexdigest(self.checksum_type, self.filename,
utime=(self.atime, self.mtime))
if checksum_data is None or checksum_data != self.checksum_data:
self.unlink()
return None
self.exists = True
return self.filename
checked_filename = property(fget=lambda self: self._getCheckedFilename(),
doc="Full path to filename for the cached object, checked")
def _getExists(self):
if getattr(self, "_exists", None) is None:
return self.size
return self._exists
def _setExists(self, value):
if not value:
if hasattr(self, "_exists"):
del self._exists
self._delStatVal()
else:
self._exists = value
return value
exists = property(fget=lambda self: self._getExists(),
fset=lambda self, value: self._setExists(value),
fdel=lambda self: self._setExists(None),
doc="Does the checksummed object exist in the cache (cached)")
def _delStatVal(self):
self._stat = None
def _getStatVal(self, mem, zero=0):
if getattr(self, "_stat", None) is None:
self._stat = _stat_f(self.filename)
if self._stat is None:
return zero
self.exists = True
return getattr(self._stat, mem)
def _getSize(self):
return self._getStatVal("st_size")
size = property(fget=lambda self: self._getSize(),
fdel=lambda self: self._delStatVal(),
doc="Size of the checksummed object in the cache (cached)")
def _getATime(self):
return self._getStatVal("st_atime")
atime = property(fget=lambda self: self._getATime(),
fdel=lambda self: self._delStatVal(),
doc="Access time of the checksummed object in the cache (cached)")
def _getCTime(self):
return self._getStatVal("st_ctime")
ctime = property(fget=lambda self: self._getCTime(),
fdel=lambda self: self._delStatVal(),
doc="Change time of the checksummed object in the cache (cached)")
def _getMTime(self):
return self._getStatVal("st_mtime")
mtime = property(fget=lambda self: self._getMTime(),
fdel=lambda self: self._delStatVal(),
doc="Modified time of the checksummed object in the cache (cached)")
def _getNlink(self):
return self._getStatVal("st_nlink")
nlink = property(fget=lambda self: self._getNlink(),
fdel=lambda self: self._delStatVal(),
doc="Number of links to the checksummed object in the cache (cached)")
def _getIno(self):
return self._getStatVal("st_ino")
st_ino = property(fget=lambda self: self._getIno(),
fdel=lambda self: self._delStatVal(),
doc="Inode of underlying checksummed object in the cache (cached)")
def _getDev(self):
return self._getStatVal("st_dev")
st_dev = property(fget=lambda self: self._getDev(),
fdel=lambda self: self._delStatVal(),
doc="Device of underlying checksummed object in the cache (cached)")
def save(self, filename, checksum=True, link=None):
""" Save the file, as an object, into the CAShe storage.
:param filename: a string specifying the path to link/read from
:param checksum: a boolean specifying if we should perform a
checksum of the data (default True)
:param link: should we try using link to store the data
"""
if False:
print "JDBG:", "save:", filename, checksum, link, self.link
if link is None:
link = self.link
try:
if not link:
tst = False
else:
tst = _link_xdev(filename, self.filename)
except OSError, e:
if e.errno == errno.ENOENT:
os.makedirs(os.path.dirname(self.filename))
return self.save(filename, checksum=checksum)
raise
if not tst:
_copy_atomic(filename, self.filename)
if checksum:
return self.checked_filename # Sets exists internally
self.exists = True
return self.filename
def load(self, filename, checksum=False, link=None):
""" Load the object, from the CAShe storage, to a file.
:param filename: a string specifying the path to link/write to
:param checksum: a boolean specifying if we should perform a
checksum of the data (default False)
:param link: should we try using link to retrieve the data
"""
if False:
print "JDBG:", "load:", filename, checksum, link, self.link
if checksum: # FIXME: This can load it twice ... meh.
if self.checked_filename is None:
return None
if link is None:
link = self.link
src = self.filename
try:
if link and _link_xdev(src, filename):
return src
except OSError, e:
if e.errno == errno.ENOENT:
return None
raise
_copy_atomic(src, filename)
self.exists = True
return filename
def get(self, *args, **kwargs):
" Same as .load() "
self.load(*args, **kwargs)
def put(self, *args, **kwargs):
" Same as .save() "
self.save(*args, **kwargs)
def unlink(self):
""" Remove the checksummed object from the cache. """
if _unlink_f(self.filename):
_try_rmdir(self.dirname)
self.exists = False
class CAShe(object):
# __slots__ = ['_objs', 'path', 'link']
def __init__(self, path="."):
self.path = path
if not os.path.exists(path):
os.makedirs(path)
self._objs = {}
for T in _checksum_d_len:
self._objs[T] = {}
self.link = True
def __contains__(self, other):
T = other.checksum_type
if T not in self._objs:
return False
D = other.checksum_data
if D not in self._objs[T]:
return False
return True
def get(self, checksum_type, checksum_data):
""" Get an object for the specified checksum.
:param checksum_type: a string specifying the type of checksum,
Eg. md5, sha256
:param checksum_data: a string specifying the hexdigest of the checksum.
"""
T = _checksum_aliases.get(checksum_type, checksum_type)
if T not in self._objs:
raise TypeError, "Not a valid Checksum Type: %s" % T
if checksum_data not in self._objs[T]:
obj = CASheFileObj(self.path, checksum_type, checksum_data)
obj.link = self.link
self._objs[T][obj.checksum_data] = obj
return self._objs[T][checksum_data]
def rm(self, obj):
""" Remove an object from the cache.
:param obj: an object returned by .get()
"""
obj.unlink()
del self._objs[obj.checksum_type][obj.checksum_data]
def ls(self, checksum_type=None):
""" Yield all objects stored in the cache.
:param checksum_type: a string specifying the type of checksum, or None
for all checksums. Eg. md5, sha256.
"""
checksum_type = _checksum_aliases.get(checksum_type, checksum_type)
for T in sorted(self._objs):
if checksum_type is not None and checksum_type != T:
continue
subdirname = "%s/%s" % (self.path, T)
for subfilename in _listdir(subdirname):
if len(subfilename) != 4:
continue
if not _valid_checksum_data(subfilename):
continue
dirname = "%s/%s" % (subdirname, subfilename)
for filename in _listdir(dirname):
if not filename.startswith(subfilename):
continue
if len(filename) != _checksum_d_len[T]:
continue
if not _valid_checksum_data(filename):
continue
yield self.get(T, filename)
def _get_config_def(self):
lo = 500 * 1000 * 1000
hi = 2 * 1000 * 1000 * 1000
age = 8 * 60 * 60 * 24
sort_by = "atime"
return (lo, hi, age, sort_by)
def _get_config(self):
try:
data = open(self.path + "/config").readlines()
except:
return self._get_config_def()
data = [x.lstrip() for x in data]
data = [x for x in data if x and x[0] != '#']
if not data:
return self._get_config_def()
lo, hi, age, sort_by = self._get_config_def()
for line in data:
vals = line.split('=')
if len(vals) != 2:
continue # ignore errors ftw
key,val = vals
key = key.strip()
val = val.strip()
mul = 1
if key == 'age':
if val.endswith('w'):
mul = 60*60*24*7
val = val[:-1]
elif val.endswith('d'):
mul = 60*60*24
val = val[:-1]
elif val.endswith('h'):
mul = 60*60
val = val[:-1]
elif val.endswith('m'):
mul = 60
val = val[:-1]
try:
val = float(val)
except:
continue
age = int(val * mul)
continue
if key == 'time':
val = val.lower()
if val in ("atime", "ctime", "mtime"):
sort_by = val
continue
if val.endswith('k') or val.endswith('K'):
mul = 1000
val = val[:-1]
elif val.endswith('m') or val.endswith('M'):
mul = 1000*1000
val = val[:-1]
elif val.endswith('g') or val.endswith('G'):
mul = 1000*1000*1000
val = val[:-1]
elif val.endswith('t') or val.endswith('T'):
mul = 1000*1000*1000*1000
val = val[:-1]
elif val.endswith('p') or val.endswith('P'): # lol
mul = 1000*1000*1000*1000*1000
val = val[:-1]
try:
val = float(val)
except:
continue
if key in ('older', 'lo', 'low'):
lo = int(val * mul)
if key in ('newer', 'hi', 'high'):
hi = int(val * mul)
if hi < lo:
hi = lo
return (lo, hi, age, sort_by)
@staticmethod
def _is_new(obj, sort_by, age, now):
if getattr(obj, sort_by) < (now - age):
return False
return True
def cleanup(self):
""" Remove objects from the cache to being it within the configured
limits (the "config" file at the root of the cashe).
"""
import time
(lo, hi, age, sort_by) = self._get_config()
# http://www.grantjenks.com/docs/sortedcontainers/ ??
def _rm_objs(objs, size, cutoff):
deleted_num = 0
deleted_size = 0
objs.sort(key=lambda x: getattr(x, sort_by), reverse=True)
while size > cutoff:
obj = objs.pop()
size -= obj.size
deleted_num += 1
deleted_size += obj.size
# print "JDBG:", obj
self.rm(obj)
return (deleted_num, deleted_size)
objs = []
size = 0
lo_objs = []
lo_size = 0
now = time.time()
for obj in self.ls():
if obj.nlink > 1:
continue
if not self._is_new(obj, sort_by, age, now):
lo_objs.append(obj)
lo_size += obj.size
else:
objs.append(obj)
size += obj.size
if (size + lo_size) < lo: # Under lo watermark, keep everything
return 0, 0
if size < lo: # Delete some, but not all of old objs
return _rm_objs(lo_objs, lo_size, lo - size)
# Over lo watermark with new objs, delete all old
(deleted_num, deleted_size) = _rm_objs(lo_objs, lo_size, 0)
assert len(lo_objs) == 0
if size < hi:
return (deleted_num, deleted_size)
# Now we need to cap the new objs to hi watermark
(hdeleted_num, hdeleted_size) = _rm_objs(objs, size, hi)
return (deleted_num + hdeleted_num,
deleted_size + hdeleted_size)
def _main():
""" CAShe test function, setup opts. """
import optparse
global prog
remap_cmds = {'rsync2' : 'rsync-to',
'usage' : 'help',
'ls' : 'list',
'rm' : 'unlink',
'gc' : 'cleanup',
'conf' : 'config',
'ls-extras' : 'ls-extra',
'rm-extras' : 'rm-extra'}
all_cmds = ("summary", "list", "info", "check",
"load", "save", "save-fast", "merge", "unlink",
"cleanup", "ls-extra", "rm-extra", "list-files", "recent",
"rsync-from", "rsync-to",
"config", "help")
argp = optparse.OptionParser(
description='Access CAShe storage from the command line',
version="%prog-" + __version__)
epilog = "\n ".join(["\n\nCOMMANDS:"]+sorted(all_cmds)) + "\n"
argp.format_epilog = lambda y: epilog
argp.add_option('-v',
'--verbose', default=False, action='store_true',
help='verbose output from commands')
argp.add_option(
'--path', default="/var/cache/CAShe",
help='path to the CAShe storage, defaults to the system cache')
argp.add_option('-p',
'--preserve', default=False, action='store_true',
help='preserve filetimes when using rsync')
argp.add_option(
'--sort-by', default="filename",
help='what to sort list/info command by')
argp.add_option('--link', default=None, action='store_true',
help='try to link in load/save operations (default)')
argp.add_option('--copy-only', dest='link', action='store_false',
help='try to link in load/save operations')
(opts, cmds) = argp.parse_args()
if argp.prog is not None:
prog = argp.prog
elif sys.argv:
prog = os.path.basename(sys.argv[0])
else:
prog = "CAShe"
if opts.sort_by not in ("filename", "size",
"atime", "ctime", "mtime", "nlink", "time"):
opts.sort_by = "filename"
cmd = "summary"
if len(cmds) >= 1:
cmd = remap_cmds.get(cmds[0], cmds[0])
if cmd not in all_cmds:
argp.print_help()
sys.exit(1)
if cmd == "help":
argp.print_help()
return
if opts.verbose:
return _main_cmds(opts, cmds, cmd)
try:
return _main_cmds(opts, cmds, cmd)
except Exception, e:
print >>sys.stderr, "Error:", str(e)
def _main_cmds(opts, cmds, cmd):
""" CAShe test function, run commands. """
import time
import stat
try:
import xattr
if not hasattr(xattr, 'get'):
xattr = None # This is a "newer" API.
except ImportError:
xattr = None
def _ui_origin_url(filename):
# See: http://www.freedesktop.org/wiki/CommonExtendedAttributes
try:
return xattr.get(filename, 'user.xdg.origin.url')
except IOError, e:
ok = False
if e.errno in (errno.ENODATA,
errno.EOPNOTSUPP, errno.E2BIG, errno.ERANGE):
ok = True
for me in ("ENOATTR", "ENOTSUPP"):
if hasattr(errno, me) and e.errno == getattr(errno, me):
ok = True
break
if not ok:
raise
return ''
def _ui_time(tm):
return time.strftime("%Y-%m-%d %H:%M", time.gmtime(tm))
def _ui_num(num):
num = str(num)
if len(num) == 4:
return " %s.%sK" % (num[0], num[1:3])
if len(num) == 5:
return " %s.%sK" % (num[0:2], num[2:4])
if len(num) == 6:
return "%s.%sK" % (num[0:3], num[3:5])
if len(num) == 7:
return " %s.%sM" % (num[0], num[1:3])
if len(num) == 8:
return " %s.%sM" % (num[0:2], num[2:4])
if len(num) == 9:
return "%s.%sM" % (num[0:3], num[3:5])
if len(num) == 10:
return " %s.%sG" % (num[0], num[1:3])
if len(num) == 11:
return " %s.%sG" % (num[0:2], num[2:4])
if len(num) == 12:
return "%s.%sG" % (num[0:3], num[3:5])
return num
def _ui_age(num):
ret = ""
weeks = num / (60 * 60 * 24 * 7)
num %= (60 * 60 * 24 * 7)
if weeks:
ret += "%u week(s)" % weeks
if num:
ret += " "
days = num / (60 * 60 * 24)
num %= (60 * 60 * 24)
if days:
ret += "%u day(s)" % days
if num:
ret += " "
if not num:
return ret
hours = num / (60 * 60)
num %= (60 * 60)
minutes = num / (60)
num %= (60)
ret += "%02u:%02u:%02u" % (hours, minutes, num)
return ret
def _get_T_D(cmds):
T = None
D = None
if len(cmds) >= 2:
cmds[1] = _checksum_aliases.get(cmds[1], cmds[1])
if cmds[1] in objs._objs:
T = cmds.pop(1)
if len(cmds) >= 2:
D = cmds[1]
return T, D
def _get_objs(objs, opts, T, D, osort_by=None):
if osort_by is None:
osort_by = opts.sort_by
for obj in sorted(objs.ls(checksum_type=T),
key=lambda x: getattr(x, osort_by)):
if D is not None and not obj.checksum_data.startswith(D):
continue
yield obj
# Actual start of main()
objs = CAShe(opts.path)
if opts.link is not None and not opts.link:
objs.link = False
(lo, hi, age, tsort_by) = objs._get_config()
if opts.sort_by == "time":
opts.sort_by = tsort_by
if cmd == "config":
(dlo, dhi, dage, dtsort_by) = objs._get_config_def()
def _dtxt(dconfig, config):
if dconfig == config:
return "def"
return "usr"
print " Storage(%s):" % _dtxt(dlo, lo), _ui_num(lo)
print "New Storage(%s):" % _dtxt(dhi, hi), _ui_num(hi)
print " Age(%s):" % _dtxt(dage, age), _ui_age(age)
print " Time(%s):" % _dtxt(dtsort_by, tsort_by), tsort_by
print " Path(%s):" %_dtxt("/var/cache/CAShe", objs.path),objs.path
if cmd == "summary":
T = None
D = None
if len(cmds) >= 2:
T = cmds[1]
if len(cmds) >= 3:
D = cmds[2]
now = time.time()
summary_data = {'used-objs' : 0,
'used-size' : 0,
'free-objs': 0,
'free-objs-old': 0,
'free-size': 0,
'free-size-old': 0,
}
Ts = {"." : summary_data.copy()}
for obj in _get_objs(objs, opts, T, D, osort_by="filename"):
if obj.checksum_type not in Ts:
Ts[obj.checksum_type] = summary_data.copy()
if obj.nlink > 1:
Ts[obj.checksum_type]['used-objs'] += 1
Ts[obj.checksum_type]['used-size'] += obj.size
Ts['.']['used-objs'] += 1
Ts['.']['used-size'] += obj.size
elif not objs._is_new(obj, tsort_by, age, now):
Ts[obj.checksum_type]['free-objs-old'] += 1
Ts[obj.checksum_type]['free-size-old'] += obj.size
Ts['.']['free-objs-old'] += 1
Ts['.']['free-size-old'] += obj.size
else:
Ts[obj.checksum_type]['free-objs'] += 1
Ts[obj.checksum_type]['free-size'] += obj.size
Ts['.']['free-objs'] += 1
Ts['.']['free-size'] += obj.size
def _prnt_summary(data):
if opts.verbose:
print " Used Objs:", _ui_num(data['used-objs'])
print " Used Size:", _ui_num(data['used-size'])
print " Free Objs:", _ui_num(data['free-objs'])
print " Free Size:", _ui_num(data['free-size'])
if opts.verbose or data['free-objs-old']:
print " OLD Objs:", _ui_num(data['free-objs-old'])
print " OLD Size:", _ui_num(data['free-size-old'])
objs = data['free-objs'] + data['used-objs']
size = data['free-size'] + data['used-size']
print " Objs:", _ui_num(objs)
print " Size:", _ui_num(size)
if True:
for T in sorted(Ts):
if T == '.': continue
print "Type:", T
_prnt_summary(Ts[T])
print "--All--:", len(Ts) - 1
_prnt_summary(Ts['.'])
def _prnt_list(obj):
if obj.nlink > 1:
prefix = " "
elif objs._is_new(obj, tsort_by, age, now):
prefix = "*"
else:
prefix = "!"
print "%s%-6s %-64s %s" % (prefix, obj.checksum_type,
obj.checksum_data, _ui_num(obj.size))
if opts.verbose:
xd = _ui_origin_url(obj.filename)
if xd:
print " \_", xd
if cmd == "list":
now = time.time()
T, D = _get_T_D(cmds)
for obj in _get_objs(objs, opts, T, D):
_prnt_list(obj)
if cmd == "check":
now = time.time()
T, D = _get_T_D(cmds)
for obj in _get_objs(objs, opts, T, D):
if obj.nlink > 1:
prefix = " "
elif objs._is_new(obj, tsort_by, age, now):
prefix = "*"
else:
prefix = "!"
print "%s%-6s %-64s %s" % (prefix, obj.checksum_type,
obj.checksum_data,
obj.checked_filename is not None)
if cmd == "info":
now = time.time()
T, D = _get_T_D(cmds)
done = False
for obj in _get_objs(objs, opts, T, D):
if done: print ''
done = True
print "Type:", obj.checksum_type
print "Data:", obj.checksum_data
suffix = ""
if not objs._is_new(obj, tsort_by, age, now):
suffix = "(old)"
print " Size:", _ui_num(obj.size), suffix
print " Links:", _ui_num(obj.nlink - 1)
print " M-Time:", _ui_time(obj.mtime)
print " A-Time:", _ui_time(obj.atime)
if opts.verbose or tsort_by == "ctime":