-
Notifications
You must be signed in to change notification settings - Fork 2
/
Jiffy.py
1465 lines (1158 loc) · 60.6 KB
/
Jiffy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# coding: utf-8 #seems to be useless
from __future__ import print_function #for testing
import os, sys
from time import time, ctime, sleep
from collections import namedtuple
try:
#Better subprocess for python2
from subprocess32 import Popen, PIPE
except:
from subprocess import Popen, PIPE
try:
import _thread as thread
from queue import Queue
except:
import thread
from Queue import Queue
#Scanning the filesystem
try:
from os import scandir #on Python 3.5+
except:
try:
from scandir import scandir
except:
#If opting to add and implement shitpa listdir as a last resort
pass
#Setting up the dbm to be used
try:
#Python 2
import anydbm as dbm #With some luck we get bsddb/dbhash
except:
#Python 3
if 'win' not in sys.platform:
import dbm #We can make do with gdbm/ndbm
else:
#Python 3 on Windows, trying to avoid dumbdbm
try:
import semidbm as dbm
except:
import dbm #All the whiz
finally:
#For checking which built-in dbm backend is in use
try:
from dbm import whichdb
except:
from whichdb import whichdb
#GUI - optional Kivy GUI, original (backfall) Tkinter GUI
KIVY_GUI=False #modify when testing/ready to use kivy
if KIVY_GUI:
try:
from kivy.app import App
from kivy.lang import Builder
from kivy.uix.widget import Widget
#from kivy.core.window import Window, WindowBase
from kivy.properties import ObjectProperty
from kivy.clock import Clock
from kivy.config import Config #to start maximized
from kivy.properties import BooleanProperty
from kivy.uix.behaviors import FocusBehavior
from kivy.uix.label import Label
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.textinput import TextInput
from kivy.uix.recycleview import RecycleView
from kivy.uix.recycleview.views import RecycleDataViewBehavior
from kivy.uix.recycleview.layout import LayoutSelectionBehavior
from kivy.uix.recycleboxlayout import RecycleBoxLayout
except:
KIVY_GUI=False
else:
#This import on itself pops a kivy window, as does importing Window
from kivy.core import window
else:
# Eventually, I would like to allow switching between the GUI's, with varying
# features on each one
try:
from tkinter import Entry, StringVar, Message, Frame, PhotoImage, mainloop, INSERT
from tkinter.ttk import Treeview, Style
from tkinter.font import Font
except:
from Tkinter import Entry, StringVar, Message, Frame, PhotoImage, mainloop, INSERT
from ttk import Treeview, Style
from tkFont import Font
#NTFS snooping, getting admin on Windows, DPI workarounds, Filetype icons
from ctypes import *
ENC='utf-8'
DB, CONF='Jiffy_DB ', 'Jiffy_Config'
##--Testing related-------------------------------------------------------------
class Checks():
testing=1
#Modify to 1 after 'else' to activate check
#--DriveIndex / GUI
recursion_stats= 0 if not testing else 1
findex_syncing= 0 if not testing else 1
is_dpi_scale= 0 if not testing else 0
is_query_passed= 0 if not testing else 0 # what did the search thread recieve
is_result_batch= 0 if not testing else 0 # results put on queue by search thread
is_query_sent= 0 if not testing else 0 # was query sent by tk gui
query_db_send= 0 if not testing else 0 # what was sent to the thread by db_send
is_batch_recieved= 0 if not testing else 0 # is result batch recieved by tk gui
is_result_parsed= 0 if not testing else 1 #
is_tk_get_name = 0 if not testing else 1
#--UpdateViaUsn
is_entries_from_fsutil= 0 if not testing else 1
findex_syncing= 0 if not testing else 0
full_path_parsing= 0 if not testing else 0
did_generate_entries= 0 if not testing else 0
if Checks.testing:
import pdir, fire
from pympler.asizeof import asizeof
def size(object):
'''getting dictionary sizes in Mb'''
return asizeof(object)/1024/1024
##--Some helper functions-------------------------------------------------------
def FloatPrecision(num, precision):
'''If i opt for losing excess precision of floats in advance'''
floater=int((num-int(num))*(10**precision))/(10**precision+0.0)
return int(num)+floater
#-------------------------------------------------------------------------------
##--Classes---------------Classes for my precious-------------------------------
#-------------------------------------------------------------------------------
''' Platform(): plat, split_token
start_func(), scale_display(), get_updates()- not implemented
Drives(): drives
_UpdateViaUsn(): Drive.drives, Drive.ntfs_drives, start_usns
create_journals(), _get_next_max_usn(), get_changed()
DriveIndex(): db, conf, drives, findex, query_queue, result_queue
generate_db(), is_db_generated(), query_db(), get_result_batch()
SearchBox(Frame): Tkinter GUI
KivySearchBox(BoxLayout): Kivy GUI
'''
class _UpdateViaUsn():
'''On Windows systems with NTFS drives/partitions, it is possible to get
info on changes to the filesystem from the USN journal. Admin privilege is
required.
Current implementation uses the fsutil tool that comes with Windows, as I
didn't want to tinker with the API calls quite yet.
'''
def __init__(self, start_usns=None, ntfs_drives=None):
self.Drives=Drives()
self.ntfs_drives=ntfs_drives if ntfs_drives else self.Drives.ntfs_drives
self.start_usns=start_usns if start_usns else 0
def create_journals(self, max_size, delta):
'''create usn journals to track changes for chosen drives if such don't
already exist'''
for drive in self.ntfs_drives:
#check if a journal exists, else:
Popen(('fsutil', 'usn', 'createjournal', max_size, delta, drive), stdout=PIPE).communicate()[0]
#experiment
def get_next_max_usn(self, drive):
'''On windows/ntfs this is 'next usn' - the usn index to latest change made.
Also returns max_usn since enumdata requires an upper boundary. Not needed
when using readjournal.
fsutil usn queryjournal result:
Usn Journal ID : 0x01d2a26e17dbc5e8
First Usn : 0x0000000000000000
Next Usn : 0x0000000000acddf0 <--- index #2
Lowest Valid Usn : 0x0000000000000000
Max Usn : 0x7fffffffffff0000 <--- index #4
.
. '''
if 'win' in sys.platform and drive == '/':
''' using '/' on windows works for scandir but not for fsutil'''
drive='c:'
else:
# Removing trailing slashes
drive=drive.split(':')[0]+':'
journal_specs=Popen(('fsutil', 'usn', 'queryjournal', drive), stdout=PIPE).communicate()[0].split('\r\n')
next_usn=journal_specs[2].split(': ')[1]
max_usn=journal_specs[4].split(': ')[1]
return next_usn, max_usn #int(next_usn, 16), int(max_usn, 16)
def get_changed(self, usn_dict): #needs to be in a thread or add wrapper
'''Return a set of full paths to files/folders that were added, altered
or deleted, and another set of renamed items. This includes pre-renamed names and names to deleted entries.
Retrieved via fsutil's usn readjournal and file queryfilenamebyid
'''
#self.drive_usns=self._get_usn_range()
self.renamed=set() #not in use
self.new_modified_deleted=set()
#for moved enrties, fsutil does not supply and old location record
self.scan_and_remove=set()
for drive in usn_dict:
#current, max=self.drive_usns[drive].split() #retrieveing usn range as strings
#startusn='startusn='+str(current)
startusn=str(usn_dict[drive])
print (drive, startusn)
#getting max_usn separately for easier switching/cooperation with readjournal
nextusn, maxusn= self.get_next_max_usn(drive)
usn_entries=Popen(('fsutil', 'usn', 'enumdata', 1, startusn, maxusn, drive), stdout=PIPE).communicate()[0]
#usn_entries=Popen(('fsutil', 'usn', 'readjournal', drive, 'startusn='+startusn), stdout=PIPE).communicate()[0]
usn_entries=usn_entries.split('\r\n\r\n')
if Checks.did_generate_entries:
print(usn_entries)
usn_entries.pop() #last one is an empty string
#------------------------------------------------------------------
#--Previous version using readjournal, not compatible with unupdated
# Win 7 machines----------------------------------------------------
#readjournal returns the following format:
'''
USN Journal ID : 0x01d2a26e17dbc5e8
First USN : 0
Next USN : 11116672
Start USN : 11116416
Min major version : Supported=2, requested=2
Max major version : Supported=4, requested=4
Usn : 11116416
File name : aa8
File name length : 6
Reason : 0x00001000: Rename: old name
Time stamp : 16-May-17 08:12:49
File attributes : 0x00000010: Directory
File ID : 000000000000000000020000000013a1
Parent file ID : 00000000000000000005000000000005
Source info : 0x00000000: *NONE*
Security ID : 0
Major version : 3
Minor version : 0
Record length : 88
.
.
'''
# Getting ID of the last change from readjournal's first entry. Will
# be the next start_usn
'''
'USN Journal ID ',
' 0x01d2a26e17dbc5e8',
'First USN ',
' 0',
'Next USN ',
' 11116672',
'Start USN ',
' 11116416','''
#last_usn=usn_entries[0].replace('\r\n', ':').split(':')[5]
#print('last usn: ', last_usn)
#with split('\r\n') currently used
'''['Usn : 11325792',
'File name : aa', ---> index #1
'File name length : 4',
'Reason : 0x00001000: Rename: old name', --->index #3
'Time stamp : 23-May-17 07:47:07',
'File attributes : 0x00000010: Directory',
'File ID : 000000000000000000020000000013a1', -->index #6
'Parent file ID : 00000000000000000005000000000005',
'Source info : 0x00000000: *NONE*',
'Security ID : 0',
'Major version : 3',
'Minor version : 0',
'Record length : 80'] '''
#-------------------------------------------------------------------
#--Using enumdata in new version, no "reason" field so checking for
# renames is on us--------------------------------------------------
for entry in usn_entries:
entry_fields=entry.split('\r\n')
#An entry after .split('\r\n')
'''['File Ref# : 0x00000000000000000004000000001f57', <--index #0
'ParentFile Ref# : 0x00000000000000000035000000002138',
'Usn : 0x0000000000ae99a8',
'SecurityId : 0x00000000',
'Reason : 0x00000000',
'Name (044) : doc.doc']'''
#Extracting ID from a listing such as above
file_id=entry_fields[0].split(' : ')[1]
'''Used by readjournal, currently not in use
file_id='0x'+entry_fields[6].split(' : ')[1]
reason=int(entry_fields[3].split(': ')[1], 16)'''
try:
full_path=Popen(('fsutil', 'file', 'queryfilenamebyid', drive, file_id), stdout=PIPE).communicate()[0]
except:
# Some events retrieved by fsutil's enumdata weren't file
# related.
print('poo')
pass
else:
''' "A random link name to this file is \\?\E:\aa\bobo"
note no trailing slashes on folder names '''
#This gets us the full path
full_path=full_path.replace('?\\', '\r\n').split('\r\n')[1]
if Checks.full_path_parsing:
print (full_path)
''' separating entries to renamed and non renamed,old named
entries should be removed and new named folders rescanned.'''
''' Used by readjournal, not committed, to be removed in a future commit
REN_OLD=0x00001000 #'Rename: old name'
REN_NEW=0x00002000 #'Rename: new name'
DEL_CLOSE=0x80000200 #'File delete | Close'
if reason == REN_OLD:
# Those entries hold no longer existing names
old_name=entry_items[1].split(' : ')[1]
path, new_name=full_path.rsplit('\\', 1)
if not old_name == new_name: #moving in same partition is almost same as renaming as far as usn
# if they are the same, this was a same drive moving
# can't get old path from fsutil for moving actions
full_path=path+'\\'+old_name
self.renamed.add(full_path)
else:
self.new_modified_deleted.add(full_path)'''
self.new_modified_deleted.add(full_path)
if Checks.is_entries_from_fsutil:
print ('entries generated for', drive)
##Note
'''Until I find a better way, every item is a suspect of being a moved
item- need to compare keys to all items, stat() any findings and remove non existing.'''
#update config with last_usn
return self.new_modified_deleted, self.renamed #empty for current method #new_altered_removed
class Platform():
'''platform specific trinkets'''
def __init__(self, platform=sys.platform):
self.plat=platform
self.split_token= '\\' if 'win' in self.plat else '/'
self.Update=_UpdateViaUsn if 'win' in self.plat else None #Windows auto-update
def start_func(self):
'''Returns a platform based method to handle double-clicks/Enter presses
for opening files with their default app, or opening folders in the
default file tool. '''
if 'win' in self.plat:
func=os.startfile #An API call also an option
else:
#Linux, FreeBSD, OpenBSD, MacOSX
open_command='open' if 'darwin' in self.plat else 'xdg-open' #os.name=='posix'
func=lambda filepath: Popen((open_command, filepath))
return func
def scale_display(self):
'''Take care of display related issues. On Windows - anounce the app
as DPI aware'''
if 'win' in sys.platform:
try:
#try for DPI awareness
windll.shcore.SetProcessDpiAwareness(1)
if Checks.is_dpi_scale:
print('great DPI success')
#windll.shcore.GetDpiForMonitor()
except:
#well, shit.
if Checks.is_dpi_scale:
print ('no dpi scaling')
else:
pass
def get_updates(self, start=None, end=None):
'''use internal platform specific function aggregates to return
a set of modified entries'''
#save last update time/usn ranges in a config dbm, here or in caller?
pass
class Drives():
'''To hold any drive related information'''
def __init__(self):
self.drive_list=self._get_drives()
if 'win' in sys.platform:
self.ntfs_drives=self._get_ntfs_drives_win()
def _get_drives(self):
'''Generate a list of drives for the database function. On linux -
just '/'. On Windows, '/' and 'C:/' represent the same drive, but
trying to walk 'C:/' only traversed very few specific dirs in my attempts
'''
self.exists=os.path.exists #sp with some useful function that gets removable drive info as well?
self.drives=[u'/']
if 'win' in sys.platform:
self.drives.extend((chr(a)+ u':\\' for a in range(ord('A'), ord('Z')) if self.exists(chr(a)+':')))
#Removing redundant 'C:' on Windows
try:
self.drives.remove(u'C:\\')
except ValueError:
pass
return self.drives
def _get_ntfs_drives_win(self):
'''Return list of ntfs drives using fsutil fsinfo's volumeinfo.
Result after slpit('\r\n'):
['Volume Name : Le Shwa',
'Volume Serial Number : 0xd4d56c89',
'Max Component Length : 255',
'File System Name : NTFS', --> index #3 --> split(':') --> index #1
'Is ReadWrite',.... ]'''
ntfs_drives=[]
win_drive_list=(chr(a)+ u':' for a in range(ord('A'), ord('Z')) if self.exists(chr(a)+':'))
for drive in self.drives:
volume_info=Popen(('fsutil', 'fsinfo', 'volumeInfo', drive), stdout=PIPE).communicate()[0]
file_system=volume_info.split('\r\n')[3].split(' : ')[1]
if file_system=='NTFS':
ntfs_drives.append(drive)
return ntfs_drives
class DriveIndex():
'''functions for GUI use:
- generate_db() - pop a thread for generating a new file index as a dictionary
- update_locations(locations)- pop a thread for rescanning an updating specific
directories
- (file_index, unsearched_dirs) is_db_generated() - return True if new index is ready
- get_changes() - update findex with entries recieved by the platform
specific update service via Update()
- query_db(query) - pass a query to the search thread, empty current results
from result queue
- *results* get_result_batch() - Retruns a batch(es?) of results if available
'''
def __init__(self, db=DB, conf=CONF):
self.Drives=Drives()
self.Plat=Platform()
self.split_token=self.Plat.split_token
self.drives=self.Drives.drives
if 'win' in sys.platform:
self.ntfs_drives=self.Drives.ntfs_drives
self.Update=self.Plat.Update()
self.db=db
self.conf=conf
self.findex=dbm.open(self.db, 'c')
self.config=dbm.open(self.conf, 'c')
#NTS Let the GUI work it out
'''if not self.config:
#No config file, generating a default one
self._default_config=dict.fromkeys(self.ntfs_drives)
for key in self._default_config:
key=key.encode(ENC)
self.config[key]=self._default_config[key]
self.config.sync()'''
#To interact with _make_search()
self.query_queue, self.result_queue= Queue(), Queue()
thread.start_new_thread(self._make_search, ())
'''
if self.findex and self.config and self.ntfs_drives: #change to a thread
for drive in self.ntfs_drives:
try:
update_checkpoint=self.Update.get_next_max_usn(drive)[0]
modified, renamed = self.Update.get_changed(self.config[drive]):
except:
print('error updating or corrupt config file')
else:
self.sync_changes(modified, renamed)
self.config[location]=update_checkpoint'''
stat=os.stat
def _pretty_size_date(self, size, date):
'''Return formatted size and date strings'''
Gb_Tb_PRECISION=2
#Size
for size_unit in u'BKMGT':
if size < 1024:
break
size/=1024.
if size_unit == u'G' or size_unit == u'T':
#cut excess precision
size=FloatPrecision(size, Gb_Tb_PRECISION)
else:
#M, K, B
size=int(size) if size-int(size)<0.5 else int(size+1)
fsize= str(size)+' '+ size_unit + u''
#Date
'''ctime style: 'Mon Oct 26 16:33:26 2015'
desired style: '26-Oct-15|16:33' '''
try:
# + works (~x10) faster than join() in my timeits
fdate= date[2] + u'-' + date[1] + u'-' + date[4][2:] + u' | ' + date[3][:-3] +u'' #change to %
except:
# In case st_mtime returned a negative timestamp due to a rare
# (I hope) oddity on Windows/Python3.5 (None passed)
fdate='Error getting date'
return fsize, fdate
def _size_from_key(self, key):
'''Getting size of a deleted entry from its string representation in the
index. Keys are of type:
'C:\fileath\filename * 24 M * [date]' -->split('*') --> index #1
'''
location_in_key=1
power={'B': 0, 'K': 1, 'M': 2, 'G': 3, 'T': 4}
# *24 M* --> size=24, units='M'
size, units=key.decode(ENC).split(u'*')[location_in_key].split()
long_size=1024**power[units] * long(size)
return long_size
def _get_inode_via_scandir(self, filepath):
'''stat() on python<3.3 returns empty st_ino, in which case we scandir
the parent folder, as scandir does provide inodes'''
parent_dir, name=filepath.rsplit(self.split_token, 1)
parent_contents=scandir(parent_dir)
for item in parent_contents:
if item.name==name:
return item.inode()
#return [item for item in parent_contents if item.name==name].pop()
def _recursive_create_dict(self, locations=None):
'''Recursive implementation of generating the file dictionary. A stack
based implementation is in stash.py. I tried to avoid function fragmentation
for the speedup, so this one is a bit longer'''
self.fdict={}
self.fdict['**']='' # "Null key" referral, currently not in use
self.unsearched=[]
self.is_generated=False
if not locations:
self.locations=self.drives
self.brand_new=True
else:
self.locations=locations
self.brand_new=False
self.generation_time=time() # for testing / displaying
def recursive_add(top):
'''Cheese - recursively scan a folder or drive("location"),
updating the dictionary and returning the total size of the location'''
#Prepping dir contents
try:
contents=scandir(top) #scandir the path
except OSError:
self.unsearched.append(top)
return 0 # no size for you.
#Getting 'C:\' back as the prefix on Windows
if 'win' in sys.platform and top[0]==u'/':
#Get the boys back home
top=u'C:\\'+top[1:]
#Will be calculated recursively
top_size=0
#Iterating over dir contents
for scandir_item in contents:
if scandir_item.is_symlink():
#symlinks make recursion sad
continue #add symlink treatment later
try:
date=ctime(scandir_item.stat().st_mtime).split()
except:
#On Python 3.5, Win8.1, had an issue with a negative time on
#st_mtime. Substituted for an error notification when parsed
date=None
if not scandir_item.is_dir():
#File specific treatment
fname='F'+ '*' + scandir_item.name
size=scandir_item.stat().st_size
else:
if not scandir_item.is_symlink(): #remove one of the checks
size=recursive_add(scandir_item.path) #Aww shit!
fname='D'+ '*' + '[' + scandir_item.name + ']'
# Saving inodes to be able to find and remove old versions of
# renamed/moved entries
try:
finode=str(scandir_item.inode())
except:
#error on windows SystemData folder with scandir/py2.7
finode='0'
finally:
# Attaching inodes increases db size significantly, but
# using fsutil i cannot track old paths of moved files,
# will implement a compressed dbm in the future
fname=finode + '*' + fname
top_size+= size
fsize, fdate=self._pretty_size_date(size, date)
fpath=top + u''
#worked faster than join() in my attempts
key=fname + '*' +fsize + '*' +fdate + u''
value= fpath
if key in self.fdict:
'''contingency in case two same named same sized files
were modified in the same minute'''
#self.key=self.fname + '[1]' + ' * ' +self.fsize + ' * ' +self.fdate + u''
pass
self.fdict[key.encode(ENC)]=value.encode(ENC)
return top_size
#Taking up the scan
for location in self.locations:
'''Recursing all drives/locations'''
if location in self.drives:
if 'win' in sys.platform and location in self.ntfs_drives + ['/']: #remove duplicity with 'c:' / '/'
''' On windows, save drive's "next usn" to the config file
before scanning. This will be this drive's starting point of
the next auto-update'''
self.config[location.encode(ENC)]=self.Update.get_next_max_usn(location)[0]
pass
self.gtime=time()
recursive_add(location)
if Checks.recursion_stats:
self.gtime=time()-self.gtime
print ("Done recursing on location ", location)
print("Time: ", self.gtime)
# _make_search() thread will close itself upon recieveng True for
# database syncing
self.query_queue.put(True)
#Deleting relevant parts of the dbm
if self.brand_new:
#All drives were rescanned, empty the dbm
try:
# some dbms don't have a clear() method. fu** me, right?
self.findex.close()
self.findex=dbm.open(self.db, 'n')
except:
# dumbdbm does not clear out the dbm when reopening with 'n'
self.findex.clear()
else:
#Remove items internal to the location list from dbm #boatshow
to_delete=(key for key in self.findex for location in locations if location in self.findex[key])
map(lambda key: self.findex.pop(key), to_delete)
#
'''for key in self.findex:
for location in self.locations:
if location in self.findex[key]:
#delete entries whose path contains location
del dict[key]'''
#Persisting the new index
if whichdb(self.db)==('dbhash'):
'''For dumbdbm, this jams the app, as does manual updating.
It's not dumb, it's simply not worthy'''
self.findex.update(self.fdict)
else:
for key in self.fdict:
self.findex[key]=self.fdict[key]
if Checks.findex_syncing:
print ('findex is updated')
#Save new database
self.findex.sync()
if Checks.findex_syncing:
print ('findex synced')
#We can now resume searching
thread.start_new_thread(self._make_search, ())
#Inform of completion
self.is_generated=True
#Cleaning up
self.fdict.clear()
self.fdict=None
self.generation_time=time()-self.generation_time
if Checks.recursion_stats:
print (self.generation_time)
def _make_search(self):
'''Attempt at circumventing StopIteration(), did not see speed advantage'''
self.results_per_batch=50
if whichdb(self.db) in {'dbm.gnu', 'gdbm', 'dbm.ndbm', 'dbm'}:
'''iteration is not implemented for gdbm and (n)dbm, forced to
pop the keys out in advance for "for key in fdict:" '''
self.keys=self.findex.keys()
#Extracting keys to a set, make this optional as the new "scorch mode"
if 'win' in sys.platform: #NTS Did not see speed improvement in Linux
try:
self.keys=frozenset(self.findex)
except:
self.keys=frozenset(self.findex.keys())
self.search_list=None
self.separator='*'.encode(ENC) #Python 3, yaaay
while True:
self.query=None
while not self.query_queue.empty():
#more items may get in (or not?) while condition is checked
self.query=self.query_queue.get()
try:
self.search_list=self.query.lower().encode(ENC).split()
if Checks.is_query_passed:
print ('is_query_passed: ', self.search_list)
except:
if self.query:
# True is passed when a new database has been generated
# A new instance of _make_search will be opened
break
else:
#No new queries
sleep(0.1)
continue
else:
self.is_new_query=True
self.result_batch=[]
name_pos=2 # inode * fof * name * size * date
for key in self.keys:
filename=key.split(self.separator)[name_pos].lower()
'''_all=all
if _all(token in filename for token in search_list):
result_batch.append(key)'''
#Add key if matching
for token in self.search_list:
if not token in filename:
break
#If loop hasn't ended abruptly
else:
'''Added existence check via stat() after adding USN auto
updates, as I am yet to figure out how to get old paths of
moved files/folders. Alternatively, every suspicious file
or folder nemae can be looked up and stat()ed on updating
'''
'''fof=key.split(self.separator)[0]
#'[', ']' take 1 byte unicode representation
real_name=filename[1:-1] if 'D' in fof else filename
does_exist=self.findex[key] + self.split_token.encode(ENC) + filename
try:
os.stat(does_exist.decode(ENC))
except:
print ('stat failed on ', does_exist)
#print(key.decode(ENC))
#A dead key
#del self.findex[key]'''
#add 'else:' if all good. check with a move/rename when autoupdate
#is up
self.result_batch.append(key)
#Time to send off a batch?
if len(self.result_batch)>=self.results_per_batch:
self.result_queue.put((self.result_batch, self.is_new_query))
if Checks.is_result_batch:
print(self.result_batch, len(self.result_batch))
print('is_result_batch: results on queue')
self.result_batch=[]
#print (len(self.result_batch))
self.is_new_query=False
sleep(0.1)
if not self.query_queue.empty():
break
#If the for loop ended naturally, with some batch<50
self.result_queue.put((self.result_batch, self.is_new_query))
def _sync_changes(self, modified, renamed):
'''* find if an older key with the same inode exists:
- if so and names are same - remove.
- if so, names are not same and folder- remove entry and all entries
where this folder is in the path. scandir new folder for updated entries
if so, names not same and file - remove entry
* if a file - get '''
self.dir_size_counter={}
isdir=os.path.isdir
def remove_entry(key, path, entry=None): #does not consider size of non deleted (modified) item
''' Removing an entry from the index, propagating the size change
to all parent directory tree. Insert updated entry if available'''
#Pulls out size as ['24', 'M'] and returns a Long value
deleted_size=self._size_from_key(key)
while path and path not in self.drives:
try:
#Assuming massive deleting will involve many files per folder
self.dir_size_counter[path]-=deleted_size
except:
self.dir_size_counter[path]=-deleted_size
path=path.rsplit(self.split_token, 1)[0]
#Removing entry from index
del self.findex[key]
for entry in modified: #renamed_deleted
# Extracting filename and path to parent dir from the full path
path, name=entry.rsplit(self.split_token, 1)
#name=entry.split(SPLIT_TOKEN)[-1]
if isdir(enrty):
#False for no longer existing entries as well as non folders
name='['+name+']'
name=name.encode(ENC)
path=path.encode(ENC)
#path=entry[:-len(name)].encode(ENC)
separator='*'.encode(ENC)
try:
entry_stats=stat(entry)
except:
'''Unable to retrieve stats for file or folder, presumably due to it
being deleted or moved: WindowsError,
no inode, searching for exact match to remove entry and update
parent dir size'''
for key in self.findex: #should do this for all entries
if name==key.split(separator)[2] and path==self.findex[key]: #inode, fof, name, size, date
#Saving size to update parent folder's size value
deleted_size=self._size_from_key(key) #Pulls out size as ['24', 'M']
try:
#Assuming massive deleting will involve many files per folder
dir_size_counter[path]-=deleted_size
except:
dir_size_counter[path]=-deleted_size
finally:
#Removing entry from index
del self.findex[key]
'''if name in deleted_entries:
deleted_entries[name].append(path)
else:
deleted_entries[name]=[path]'''
else:
#We have entry_stats, this is an existing(modified) file or folder
size, date, inode=entry_stats.st_size, entry_stats.st_mtime, entry_stats.st_ino
size, date=self._pretty_size_date(size, date)
inode=inode if inode else self._get_inode_via_scandir(entry)
inode=str(inode).encode(ENC)
for key in self.findex:
if inode==key.split(separator)[0]: #inode, fof, name, size, date
if name==key.split(separator)[2]:
'''Name and inode are the same, this was a simple
modification as far as the file index'''
else:
'''Name and inode are not the same, file/folder was
renamed or moved in the same drive'''
if isdir(enrty):
#check if action was renaming
pass
def _get_changes(self):
'''for drive in self.ntfs_drives:
try:
modified, renamed = self.Update.get_changed(self.config[drive]):
except:
print('error updating or corrupt config file')
else:
self._sync_changes(modified, renamed) '''
pass
def generate_db(self):
'''Pop a thread on _recursive_create_dict with a queue for the
resulting db'''
thread.start_new_thread(self._recursive_create_dict, ())
def update_locations(self, locations):
'''Rescan specific folders and update self.findex. Does not update
the folder entry itself, only what's inside'''
thread.start_new_thread(self._recursive_create_dict, (locations))
def is_db_generated(self):
'''To be called by GUI periodically after calling generate_db()'''
try:
self.is_generated #is this relevant when _recursive() is in a different thread?
except:
#In case is_db_generated() is called before generate_db()
pass
else:
if self.is_generated:
self.is_generated=False
return True
# deprecated method using a queue, we be classy now
'''if not self.dbgen_queue.empty():
return self.dbgen_queue.get()
else:
return None'''