-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhashfs_fuse.py
executable file
·411 lines (329 loc) · 15 KB
/
hashfs_fuse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
#!/usr/bin/env python
import os, sys, shutil
import errno
import stat
import fcntl
from hashfs.hashfs_core import HashFS as HashFS_Core
# from examples in libfuse/python-fuse
# pull in some spaghetti to make this stuff work without fuse-py being installed
try:
import _find_fuse_parts
except ImportError:
pass
import fuse
from fuse import Fuse
if not hasattr(fuse, '__version__'):
raise RuntimeError("your fuse-py doesn't know of fuse.__version__, probably it's too old.")
fuse.fuse_python_api = (0, 2)
fuse.feature_assert('stateful_files', 'has_init')
# Not implemented:
# - symlink
# - link
# - truncate
# - mknod
# - ioctl
# - fsinit
# - getxattr
# - setxattr
# - listxattr
# - removexattr
# - lock
# - create (handled by open)
# - fgetattr
# - ftruncate
# - chmod
# - chown
# - fsyncdir
# - releasedir
# - fsync
# - flush
# For all of these functions, you can return -errno.ENOENT if the path doesn't
# exist, or -errno.ENOTDIR if a path component is not a directory
class HashFS(Fuse):
def __init__(self, *args, **kw):
Fuse.__init__(self, *args, **kw)
# Default values
self.root = '44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a'
self.local_cache_dir = '/tmp/mkfs'
self.port = '9999'
self.host = 'localhost'
self.hash_alg = 'sha256'
self.local_run = False
self.log_file = '/tmp/mkfs/root_log.txt'
self.log_fh = None
self.fs = None
# Dictionary to keep track of opened_files
# key = path, value = OpenedNodes
self.opened_files = dict()
class OpenedNode:
def __init__(self, fd, local_name, nodes_traversed, flags):
self.fd = fd
self.local_name = local_name
self.nodes_traversed = nodes_traversed
self.flags = flags
def __str__(self):
return "fd: {}, local_name: {}, flags: {}".format(self.fd, self.local_name, self.flags)
def update_log(self):
self.log_fh.write(self.root+'\n')
self.log_fh.flush()
def getattr(self, path):
print(self.local_cache_dir)
#TODO fill in missing stat fields
# the most important ones:
# - st_ino: can probably be 0 for now, but should be chosen better.
# An easy solution is grabbing the inode number of the backing
# file in the cache, as someone else is managing it. Could also
# assign sequentially or randomly.
# - st_mode: should be stat.S_IFDIR | 0o700 for directories,
# stat.S_IFREG | 0o600, other types such as symlinks have
# different values
# - st_size: size in bytes of on-disk contents (file contents, symlink
# target, directory listing, etc.)
# - st_nlink: should be 1 for files, 2 + the number of immediate
# subdirectories for directories
out = fuse.Stat()
out.st_uid = os.getuid()
out.st_gid = os.getgid()
out.st_ino = 0
out.st_dev = 0
out.st_atime = 0
out.st_mtime = 0
out.st_ctime = 0
# Special case to handle root path /
if path == '/':
out.st_mode = stat.S_IFDIR | 0o600
out.st_nlink = 2
# Fill st_nlink
dir_info = self.fs.fetch_dir_info_from_cache(self.root)
for child, child_info in dir_info.items():
if child_info['type'] == 'directory':
out.st_nlink += 1
out.st_size = os.path.getsize("{}/{}".format(self.local_cache_dir, self.root))
return out
# Get path to the parent directory of the target file/directory
# Since metadata of a file/directory is in the parent node
parent_path = '/'.join(path.strip('/').split('/')[:-1])
parent_path = '/'+parent_path
_, parent_node = self.fs.get_node_by_path(self.root, parent_path)
_, node = self.fs.get_node_by_path(self.root, path)
if parent_node is None or node is None:
return -errno.ENOENT
parent_dir_info = self.fs.fetch_dir_info_from_cache(parent_node.node_cksum)
node_metadata = parent_dir_info[path.split('/')[-1]]
if node_metadata['type'] == 'file':
out.st_mode = stat.S_IFREG | 0o700
out.st_nlink = 1
elif node_metadata['type'] == 'directory':
out.st_mode = stat.S_IFDIR | 0o600
out.st_nlink = 2
# Fill st_nlink
dir_info = self.fs.fetch_dir_info_from_cache(node_metadata['cksum'])
for child, child_info in dir_info.items():
if child_info['type'] == 'directory':
out.st_nlink += 1
out.st_size = os.path.getsize("{}/{}".format(self.local_cache_dir, node_metadata['cksum']))
return out
def readlink(self, path):
raise NotImplementedError
#TODO not supporting symlinks at the moment, so this should just
# return -errno.ENOENT if path doesn't exist, -errno.EINVAL otherwise
def unlink(self, path):
# Delete the file to the path
# should return -errno.EISDIR if path is a directory
nodes_traversed, node = self.fs.get_node_by_path(self.root, path)
if node.node_type == "directory":
print("{} is a directory".format(path))
return -errno.EISDIR
filename = path.split('/')[-1]
self.root = self.fs.delete_node_bubble_up(filename, node.node_cksum, nodes_traversed)
self.update_log()
def rmdir(self, path):
#TODO remove an *empty* directory
# should return -errno.ENOTEMPTY if there's anything in the directory
nodes_traversed, node = self.fs.get_node_by_path(self.root, path)
if node == None:
print("The path doesn't exist")
return -errno.ENOENT
if node.node_type != "directory":
print("{} is not a directory".format(path))
return -errno.ENOTDIR
dir_info = self.fs.fetch_dir_info_from_cache(node.node_cksum)
if dir_info is None:
print("{} is not an empty directory".format(path))
return -errno.ENOTEMPTY
dir_name = path.split('/')[-1]
self.root = self.fs.delete_node_bubble_up(dir_name, node.node_cksum, nodes_traversed)
self.update_log()
def rename(self, src, dst):
raise NotImplementedError
#TODO move a file
# should return -errno.ENOENT if src doesn't exist
# there are some edge cases when moving a directory, but it's not
# critical to get those right in a first pass
# TODO: handle make_directory error
def mkdir(self, path, mode):
#TODO make an empty directory
# should return -errno.EEXIST if there's already something at path
# should only create the *last* component, i.e. not like mkdir -p
# if any parent directory is missing, should return -errno.ENOENT
new_dir = path.split('/')[-1]
parent_path = '/'.join(path.strip('/').split('/')[:-1])
parent_path = '/'+parent_path
nodes_traversed, parent_node = self.fs.get_node_by_path(self.root, parent_path)
if parent_node is None:
print("Can't find parent node")
return -errno.ENOENT
parent_dirinfo = self.fs.fetch_dir_info_from_cache(parent_node.node_cksum)
if parent_dirinfo.get(new_dir) is not None:
return -errno.EEXIST
nodes_traversed.append((parent_node.node_name, parent_node.node_cksum))
self.root = self.fs.make_directory(new_dir, nodes_traversed)
self.update_log()
def utime(self, path, times):
# silently ignore
pass
def utimens(self, path, ts_acc, ts_mod):
# silently ignore
pass
def access(self, path, mode):
#TODO since we're not enforcing permissions, it's OK to just check
# for existence and do nothing. If path doesn't exist, should
# return -errno.ENOENT
_, node = self.fs.get_node_by_path(self.root, path)
if node is None: return -errno.ENOENT
def chmod(self, path, mode):
pass
def statfs(self):
out = fuse.StatVFS()
# preferred size of file blocks, in bytes
out.f_bsize = 4096
# fundamental size of file blcoks, in bytes
out.f_frsize = 4096
#TODO fill in file system summary info
# total number of blocks in the filesystem
out.f_blocks = 0
# number of free blocks
out.f_bfree = 0
# total number of file inodes
out.f_files = 0
# nunber of free file inodes
out.f_ffree = 0
return out
def opendir(self, path):
#raise NotImplementedError
#TODO any prep work
# should return -errno.ENOENT if path doesn't exist, -errno.ENOTDIR
# if it's not a directory
_, node = self.fs.get_node_by_path(self.root, path)
if node == None:
print("The path doesn't exist")
return -errno.ENOENT
if node.node_type != "directory":
print("{} is not a directory".format(path))
return -errno.ENOTDIR
def readdir(self, path, offset):
#TODO list directory contents
# should look something like
#for e in SOMETHING:
# yield fuse.Direntry(e)
# get node from path, if it doesn't exist or is not a directory, opendir would have failed
_, node = self.fs.get_node_by_path(self.root, path)
# Open dir_node and list files
dir_contents = self.fs.fetch_dir_info_from_cache(node.node_cksum)
all_dirs = ['.','..']
all_dirs.extend(dir_contents.keys())
for name in all_dirs:
yield fuse.Direntry(str(name))
def mknod(self, path, mode, dev):
parent_path = '/'.join(path.strip('/').split('/')[:-1])
parent_path = '/'+parent_path
nodes_traversed, node = self.fs.get_node_by_path(self.root, parent_path)
nodes_traversed.append((node.node_name, node.node_cksum))
# Put empty file into the parent_node directory to "create a file"
parent_dirinfo = self.fs.fetch_dir_info_from_cache(node.node_cksum)
self.root = self.fs.bubble_up_existing_dir(nodes_traversed, path.split('/')[-1], self.fs.EMPTY_CKSUM, "file")
self.update_log()
return
def open(self, path, flags):
#TODO get ready to use a file
# should (sometimes) check for existence of path and return
# Open a file and store the file handler in the self.opened_files dictionary
# -errno.ENOENT if it's missing.
# this call has a lot of variations and edge cases, so don't worry too
# much about getting things perfect on the first pass.
# TODO: NEED TO HANDLE MULTIPLE OPEN on the same file
if (flags & os.O_WRONLY) or (flags & os.O_RDWR):
nodes_traversed, node = self.fs.get_node_by_path(self.root, path)
if node is None:
return -errno.ENOENT
# Open a temp file
tmp = "{}/temp{}".format(self.local_cache_dir, path.replace('/', '_'))
shutil.copyfile(self.local_cache_dir+'/'+node.node_cksum, tmp)
fd = os.open(tmp, flags)
self.opened_files[path] = self.OpenedNode(fd, tmp, nodes_traversed, flags)
else:
_, node = self.fs.get_node_by_path(self.root, path)
if node is None:
return -errno.ENOENT
src = "{}/{}".format(self.local_cache_dir, node.node_cksum)
fd = os.open(src, flags)
self.opened_files[path] = self.OpenedNode(fd, src, None, flags)
def read(self, path, length, offset):
fh = self.opened_files[path].fd
os.lseek(fh, offset, os.SEEK_SET)
return os.read(fh, length)
def write(self, path, buf, offset):
fh = self.opened_files[path].fd
os.lseek(fh, offset, os.SEEK_SET)
return os.write(fh, buf)
def release(self, path, flags):
# TODO: NEED TO HANDLE MULTIPLE OPEN on the same file
# TODO commit any buffered changes to the file
# Check if the file has been opened for write, if so, commit the changes
open_node = self.opened_files.get(path)
if open_node:
# Check if the file has been opened for write
# If so, need to commit the changes
if (open_node.flags & os.O_WRONLY ) or (open_node.flags & os.O_RDWR):
tmp = open_node.local_name
cksum = self.fs.calculate_file_cksum(tmp)
local_name = "{}/{}".format(self.local_cache_dir, cksum)
os.rename(tmp, local_name)
self.fs.put_file_to_parent([(cksum, local_name)])
self.root = self.fs.bubble_up_existing_dir(open_node.nodes_traversed, path.split('/')[-1], cksum, "file")
self.update_log()
os.close(open_node.fd)
del self.opened_files[path]
def main(self, *a, **kw):
if not os.path.isdir(self.local_cache_dir):
print("Creating local cache directory: {}".format(self.local_cache_dir))
os.mkdir(self.local_cache_dir)
if self.local_cache_dir[-1] == '/':
self.local_cache_dir = self.local_cache_dir[:-1]
self.log_fh = open(self.log_file, "a")
parent = "{}:{}".format(self.host, self.port)
self.fs = HashFS_Core(parent_node=parent, local_cache_dir=self.local_cache_dir, local_run=self.local_run, hash_alg=self.hash_alg)
return Fuse.main(self, *a, **kw)
def main():
server = HashFS(version="%prog " + fuse.__version__,
usage="A FUSE implementation of HashFS." + Fuse.fusage,
dash_s_do='setsingle')
server.parser.add_option(mountopt="root", metavar='HASH',
default='44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a',
help="Specify a root hash [default: %default]")
server.parser.add_option(mountopt="host", metavar='HOST', default='localhost',
help="Specify the address of the parent node [default: %default]")
server.parser.add_option(mountopt="port", metavar='PORT', default='9999',
help="Specify the port to connect to [default: %default]")
server.parser.add_option(mountopt="hash_alg", metavar='HASH', default='sha256',
help="Specify the hashing algorithm to use [default: %default]")
server.parser.add_option(mountopt="local_cache_dir", metavar='DIR', default='/tmp/mkfs',
help="Specify a local cache directory [default: %default]")
server.parser.add_option(mountopt="log_file", metavar='LOG', default='/tmp/mkfs/root_log.txt',
help="Specify a path to log file [default: %default]")
server.parser.add_option(mountopt="local_run", action="store_true",
help="Run locally, do not put nodes to parent [default: False]")
server.parse(values=server, errex=4)
server.main()
if __name__ == '__main__':
main()