Skip to content

Commit

Permalink
+trunc-filenames
Browse files Browse the repository at this point in the history
  • Loading branch information
mk-fg committed Jun 6, 2024
1 parent b887cee commit 0a674c1
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 0 deletions.
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Contents - links to doc section for each script here:
- [docker-ln](#hdr-docker-ln)
- [fast-disk-wipe](#hdr-fast-disk-wipe)
- [lsx](#hdr-lsx)
- [trunc-filenames](#hdr-trunc-filenames)

- [Various file-data processing tools](#hdr-various_file-data_processing_tools)

Expand Down Expand Up @@ -476,6 +477,26 @@ For example, to print `-a/--adjacent` files (w/ some ordering):

Simple python script with no extra dependencies.

<a name=hdr-trunc-filenames></a>
<a name=user-content-hdr-trunc-filenames></a>
##### [trunc-filenames](trunc-filenames)

Python script to recursively shorten (truncate) file/directory names
under specified byte-limit, respecting typical filename format, suffixes
and multibyte encodings.

Useful for transferring files from NTFS and similar filesystems
to POSIX/linux ones that have strict 255-byte filename-length limit,
where non-english paths can get very long fast bytewise.

Truncates names decoded to unicode characters to avoid splitting those,
has somewhat complicated rules for how to truncate filenames with dot-suffixes
and multiple dots in them, disambiguates rename destinations on conflicts,
always keeps longest filename possible under `-l/--max-len` limit,
inserts unicode-ellipsis (…) character to indicate where truncation was made.

Defaults to dry-run mode for safety, only printing all renames to be made.



<a name=hdr-various_file-data_processing_tools></a>
Expand Down
112 changes: 112 additions & 0 deletions trunc-filenames
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/usr/bin/env python

import os, sys, re, pathlib as pl, itertools as it

p_err = lambda *a,**kw: print('ERROR:', *a, **kw, file=sys.stderr, flush=True)


def fn_split(fn):
'pl.Path has complex, inconsistent and braindead concepts of stem/suffixes'
if hasattr(fn, 'name'): fn = fn.name
if '.' not in fn: return fn, list()
fn = fn.split('.')
return fn[0], fn[1:]

def fn_trunc(fn_stem, fn_suffixes=list(), max_len=240, el='…'):
stem, suff = fn_stem, '.'.join(fn_suffixes)
stem_n, suff_n = len(stem.encode()), len(suff.encode())
while stem and len(stem.encode()) + suff_n + 1 > max_len:
stem = stem[:-1].strip()
if stem: return f'{stem}{el}.{suff}'

stem, suff_list = fn_stem, fn_suffixes
while any(suff_list) and len(suff.encode()) + stem_n + 1 > max_len:
s_len, sn = sorted((len(v.encode()), n) for n, v in enumerate(suff_list))[-1]
if s_len: suff_list[sn] = suff_list[sn][:-1]
suff = '.'.join(suff_list).strip()
if suff: return f'{stem}.{el}{suff}'

suff = '.'.join(fn_suffixes)
while (stem or suff) and len(f'{stem}.{suff}'.encode()) > max_len:
stem, suff = stem and stem[:-1].strip(), suff and suff[1:].strip()
if not stem and not suff: raise ValueError([fn_stem, fn_suffixes])
return f'{stem}{el}.{suff}'

def run_renames(p_top, max_len=240, ignore_suff=False, dry_run=True, quiet=False):
max_len_src, max_len = max_len, max_len - len((el := '…').encode())
n, len_repr = 0, lambda fn: ( f'{len(fn):,d} B'
if len(fn) == len(fn.encode()) else f'{len(fn)}c/{len(fn.encode()):,d}B' )
fnt = lambda st, suff=list(), ml=max_len: fn_trunc(st, suff, max_len=ml, el=el)

for root, dirs, files in p_top.walk(top_down=False):
for name in it.chain(dirs, files):
if len(name.encode()) <= max_len: continue
name_src, p = name, root / name

if not ignore_suff:
st, suffs = fn_split(p)
if sum(map(len, suffs)) > len(st):
st, suffs = p.name.rsplit('.', 1); suffs = [suffs]
else: st, suffs = p.name, list()

name = fnt(st, suffs)
if name_src == name: raise ValueError(p)

if not quiet:
if n: print()
n += 1
print(
f'Rename #{n} [ {len_repr(name_src)} -> {len_repr(name)} ] in [ {p.parent} ]:'
f'\n {p.name!r}\n -> {name!r}')

m, dst = 0, root / name
while dst.exists() and m <= 99:
m += 1; name = fnt(st, suffs, ml=max_len - 4)
name += f'.{m:02d}'; dst = root / name
if m >= 99: raise ValueError(p)
if len(name.encode()) > max_len_src: raise ValueError(p)
if not dry_run: p.rename(dst)


def main(argv=None):
import argparse, textwrap
dd = lambda text: re.sub( r' \t+', ' ',
textwrap.dedent(text).strip('\n') + '\n' ).replace('\t', ' ')
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter, description=dd('''
Truncate file/dir names recursively under a specified dir(s).
Uses unicode names to the best of its ability,
but should fallback to splitting bytes if name doesn't decode properly.
Default operation mode is dry-run, just to be safe -
use -x/--execute flag to actually make script rename stuff.
Any error should stop the script with a traceback.'''))

parser.add_argument('paths', nargs='*', help=dd('''
Directories to scan for long names to truncate.
Both files and directories are renamed, in what should be a safe order.'''))

parser.add_argument('-l', '--max-len', metavar='bytes', type=int, default=240, help=dd('''
Max length for all resulting file/directory names, in bytes.
For sanity reasons, should be >50B. Default: %(default)s'''))
parser.add_argument('--ignore-suffixes', action='store_true', help=dd('''
Do not try to truncate names preserving dot-separated suffixes/extensions.
Default is to try truncating suffix-less "name" part first, then suffixes.'''))
parser.add_argument('-q', '--quiet', action='store_true', help=dd('''
Do not print (potential) renames to stdout.'''))
parser.add_argument('-x', '--execute', action='store_true', help=dd('''
Actually rename files/dirs that are printed without this and -q/--quiet flags.
Script runs in dry-run mode only finding/printing long names by default.'''))

opts = parser.parse_args(sys.argv[1:] if argv is None else argv)

if opts.max_len < 50: parser.error('-l/--max-len should be >50B')
paths = list(pl.Path(p) for p in opts.paths)
for p in paths: p.resolve(strict=True) # make sure all paths exist
for p in paths: run_renames( p, ignore_suff=opts.ignore_suffixes,
max_len=opts.max_len, quiet=opts.quiet, dry_run=not opts.execute )

if __name__ == '__main__':
try: sys.exit(main())
except BrokenPipeError: # stdout pipe closed
os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stdout.fileno())
sys.exit(1)

0 comments on commit 0a674c1

Please sign in to comment.