diff --git a/integration-test.sh b/integration-test.sh index a6224ea..c58f1b0 100755 --- a/integration-test.sh +++ b/integration-test.sh @@ -11,35 +11,25 @@ SRCDIR=$HOME/Notes DSTDIR=$HOME/.second-brain EOF -cat > $HOME/Notes/langchain.md < $HOME/Notes/langchain.md <&2; echo "${dir}${fname}"; done | "$TRANSFORM" "-" "$DST" +inotifywait -m -e CLOSE_WRITE,DELETE "$SRC"|while read dir event fname; do echo "${dir}${fname} $event" 1>&2; echo "${dir}${fname}"; done | "$TRANSFORM" "-" "$DST" # monitor.sh ends here diff --git a/transform_md.py b/transform_md.py index 1b7fb4a..4501396 100755 --- a/transform_md.py +++ b/transform_md.py @@ -8,6 +8,7 @@ """ import datetime +import glob import hashlib import json import os @@ -393,6 +394,22 @@ def process_md_file(fname, out_dir, checksum_store): return False basename = os.path.basename(fname[:-3]) oname = get_output_file_path(out_dir, basename) + if not os.path.exists(fname): + print(f"removing {oname} as {fname} do not exist anymore", file=sys.stderr) + os.remove(oname) + if is_history_filename(fname): + for hname in glob.glob( + os.path.join(out_dir, "Markdown", basename + "*.md") + ): + basename = os.path.basename(hname[:-3]) + oname = get_output_file_path(out_dir, basename) + print( + f"removing {hname} / {oname} as {fname} do not exist anymore", + file=sys.stderr, + ) + os.remove(hname) + os.remove(oname) + return True if is_same_time(fname, oname): print(f"skipping {fname} as there is no time change", file=sys.stderr) return False diff --git a/transform_txt.py b/transform_txt.py index ba42e5f..ab1400b 100755 --- a/transform_txt.py +++ b/transform_txt.py @@ -73,10 +73,27 @@ def validate_and_extract_url(fname, basename, out_dir): return metadata, data["text"] +def remove_related_files(fname, indexer, out_dir): + "Remove related files" + results = indexer.get(where={"main_source": {"$eq": fname}}) + print( + f"Removing {len(results['ids'])} related files to {fname}: {' '.join(results['ids'])}", + file=sys.stderr, + ) + if len(results["ids"]) > 0: + indexer.delete(results["ids"]) + for chunk_id in results["ids"]: + os.remove(os.path.join(out_dir, "Chunk", chunk_id)) + + def process_file(fname: str, out_dir: str, indexer, splitter): "Cut a text file in multiple chunks" basename = os.path.basename(fname[:-5]) print(f"Processing '{fname}' '{basename}'", file=sys.stderr) + if not os.path.exists(fname): + print(f"File {fname} does not exist anymore", file=sys.stderr) + remove_related_files(fname, indexer, out_dir) + return metadata, content = validate_and_extract_url(fname, basename, out_dir) if metadata is False: return