Skip to content

Commit

Permalink
smart removal of hard linked (all in client)
Browse files Browse the repository at this point in the history
  • Loading branch information
SweetMnM committed Nov 11, 2022
1 parent 36a69e0 commit b597b31
Show file tree
Hide file tree
Showing 8 changed files with 246 additions and 8 deletions.
17 changes: 16 additions & 1 deletion cmd/clean.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ package cmd

import (
"encoding/json"
"time"

"github.com/dustin/go-humanize"
"github.com/spf13/cobra"

"github.com/l3uddz/tqm/client"
"github.com/l3uddz/tqm/config"
"github.com/l3uddz/tqm/expression"
"github.com/l3uddz/tqm/hardlinkfilemap"
"github.com/l3uddz/tqm/logger"
"github.com/l3uddz/tqm/torrentfilemap"
"github.com/l3uddz/tqm/tracker"
Expand Down Expand Up @@ -116,8 +118,21 @@ var cleanCmd = &cobra.Command{
tfm := torrentfilemap.New(torrents)
log.Infof("Mapped torrents to %d unique torrent files", tfm.Length())

// create map of paths associated to underlying file ids
clientDownloadPathMapping, err := getClientDownloadPathMapping(clientConfig)
if err != nil {
log.WithError(err).Fatal("Failed loading client download path mappings")
} else if clientDownloadPathMapping != nil {
log.Debugf("Loaded %d client download path mappings: %#v", len(clientDownloadPathMapping),
clientDownloadPathMapping)
}

start := time.Now()
hfm := hardlinkfilemap.New(torrents, clientDownloadPathMapping)
log.Infof("Mapped all torrent file paths to %d unique underlying file IDs in %s", hfm.Length(), time.Since(start))

// remove torrents that are not ignored and match remove criteria
if err := removeEligibleTorrents(log, c, torrents, tfm); err != nil {
if err := removeEligibleTorrents(log, c, torrents, tfm, hfm); err != nil {
log.WithError(err).Fatal("Failed removing eligible torrents...")
}
},
Expand Down
23 changes: 16 additions & 7 deletions cmd/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/l3uddz/tqm/client"
"github.com/l3uddz/tqm/config"
"github.com/l3uddz/tqm/hardlinkfilemap"
"github.com/l3uddz/tqm/torrentfilemap"
)

Expand Down Expand Up @@ -97,7 +98,7 @@ func relabelEligibleTorrents(log *logrus.Entry, c client.Interface, torrents map
// torrent file is not unique, files are contained within another torrent
// so we cannot safely change the label in-case of auto move
nonUniqueTorrents++
log.Warnf("Skipping non unique torrent: %+v", t)
log.Warnf("Skipping non unique torrent | Name: %s / Label: %s / Tags: %s / Tracker: %s", t.Name, t.Label, strings.Join(t.Tags, ", "), t.TrackerName)
continue
}

Expand Down Expand Up @@ -153,7 +154,7 @@ func relabelEligibleTorrents(log *logrus.Entry, c client.Interface, torrents map

// remove torrents that meet remove filters
func removeEligibleTorrents(log *logrus.Entry, c client.Interface, torrents map[string]config.Torrent,
tfm *torrentfilemap.TorrentFileMap) error {
tfm *torrentfilemap.TorrentFileMap, hfm *hardlinkfilemap.HardlinkFileMap) error {
// vars
ignoredTorrents := 0
hardRemoveTorrents := 0
Expand Down Expand Up @@ -210,8 +211,9 @@ func removeEligibleTorrents(log *logrus.Entry, c client.Interface, torrents map[
removedTorrentBytes += t.DownloadedBytes
hardRemoveTorrents++

// remove the torrent from the torrent file map
// remove the torrent from the torrent maps
tfm.Remove(*t)
hfm.RemoveByTorrent(*t)
delete(torrents, h)
}

Expand Down Expand Up @@ -247,11 +249,17 @@ func removeEligibleTorrents(log *logrus.Entry, c client.Interface, torrents map[
}

// torrent meets the remove filters

// are the files unique and eligible for a hard deletion (remove data)
uniqueTorrent := tfm.IsUnique(t)
if !tfm.IsUnique(t) {
log.Warnf("Skipping non unique torrent | Name: %s / Label: %s / Tags: %s / Tracker: %s", t.Name, t.Label, strings.Join(t.Tags, ", "), t.TrackerName)
canidates[h] = t
continue
}

if !uniqueTorrent {
log.Tracef("%s not unique adding to canidates", t.Name)
// are the files not hardlinked to other torrents
if !hfm.IsTorrentUnique(t) {
log.Warnf("Skipping non unique torrent (hardlinked) | Name: %s / Label: %s / Tags: %s / Tracker: %s", t.Name, t.Label, strings.Join(t.Tags, ", "), t.TrackerName)
canidates[h] = t
continue
}
Expand All @@ -268,12 +276,13 @@ func removeEligibleTorrents(log *logrus.Entry, c client.Interface, torrents map[
// or can be safely removed
for _, t := range canidates {
tfm.Remove(t)
hfm.RemoveByTorrent(t)
}

// check again for unique torrents
removedCanidates := 0
for h, t := range canidates {
noInstances := tfm.NoInstances(t)
noInstances := tfm.NoInstances(t) && hfm.NoInstances(t)

if !noInstances {
log.Tracef("%s still not unique unique", t.Name)
Expand Down
19 changes: 19 additions & 0 deletions hardlinkfilemap/fileidentifier_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//go:build !windows && !plan9

package hardlinkfilemap

import (
"errors"
"os"
"strconv"
"syscall"
)

func FileIdentifier(fi os.FileInfo) (string, error) {
sys, ok := fi.Sys().(*syscall.Stat_t)
if !ok {
return "", errors.New("failed to get file identifier")
}

return strconv.FormatUint(sys.Dev, 10) + "|" + strconv.FormatUint(sys.Ino, 10), nil
}
22 changes: 22 additions & 0 deletions hardlinkfilemap/fileidentifier_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package hardlinkfilemap

import (
"os"
"reflect"
"strconv"
)

func FileIdentifier(fi os.FileInfo) (string, error) {
// This is extreme hackies to get os to load the vol, idxhi and idxlo fields
ok := os.SameFile(fi, fi)
if !ok {
return "", errors.New("error while getting file identifier")
}

v := reflect.Indirect(reflect.ValueOf(fi))
vol := v.FieldByName("vol").Uint()
idxhi := v.FieldByName("idxhi").Uint()
idxlo := v.FieldByName("idxlo").Uint()

return strconv.FormatUint(vol, 10) + "|" + strconv.FormatUint(idxhi, 10) + "|" + strconv.FormatUint(idxlo, 10), nil
}
142 changes: 142 additions & 0 deletions hardlinkfilemap/hardlinkfilemap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
package hardlinkfilemap

import (
"os"
"strings"

"github.com/l3uddz/tqm/config"
"github.com/l3uddz/tqm/logger"
"github.com/l3uddz/tqm/sliceutils"
)

func New(torrents map[string]config.Torrent, torrentPathMapping map[string]string) *HardlinkFileMap {
tfm := &HardlinkFileMap{
hardlinkFileMap: make(map[string][]string),
log: logger.GetLogger("hardlinkfilemap"),
torrentPathMapping: torrentPathMapping,
}

for _, torrent := range torrents {
tfm.AddByTorrent(torrent)
}

return tfm
}

func (t *HardlinkFileMap) ConsiderPathMapping(path string) string {
for mapFrom, mapTo := range t.torrentPathMapping {
if strings.HasPrefix(path, mapFrom) {
return strings.Replace(path, mapFrom, mapTo, 1)
}
}

return path
}

func (t *HardlinkFileMap) FileIdentifierByPath(path string) (string, bool) {
stat, err1 := os.Stat(path)
if err1 != nil {
t.log.Warnf("Failed to stat file: %s - %s", path, err1)
return "", false
}

id, err2 := FileIdentifier(stat)
if err2 != nil {
t.log.Warnf("Failed to get file identifier: %s - %s", path, err2)
return "", false
}

return id, true
}

func (t *HardlinkFileMap) AddByTorrent(torrent config.Torrent) {
for _, f := range torrent.Files {
f = t.ConsiderPathMapping(f)

id, ok := t.FileIdentifierByPath(f)

if !ok {
continue
}

if _, exists := t.hardlinkFileMap[id]; exists {
// file id already associated with other paths
t.hardlinkFileMap[id] = append(t.hardlinkFileMap[id], f)
continue
}

// file id has not been seen before, create id entry
t.hardlinkFileMap[id] = []string{f}
}
}

func (t *HardlinkFileMap) RemoveByTorrent(torrent config.Torrent) {
for _, f := range torrent.Files {
f = t.ConsiderPathMapping(f)

id, ok := t.FileIdentifierByPath(f)

if !ok {
continue
}

if _, exists := t.hardlinkFileMap[id]; exists {
// remove this path from the id entry
i := sliceutils.IndexOfString(t.hardlinkFileMap[id], f)
if i != -1 {
t.hardlinkFileMap[id] = sliceutils.FastDelete(t.hardlinkFileMap[id], i)
}

// remove id entry if no more paths
if len(t.hardlinkFileMap[id]) == 0 {
delete(t.hardlinkFileMap, id)
}

continue
}
}
}

func (t *HardlinkFileMap) IsTorrentUnique(torrent config.Torrent) bool {
for _, f := range torrent.Files {
f = t.ConsiderPathMapping(f)

id, ok := t.FileIdentifierByPath(f)

if !ok {
return false
}

t.log.Infof("File: %s - ID: %s", f, id)
// preview the file id entry
t.log.Infof("File ID Entry: %v", t.hardlinkFileMap[id])

if paths, exists := t.hardlinkFileMap[id]; exists && len(paths) > 1 {
return false
}
}

return true
}

func (t *HardlinkFileMap) NoInstances(torrent config.Torrent) bool {
for _, f := range torrent.Files {
f = t.ConsiderPathMapping(f)

id, ok := t.FileIdentifierByPath(f)

if !ok {
return false
}

if paths, exists := t.hardlinkFileMap[id]; exists && len(paths) != 0 {
return false
}
}

return true
}

func (t *HardlinkFileMap) Length() int {
return len(t.hardlinkFileMap)
}
12 changes: 12 additions & 0 deletions hardlinkfilemap/struct.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package hardlinkfilemap

import (
"github.com/sirupsen/logrus"
)

type HardlinkFileMap struct {
// hardlinkFileMap map[string]map[string]config.Torrent
hardlinkFileMap map[string][]string
log *logrus.Entry
torrentPathMapping map[string]string
}
8 changes: 8 additions & 0 deletions sliceutils/fastdelete.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package sliceutils

func FastDelete[S ~[]E, E any](s S, i int) S {
_ = s[i] // bounds check

s[i] = s[len(s)-1]
return s[:len(s)-1]
}
11 changes: 11 additions & 0 deletions sliceutils/indexofstring.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package sliceutils

func IndexOfString(s []string, e string) int {
for i, v := range s {
if v == e {
return i
}
}

return -1
}

0 comments on commit b597b31

Please sign in to comment.