From 6462876f045116b88de1c3a8a5362d03d70546cd Mon Sep 17 00:00:00 2001 From: Tom DNetto Date: Thu, 23 Mar 2023 12:08:16 -0700 Subject: [PATCH] tka: implement filesystem chonk garbage collection Signed-off-by: Tom DNetto --- ipn/ipnlocal/local.go | 4 ++ tka/tailchonk.go | 106 ++++++++++++++++++++++++++++++++++++++++-- tka/tailchonk_test.go | 91 +++++++++++++++++++++++++++++++++++- 3 files changed, 196 insertions(+), 5 deletions(-) diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index b27509a22..7c0db4e64 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -4800,6 +4800,10 @@ func (b *LocalBackend) initTKALocked() error { if err != nil { return fmt.Errorf("opening tailchonk: %v", err) } + // Actually delete data which has been purged for 7 days: + if err := storage.CollectGarbage(7 * 24 * time.Hour); err != nil { + b.logf("tka garbage collection failed: %v", err) + } authority, err := tka.Open(storage) if err != nil { return fmt.Errorf("initializing tka: %v", err) diff --git a/tka/tailchonk.go b/tka/tailchonk.go index 90af2df7f..b667cb71f 100644 --- a/tka/tailchonk.go +++ b/tka/tailchonk.go @@ -201,10 +201,6 @@ func ChonkDir(dir string) (*FS, error) { return nil, fmt.Errorf("chonk directory %q is a file", dir) } - // TODO(tom): *FS marks AUMs as deleted but does not actually - // delete them, to avoid data loss in the event of a bug. - // Implement deletion after we are fairly sure in the implementation. - return &FS{base: dir}, nil } @@ -218,6 +214,9 @@ func ChonkDir(dir string) (*FS, error) { // much smaller than JSON for AUMs. The 'keyasint' thing isn't essential // but again it saves a bunch of bytes. type fsHashInfo struct { + // diskHash specifies the AUMHash this structure describes. + diskHash AUMHash + Children []AUMHash `cbor:"1,keyasint"` AUM *AUM `cbor:"2,keyasint"` CreatedUnix int64 `cbor:"3,keyasint,omitempty"` @@ -344,6 +343,7 @@ func (c *FS) get(h AUMHash) (*fsHashInfo, error) { if out.AUM != nil && out.AUM.Hash() != h { return nil, fmt.Errorf("%s: AUM does not match file name hash %s", f.Name(), out.AUM.Hash()) } + out.diskHash = h return &out, nil } @@ -380,6 +380,104 @@ func (c *FS) AllAUMs() ([]AUMHash, error) { return out, err } +// CollectGarbage frees up disk space by removing purged AUMs +// and files which contain no data. +func (c *FS) CollectGarbage(maxAge time.Duration) error { + c.mu.Lock() + defer c.mu.Unlock() + + // Collect the list of all stored hashes which are marked + // for deletion & old enough to delete. + var ( + deletionCandidates = make(map[AUMHash]*fsHashInfo) + purgeBefore = time.Now().Add(-maxAge) + ) + err := c.scanHashes(func(info *fsHashInfo) { + // Mark for deletion all hashes which are explicitly purged, or + // hashes that store no data. + purged := info.PurgedUnix > 0 && time.Unix(info.PurgedUnix, 0).Before(purgeBefore) + if purged || (info.AUM == nil && len(info.Children) == 0) { + deletionCandidates[info.diskHash] = info + } + }) + if err != nil { + return err + } + + // TODO: consistency check that no deletion candidate is the last active + // ancestor nor a parent is the last active ancestor. + + for h, info := range deletionCandidates { + + // First, if we store the parent, remove the reference to this + // hash as a child. + if info.AUM != nil { + if parent, haveParent := info.AUM.Parent(); haveParent { + dir, base := c.aumDir(parent) + _, err := os.Stat(filepath.Join(dir, base)) + parentExists := err == nil + + if parentExists { + err := c.commit(parent, func(info *fsHashInfo) { + newChildren := make([]AUMHash, 0, len(info.Children)) + for _, c := range info.Children { + if c != h { + newChildren = append(newChildren, c) + } + } + info.Children = newChildren + }) + if err != nil { + return fmt.Errorf("mutating parent %x of %x: %v", parent, h, err) + } + } + } + } + + dir, base := c.aumDir(h) + path := filepath.Join(dir, base) + if len(info.Children) == 0 { + // This hash has no dependencies. + // + // Technically, info.Children could be stale, because if this hash was + // someones parent then that someone would have removed their hash from + // the list. Because thats only ever a deletion tho, this is still safe, + // staleness will result in us not deleting this file but it will be + // deleted next time. + if err := os.Remove(path); err != nil { + return fmt.Errorf("removing dead entry: %w", err) + } + continue + } + + // This hash has children it needs to keep track of, so might not + // be able to be deleted outright. + var delete bool + err := c.commit(h, func(info *fsHashInfo) { + info.AUM = nil // in all cases this hash shouldnt store its own AUM info + newChildren := make([]AUMHash, 0, len(info.Children)) + for _, c := range info.Children { + if _, deleted := deletionCandidates[c]; !deleted { + newChildren = append(newChildren, c) + } + } + info.Children = newChildren + delete = len(newChildren) == 0 + }) + if err != nil { + return fmt.Errorf("mutating entry %x: %v", h, err) + } + + if delete { + if err := os.Remove(path); err != nil { + return fmt.Errorf("removing empty entry: %w", err) + } + } + } + + return nil +} + func (c *FS) scanHashes(eachHashInfo func(*fsHashInfo)) error { prefixDirs, err := os.ReadDir(c.base) if err != nil { diff --git a/tka/tailchonk_test.go b/tka/tailchonk_test.go index 0ba263b7b..f908007b4 100644 --- a/tka/tailchonk_test.go +++ b/tka/tailchonk_test.go @@ -630,6 +630,7 @@ func TestCompact(t *testing.T) { // OLD is deleted because it does not match retention criteria, and // though it is a descendant of the new lastActiveAncestor (C), it is not a // descendant of a retained AUM. + // O is deleted because it is orphaned. // G, & H are retained as recent (MinChain=2) ancestors of HEAD. // E & F are retained because they are between retained AUMs (G+) and // their newest checkpoint ancestor. @@ -648,6 +649,9 @@ func TestCompact(t *testing.T) { | -> F1 -> F2 | -> G2 | -> OLD + // Orphaned AUM + O + // make {A,B,C,D} compaction candidates A.template = checkpoint B.template = checkpoint @@ -658,13 +662,14 @@ func TestCompact(t *testing.T) { F1.hashSeed = 1 OLD.hashSeed = 2 G2.hashSeed = 3 + O.hashSeed = 4 `, optTemplate("checkpoint", AUM{MessageKind: AUMCheckpoint, State: fakeState})) storage := &compactingChonkFake{ Mem: (*c.Chonk().(*Mem)), aumAge: map[AUMHash]time.Time{(c.AUMHashes["F1"]): time.Now()}, t: t, - wantDelete: []AUMHash{c.AUMHashes["A"], c.AUMHashes["B"], c.AUMHashes["OLD"]}, + wantDelete: []AUMHash{c.AUMHashes["A"], c.AUMHashes["B"], c.AUMHashes["O"], c.AUMHashes["OLD"]}, } lastActiveAncestor, err := Compact(storage, c.AUMHashes["H"], CompactionOptions{MinChain: 2, MinAge: time.Hour}) @@ -681,3 +686,87 @@ func TestCompact(t *testing.T) { } } } + +func TestCollectGarbage(t *testing.T) { + fakeState := &State{ + Keys: []Key{{Kind: Key25519, Votes: 1}}, + DisablementSecrets: [][]byte{bytes.Repeat([]byte{1}, 32)}, + } + + c := newTestchain(t, ` + A -> B -> C -> C2 -> D -> E -> F -> G -> H + | -> OLD | -> G2 + + // make {A,B,C,D} compaction candidates + A.template = checkpoint + B.template = checkpoint + C.template = checkpoint + D.template = checkpoint + + // tweak seeds of forks so hashes arent identical + OLD.hashSeed = 2 + G2.hashSeed = 3 + `, optTemplate("checkpoint", AUM{MessageKind: AUMCheckpoint, State: fakeState})) + + // Populate a *FS chonk. + storage, err := ChonkDir(t.TempDir()) + if err != nil { + t.Fatal(err) + } + for _, update := range c.AUMs { + if err := storage.CommitVerifiedAUMs([]AUM{update}); err != nil { + t.Fatal(err) + } + } + if err := storage.SetLastActiveAncestor(c.AUMHashes["A"]); err != nil { + t.Fatal(err) + } + + // Run compaction. + lastActiveAncestor, err := Compact(storage, c.AUMHashes["H"], CompactionOptions{MinChain: 2, MinAge: 1}) + if err != nil { + t.Errorf("Compact() failed: %v", err) + } + if lastActiveAncestor != c.AUMHashes["D"] { + t.Errorf("last active ancestor = %v, want %v", lastActiveAncestor, c.AUMHashes["C"]) + } + + deletedAUMs := []AUMHash{c.AUMHashes["A"], c.AUMHashes["B"], c.AUMHashes["C"], c.AUMHashes["C2"], c.AUMHashes["OLD"]} + + // Make sure deleted AUMs are unreadable. + for _, h := range deletedAUMs { + if _, err := storage.AUM(h); err != os.ErrNotExist { + t.Errorf("storage.AUM(%v).err = %v, want ErrNotExist", h, err) + } + } + + if err := storage.CollectGarbage(0); err != nil { + t.Fatal(err) + } + + // Make sure files for deleted AUMs are gone. + for _, h := range deletedAUMs { + dir, base := storage.aumDir(h) + path := filepath.Join(dir, base) + // C2 is excluded, because its child D exists and the file + // stores the parent->child relationship. + if _, err := os.Stat(path); err == nil && h != c.AUMHashes["C2"] { + t.Errorf("file for deleted AUM %v exists", h) + } + } + + if t.Failed() { + for name, hash := range c.AUMHashes { + t.Logf("AUM[%q] = %v", name, hash) + } + } + + // Lastly, lets make sure an authority can start from the garbage-collected state. + a, err := Open(storage) + if err != nil { + t.Fatal(err) + } + if a.Head() != c.AUMHashes["H"] { + t.Errorf("head = %v, want %v", a.Head(), c.AUMHashes["H"]) + } +}