tka: implement filesystem chonk garbage collection

Signed-off-by: Tom DNetto <tom@tailscale.com>
tom/tka6
Tom DNetto 2023-03-23 12:08:16 -07:00
parent 6f9aed1656
commit 6462876f04
3 changed files with 196 additions and 5 deletions

View File

@ -4800,6 +4800,10 @@ func (b *LocalBackend) initTKALocked() error {
if err != nil {
return fmt.Errorf("opening tailchonk: %v", err)
}
// Actually delete data which has been purged for 7 days:
if err := storage.CollectGarbage(7 * 24 * time.Hour); err != nil {
b.logf("tka garbage collection failed: %v", err)
}
authority, err := tka.Open(storage)
if err != nil {
return fmt.Errorf("initializing tka: %v", err)

View File

@ -201,10 +201,6 @@ func ChonkDir(dir string) (*FS, error) {
return nil, fmt.Errorf("chonk directory %q is a file", dir)
}
// TODO(tom): *FS marks AUMs as deleted but does not actually
// delete them, to avoid data loss in the event of a bug.
// Implement deletion after we are fairly sure in the implementation.
return &FS{base: dir}, nil
}
@ -218,6 +214,9 @@ func ChonkDir(dir string) (*FS, error) {
// much smaller than JSON for AUMs. The 'keyasint' thing isn't essential
// but again it saves a bunch of bytes.
type fsHashInfo struct {
// diskHash specifies the AUMHash this structure describes.
diskHash AUMHash
Children []AUMHash `cbor:"1,keyasint"`
AUM *AUM `cbor:"2,keyasint"`
CreatedUnix int64 `cbor:"3,keyasint,omitempty"`
@ -344,6 +343,7 @@ func (c *FS) get(h AUMHash) (*fsHashInfo, error) {
if out.AUM != nil && out.AUM.Hash() != h {
return nil, fmt.Errorf("%s: AUM does not match file name hash %s", f.Name(), out.AUM.Hash())
}
out.diskHash = h
return &out, nil
}
@ -380,6 +380,104 @@ func (c *FS) AllAUMs() ([]AUMHash, error) {
return out, err
}
// CollectGarbage frees up disk space by removing purged AUMs
// and files which contain no data.
func (c *FS) CollectGarbage(maxAge time.Duration) error {
c.mu.Lock()
defer c.mu.Unlock()
// Collect the list of all stored hashes which are marked
// for deletion & old enough to delete.
var (
deletionCandidates = make(map[AUMHash]*fsHashInfo)
purgeBefore = time.Now().Add(-maxAge)
)
err := c.scanHashes(func(info *fsHashInfo) {
// Mark for deletion all hashes which are explicitly purged, or
// hashes that store no data.
purged := info.PurgedUnix > 0 && time.Unix(info.PurgedUnix, 0).Before(purgeBefore)
if purged || (info.AUM == nil && len(info.Children) == 0) {
deletionCandidates[info.diskHash] = info
}
})
if err != nil {
return err
}
// TODO: consistency check that no deletion candidate is the last active
// ancestor nor a parent is the last active ancestor.
for h, info := range deletionCandidates {
// First, if we store the parent, remove the reference to this
// hash as a child.
if info.AUM != nil {
if parent, haveParent := info.AUM.Parent(); haveParent {
dir, base := c.aumDir(parent)
_, err := os.Stat(filepath.Join(dir, base))
parentExists := err == nil
if parentExists {
err := c.commit(parent, func(info *fsHashInfo) {
newChildren := make([]AUMHash, 0, len(info.Children))
for _, c := range info.Children {
if c != h {
newChildren = append(newChildren, c)
}
}
info.Children = newChildren
})
if err != nil {
return fmt.Errorf("mutating parent %x of %x: %v", parent, h, err)
}
}
}
}
dir, base := c.aumDir(h)
path := filepath.Join(dir, base)
if len(info.Children) == 0 {
// This hash has no dependencies.
//
// Technically, info.Children could be stale, because if this hash was
// someones parent then that someone would have removed their hash from
// the list. Because thats only ever a deletion tho, this is still safe,
// staleness will result in us not deleting this file but it will be
// deleted next time.
if err := os.Remove(path); err != nil {
return fmt.Errorf("removing dead entry: %w", err)
}
continue
}
// This hash has children it needs to keep track of, so might not
// be able to be deleted outright.
var delete bool
err := c.commit(h, func(info *fsHashInfo) {
info.AUM = nil // in all cases this hash shouldnt store its own AUM info
newChildren := make([]AUMHash, 0, len(info.Children))
for _, c := range info.Children {
if _, deleted := deletionCandidates[c]; !deleted {
newChildren = append(newChildren, c)
}
}
info.Children = newChildren
delete = len(newChildren) == 0
})
if err != nil {
return fmt.Errorf("mutating entry %x: %v", h, err)
}
if delete {
if err := os.Remove(path); err != nil {
return fmt.Errorf("removing empty entry: %w", err)
}
}
}
return nil
}
func (c *FS) scanHashes(eachHashInfo func(*fsHashInfo)) error {
prefixDirs, err := os.ReadDir(c.base)
if err != nil {

View File

@ -630,6 +630,7 @@ func TestCompact(t *testing.T) {
// OLD is deleted because it does not match retention criteria, and
// though it is a descendant of the new lastActiveAncestor (C), it is not a
// descendant of a retained AUM.
// O is deleted because it is orphaned.
// G, & H are retained as recent (MinChain=2) ancestors of HEAD.
// E & F are retained because they are between retained AUMs (G+) and
// their newest checkpoint ancestor.
@ -648,6 +649,9 @@ func TestCompact(t *testing.T) {
| -> F1 -> F2 | -> G2
| -> OLD
// Orphaned AUM
O
// make {A,B,C,D} compaction candidates
A.template = checkpoint
B.template = checkpoint
@ -658,13 +662,14 @@ func TestCompact(t *testing.T) {
F1.hashSeed = 1
OLD.hashSeed = 2
G2.hashSeed = 3
O.hashSeed = 4
`, optTemplate("checkpoint", AUM{MessageKind: AUMCheckpoint, State: fakeState}))
storage := &compactingChonkFake{
Mem: (*c.Chonk().(*Mem)),
aumAge: map[AUMHash]time.Time{(c.AUMHashes["F1"]): time.Now()},
t: t,
wantDelete: []AUMHash{c.AUMHashes["A"], c.AUMHashes["B"], c.AUMHashes["OLD"]},
wantDelete: []AUMHash{c.AUMHashes["A"], c.AUMHashes["B"], c.AUMHashes["O"], c.AUMHashes["OLD"]},
}
lastActiveAncestor, err := Compact(storage, c.AUMHashes["H"], CompactionOptions{MinChain: 2, MinAge: time.Hour})
@ -681,3 +686,87 @@ func TestCompact(t *testing.T) {
}
}
}
func TestCollectGarbage(t *testing.T) {
fakeState := &State{
Keys: []Key{{Kind: Key25519, Votes: 1}},
DisablementSecrets: [][]byte{bytes.Repeat([]byte{1}, 32)},
}
c := newTestchain(t, `
A -> B -> C -> C2 -> D -> E -> F -> G -> H
| -> OLD | -> G2
// make {A,B,C,D} compaction candidates
A.template = checkpoint
B.template = checkpoint
C.template = checkpoint
D.template = checkpoint
// tweak seeds of forks so hashes arent identical
OLD.hashSeed = 2
G2.hashSeed = 3
`, optTemplate("checkpoint", AUM{MessageKind: AUMCheckpoint, State: fakeState}))
// Populate a *FS chonk.
storage, err := ChonkDir(t.TempDir())
if err != nil {
t.Fatal(err)
}
for _, update := range c.AUMs {
if err := storage.CommitVerifiedAUMs([]AUM{update}); err != nil {
t.Fatal(err)
}
}
if err := storage.SetLastActiveAncestor(c.AUMHashes["A"]); err != nil {
t.Fatal(err)
}
// Run compaction.
lastActiveAncestor, err := Compact(storage, c.AUMHashes["H"], CompactionOptions{MinChain: 2, MinAge: 1})
if err != nil {
t.Errorf("Compact() failed: %v", err)
}
if lastActiveAncestor != c.AUMHashes["D"] {
t.Errorf("last active ancestor = %v, want %v", lastActiveAncestor, c.AUMHashes["C"])
}
deletedAUMs := []AUMHash{c.AUMHashes["A"], c.AUMHashes["B"], c.AUMHashes["C"], c.AUMHashes["C2"], c.AUMHashes["OLD"]}
// Make sure deleted AUMs are unreadable.
for _, h := range deletedAUMs {
if _, err := storage.AUM(h); err != os.ErrNotExist {
t.Errorf("storage.AUM(%v).err = %v, want ErrNotExist", h, err)
}
}
if err := storage.CollectGarbage(0); err != nil {
t.Fatal(err)
}
// Make sure files for deleted AUMs are gone.
for _, h := range deletedAUMs {
dir, base := storage.aumDir(h)
path := filepath.Join(dir, base)
// C2 is excluded, because its child D exists and the file
// stores the parent->child relationship.
if _, err := os.Stat(path); err == nil && h != c.AUMHashes["C2"] {
t.Errorf("file for deleted AUM %v exists", h)
}
}
if t.Failed() {
for name, hash := range c.AUMHashes {
t.Logf("AUM[%q] = %v", name, hash)
}
}
// Lastly, lets make sure an authority can start from the garbage-collected state.
a, err := Open(storage)
if err != nil {
t.Fatal(err)
}
if a.Head() != c.AUMHashes["H"] {
t.Errorf("head = %v, want %v", a.Head(), c.AUMHashes["H"])
}
}