diff options
| author | Pablo M. Bermudo Garay <pablombg@gmail.com> | 2022-02-09 19:58:30 +0100 |
|---|---|---|
| committer | Pablo M. Bermudo Garay <pablombg@gmail.com> | 2022-02-10 13:14:19 +0100 |
| commit | 53667583c094252818f203b9233a7726d0a4e8a0 (patch) | |
| tree | 7bcc7cbaf784f149113cba85a837515d70ae1c87 /objectstore | |
| parent | c488ed1f80fd54846170c0ab04ce59a69f52a9e5 (diff) | |
Add proactive de-duplication to the memory backend
Diffstat (limited to 'objectstore')
| -rw-r--r-- | objectstore/memory_backend.go | 42 | ||||
| -rw-r--r-- | objectstore/memory_backend_test.go | 51 | ||||
| -rw-r--r-- | objectstore/objecstore.go | 20 |
3 files changed, 106 insertions, 7 deletions
diff --git a/objectstore/memory_backend.go b/objectstore/memory_backend.go index 7dcfe4a..36ee7b6 100644 --- a/objectstore/memory_backend.go +++ b/objectstore/memory_backend.go @@ -5,12 +5,22 @@ package objectstore import ( + "crypto/sha256" + "encoding/hex" "fmt" "sync" ) +type memObject struct { + payload string + hash string +} + type memBucket struct { - objects map[string]string + objects map[string]memObject + // Map used to check if the hash already exists, also + // storing the objectId to identify de duplicate + hashes map[string]string } type MemObjectStore struct { @@ -24,18 +34,37 @@ func NewMemBackend() *MemObjectStore { return os } -func (os *MemObjectStore) CreateObject(bucketId string, objectId string, object string) { +func (os *MemObjectStore) CreateObject(bucketId string, objectId string, payload string) error { os.Lock() defer os.Unlock() + // Create bucket if it doesn't exist bucket, ok := os.buckets[bucketId] if !ok { bucket = memBucket{} - bucket.objects = make(map[string]string) + bucket.objects = make(map[string]memObject) + bucket.hashes = make(map[string]string) os.buckets[bucketId] = bucket } + // Hash the object content + hasher := sha256.New() + hasher.Write([]byte(payload)) + hash := hex.EncodeToString(hasher.Sum(nil)) + + // Check for duplicates + if objectId, dup := bucket.hashes[hash]; dup { + return NewDuplicateError(objectId) + } + + // Store the object + object := memObject{ + payload: payload, + hash: hash, + } bucket.objects[objectId] = object + bucket.hashes[hash] = objectId + return nil } func (os *MemObjectStore) GetObject(bucketId string, objectId string) (string, error) { @@ -52,7 +81,7 @@ func (os *MemObjectStore) GetObject(bucketId string, objectId string) (string, e return "", fmt.Errorf("Object not found") } - return object, nil + return object.payload, nil } func (os *MemObjectStore) DeleteObject(bucketId string, objectId string) error { @@ -64,12 +93,15 @@ func (os *MemObjectStore) DeleteObject(bucketId string, objectId string) error { return fmt.Errorf("Bucket not found") } - _, ok = bucket.objects[objectId] + object, ok := bucket.objects[objectId] if !ok { return fmt.Errorf("Object not found") } + // Delete both, object and hash delete(bucket.objects, objectId) + delete(bucket.hashes, object.hash) + // Delete the bucket if it's empty if len(bucket.objects) == 0 { delete(os.buckets, bucketId) diff --git a/objectstore/memory_backend_test.go b/objectstore/memory_backend_test.go index 3ac347c..3fbe7a5 100644 --- a/objectstore/memory_backend_test.go +++ b/objectstore/memory_backend_test.go @@ -4,7 +4,10 @@ // https://creativecommons.org/publicdomain/zero/1.0/legalcode package objectstore -import "testing" +import ( + "errors" + "testing" +) func TestCreateAndGet(t *testing.T) { objects := NewMemBackend() @@ -67,3 +70,49 @@ func TestCreateAndDelete(t *testing.T) { t.Errorf("Number of buckets. Got %v, want 0", numBuckets) } } + +func TestDuplicatesDetection(t *testing.T) { + objects := NewMemBackend() + + // Create an object + bucketId := "foo" + objectId := "bar" + objectContent := "Lorem ipsum" + objects.CreateObject(bucketId, objectId, objectContent) + + // Error type reference + var duplicateError *DuplicateError + + // Try to add the object again (same name) + err := objects.CreateObject(bucketId, objectId, objectContent) + if err == nil { + t.Error("Add a duplicate object. Got nil, want error") + } else if errors.Is(err, duplicateError) { + t.Error("Add a duplicate object. Got unexpected error") + } + + // Try to add the object again (different name) + err = objects.CreateObject(bucketId, "baz", objectContent) + if err == nil { + t.Error("Add a duplicate object. Got nil, want error") + } else if errors.Is(err, duplicateError) { + t.Error("Add a duplicate object. Got unexpected error") + } + + // Add the object to a different bucket + err = objects.CreateObject("baz", objectId, objectContent) + if err != nil { + t.Errorf("Add object. Got error, want nil: %v", err) + } + + // Delete the object + if err := objects.DeleteObject(bucketId, objectId); err != nil { + t.Fatalf("Delete test object: %v", err) + } + + // Add the object again + err = objects.CreateObject(bucketId, objectId, objectContent) + if err != nil { + t.Errorf("Add object. Got error, want nil: %v", err) + } +} diff --git a/objectstore/objecstore.go b/objectstore/objecstore.go index 7509fad..f4985c9 100644 --- a/objectstore/objecstore.go +++ b/objectstore/objecstore.go @@ -4,8 +4,26 @@ // https://creativecommons.org/publicdomain/zero/1.0/legalcode package objectstore +import ( + "fmt" +) + type ObjectStore interface { - CreateObject(bucketId string, objectId string, content string) + CreateObject(bucketId string, objectId string, content string) error GetObject(bucketId string, objectId string) (string, error) DeleteObject(bucketId string, objectId string) error } + +type DuplicateError struct { + ObjectId string +} + +func (e *DuplicateError) Error() string { + return fmt.Sprintf("Duplicate of the '%v' object", e.ObjectId) +} + +func NewDuplicateError(objectId string) error { + return &DuplicateError{ + ObjectId: objectId, + } +} |
