summaryrefslogtreecommitdiff
path: root/objectstore
diff options
context:
space:
mode:
authorPablo M. Bermudo Garay <pablombg@gmail.com>2022-02-09 19:58:30 +0100
committerPablo M. Bermudo Garay <pablombg@gmail.com>2022-02-10 13:14:19 +0100
commit53667583c094252818f203b9233a7726d0a4e8a0 (patch)
tree7bcc7cbaf784f149113cba85a837515d70ae1c87 /objectstore
parentc488ed1f80fd54846170c0ab04ce59a69f52a9e5 (diff)
Add proactive de-duplication to the memory backend
Diffstat (limited to 'objectstore')
-rw-r--r--objectstore/memory_backend.go42
-rw-r--r--objectstore/memory_backend_test.go51
-rw-r--r--objectstore/objecstore.go20
3 files changed, 106 insertions, 7 deletions
diff --git a/objectstore/memory_backend.go b/objectstore/memory_backend.go
index 7dcfe4a..36ee7b6 100644
--- a/objectstore/memory_backend.go
+++ b/objectstore/memory_backend.go
@@ -5,12 +5,22 @@
package objectstore
import (
+ "crypto/sha256"
+ "encoding/hex"
"fmt"
"sync"
)
+type memObject struct {
+ payload string
+ hash string
+}
+
type memBucket struct {
- objects map[string]string
+ objects map[string]memObject
+ // Map used to check if the hash already exists, also
+ // storing the objectId to identify de duplicate
+ hashes map[string]string
}
type MemObjectStore struct {
@@ -24,18 +34,37 @@ func NewMemBackend() *MemObjectStore {
return os
}
-func (os *MemObjectStore) CreateObject(bucketId string, objectId string, object string) {
+func (os *MemObjectStore) CreateObject(bucketId string, objectId string, payload string) error {
os.Lock()
defer os.Unlock()
+ // Create bucket if it doesn't exist
bucket, ok := os.buckets[bucketId]
if !ok {
bucket = memBucket{}
- bucket.objects = make(map[string]string)
+ bucket.objects = make(map[string]memObject)
+ bucket.hashes = make(map[string]string)
os.buckets[bucketId] = bucket
}
+ // Hash the object content
+ hasher := sha256.New()
+ hasher.Write([]byte(payload))
+ hash := hex.EncodeToString(hasher.Sum(nil))
+
+ // Check for duplicates
+ if objectId, dup := bucket.hashes[hash]; dup {
+ return NewDuplicateError(objectId)
+ }
+
+ // Store the object
+ object := memObject{
+ payload: payload,
+ hash: hash,
+ }
bucket.objects[objectId] = object
+ bucket.hashes[hash] = objectId
+ return nil
}
func (os *MemObjectStore) GetObject(bucketId string, objectId string) (string, error) {
@@ -52,7 +81,7 @@ func (os *MemObjectStore) GetObject(bucketId string, objectId string) (string, e
return "", fmt.Errorf("Object not found")
}
- return object, nil
+ return object.payload, nil
}
func (os *MemObjectStore) DeleteObject(bucketId string, objectId string) error {
@@ -64,12 +93,15 @@ func (os *MemObjectStore) DeleteObject(bucketId string, objectId string) error {
return fmt.Errorf("Bucket not found")
}
- _, ok = bucket.objects[objectId]
+ object, ok := bucket.objects[objectId]
if !ok {
return fmt.Errorf("Object not found")
}
+ // Delete both, object and hash
delete(bucket.objects, objectId)
+ delete(bucket.hashes, object.hash)
+
// Delete the bucket if it's empty
if len(bucket.objects) == 0 {
delete(os.buckets, bucketId)
diff --git a/objectstore/memory_backend_test.go b/objectstore/memory_backend_test.go
index 3ac347c..3fbe7a5 100644
--- a/objectstore/memory_backend_test.go
+++ b/objectstore/memory_backend_test.go
@@ -4,7 +4,10 @@
// https://creativecommons.org/publicdomain/zero/1.0/legalcode
package objectstore
-import "testing"
+import (
+ "errors"
+ "testing"
+)
func TestCreateAndGet(t *testing.T) {
objects := NewMemBackend()
@@ -67,3 +70,49 @@ func TestCreateAndDelete(t *testing.T) {
t.Errorf("Number of buckets. Got %v, want 0", numBuckets)
}
}
+
+func TestDuplicatesDetection(t *testing.T) {
+ objects := NewMemBackend()
+
+ // Create an object
+ bucketId := "foo"
+ objectId := "bar"
+ objectContent := "Lorem ipsum"
+ objects.CreateObject(bucketId, objectId, objectContent)
+
+ // Error type reference
+ var duplicateError *DuplicateError
+
+ // Try to add the object again (same name)
+ err := objects.CreateObject(bucketId, objectId, objectContent)
+ if err == nil {
+ t.Error("Add a duplicate object. Got nil, want error")
+ } else if errors.Is(err, duplicateError) {
+ t.Error("Add a duplicate object. Got unexpected error")
+ }
+
+ // Try to add the object again (different name)
+ err = objects.CreateObject(bucketId, "baz", objectContent)
+ if err == nil {
+ t.Error("Add a duplicate object. Got nil, want error")
+ } else if errors.Is(err, duplicateError) {
+ t.Error("Add a duplicate object. Got unexpected error")
+ }
+
+ // Add the object to a different bucket
+ err = objects.CreateObject("baz", objectId, objectContent)
+ if err != nil {
+ t.Errorf("Add object. Got error, want nil: %v", err)
+ }
+
+ // Delete the object
+ if err := objects.DeleteObject(bucketId, objectId); err != nil {
+ t.Fatalf("Delete test object: %v", err)
+ }
+
+ // Add the object again
+ err = objects.CreateObject(bucketId, objectId, objectContent)
+ if err != nil {
+ t.Errorf("Add object. Got error, want nil: %v", err)
+ }
+}
diff --git a/objectstore/objecstore.go b/objectstore/objecstore.go
index 7509fad..f4985c9 100644
--- a/objectstore/objecstore.go
+++ b/objectstore/objecstore.go
@@ -4,8 +4,26 @@
// https://creativecommons.org/publicdomain/zero/1.0/legalcode
package objectstore
+import (
+ "fmt"
+)
+
type ObjectStore interface {
- CreateObject(bucketId string, objectId string, content string)
+ CreateObject(bucketId string, objectId string, content string) error
GetObject(bucketId string, objectId string) (string, error)
DeleteObject(bucketId string, objectId string) error
}
+
+type DuplicateError struct {
+ ObjectId string
+}
+
+func (e *DuplicateError) Error() string {
+ return fmt.Sprintf("Duplicate of the '%v' object", e.ObjectId)
+}
+
+func NewDuplicateError(objectId string) error {
+ return &DuplicateError{
+ ObjectId: objectId,
+ }
+}