From 53667583c094252818f203b9233a7726d0a4e8a0 Mon Sep 17 00:00:00 2001 From: "Pablo M. Bermudo Garay" Date: Wed, 9 Feb 2022 19:58:30 +0100 Subject: Add proactive de-duplication to the memory backend --- objectstore/memory_backend.go | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) (limited to 'objectstore/memory_backend.go') diff --git a/objectstore/memory_backend.go b/objectstore/memory_backend.go index 7dcfe4a..36ee7b6 100644 --- a/objectstore/memory_backend.go +++ b/objectstore/memory_backend.go @@ -5,12 +5,22 @@ package objectstore import ( + "crypto/sha256" + "encoding/hex" "fmt" "sync" ) +type memObject struct { + payload string + hash string +} + type memBucket struct { - objects map[string]string + objects map[string]memObject + // Map used to check if the hash already exists, also + // storing the objectId to identify de duplicate + hashes map[string]string } type MemObjectStore struct { @@ -24,18 +34,37 @@ func NewMemBackend() *MemObjectStore { return os } -func (os *MemObjectStore) CreateObject(bucketId string, objectId string, object string) { +func (os *MemObjectStore) CreateObject(bucketId string, objectId string, payload string) error { os.Lock() defer os.Unlock() + // Create bucket if it doesn't exist bucket, ok := os.buckets[bucketId] if !ok { bucket = memBucket{} - bucket.objects = make(map[string]string) + bucket.objects = make(map[string]memObject) + bucket.hashes = make(map[string]string) os.buckets[bucketId] = bucket } + // Hash the object content + hasher := sha256.New() + hasher.Write([]byte(payload)) + hash := hex.EncodeToString(hasher.Sum(nil)) + + // Check for duplicates + if objectId, dup := bucket.hashes[hash]; dup { + return NewDuplicateError(objectId) + } + + // Store the object + object := memObject{ + payload: payload, + hash: hash, + } bucket.objects[objectId] = object + bucket.hashes[hash] = objectId + return nil } func (os *MemObjectStore) GetObject(bucketId string, objectId string) (string, error) { @@ -52,7 +81,7 @@ func (os *MemObjectStore) GetObject(bucketId string, objectId string) (string, e return "", fmt.Errorf("Object not found") } - return object, nil + return object.payload, nil } func (os *MemObjectStore) DeleteObject(bucketId string, objectId string) error { @@ -64,12 +93,15 @@ func (os *MemObjectStore) DeleteObject(bucketId string, objectId string) error { return fmt.Errorf("Bucket not found") } - _, ok = bucket.objects[objectId] + object, ok := bucket.objects[objectId] if !ok { return fmt.Errorf("Object not found") } + // Delete both, object and hash delete(bucket.objects, objectId) + delete(bucket.hashes, object.hash) + // Delete the bucket if it's empty if len(bucket.objects) == 0 { delete(os.buckets, bucketId) -- cgit v1.2.3-70-g09d2