API: Rig up File { sha }

2021-09-21 11:43:10 +02:00 · 2021-09-21 11:43:10 +02:00 · ec9bcf7fae
parent dd5dbf3240
commit ec9bcf7fae
4 changed files with 281 additions and 1 deletions
--- a/api/graph/model/blob.go
+++ b/api/graph/model/blob.go
@ -0,0 +1,7 @@
+// This type is not rigged up with GraphQL, and is only used internally.
+package model
+
+type Blob struct {
+	ID  int
+	SHA string
+}
--- a/api/graph/schema.resolvers.go
+++ b/api/graph/schema.resolvers.go
@ -19,7 +19,11 @@ import (
 )

 func (r *fileResolver) Sha(ctx context.Context, obj *model.File) (string, error) {
-	panic(fmt.Errorf("not implemented"))
+	blob, err := loaders.ForContext(ctx).BlobsByID.Load(obj.BlobID)
+	if err != nil {
+		panic(err)
+	}
+	return blob.SHA, nil
 }

 func (r *fileResolver) Contents(ctx context.Context, obj *model.File) (*model.URL, error) {
--- a/api/loaders/blobsbyidloader_gen.go
+++ b/api/loaders/blobsbyidloader_gen.go
@ -0,0 +1,224 @@
+// Code generated by github.com/vektah/dataloaden, DO NOT EDIT.
+
+package loaders
+
+import (
+	"sync"
+	"time"
+
+	"git.sr.ht/~sircmpwn/paste.sr.ht/api/graph/model"
+)
+
+// BlobsByIDLoaderConfig captures the config to create a new BlobsByIDLoader
+type BlobsByIDLoaderConfig struct {
+	// Fetch is a method that provides the data for the loader
+	Fetch func(keys []int) ([]*model.Blob, []error)
+
+	// Wait is how long wait before sending a batch
+	Wait time.Duration
+
+	// MaxBatch will limit the maximum number of keys to send in one batch, 0 = not limit
+	MaxBatch int
+}
+
+// NewBlobsByIDLoader creates a new BlobsByIDLoader given a fetch, wait, and maxBatch
+func NewBlobsByIDLoader(config BlobsByIDLoaderConfig) *BlobsByIDLoader {
+	return &BlobsByIDLoader{
+		fetch:    config.Fetch,
+		wait:     config.Wait,
+		maxBatch: config.MaxBatch,
+	}
+}
+
+// BlobsByIDLoader batches and caches requests
+type BlobsByIDLoader struct {
+	// this method provides the data for the loader
+	fetch func(keys []int) ([]*model.Blob, []error)
+
+	// how long to done before sending a batch
+	wait time.Duration
+
+	// this will limit the maximum number of keys to send in one batch, 0 = no limit
+	maxBatch int
+
+	// INTERNAL
+
+	// lazily created cache
+	cache map[int]*model.Blob
+
+	// the current batch. keys will continue to be collected until timeout is hit,
+	// then everything will be sent to the fetch method and out to the listeners
+	batch *blobsByIDLoaderBatch
+
+	// mutex to prevent races
+	mu sync.Mutex
+}
+
+type blobsByIDLoaderBatch struct {
+	keys    []int
+	data    []*model.Blob
+	error   []error
+	closing bool
+	done    chan struct{}
+}
+
+// Load a Blob by key, batching and caching will be applied automatically
+func (l *BlobsByIDLoader) Load(key int) (*model.Blob, error) {
+	return l.LoadThunk(key)()
+}
+
+// LoadThunk returns a function that when called will block waiting for a Blob.
+// This method should be used if you want one goroutine to make requests to many
+// different data loaders without blocking until the thunk is called.
+func (l *BlobsByIDLoader) LoadThunk(key int) func() (*model.Blob, error) {
+	l.mu.Lock()
+	if it, ok := l.cache[key]; ok {
+		l.mu.Unlock()
+		return func() (*model.Blob, error) {
+			return it, nil
+		}
+	}
+	if l.batch == nil {
+		l.batch = &blobsByIDLoaderBatch{done: make(chan struct{})}
+	}
+	batch := l.batch
+	pos := batch.keyIndex(l, key)
+	l.mu.Unlock()
+
+	return func() (*model.Blob, error) {
+		<-batch.done
+
+		var data *model.Blob
+		if pos < len(batch.data) {
+			data = batch.data[pos]
+		}
+
+		var err error
+		// its convenient to be able to return a single error for everything
+		if len(batch.error) == 1 {
+			err = batch.error[0]
+		} else if batch.error != nil {
+			err = batch.error[pos]
+		}
+
+		if err == nil {
+			l.mu.Lock()
+			l.unsafeSet(key, data)
+			l.mu.Unlock()
+		}
+
+		return data, err
+	}
+}
+
+// LoadAll fetches many keys at once. It will be broken into appropriate sized
+// sub batches depending on how the loader is configured
+func (l *BlobsByIDLoader) LoadAll(keys []int) ([]*model.Blob, []error) {
+	results := make([]func() (*model.Blob, error), len(keys))
+
+	for i, key := range keys {
+		results[i] = l.LoadThunk(key)
+	}
+
+	blobs := make([]*model.Blob, len(keys))
+	errors := make([]error, len(keys))
+	for i, thunk := range results {
+		blobs[i], errors[i] = thunk()
+	}
+	return blobs, errors
+}
+
+// LoadAllThunk returns a function that when called will block waiting for a Blobs.
+// This method should be used if you want one goroutine to make requests to many
+// different data loaders without blocking until the thunk is called.
+func (l *BlobsByIDLoader) LoadAllThunk(keys []int) func() ([]*model.Blob, []error) {
+	results := make([]func() (*model.Blob, error), len(keys))
+	for i, key := range keys {
+		results[i] = l.LoadThunk(key)
+	}
+	return func() ([]*model.Blob, []error) {
+		blobs := make([]*model.Blob, len(keys))
+		errors := make([]error, len(keys))
+		for i, thunk := range results {
+			blobs[i], errors[i] = thunk()
+		}
+		return blobs, errors
+	}
+}
+
+// Prime the cache with the provided key and value. If the key already exists, no change is made
+// and false is returned.
+// (To forcefully prime the cache, clear the key first with loader.clear(key).prime(key, value).)
+func (l *BlobsByIDLoader) Prime(key int, value *model.Blob) bool {
+	l.mu.Lock()
+	var found bool
+	if _, found = l.cache[key]; !found {
+		// make a copy when writing to the cache, its easy to pass a pointer in from a loop var
+		// and end up with the whole cache pointing to the same value.
+		cpy := *value
+		l.unsafeSet(key, &cpy)
+	}
+	l.mu.Unlock()
+	return !found
+}
+
+// Clear the value at key from the cache, if it exists
+func (l *BlobsByIDLoader) Clear(key int) {
+	l.mu.Lock()
+	delete(l.cache, key)
+	l.mu.Unlock()
+}
+
+func (l *BlobsByIDLoader) unsafeSet(key int, value *model.Blob) {
+	if l.cache == nil {
+		l.cache = map[int]*model.Blob{}
+	}
+	l.cache[key] = value
+}
+
+// keyIndex will return the location of the key in the batch, if its not found
+// it will add the key to the batch
+func (b *blobsByIDLoaderBatch) keyIndex(l *BlobsByIDLoader, key int) int {
+	for i, existingKey := range b.keys {
+		if key == existingKey {
+			return i
+		}
+	}
+
+	pos := len(b.keys)
+	b.keys = append(b.keys, key)
+	if pos == 0 {
+		go b.startTimer(l)
+	}
+
+	if l.maxBatch != 0 && pos >= l.maxBatch-1 {
+		if !b.closing {
+			b.closing = true
+			l.batch = nil
+			go b.end(l)
+		}
+	}
+
+	return pos
+}
+
+func (b *blobsByIDLoaderBatch) startTimer(l *BlobsByIDLoader) {
+	time.Sleep(l.wait)
+	l.mu.Lock()
+
+	// we must have hit a batch limit and are already finalizing this batch
+	if b.closing {
+		l.mu.Unlock()
+		return
+	}
+
+	l.batch = nil
+	l.mu.Unlock()
+
+	b.end(l)
+}
+
+func (b *blobsByIDLoaderBatch) end(l *BlobsByIDLoader) {
+	b.data, b.error = l.fetch(b.keys)
+	close(b.done)
+}
--- a/api/loaders/middleware.go
+++ b/api/loaders/middleware.go
@ -3,6 +3,7 @@ package loaders
 //go:generate ./gen UsersByIDLoader int api/graph/model.User
 //go:generate ./gen UsersByNameLoader string api/graph/model.User
 //go:generate ./gen PastesBySHALoader string api/graph/model.Paste
+//go:generate ./gen BlobsByIDLoader int api/graph/model.Blob

 import (
 	"context"
@ -26,11 +27,50 @@ type contextKey struct {
 }

 type Loaders struct {
+	BlobsByID   BlobsByIDLoader
 	PastesBySHA PastesBySHALoader
 	UsersByID   UsersByIDLoader
 	UsersByName UsersByNameLoader
 }

+func fetchBlobsByID(ctx context.Context) func(ids []int) ([]*model.Blob, []error) {
+	return func(ids []int) ([]*model.Blob, []error) {
+		blobs := make([]*model.Blob, len(ids))
+		if err := database.WithTx(ctx, &sql.TxOptions{
+			Isolation: 0,
+			ReadOnly: true,
+		}, func (tx *sql.Tx) error {
+			rows, err := tx.QueryContext(ctx, `
+				SELECT id, sha FROM blob WHERE id = ANY($1);
+			`, pq.Array(ids))
+			if err != nil {
+				return err
+			}
+			defer rows.Close()
+
+			blobsByID := map[int]*model.Blob{}
+			for rows.Next() {
+				var blob model.Blob
+				if err := rows.Scan(&blob.ID, &blob.SHA); err != nil {
+					panic(err)
+				}
+				blobsByID[blob.ID] = &blob
+			}
+			if err = rows.Err(); err != nil {
+				panic(err)
+			}
+
+			for i, id := range ids {
+				blobs[i] = blobsByID[id]
+			}
+			return nil
+		}); err != nil {
+			panic(err)
+		}
+		return blobs, nil
+	}
+}
+
 func fetchPastesBySHA(ctx context.Context) func(shas []string) ([]*model.Paste, []error) {
 	return func(shas []string) ([]*model.Paste, []error) {
 		pastes := make([]*model.Paste, len(shas))
@ -170,6 +210,11 @@ func fetchUsersByName(ctx context.Context) func(names []string) ([]*model.User,
 func Middleware(next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		ctx := context.WithValue(r.Context(), loadersCtxKey, &Loaders{
+			BlobsByID: BlobsByIDLoader{
+				maxBatch: 100,
+				wait:     1 * time.Millisecond,
+				fetch:    fetchBlobsByID(r.Context()),
+			},
 			PastesBySHA: PastesBySHALoader{
 				maxBatch: 100,
 				wait:     1 * time.Millisecond,