diff --git a/s2/index.go b/s2/index.go
index 18a4f7acd..4229957b9 100644
--- a/s2/index.go
+++ b/s2/index.go
@@ -17,6 +17,8 @@ const (
 	S2IndexHeader   = "s2idx\x00"
 	S2IndexTrailer  = "\x00xdi2s"
 	maxIndexEntries = 1 << 16
+	// If distance is less than this, we do not add the entry.
+	minIndexDist = 1 << 20
 )
 
 // Index represents an S2/Snappy index.
@@ -72,6 +74,10 @@ func (i *Index) add(compressedOffset, uncompressedOffset int64) error {
 		if latest.compressedOffset > compressedOffset {
 			return fmt.Errorf("internal error: Earlier compressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
 		}
+		if latest.uncompressedOffset+minIndexDist > uncompressedOffset {
+			// Only add entry if distance is large enough.
+			return nil
+		}
 	}
 	i.info = append(i.info, struct {
 		compressedOffset   int64
@@ -122,7 +128,7 @@ func (i *Index) Find(offset int64) (compressedOff, uncompressedOff int64, err er
 
 // reduce to stay below maxIndexEntries
 func (i *Index) reduce() {
-	if len(i.info) < maxIndexEntries && i.estBlockUncomp >= 1<<20 {
+	if len(i.info) < maxIndexEntries && i.estBlockUncomp >= minIndexDist {
 		return
 	}
 
@@ -132,7 +138,7 @@ func (i *Index) reduce() {
 	j := 0
 
 	// Each block should be at least 1MB, but don't reduce below 1000 entries.
-	for i.estBlockUncomp*(int64(removeN)+1) < 1<<20 && len(i.info)/(removeN+1) > 1000 {
+	for i.estBlockUncomp*(int64(removeN)+1) < minIndexDist && len(i.info)/(removeN+1) > 1000 {
 		removeN++
 	}
 	for idx := 0; idx < len(src); idx++ {
diff --git a/s2/s2.go b/s2/s2.go
index 72bcb4945..cbd1ed64d 100644
--- a/s2/s2.go
+++ b/s2/s2.go
@@ -109,7 +109,11 @@ const (
 	chunkTypeStreamIdentifier = 0xff
 )
 
-var crcTable = crc32.MakeTable(crc32.Castagnoli)
+var (
+	crcTable              = crc32.MakeTable(crc32.Castagnoli)
+	magicChunkSnappyBytes = []byte(magicChunkSnappy) // Can be passed to functions where it escapes.
+	magicChunkBytes       = []byte(magicChunk)       // Can be passed to functions where it escapes.
+)
 
 // crc implements the checksum specified in section 3 of
 // https://github.com/google/snappy/blob/master/framing_format.txt
diff --git a/s2/writer.go b/s2/writer.go
index 637c93147..0a46f2b98 100644
--- a/s2/writer.go
+++ b/s2/writer.go
@@ -239,6 +239,9 @@ func (w *Writer) ReadFrom(r io.Reader) (n int64, err error) {
 			}
 		}
 		if n2 == 0 {
+			if cap(inbuf) >= w.obufLen {
+				w.buffers.Put(inbuf)
+			}
 			break
 		}
 		n += int64(n2)
@@ -314,9 +317,9 @@ func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) {
 		hWriter := make(chan result)
 		w.output <- hWriter
 		if w.snappy {
-			hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
+			hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes}
 		} else {
-			hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
+			hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes}
 		}
 	}
 
@@ -370,9 +373,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) {
 		hWriter := make(chan result)
 		w.output <- hWriter
 		if w.snappy {
-			hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
+			hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes}
 		} else {
-			hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
+			hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes}
 		}
 	}
 
@@ -478,9 +481,9 @@ func (w *Writer) write(p []byte) (nRet int, errRet error) {
 			hWriter := make(chan result)
 			w.output <- hWriter
 			if w.snappy {
-				hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
+				hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes}
 			} else {
-				hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
+				hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes}
 			}
 		}
 
@@ -560,6 +563,9 @@ func (w *Writer) writeFull(inbuf []byte) (errRet error) {
 
 	if w.concurrency == 1 {
 		_, err := w.writeSync(inbuf[obufHeaderLen:])
+		if cap(inbuf) >= w.obufLen {
+			w.buffers.Put(inbuf)
+		}
 		return err
 	}
 
@@ -569,9 +575,9 @@ func (w *Writer) writeFull(inbuf []byte) (errRet error) {
 		hWriter := make(chan result)
 		w.output <- hWriter
 		if w.snappy {
-			hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
+			hWriter <- result{startOffset: w.uncompWritten, b: magicChunkSnappyBytes}
 		} else {
-			hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
+			hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes}
 		}
 	}
 
@@ -637,9 +643,9 @@ func (w *Writer) writeSync(p []byte) (nRet int, errRet error) {
 		var n int
 		var err error
 		if w.snappy {
-			n, err = w.writer.Write([]byte(magicChunkSnappy))
+			n, err = w.writer.Write(magicChunkSnappyBytes)
 		} else {
-			n, err = w.writer.Write([]byte(magicChunk))
+			n, err = w.writer.Write(magicChunkBytes)
 		}
 		if err != nil {
 			return 0, w.err(err)
diff --git a/s2/writer_test.go b/s2/writer_test.go
index 92f5066f8..470abbb80 100644
--- a/s2/writer_test.go
+++ b/s2/writer_test.go
@@ -603,6 +603,36 @@ func BenchmarkWriterRandom(b *testing.B) {
 	}
 }
 
+func BenchmarkReadFromRandom(b *testing.B) {
+	rng := rand.New(rand.NewSource(1))
+	// Make max window so we never get matches.
+	data := make([]byte, 8<<20)
+	for i := range data {
+		data[i] = uint8(rng.Intn(256))
+	}
+
+	for name, opts := range testOptions(b) {
+		w := NewWriter(io.Discard, opts...)
+		in := bytes.NewReader(data)
+		w.ReadFrom(in)
+		b.Run(name, func(b *testing.B) {
+			b.ResetTimer()
+			b.ReportAllocs()
+			b.SetBytes(int64(len(data)))
+			for i := 0; i < b.N; i++ {
+				in.Reset(data)
+				_, err := w.ReadFrom(in)
+				if err != nil {
+					b.Fatal(err)
+				}
+			}
+			// Flush output
+			w.Flush()
+		})
+		w.Close()
+	}
+}
+
 func BenchmarkIndexFind(b *testing.B) {
 	fatalErr := func(t testing.TB, err error) {
 		if err != nil {