Skip to content

Commit

Permalink
Add Image Diff for SVG files (#14867)
Browse files Browse the repository at this point in the history
* Added type sniffer.

* Switched content detection from base to typesniffer.

* Added GuessContentType to Blob.

* Moved image info logic to client.
Added support for SVG images in diff.

* Restore old blocked svg behaviour.

* Added missing image formats.

* Execute image diff only when container is visible.

* add margin to spinner

* improve BIN tag on image diffs

* Default to render view.

* Show image diff on incomplete diff.

Co-authored-by: silverwind <me@silverwind.io>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
Co-authored-by: Lauris BH <lauris@nix.lv>
  • Loading branch information
4 people authored Jun 5, 2021
1 parent 7979c36 commit 8e26210
Show file tree
Hide file tree
Showing 19 changed files with 444 additions and 436 deletions.
5 changes: 3 additions & 2 deletions modules/avatar/avatar.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ import (
"image"
"image/color/palette"

// Enable PNG support:
_ "image/png"
_ "image/gif" // for processing gif images
_ "image/jpeg" // for processing jpeg images
_ "image/png" // for processing png images

"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
Expand Down
68 changes: 0 additions & 68 deletions modules/base/tool.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@ import (
"encoding/hex"
"errors"
"fmt"
"net/http"
"os"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
Expand All @@ -30,15 +28,6 @@ import (
"github.com/dustin/go-humanize"
)

// Use at most this many bytes to determine Content Type.
const sniffLen = 512

// SVGMimeType MIME type of SVG images.
const SVGMimeType = "image/svg+xml"

var svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)
var svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)

// EncodeMD5 encodes string to md5 hex value.
func EncodeMD5(str string) string {
m := md5.New()
Expand Down Expand Up @@ -276,63 +265,6 @@ func IsLetter(ch rune) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
}

// DetectContentType extends http.DetectContentType with more content types.
func DetectContentType(data []byte) string {
ct := http.DetectContentType(data)

if len(data) > sniffLen {
data = data[:sniffLen]
}

if setting.UI.SVG.Enabled &&
((strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")) && svgTagRegex.Match(data) ||
strings.Contains(ct, "text/xml") && svgTagInXMLRegex.Match(data)) {

// SVG is unsupported. https://github.com/golang/go/issues/15888
return SVGMimeType
}
return ct
}

// IsRepresentableAsText returns true if file content can be represented as
// plain text or is empty.
func IsRepresentableAsText(data []byte) bool {
return IsTextFile(data) || IsSVGImageFile(data)
}

// IsTextFile returns true if file content format is plain text or empty.
func IsTextFile(data []byte) bool {
if len(data) == 0 {
return true
}
return strings.Contains(DetectContentType(data), "text/")
}

// IsImageFile detects if data is an image format
func IsImageFile(data []byte) bool {
return strings.Contains(DetectContentType(data), "image/")
}

// IsSVGImageFile detects if data is an SVG image format
func IsSVGImageFile(data []byte) bool {
return strings.Contains(DetectContentType(data), SVGMimeType)
}

// IsPDFFile detects if data is a pdf format
func IsPDFFile(data []byte) bool {
return strings.Contains(DetectContentType(data), "application/pdf")
}

// IsVideoFile detects if data is an video format
func IsVideoFile(data []byte) bool {
return strings.Contains(DetectContentType(data), "video/")
}

// IsAudioFile detects if data is an video format
func IsAudioFile(data []byte) bool {
return strings.Contains(DetectContentType(data), "audio/")
}

// EntryIcon returns the octicon class for displaying files/directories
func EntryIcon(entry *git.TreeEntry) string {
switch {
Expand Down
92 changes: 0 additions & 92 deletions modules/base/tool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
package base

import (
"encoding/base64"
"os"
"testing"
"time"
Expand Down Expand Up @@ -246,97 +245,6 @@ func TestIsLetter(t *testing.T) {
assert.False(t, IsLetter(0x93))
}

func TestDetectContentTypeLongerThanSniffLen(t *testing.T) {
// Pre-condition: Shorter than sniffLen detects SVG.
assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)))
// Longer than sniffLen detects something else.
assert.Equal(t, "text/plain; charset=utf-8", DetectContentType([]byte(`<!--
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
Comment Comment Comment --><svg></svg>`)))
}

// IsRepresentableAsText

func TestIsTextFile(t *testing.T) {
assert.True(t, IsTextFile([]byte{}))
assert.True(t, IsTextFile([]byte("lorem ipsum")))
}

func TestIsImageFile(t *testing.T) {
png, _ := base64.StdEncoding.DecodeString("iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAG0lEQVQYlWN4+vTpf3SMDTAMBYXYBLFpHgoKAeiOf0SGE9kbAAAAAElFTkSuQmCC")
assert.True(t, IsImageFile(png))
assert.False(t, IsImageFile([]byte("plain text")))
}

func TestIsSVGImageFile(t *testing.T) {
assert.True(t, IsSVGImageFile([]byte("<svg></svg>")))
assert.True(t, IsSVGImageFile([]byte(" <svg></svg>")))
assert.True(t, IsSVGImageFile([]byte(`<svg width="100"></svg>`)))
assert.True(t, IsSVGImageFile([]byte("<svg/>")))
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<!-- Comment -->
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<!-- Multiple -->
<!-- Comments -->
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<!-- Multiline
Comment -->
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd">
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- Comment -->
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- Multiple -->
<!-- Comments -->
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- Multline
Comment -->
<svg></svg>`)))
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Multline
Comment -->
<svg></svg>`)))
assert.False(t, IsSVGImageFile([]byte{}))
assert.False(t, IsSVGImageFile([]byte("svg")))
assert.False(t, IsSVGImageFile([]byte("<svgfoo></svgfoo>")))
assert.False(t, IsSVGImageFile([]byte("text<svg></svg>")))
assert.False(t, IsSVGImageFile([]byte("<html><body><svg></svg></body></html>")))
assert.False(t, IsSVGImageFile([]byte(`<script>"<svg></svg>"</script>`)))
assert.False(t, IsSVGImageFile([]byte(`<!-- <svg></svg> inside comment -->
<foo></foo>`)))
assert.False(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<!-- <svg></svg> inside comment -->
<foo></foo>`)))
}

func TestIsPDFFile(t *testing.T) {
pdf, _ := base64.StdEncoding.DecodeString("JVBERi0xLjYKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURlY29kZT4+CnN0cmVhbQp4nF3NPwsCMQwF8D2f4s2CNYk1baF0EHRwOwg4iJt/NsFb/PpevUE4Mjwe")
assert.True(t, IsPDFFile(pdf))
assert.False(t, IsPDFFile([]byte("plain text")))
}

func TestIsVideoFile(t *testing.T) {
mp4, _ := base64.StdEncoding.DecodeString("AAAAGGZ0eXBtcDQyAAAAAGlzb21tcDQyAAEI721vb3YAAABsbXZoZAAAAADaBlwX2gZcFwAAA+gA")
assert.True(t, IsVideoFile(mp4))
assert.False(t, IsVideoFile([]byte("plain text")))
}

func TestIsAudioFile(t *testing.T) {
mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
assert.True(t, IsAudioFile(mp3))
assert.False(t, IsAudioFile([]byte("plain text")))
}

// TODO: Test EntryIcon

func TestSetupGiteaRoot(t *testing.T) {
Expand Down
13 changes: 13 additions & 0 deletions modules/git/blob.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"encoding/base64"
"io"
"io/ioutil"

"code.gitea.io/gitea/modules/typesniffer"
)

// This file contains common functions between the gogit and !gogit variants for git Blobs
Expand Down Expand Up @@ -82,3 +84,14 @@ func (b *Blob) GetBlobContentBase64() (string, error) {
}
return string(out), nil
}

// GuessContentType guesses the content type of the blob.
func (b *Blob) GuessContentType() (typesniffer.SniffedType, error) {
r, err := b.DataAsync()
if err != nil {
return typesniffer.SniffedType{}, err
}
defer r.Close()

return typesniffer.DetectContentTypeFromReader(r)
}
70 changes: 0 additions & 70 deletions modules/git/commit.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,7 @@ import (
"container/list"
"errors"
"fmt"
"image"
"image/color"
_ "image/gif" // for processing gif images
_ "image/jpeg" // for processing jpeg images
_ "image/png" // for processing png images
"io"
"net/http"
"os/exec"
"strconv"
"strings"
Expand Down Expand Up @@ -81,70 +75,6 @@ func (c *Commit) ParentCount() int {
return len(c.Parents)
}

func isImageFile(data []byte) (string, bool) {
contentType := http.DetectContentType(data)
if strings.Contains(contentType, "image/") {
return contentType, true
}
return contentType, false
}

// IsImageFile is a file image type
func (c *Commit) IsImageFile(name string) bool {
blob, err := c.GetBlobByPath(name)
if err != nil {
return false
}

dataRc, err := blob.DataAsync()
if err != nil {
return false
}
defer dataRc.Close()
buf := make([]byte, 1024)
n, _ := dataRc.Read(buf)
buf = buf[:n]
_, isImage := isImageFile(buf)
return isImage
}

// ImageMetaData represents metadata of an image file
type ImageMetaData struct {
ColorModel color.Model
Width int
Height int
ByteSize int64
}

// ImageInfo returns information about the dimensions of an image
func (c *Commit) ImageInfo(name string) (*ImageMetaData, error) {
if !c.IsImageFile(name) {
return nil, nil
}

blob, err := c.GetBlobByPath(name)
if err != nil {
return nil, err
}
reader, err := blob.DataAsync()
if err != nil {
return nil, err
}
defer reader.Close()
config, _, err := image.DecodeConfig(reader)
if err != nil {
return nil, err
}

metadata := ImageMetaData{
ColorModel: config.ColorModel,
Width: config.Width,
Height: config.Height,
ByteSize: blob.Size(),
}
return &metadata, nil
}

// GetCommitByPath return the commit of relative path object.
func (c *Commit) GetCommitByPath(relpath string) (*Commit, error) {
return c.repo.getCommitByPathWithID(c.ID, relpath)
Expand Down
4 changes: 2 additions & 2 deletions modules/indexer/code/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ import (

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
"code.gitea.io/gitea/modules/util"

"github.com/blevesearch/bleve/v2"
Expand Down Expand Up @@ -211,7 +211,7 @@ func (b *BleveIndexer) addUpdate(batchWriter git.WriteCloserError, batchReader *
fileContents, err := ioutil.ReadAll(io.LimitReader(batchReader, size))
if err != nil {
return err
} else if !base.IsTextFile(fileContents) {
} else if !typesniffer.DetectContentType(fileContents).IsText() {
// FIXME: UTF-16 files will probably fail here
return nil
}
Expand Down
4 changes: 2 additions & 2 deletions modules/indexer/code/elastic_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ import (

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"

"github.com/go-enry/go-enry/v2"
jsoniter "github.com/json-iterator/go"
Expand Down Expand Up @@ -210,7 +210,7 @@ func (b *ElasticSearchIndexer) addUpdate(batchWriter git.WriteCloserError, batch
fileContents, err := ioutil.ReadAll(io.LimitReader(batchReader, size))
if err != nil {
return nil, err
} else if !base.IsTextFile(fileContents) {
} else if !typesniffer.DetectContentType(fileContents).IsText() {
// FIXME: UTF-16 files will probably fail here
return nil, nil
}
Expand Down
Loading

0 comments on commit 8e26210

Please sign in to comment.