Skip to content

Commit

Permalink
refactor image preprocessing (still draft)
Browse files Browse the repository at this point in the history
  • Loading branch information
wagoodman committed Dec 2, 2018
1 parent 53b11f4 commit 7018cb3
Show file tree
Hide file tree
Showing 13 changed files with 522 additions and 402 deletions.
28 changes: 24 additions & 4 deletions cmd/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ import (
"github.com/wagoodman/dive/utils"
)

// analyze takes a docker image tag, digest, or id and displays the
// doAnalyzeCmd takes a docker image tag, digest, or id and displays the
// image analysis to the screen
func analyze(cmd *cobra.Command, args []string) {
func doAnalyzeCmd(cmd *cobra.Command, args []string) {
defer utils.Cleanup()
if len(args) == 0 {
printVersionFlag, err := cmd.PersistentFlags().GetBool("version")
Expand All @@ -33,6 +33,26 @@ func analyze(cmd *cobra.Command, args []string) {
utils.Exit(1)
}
color.New(color.Bold).Println("Analyzing Image")
manifest, refTrees, efficiency, inefficiencies := image.InitializeData(userImage)
ui.Run(manifest, refTrees, efficiency, inefficiencies)

ui.Run(fetchAndAnalyze(userImage))
}


func fetchAndAnalyze(imageID string) *image.AnalysisResult {
analyzer := image.GetAnalyzer(imageID)

fmt.Println(" Fetching image...")
err := analyzer.Parse(imageID)
if err != nil {
fmt.Printf("cannot fetch image: %v\n", err)
utils.Exit(1)
}

fmt.Println(" Analyzing image...")
result, err := analyzer.Analyze()
if err != nil {
fmt.Printf("cannot doAnalyzeCmd image: %v\n", err)
utils.Exit(1)
}
return result
}
11 changes: 5 additions & 6 deletions cmd/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"github.com/fatih/color"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/wagoodman/dive/image"
"github.com/wagoodman/dive/ui"
"github.com/wagoodman/dive/utils"
"io/ioutil"
Expand All @@ -16,15 +15,15 @@ var buildCmd = &cobra.Command{
Use: "build [any valid `docker build` arguments]",
Short: "Builds and analyzes a docker image from a Dockerfile (this is a thin wrapper for the `docker build` command).",
DisableFlagParsing: true,
Run: doBuild,
Run: doBuildCmd,
}

func init() {
rootCmd.AddCommand(buildCmd)
}

// doBuild implements the steps taken for the build command
func doBuild(cmd *cobra.Command, args []string) {
// doBuildCmd implements the steps taken for the build command
func doBuildCmd(cmd *cobra.Command, args []string) {
defer utils.Cleanup()
iidfile, err := ioutil.TempFile("/tmp", "dive.*.iid")
if err != nil {
Expand All @@ -47,6 +46,6 @@ func doBuild(cmd *cobra.Command, args []string) {
}

color.New(color.Bold).Println("Analyzing Image")
manifest, refTrees, efficiency, inefficiencies := image.InitializeData(string(imageId))
ui.Run(manifest, refTrees, efficiency, inefficiencies)

ui.Run(fetchAndAnalyze(string(imageId)))
}
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ var rootCmd = &cobra.Command{
Long: `This tool provides a way to discover and explore the contents of a docker image. Additionally the tool estimates
the amount of wasted space and identifies the offending files from the image.`,
Args: cobra.MaximumNArgs(1),
Run: analyze,
Run: doAnalyzeCmd,
}

// Execute adds all child commands to the root command and sets flags appropriately.
Expand Down
266 changes: 266 additions & 0 deletions image/docker_image.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
package image

import (
"archive/tar"
"encoding/json"
"fmt"
"github.com/docker/docker/client"
"github.com/sirupsen/logrus"
"io"
"strings"
"github.com/wagoodman/dive/filetree"
"github.com/wagoodman/dive/utils"
"golang.org/x/net/context"
)

var dockerVersion string

func check(e error) {
if e != nil {
panic(e)
}
}

func newDockerImageAnalyzer() Analyzer {
return &dockerImageAnalyzer{
}
}

func newDockerImageManifest(manifestBytes []byte) dockerImageManifest {
var manifest []dockerImageManifest
err := json.Unmarshal(manifestBytes, &manifest)
if err != nil {
logrus.Panic(err)
}
return manifest[0]
}

func newDockerImageConfig(configBytes []byte) dockerImageConfig {
var imageConfig dockerImageConfig
err := json.Unmarshal(configBytes, &imageConfig)
if err != nil {
logrus.Panic(err)
}

layerIdx := 0
for idx := range imageConfig.History {
if imageConfig.History[idx].EmptyLayer {
imageConfig.History[idx].ID = "<missing>"
} else {
imageConfig.History[idx].ID = imageConfig.RootFs.DiffIds[layerIdx]
layerIdx++
}
}

return imageConfig
}


func (image *dockerImageAnalyzer) Parse(imageID string) error {
var err error
image.id = imageID
// store discovered json files in a map so we can read the image in one pass
image.jsonFiles = make(map[string][]byte)
image.layerMap = make(map[string]*filetree.FileTree)

// pull the image if it does not exist
ctx := context.Background()
image.client, err = client.NewClientWithOpts(client.WithVersion(dockerVersion), client.FromEnv)
if err != nil {
return err
}
_, _, err = image.client.ImageInspectWithRaw(ctx, imageID)
if err != nil {
// don't use the API, the CLI has more informative output
fmt.Println("Image not available locally. Trying to pull '" + imageID + "'...")
utils.RunDockerCmd("pull", imageID)
}

tarFile, _, err := image.getReader(imageID)
if err != nil {
return err
}
defer tarFile.Close()

err = image.read(tarFile)
if err != nil {
return err
}
return nil
}


// todo: it is bad that this is printing out to the screen
func (image *dockerImageAnalyzer) read(tarFile io.ReadCloser) error {
tarReader := tar.NewReader(tarFile)

var currentLayer uint
for {
header, err := tarReader.Next()

if err == io.EOF {
fmt.Println(" ╧")
break
}

if err != nil {
fmt.Println(err)
utils.Exit(1)
}

layerProgress := fmt.Sprintf("[layer: %2d]", currentLayer)

name := header.Name
var n int

// some layer tars can be relative layer symlinks to other layer tars
if header.Typeflag == tar.TypeSymlink || header.Typeflag == tar.TypeReg {

if strings.HasSuffix(name, "layer.tar") {
currentLayer++
if err != nil {
logrus.Panic(err)
}
message := fmt.Sprintf(" ├─ %s %s ", layerProgress, "working...")
fmt.Printf("\r%s", message)

layerReader := tar.NewReader(tarReader)
image.processLayerTar(name, layerReader, layerProgress)
} else if strings.HasSuffix(name, ".json") {
var fileBuffer = make([]byte, header.Size)
n, err = tarReader.Read(fileBuffer)
if err != nil && err != io.EOF || int64(n) != header.Size {
return err
}
image.jsonFiles[name] = fileBuffer
}
}
}

return nil
}

func (image *dockerImageAnalyzer) Analyze() (*AnalysisResult, error) {
image.trees = make([]*filetree.FileTree, 0)

manifest := newDockerImageManifest(image.jsonFiles["manifest.json"])
config := newDockerImageConfig(image.jsonFiles[manifest.ConfigPath])

// build the content tree
for _, treeName := range manifest.LayerTarPaths {
image.trees = append(image.trees, image.layerMap[treeName])
}

// build the layers array
image.layers = make([]*dockerLayer, len(image.trees))

// note that the image config stores images in reverse chronological order, so iterate backwards through layers
// as you iterate chronologically through history (ignoring history items that have no layer contents)
layerIdx := len(image.trees) - 1
tarPathIdx := 0
for idx := 0; idx < len(config.History); idx++ {
// ignore empty layers, we are only observing layers with content
if config.History[idx].EmptyLayer {
continue
}

tree := image.trees[(len(image.trees)-1)-layerIdx]
config.History[idx].Size = uint64(tree.FileSize)

image.layers[layerIdx] = &dockerLayer{
history: config.History[idx],
index: layerIdx,
tree: image.trees[layerIdx],
tarPath: manifest.LayerTarPaths[tarPathIdx],
}

layerIdx--
tarPathIdx++
}


efficiency, inefficiencies := filetree.Efficiency(image.trees)

layers := make([]Layer, len(image.layers))
for i, v := range image.layers {
layers[i] = v
}

return &AnalysisResult{
Layers: layers,
RefTrees: image.trees,
Efficiency: efficiency,
Inefficiencies: inefficiencies,
}, nil
}


func (image *dockerImageAnalyzer) getReader(imageID string) (io.ReadCloser, int64, error) {

ctx := context.Background()
result, _, err := image.client.ImageInspectWithRaw(ctx, imageID)
if err != nil {
return nil, -1, err
}
totalSize := result.Size

readCloser, err := image.client.ImageSave(ctx, []string{imageID})
check(err)

return readCloser, totalSize, nil
}

// todo: it is bad that this is printing out to the screen
func (image *dockerImageAnalyzer) processLayerTar(name string, reader *tar.Reader, layerProgress string) {
tree := filetree.NewFileTree()
tree.Name = name

fileInfos := image.getFileList(reader)

shortName := name[:15]
pb := utils.NewProgressBar(int64(len(fileInfos)), 30)
for idx, element := range fileInfos {
tree.FileSize += uint64(element.TarHeader.FileInfo().Size())
tree.AddPath(element.Path, element)

if pb.Update(int64(idx)) {
message := fmt.Sprintf(" ├─ %s %s : %s", layerProgress, shortName, pb.String())
fmt.Printf("\r%s", message)
}
}
pb.Done()
message := fmt.Sprintf(" ├─ %s %s : %s", layerProgress, shortName, pb.String())
fmt.Printf("\r%s\n", message)

image.layerMap[tree.Name] = tree
}

// todo: it is bad that this is printing out to the screen
func (image *dockerImageAnalyzer) getFileList(tarReader *tar.Reader) []filetree.FileInfo {
var files []filetree.FileInfo

for {
header, err := tarReader.Next()

if err == io.EOF {
break
}

if err != nil {
fmt.Println(err)
utils.Exit(1)
}

name := header.Name

switch header.Typeflag {
case tar.TypeXGlobalHeader:
fmt.Printf("ERRG: XGlobalHeader: %v: %s\n", header.Typeflag, name)
case tar.TypeXHeader:
fmt.Printf("ERRG: XHeader: %v: %s\n", header.Typeflag, name)
default:
files = append(files, filetree.NewFileInfo(tarReader, header, name))
}
}
return files
}
Loading

0 comments on commit 7018cb3

Please sign in to comment.