Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor image preprocessing #121

Merged
merged 4 commits into from
Dec 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions cmd/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ import (
"github.com/wagoodman/dive/utils"
)

// analyze takes a docker image tag, digest, or id and displays the
// doAnalyzeCmd takes a docker image tag, digest, or id and displays the
// image analysis to the screen
func analyze(cmd *cobra.Command, args []string) {
func doAnalyzeCmd(cmd *cobra.Command, args []string) {
defer utils.Cleanup()
if len(args) == 0 {
printVersionFlag, err := cmd.PersistentFlags().GetBool("version")
Expand All @@ -33,6 +33,25 @@ func analyze(cmd *cobra.Command, args []string) {
utils.Exit(1)
}
color.New(color.Bold).Println("Analyzing Image")
manifest, refTrees, efficiency, inefficiencies := image.InitializeData(userImage)
ui.Run(manifest, refTrees, efficiency, inefficiencies)

ui.Run(fetchAndAnalyze(userImage))
}

func fetchAndAnalyze(imageID string) *image.AnalysisResult {
analyzer := image.GetAnalyzer(imageID)

fmt.Println(" Fetching image...")
err := analyzer.Parse(imageID)
if err != nil {
fmt.Printf("cannot fetch image: %v\n", err)
utils.Exit(1)
}

fmt.Println(" Analyzing image...")
result, err := analyzer.Analyze()
if err != nil {
fmt.Printf("cannot doAnalyzeCmd image: %v\n", err)
utils.Exit(1)
}
return result
}
11 changes: 5 additions & 6 deletions cmd/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"github.com/fatih/color"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/wagoodman/dive/image"
"github.com/wagoodman/dive/ui"
"github.com/wagoodman/dive/utils"
"io/ioutil"
Expand All @@ -16,15 +15,15 @@ var buildCmd = &cobra.Command{
Use: "build [any valid `docker build` arguments]",
Short: "Builds and analyzes a docker image from a Dockerfile (this is a thin wrapper for the `docker build` command).",
DisableFlagParsing: true,
Run: doBuild,
Run: doBuildCmd,
}

func init() {
rootCmd.AddCommand(buildCmd)
}

// doBuild implements the steps taken for the build command
func doBuild(cmd *cobra.Command, args []string) {
// doBuildCmd implements the steps taken for the build command
func doBuildCmd(cmd *cobra.Command, args []string) {
defer utils.Cleanup()
iidfile, err := ioutil.TempFile("/tmp", "dive.*.iid")
if err != nil {
Expand All @@ -47,6 +46,6 @@ func doBuild(cmd *cobra.Command, args []string) {
}

color.New(color.Bold).Println("Analyzing Image")
manifest, refTrees, efficiency, inefficiencies := image.InitializeData(string(imageId))
ui.Run(manifest, refTrees, efficiency, inefficiencies)

ui.Run(fetchAndAnalyze(string(imageId)))
}
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ var rootCmd = &cobra.Command{
Long: `This tool provides a way to discover and explore the contents of a docker image. Additionally the tool estimates
the amount of wasted space and identifies the offending files from the image.`,
Args: cobra.MaximumNArgs(1),
Run: analyze,
Run: doAnalyzeCmd,
}

// Execute adds all child commands to the root command and sets flags appropriately.
Expand Down
263 changes: 263 additions & 0 deletions image/docker_image.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
package image

import (
"archive/tar"
"encoding/json"
"fmt"
"github.com/docker/docker/client"
"github.com/sirupsen/logrus"
"github.com/wagoodman/dive/filetree"
"github.com/wagoodman/dive/utils"
"golang.org/x/net/context"
"io"
"io/ioutil"
"strings"
)

var dockerVersion string

func newDockerImageAnalyzer() Analyzer {
return &dockerImageAnalyzer{}
}

func newDockerImageManifest(manifestBytes []byte) dockerImageManifest {
var manifest []dockerImageManifest
err := json.Unmarshal(manifestBytes, &manifest)
if err != nil {
logrus.Panic(err)
}
return manifest[0]
}

func newDockerImageConfig(configBytes []byte) dockerImageConfig {
var imageConfig dockerImageConfig
err := json.Unmarshal(configBytes, &imageConfig)
if err != nil {
logrus.Panic(err)
}

layerIdx := 0
for idx := range imageConfig.History {
if imageConfig.History[idx].EmptyLayer {
imageConfig.History[idx].ID = "<missing>"
} else {
imageConfig.History[idx].ID = imageConfig.RootFs.DiffIds[layerIdx]
layerIdx++
}
}

return imageConfig
}

func (image *dockerImageAnalyzer) Parse(imageID string) error {
var err error
image.id = imageID
// store discovered json files in a map so we can read the image in one pass
image.jsonFiles = make(map[string][]byte)
image.layerMap = make(map[string]*filetree.FileTree)

// pull the image if it does not exist
ctx := context.Background()
image.client, err = client.NewClientWithOpts(client.WithVersion(dockerVersion), client.FromEnv)
if err != nil {
return err
}
_, _, err = image.client.ImageInspectWithRaw(ctx, imageID)
if err != nil {
// don't use the API, the CLI has more informative output
fmt.Println("Image not available locally. Trying to pull '" + imageID + "'...")
utils.RunDockerCmd("pull", imageID)
}

tarFile, _, err := image.getReader(imageID)
if err != nil {
return err
}
defer tarFile.Close()

err = image.read(tarFile)
if err != nil {
return err
}
return nil
}

func (image *dockerImageAnalyzer) read(tarFile io.ReadCloser) error {
tarReader := tar.NewReader(tarFile)

var currentLayer uint
for {
header, err := tarReader.Next()

if err == io.EOF {
fmt.Println(" ╧")
break
}

if err != nil {
fmt.Println(err)
utils.Exit(1)
}

name := header.Name

// some layer tars can be relative layer symlinks to other layer tars
if header.Typeflag == tar.TypeSymlink || header.Typeflag == tar.TypeReg {

if strings.HasSuffix(name, "layer.tar") {
currentLayer++
if err != nil {
return err
}
layerReader := tar.NewReader(tarReader)
err := image.processLayerTar(name, currentLayer, layerReader)
if err != nil {
return err
}
} else if strings.HasSuffix(name, ".json") {
fileBuffer, err := ioutil.ReadAll(tarReader)
if err != nil {
return err
}
image.jsonFiles[name] = fileBuffer
}
}
}

return nil
}

func (image *dockerImageAnalyzer) Analyze() (*AnalysisResult, error) {
image.trees = make([]*filetree.FileTree, 0)

manifest := newDockerImageManifest(image.jsonFiles["manifest.json"])
config := newDockerImageConfig(image.jsonFiles[manifest.ConfigPath])

// build the content tree
for _, treeName := range manifest.LayerTarPaths {
image.trees = append(image.trees, image.layerMap[treeName])
}

// build the layers array
image.layers = make([]*dockerLayer, len(image.trees))

// note that the image config stores images in reverse chronological order, so iterate backwards through layers
// as you iterate chronologically through history (ignoring history items that have no layer contents)
layerIdx := len(image.trees) - 1
tarPathIdx := 0
for idx := 0; idx < len(config.History); idx++ {
// ignore empty layers, we are only observing layers with content
if config.History[idx].EmptyLayer {
continue
}

tree := image.trees[(len(image.trees)-1)-layerIdx]
config.History[idx].Size = uint64(tree.FileSize)

image.layers[layerIdx] = &dockerLayer{
history: config.History[idx],
index: layerIdx,
tree: image.trees[layerIdx],
tarPath: manifest.LayerTarPaths[tarPathIdx],
}

layerIdx--
tarPathIdx++
}

efficiency, inefficiencies := filetree.Efficiency(image.trees)

layers := make([]Layer, len(image.layers))
for i, v := range image.layers {
layers[i] = v
}

return &AnalysisResult{
Layers: layers,
RefTrees: image.trees,
Efficiency: efficiency,
Inefficiencies: inefficiencies,
}, nil
}

func (image *dockerImageAnalyzer) getReader(imageID string) (io.ReadCloser, int64, error) {

ctx := context.Background()
result, _, err := image.client.ImageInspectWithRaw(ctx, imageID)
if err != nil {
return nil, -1, err
}
totalSize := result.Size

readCloser, err := image.client.ImageSave(ctx, []string{imageID})
if err != nil {
return nil, -1, err
}

return readCloser, totalSize, nil
}

// todo: it is bad that this is printing out to the screen. As the interface gets more flushed out, an event update mechanism should be built in (so the caller can format and print updates)
func (image *dockerImageAnalyzer) processLayerTar(name string, layerIdx uint, reader *tar.Reader) error {
tree := filetree.NewFileTree()
tree.Name = name

title := fmt.Sprintf("[layer: %2d]", layerIdx)
message := fmt.Sprintf(" ├─ %s %s ", title, "working...")
fmt.Printf("\r%s", message)

fileInfos, err := image.getFileList(reader)
if err != nil {
return err
}

shortName := name[:15]
pb := utils.NewProgressBar(int64(len(fileInfos)), 30)
for idx, element := range fileInfos {
tree.FileSize += uint64(element.TarHeader.FileInfo().Size())
_, err := tree.AddPath(element.Path, element)
if err != nil {
return err
}

if pb.Update(int64(idx)) {
message = fmt.Sprintf(" ├─ %s %s : %s", title, shortName, pb.String())
fmt.Printf("\r%s", message)
}
}
pb.Done()
message = fmt.Sprintf(" ├─ %s %s : %s", title, shortName, pb.String())
fmt.Printf("\r%s\n", message)

image.layerMap[tree.Name] = tree
return nil
}

func (image *dockerImageAnalyzer) getFileList(tarReader *tar.Reader) ([]filetree.FileInfo, error) {
var files []filetree.FileInfo

for {
header, err := tarReader.Next()

if err == io.EOF {
break
}

if err != nil {
fmt.Println(err)
utils.Exit(1)
}

name := header.Name

switch header.Typeflag {
case tar.TypeXGlobalHeader:
return nil, fmt.Errorf("unexptected tar file: (XGlobalHeader): type=%v name=%s", header.Typeflag, name)
case tar.TypeXHeader:
return nil, fmt.Errorf("unexptected tar file (XHeader): type=%v name=%s", header.Typeflag, name)
default:
files = append(files, filetree.NewFileInfo(tarReader, header, name))
}
}
return files, nil
}
Loading