diff --git a/cmd/new-ui/v1beta1/main.go b/cmd/new-ui/v1beta1/main.go index 68d2f1dcd2b..7ad6e5f6001 100644 --- a/cmd/new-ui/v1beta1/main.go +++ b/cmd/new-ui/v1beta1/main.go @@ -67,6 +67,7 @@ func main() { http.HandleFunc("/katib/edit_template/", kuh.EditTemplate) http.HandleFunc("/katib/delete_template/", kuh.DeleteTemplate) http.HandleFunc("/katib/fetch_namespaces", kuh.FetchNamespaces) + http.HandleFunc("/katib/fetch_trial_logs/", kuh.FetchTrialLogs) log.Printf("Serving at %s:%s", *host, *port) if err := http.ListenAndServe(fmt.Sprintf("%s:%s", *host, *port), nil); err != nil { diff --git a/manifests/v1beta1/components/ui/rbac.yaml b/manifests/v1beta1/components/ui/rbac.yaml index c549bf351d3..9a47971a006 100644 --- a/manifests/v1beta1/components/ui/rbac.yaml +++ b/manifests/v1beta1/components/ui/rbac.yaml @@ -19,6 +19,14 @@ rules: - suggestions verbs: - "*" + - apiGroups: + - "" + resources: + - pods + - pods/log + verbs: + - list + - get --- apiVersion: v1 kind: ServiceAccount diff --git a/pkg/new-ui/v1beta1/backend.go b/pkg/new-ui/v1beta1/backend.go index 80c5aff83b4..b8afa2b74bd 100644 --- a/pkg/new-ui/v1beta1/backend.go +++ b/pkg/new-ui/v1beta1/backend.go @@ -17,7 +17,10 @@ limitations under the License. package v1beta1 import ( + "bytes" + "context" "encoding/json" + "io" "log" "net/http" "path/filepath" @@ -27,8 +30,15 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" experimentv1beta1 "github.com/kubeflow/katib/pkg/apis/controller/experiments/v1beta1" + trialsv1beta1 "github.com/kubeflow/katib/pkg/apis/controller/trials/v1beta1" api_pb_v1beta1 "github.com/kubeflow/katib/pkg/apis/manager/v1beta1" "github.com/kubeflow/katib/pkg/util/v1beta1/katibclient" + + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + corev1 "k8s.io/client-go/kubernetes/typed/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client/config" ) func NewKatibUIHandler(dbManagerAddr string) *KatibUIHandler { @@ -421,3 +431,87 @@ func (k *KatibUIHandler) FetchTrial(w http.ResponseWriter, r *http.Request) { return } } + +// FetchTrialLogs fetches logs for a trial in specific namespace. +func (k *KatibUIHandler) FetchTrialLogs(w http.ResponseWriter, r *http.Request) { + log.Printf("Requesting logs") + + trialName := r.URL.Query()["trialName"][0] + namespace := r.URL.Query()["namespace"][0] + log.Printf("Requesting logs") + + logs, err := getTrialLogs(k, trialName, namespace) + if err != nil { + log.Printf("GetLogs failed: %v", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + response, err := json.Marshal(logs) + if err != nil { + log.Printf("Marshal logs failed: %v", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + w.Write(response) +} + +// GetTrialLogs returns logs of a master Pod for the given job name and namespace +func getTrialLogs(k *KatibUIHandler, trialName string, namespace string) (string, error) { + cfg, err := config.GetConfig() + if err != nil { + return "", err + } + + clientset, err := corev1.NewForConfig(cfg) + if err != nil { + return "", err + } + + trial := &trialsv1beta1.Trial{} + if err := k.katibClient.GetClient().Get(context.TODO(), types.NamespacedName{Name: trialName, Namespace: namespace}, trial); err != nil { + return "", err + } + + selectionLabel := "training.kubeflow.org/job-name=" + trialName + ",training.kubeflow.org/job-role=master" + if trial.Spec.RunSpec.GetKind() == "Job" { + selectionLabel = "job-name=" + trialName + } + + podList, err := clientset.Pods(namespace).List(context.Background(), metav1.ListOptions{LabelSelector: selectionLabel}) + if err != nil { + return "", err + } + + if len(podList.Items) == 0 { + message := `Logs for the trial could not be found. +Was 'retain: true' specified in the Experiment definition? +An example can be found here: https://github.com/kubeflow/katib/blob/master/examples/v1beta1/argo/argo-workflow.yaml#L33` + + return message, nil + } + + podLogOpts := apiv1.PodLogOptions{} + podLogOpts.Container = trial.Spec.PrimaryContainerName + for container := range podList.Items[0].Spec.Containers { + if podList.Items[0].Spec.Containers[container].Name == "metrics-logger-and-collector" { + podLogOpts.Container = "metrics-logger-and-collector" + break + } + } + + req := clientset.Pods(namespace).GetLogs(podList.Items[0].Name, &podLogOpts) + podLogs, err := req.Stream(context.Background()) + if err != nil { + return "", err + } + defer podLogs.Close() + + buf := new(bytes.Buffer) + _, err = io.Copy(buf, podLogs) + if err != nil { + return "", err + } + str := buf.String() + + return str, nil +}