Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
61b2fc8
feat(tree): add WorkloadTree builder
lavianalon May 4, 2026
7c2ecf9
feat(cli): add karta workload CLI with tree and list
lavianalon May 4, 2026
06a2ff7
feat(cli): colorize tree and list output with --color flag
lavianalon May 4, 2026
7c6ce5a
fix(tree): non-leaf components only own pods their descendants claim
lavianalon May 4, 2026
5dc9405
feat(tree): support multi-instance components for Dynamo
lavianalon May 4, 2026
a46ed4c
feat(tree): replicaSelector splits per-replica subtrees
lavianalon May 4, 2026
927383b
fix(render): per-replica desired counts in replica wrappers
lavianalon May 4, 2026
0b86d9f
feat(cli): per-sibling-group column alignment in tree output
lavianalon May 6, 2026
229f088
refactor(cli): tighter tree indent (kubectl-tree style)
lavianalon May 6, 2026
d8a4309
fix(cli): align gpu and nodes columns across component and pod rows
lavianalon May 6, 2026
6d8bc55
fix(cli): consistent nodes label and color across component and pod rows
lavianalon May 6, 2026
03997a4
refactor(cli): drop nodes label, dim hostnames in trailing column
lavianalon May 6, 2026
c8a6d85
fix(cli): align list columns with ANSI-aware padding
lavianalon May 19, 2026
4b8d2e0
feat(cli): add --all-namespaces / -A flag to workload list
lavianalon May 19, 2026
457833f
fix(examples): correct malformed mpijob.yaml status mappings
lavianalon May 19, 2026
89bd383
feat(cli): bundle all Karta definitions and add sync target
lavianalon May 19, 2026
006a9e3
feat(cli): publish CLI to GitHub Releases via goreleaser
lavianalon May 19, 2026
89c9e3f
Merge remote-tracking branch 'origin/main' into feat/cli-poc
lavianalon May 19, 2026
a7bc745
refactor(tree): drop controller-runtime; use resource.KubernetesObject
lavianalon May 19, 2026
f475c5e
refactor(cli): move karta CLI to its own go submodule
lavianalon May 19, 2026
98d2d8a
chore: sync bundled pytorch.yaml and regenerate licenses
lavianalon May 20, 2026
a85b34b
chore: pin Go 1.25 and k8s deps to v0.35.1 to match main
lavianalon May 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions .github/workflows/release-cli.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2026 NVIDIA Corporation
#
# Builds and publishes the karta CLI to GitHub Releases.
# Triggered by tags matching kartacli-v* — kept namespace-separate from the
# existing push-artifacts.yaml workflow which fires on v[0-9].[0-9].[0-9].
#
# The kartacli- prefix is stripped before goreleaser runs (via
# GORELEASER_CURRENT_TAG) because goreleaser's monorepo feature is Pro-only.
# Goreleaser builds artifacts under dist/ with --skip=publish; the GitHub
# Release for the original kartacli- tag is then created via the gh CLI.

name: Release karta CLI

on:
push:
tags:
- 'kartacli-v*'

permissions:
contents: write

jobs:
release:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: go.mod

- name: Sync bundled Karta definitions
run: make sync-cli-definitions

- name: Compute version
id: ver
run: |
TAG="${GITHUB_REF#refs/tags/}"
VERSION="${TAG#kartacli-}"
echo "tag=${TAG}" >> "$GITHUB_OUTPUT"
echo "version=${VERSION}" >> "$GITHUB_OUTPUT"

- name: Build artifacts with goreleaser
uses: goreleaser/goreleaser-action@v6
with:
workdir: cmd/karta
distribution: goreleaser
version: '~> v2'
args: release --clean --skip=publish
env:
GORELEASER_CURRENT_TAG: ${{ steps.ver.outputs.version }}

- name: Publish GitHub Release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh release create "${{ steps.ver.outputs.tag }}" \
cmd/karta/dist/*.tar.gz cmd/karta/dist/*.zip cmd/karta/dist/*_checksums.txt \
--prerelease \
--latest=false \
--title "karta CLI ${{ steps.ver.outputs.tag }}" \
--generate-notes
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ config/crd/_.yaml
*~
.vscode/
.idea
.dori/
13 changes: 12 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ $(LOCALBIN):
PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
KARTA_CHART_DIR := $(PROJECT_DIR)/charts/karta
KARTA_CRDS_DIR := $(KARTA_CHART_DIR)/crds
KARTA_DEFINITIONS_DIR := $(PROJECT_DIR)/docs/examples
CLI_DEFINITIONS_DIR := $(PROJECT_DIR)/cmd/karta/internal/definitions/community

HELM_CHART_VERSION ?= 0.0.1

Expand All @@ -32,6 +34,11 @@ PATH := $(abspath $(LOCALBIN)):$(PATH)
manifests: controller-gen ## Generate CRD manifests
$(CONTROLLER_GEN) crd paths="./pkg/..." output:crd:artifacts:config=$(KARTA_CRDS_DIR)

.PHONY: sync-cli-definitions
sync-cli-definitions: ## Sync canonical Karta definitions into the CLI's embedded bundle
rm -f $(CLI_DEFINITIONS_DIR)/*.yaml
cp $(KARTA_DEFINITIONS_DIR)/*.yaml $(CLI_DEFINITIONS_DIR)/

.PHONY: generate
generate: controller-gen ## Generate DeepCopy methods
$(CONTROLLER_GEN) object paths="./..."
Expand Down Expand Up @@ -61,7 +68,7 @@ lint: fmt-go vet-go lint-go
.PHONY: lint

.PHONY: validate
validate: generate manifests generate-mocks generate-licenses
validate: generate manifests generate-mocks generate-licenses sync-cli-definitions
@git diff --exit-code

.PHONY: install-crd
Expand Down Expand Up @@ -128,6 +135,10 @@ check: download-dependencies validate test lint

##@ Helm

.PHONY: release-snapshot
release-snapshot: ## Build a local snapshot release with goreleaser (no GH publish)
cd cmd/karta && goreleaser release --snapshot --clean --skip=publish

.PHONY: helm-build
helm-build: ## Build the helm chart
helm package $(KARTA_CHART_DIR) --version $(HELM_CHART_VERSION) --app-version $(HELM_CHART_VERSION)
Expand Down
8 changes: 4 additions & 4 deletions THIRD_PARTY_LICENSES
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,13 @@ For complete license texts, please refer to the source repositories or the LICEN

### k8s.io/klog/v2
- Name: k8s.io/klog/v2
- Version: v2.130.1
- License: [Apache-2.0](https://github.com/kubernetes/klog/blob/v2.130.1/LICENSE)
- Version: v2.140.0
- License: [Apache-2.0](https://github.com/kubernetes/klog/blob/v2.140.0/LICENSE)

### k8s.io/kube-openapi/pkg/util
- Name: k8s.io/kube-openapi/pkg/util
- Version: v0.0.0-20260127142750-a19766b6e2d4
- License: [Apache-2.0](https://github.com/kubernetes/kube-openapi/blob/a19766b6e2d4/LICENSE)
- Version: v0.0.0-20260317180543-43fb72c5454a
- License: [Apache-2.0](https://github.com/kubernetes/kube-openapi/blob/43fb72c5454a/LICENSE)

### k8s.io/utils
- Name: k8s.io/utils
Expand Down
6 changes: 6 additions & 0 deletions cmd/karta/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Staged at release time by .goreleaser.yaml's before-hook
LICENSE
# goreleaser output
dist/
# local builds of the CLI binary
karta
62 changes: 62 additions & 0 deletions cmd/karta/.goreleaser.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Goreleaser config for the karta CLI.
# Triggered by tags matching kartacli-v* (see .github/workflows/release-cli.yaml).
# The kartacli- prefix isolates these releases from the existing
# v[0-9].[0-9].[0-9] tags used for the Karta helm chart / Go module.
version: 2

project_name: karta-cli

# Note on tag prefix: the kartacli- prefix is stripped by the GitHub workflow
# (.github/workflows/release-cli.yaml) via GORELEASER_CURRENT_TAG before
# goreleaser runs, because the goreleaser monorepo feature is Pro-only.

before:
hooks:
- go mod tidy
- sh -c 'cp ../../LICENSE LICENSE'

builds:
- id: karta
main: .
binary: karta
env:
- CGO_ENABLED=0
goos: [darwin, linux, windows]
goarch: [amd64, arm64]
ignore:
- goos: windows
goarch: arm64
ldflags:
- -s -w
- -X github.com/run-ai/karta/cmd/karta/cmd.version={{.Version}}
- -X github.com/run-ai/karta/cmd/karta/cmd.commit={{.ShortCommit}}
- -X github.com/run-ai/karta/cmd/karta/cmd.date={{.Date}}

archives:
- id: karta
ids: [karta]
name_template: "karta_{{.Version}}_{{.Os}}_{{.Arch}}"
formats: [tar.gz]
format_overrides:
- goos: windows
formats: [zip]
files:
- LICENSE
- README.md

checksum:
name_template: 'karta_{{.Version}}_checksums.txt'

release:
prerelease: auto # auto-marks pre-release when the tag has -alpha/-beta/-rc
name_template: "karta CLI {{.Tag}}"

changelog:
use: github
sort: asc
filters:
exclude:
- '^docs:'
- '^test:'
- '^chore:'
- '^ci:'
64 changes: 64 additions & 0 deletions cmd/karta/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
karta — workload-aware visibility for Kubernetes AI workloads
=============================================================

> **Alpha.** The CRD schema, CLI flags, and output format may change
> without notice between releases. Don't build automation against
> karta output yet.

Install
-------

Linux / macOS:

tar -xzf karta_*.tar.gz
sudo mv karta /usr/local/bin/karta

Windows: extract `karta.exe` from the `.zip` and place it on `PATH`.

karta uses your existing kubeconfig — no separate setup needed.

Usage
-----

karta workload list # current namespace
karta workload list -A # all namespaces
karta workload list -n my-namespace # specific namespace
karta workload tree <name> # hierarchical view of one workload
karta --context my-cluster workload list

Same kubeconfig flags as kubectl: `--kubeconfig`, `--context`, `-n`,
plus `--color {auto,always,never}`.

See `karta --help` and `karta workload --help` for the full surface.

Supported workload kinds
------------------------

The CLI ships with built-in Karta definitions for:

* PyTorchJob (kubeflow.org)
* JobSet (jobset.x-k8s.io)
* RayCluster, RayJob (ray.io)
* MPIJob (kubeflow.org)
* LeaderWorkerSet (leaderworkerset.x-k8s.io)
* InferenceService (serving.kserve.io)
* Service (serving.knative.dev)
* DynamoGraphDeployment (nvidia.com)
* NIMService (apps.nvidia.com)
* Milvus

To add a new workload kind, write a Karta definition and contribute
it under `docs/examples/` — the CLI bundle is regenerated from there.

Version & bugs
--------------

karta version

Project: https://github.com/run-ai/karta
Issues: https://github.com/run-ai/karta/issues

License
-------

Apache-2.0. See `LICENSE` in this archive.
57 changes: 57 additions & 0 deletions cmd/karta/cmd/root.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright (c) 2026 NVIDIA Corporation

package cmd

import (
"io"

"github.com/spf13/cobra"
"k8s.io/cli-runtime/pkg/genericclioptions"

"github.com/run-ai/karta/cmd/karta/internal/render"
)

// styleFor resolves the active render.Style based on --color and the
// writer's TTY status.
func (o *rootOptions) styleFor(w io.Writer) render.Style {
switch o.colorMode {
case "always":
return render.ForceStyle()
case "never":
return render.PlainStyle()
default:
return render.AutoStyle(w)
}
}

type rootOptions struct {
configFlags *genericclioptions.ConfigFlags
colorMode string
}

func NewRootCmd() *cobra.Command {
opts := &rootOptions{
configFlags: genericclioptions.NewConfigFlags(true),
}

root := &cobra.Command{
Use: "karta",
Short: "Karta CLI — workload-aware visibility for any Kubernetes AI workload",
Long: `Karta is a CLI that reads Karta workload definitions and renders a unified view
of any Kubernetes AI workload — components, roles, scaling, status, GPU allocation —
across PyTorchJob, RayCluster, JobSet, KServe, and any custom CRD with a Karta definition.

Same output shape regardless of the underlying CRD.`,
SilenceUsage: true,
SilenceErrors: false,
}

opts.configFlags.AddFlags(root.PersistentFlags())
root.PersistentFlags().StringVar(&opts.colorMode, "color", "auto", "Colorize output: auto, always, never")

root.AddCommand(newWorkloadCmd(opts))
root.AddCommand(newVersionCmd())

return root
}
29 changes: 29 additions & 0 deletions cmd/karta/cmd/version.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright (c) 2026 NVIDIA Corporation

package cmd

import (
"fmt"

"github.com/spf13/cobra"
)

// Build metadata injected via ldflags at release time. Defaults make sense
// for `go build` developer builds.
var (
version = "dev"
commit = "none"
date = "unknown"
)

func newVersionCmd() *cobra.Command {
return &cobra.Command{
Use: "version",
Short: "Print karta version, commit, and build date",
Args: cobra.NoArgs,
Run: func(c *cobra.Command, _ []string) {
fmt.Fprintf(c.OutOrStdout(), "karta %s (commit %s, built %s)\n", version, commit, date)
},
}
}
20 changes: 20 additions & 0 deletions cmd/karta/cmd/workload.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright (c) 2026 NVIDIA Corporation

package cmd

import (
"github.com/spf13/cobra"
)

func newWorkloadCmd(opts *rootOptions) *cobra.Command {
c := &cobra.Command{
Use: "workload",
Short: "Operate on workloads in the cluster",
}

c.AddCommand(newWorkloadListCmd(opts))
c.AddCommand(newWorkloadTreeCmd(opts))

return c
}
Loading
Loading