From 641fa02d4dc7611c01675d1a095f06fe3819785e Mon Sep 17 00:00:00 2001 From: ysyneu Date: Tue, 28 Apr 2026 11:35:34 +0800 Subject: [PATCH 01/10] fix(ws): drop CurrentTime from heartbeat env info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CurrentTime was captured once via collectEnvironmentInfo() at runner start and the resulting EnvInfo was sent only on the first heartbeat (envInfoSent gate), so a long-lived runner kept reporting its boot time forever. Safari then served that stale value via the `now` tool. Static info (OS, arch, hostname, timezone) is fine to send once; current time is by definition not static. Drop the field entirely — safari uses its own wall clock now. Co-Authored-By: Claude Opus 4.7 (1M context) --- protocol/messages.go | 1 - ws/client.go | 3 --- 2 files changed, 4 deletions(-) diff --git a/protocol/messages.go b/protocol/messages.go index a5dcd35..2abbc1a 100644 --- a/protocol/messages.go +++ b/protocol/messages.go @@ -90,7 +90,6 @@ type EnvironmentInfo struct { Username string `json:"username"` // Current user NumCPU int `json:"num_cpu"` // Number of CPUs TotalMemoryMB int64 `json:"total_memory_mb"` // Total memory in MB - CurrentTime string `json:"current_time"` // Current time in RFC3339 format Timezone string `json:"timezone"` // Timezone name, e.g., "Asia/Shanghai" UTCOffset string `json:"utc_offset"` // UTC offset, e.g., "+08:00" } diff --git a/ws/client.go b/ws/client.go index 021b2dd..d692c07 100644 --- a/ws/client.go +++ b/ws/client.go @@ -446,8 +446,6 @@ func (c *Client) sendHeartbeat() { Version: c.version, } - // Only send environment info on first heartbeat after connection - // Environment info is static and doesn't need to be sent repeatedly c.mu.Lock() if !c.envInfoSent { payload.Environment = c.envInfo @@ -485,7 +483,6 @@ func collectEnvironmentInfo(workspaceRoot string) *protocol.EnvironmentInfo { } now := time.Now() - info.CurrentTime = now.Format(time.RFC3339) info.Timezone = now.Location().String() info.UTCOffset = now.Format("-07:00") From 3efa401a55d4d38975d3ebe4bbebf5de4e9b2be0 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Wed, 29 Apr 2026 17:12:58 +0800 Subject: [PATCH 02/10] fix(ws): tighten reconnect timing for cloud sandbox pool pods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cloud sandbox pool pods spend their entire pre-claim lifetime dialing safari and getting 401 (no t_cloud_sandbox_0 row exists yet — claim inserts it). Two timing knobs were tuned for long-lived BYOC outage recovery and hurt cloud claim latency: - initialReconnectDelay 1s -> 200ms: first dials race the row INSERT. The 1s delay added 1-2s of extra 401-retry cost before hostname auth could possibly succeed. - maxReconnectDelay 5min -> 10s: a pool pod waiting >50s slid into a 32-64s exponential-backoff sleep, and got declared unrecoverable by safari's 90s claim window before its next dial — observed live (sbx_bzQ... did not come online within 1m30s, traceid 6996857e714cb99c31665d043ed3c260). 10s stays well inside the claim window and remains acceptable for BYOC server-restart recovery. Co-Authored-By: Claude Opus 4.7 (1M context) --- ws/client.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/ws/client.go b/ws/client.go index d692c07..58e55a7 100644 --- a/ws/client.go +++ b/ws/client.go @@ -33,14 +33,25 @@ const ( // DefaultMaxReconnectAttempts is the default max reconnect attempts (0 = unlimited). DefaultMaxReconnectAttempts = 30 - // Initial reconnect delay (used for first few attempts) - initialReconnectDelay = 1 * time.Second + // Initial reconnect delay (used for first few attempts). + // Cloud sandbox runners race safari's t_cloud_sandbox_0 INSERT/UPDATE on + // first dial — a 1s delay added 1–2s of extra 401-retry cost before the + // row landed and the hostname-auth lookup could succeed. 200ms keeps the + // retry storm under control while shrinking the cold-start tail. + initialReconnectDelay = 200 * time.Millisecond // Fast retry attempts before switching to exponential backoff fastRetryAttempts = 15 - // Maximum reconnect delay - maxReconnectDelay = 5 * time.Minute + // Maximum reconnect delay. Capped at 10s so cloud-sandbox pool pods + // (which spend their entire pre-claim lifetime in 401-retry) never + // land in a multi-minute sleep when sandbox-manager finally hands + // them off — Safari's claim window is 90s, anything longer than that + // risks the pod being declared unrecoverable while it's still asleep. + // The previous 5-minute cap was tuned for BYOC outage recovery; 10s + // stays well under the BYOC server-restart window in practice and + // adds at most one extra dial per 10s of downtime. + maxReconnectDelay = 10 * time.Second ) // MessageHandler handles incoming messages from Flashduty. From 0f222d9255d4283bb5e46d2d614c6a90adda48df Mon Sep 17 00:00:00 2001 From: ysyneu Date: Wed, 29 Apr 2026 17:30:38 +0800 Subject: [PATCH 03/10] fix(env): reject duplicated-root rel_paths; surface sibling hint on ENOENT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit safePath defense-in-depth — runner cannot rely on safari to filter rel_paths that mirror the workspace root. Joining e.root with such a path silently created a nested duplicate workspace tree (matching nested copies were observed on a long-running BYOC host). Reject them at the runner boundary. Read now appends a bounded sibling-name list when stat returns ENOENT, so the agent can self-correct a near-miss filename without spending a turn on an extra list call. .golangci.yml: exclude gosec G706 (log-taint via taint analysis). slog's structured key-value logging is not a format-string injection vector; the rule fires on every slog call with a user-controlled value, which is the whole point of structured logging. Co-Authored-By: Claude Opus 4.7 (1M context) --- .golangci.yml | 5 ++++ environment/environment.go | 52 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/.golangci.yml b/.golangci.yml index 3e28c53..122e243 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -47,10 +47,15 @@ linters: # Exclude G301 (directory permissions) - workspace needs readable directories # Exclude G304 (file inclusion) - paths are validated via safePath() # Exclude G306 (file permissions) - workspace files need to be readable + # Exclude G706 (log taint via taint analysis) - slog's structured + # key-value logging is not a format-string injection vector; the rule + # fires on every slog call that includes a user-controlled value, which + # is the whole point of structured logging. excludes: - G301 - G304 - G306 + - G706 formatters: enable: diff --git a/environment/environment.go b/environment/environment.go index d332033..d2b0b56 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -59,6 +59,22 @@ func (e *Environment) Root() string { // safePath ensures the path is within the workspace root, resolving symlinks. func (e *Environment) safePath(path string) (string, error) { + // Defense-in-depth: reject relative paths that mirror the host's workspace + // root (e.g. "Users/ysy/.flashduty-runner/workspace/DUTY.md"). Safari already + // rejects these, but the runner accepts requests over the wire and must not + // rely on the caller to have done the check. Joining such a path with + // e.root would silently create a nested duplicate of the workspace tree. + if !filepath.IsAbs(path) { + cleaned := filepath.Clean(filepath.FromSlash(path)) + rootRel := strings.TrimPrefix(e.root, string(filepath.Separator)) + if rootRel != "" { + rootRel = filepath.Clean(rootRel) + if cleaned == rootRel || strings.HasPrefix(cleaned, rootRel+string(filepath.Separator)) { + return "", fmt.Errorf("rel_path duplicates workspace root: %s", path) + } + } + } + absPath, err := filepath.Abs(filepath.Join(e.root, path)) if err != nil { return "", fmt.Errorf("failed to get absolute path: %w", err) @@ -105,6 +121,11 @@ func (e *Environment) Read(ctx context.Context, args *protocol.ReadArgs) (*proto info, err := os.Stat(realPath) if err != nil { + if os.IsNotExist(err) { + // Include sibling names so the model can self-correct without an extra + // list round-trip. Bounded to keep the error message small. + return nil, fmt.Errorf("failed to stat file: %w%s", err, siblingHint(filepath.Dir(realPath), filepath.Base(realPath))) + } return nil, fmt.Errorf("failed to stat file: %w", err) } if info.IsDir() { @@ -114,6 +135,37 @@ func (e *Environment) Read(ctx context.Context, args *protocol.ReadArgs) (*proto return e.readFileContent(realPath, info.Size(), args.Offset, args.Limit) } +// siblingHint returns a short " (siblings: a, b, c)" suffix when the parent +// directory exists, helping the agent recover from a near-miss filename +// without spending a turn on a list call. Returns empty string on any error +// or when the parent has too many entries to summarize compactly. +func siblingHint(dir, missing string) string { + const maxSiblings = 12 + entries, err := os.ReadDir(dir) + if err != nil { + return "" + } + names := make([]string, 0, len(entries)) + for _, e := range entries { + n := e.Name() + if strings.HasPrefix(n, ".") { + continue + } + names = append(names, n) + if len(names) > maxSiblings { + break + } + } + if len(names) == 0 { + return "" + } + if len(names) > maxSiblings { + names = names[:maxSiblings] + names = append(names, "…") + } + return fmt.Sprintf(" (parent %q contains: %s)", dir, strings.Join(names, ", ")) +} + // readFileContent reads file content with offset and limit support. func (e *Environment) readFileContent(path string, size, offset, limit int64) (*protocol.ReadResult, error) { file, err := os.Open(path) From ef2a0dcd0e39326ea44c28e3fbdfc9b60c4d2148 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 30 Apr 2026 11:10:31 +0800 Subject: [PATCH 04/10] feat(skill): probe-or-install sync_skill; default home ~/.flashduty - SyncSkill now checks .checksum+SKILL.md before install (cache hit returns {Cached:true,Path}; miss with no zip returns {Cached:false} so cloud retries with zip_data) - Skills land at /skills// instead of /.work/skills// - Default home moves from ~/.flashduty-runner/workspace to ~/.flashduty - FLASHDUTY_RUNNER_HOME is the new canonical override; FLASHDUTY_RUNNER_WORKSPACE kept as deprecated alias - Add SyncSkillResult.Cached field (omitempty); SyncSkillArgs unchanged Co-Authored-By: Claude Sonnet 4.6 --- cmd/main.go | 12 +++-- environment/environment.go | 42 +++++++++++------ environment/environment_test.go | 82 +++++++++++++++++++++++++++++++++ protocol/messages.go | 9 +++- 4 files changed, 126 insertions(+), 19 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 4872346..5956afc 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -79,7 +79,8 @@ Examples: Environment variables: FLASHDUTY_RUNNER_TOKEN - Authentication token (required if --token not provided) FLASHDUTY_RUNNER_URL - WebSocket endpoint URL - FLASHDUTY_RUNNER_WORKSPACE - Workspace root directory`, + FLASHDUTY_RUNNER_HOME - Home directory for skills and data (default: ~/.flashduty) + FLASHDUTY_RUNNER_WORKSPACE - Deprecated alias for FLASHDUTY_RUNNER_HOME`, RunE: func(cmd *cobra.Command, args []string) error { return runRunner() }, @@ -137,17 +138,20 @@ func loadConfig() (*Config, error) { cfg.URL = defaultURL } - // Workspace: flag > env > default + // Home directory (skills, future knowledge/output): flag > FLASHDUTY_RUNNER_HOME > deprecated FLASHDUTY_RUNNER_WORKSPACE > default ~/.flashduty cfg.WorkspaceRoot = flagWorkspace if cfg.WorkspaceRoot == "" { - cfg.WorkspaceRoot = os.Getenv("FLASHDUTY_RUNNER_WORKSPACE") + cfg.WorkspaceRoot = os.Getenv("FLASHDUTY_RUNNER_HOME") + } + if cfg.WorkspaceRoot == "" { + cfg.WorkspaceRoot = os.Getenv("FLASHDUTY_RUNNER_WORKSPACE") // deprecated alias } if cfg.WorkspaceRoot == "" { homeDir, err := os.UserHomeDir() if err != nil { return nil, fmt.Errorf("failed to get home directory: %w", err) } - cfg.WorkspaceRoot = filepath.Join(homeDir, ".flashduty-runner", "workspace") + cfg.WorkspaceRoot = filepath.Join(homeDir, ".flashduty") } // Log level: flag > env > default diff --git a/environment/environment.go b/environment/environment.go index d2b0b56..e15f2bd 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -591,34 +591,48 @@ func (e *Environment) MCPListTools(ctx context.Context, args *protocol.MCPListTo return result, nil } -// SyncSkill syncs a skill from cloud to local workspace. +// SyncSkill is probe-or-install. With empty ZipData it checks local cache +// against args.Checksum: hit returns {Cached:true,Path}; miss returns +// {Cached:false,Path:""} so the cloud retries with zip. With ZipData present +// it overwrites /skills// and writes .checksum. +// +// args.SkillDir is accepted for backward compatibility but ignored — the +// runner derives the path from skill_name to keep layout under its control. func (e *Environment) SyncSkill(ctx context.Context, args *protocol.SyncSkillArgs) (*protocol.SyncSkillResult, error) { - skillDir, err := e.safePath(args.SkillDir) + if args.SkillName == "" || args.Checksum == "" { + return nil, fmt.Errorf("skill_name and checksum are required") + } + relDir := filepath.Join("skills", args.SkillName) + skillDir, err := e.safePath(relDir) if err != nil { return nil, err } - // Decode zip data + // Probe phase: cache hit if .checksum matches AND SKILL.md exists. + cachedSum, _ := os.ReadFile(filepath.Join(skillDir, ".checksum")) + if string(cachedSum) == args.Checksum { + if _, err := os.Stat(filepath.Join(skillDir, "SKILL.md")); err == nil { + return &protocol.SyncSkillResult{Success: true, Path: skillDir, Cached: true}, nil + } + } + + // Cache miss with no zip: signal cloud to retry with zip_data. + if args.ZipData == "" { + return &protocol.SyncSkillResult{Success: true, Cached: false}, nil + } + + // Install: decode, wipe, unzip, write .checksum. zipData, err := base64.StdEncoding.DecodeString(args.ZipData) if err != nil { return nil, fmt.Errorf("failed to decode zip data: %w", err) } - - // Unzip to skill directory if err := e.unzipSkill(zipData, skillDir); err != nil { return nil, fmt.Errorf("failed to unzip skill: %w", err) } - - // Write checksum file - checksumPath := filepath.Join(skillDir, ".checksum") - if err := os.WriteFile(checksumPath, []byte(args.Checksum), 0o644); err != nil { + if err := os.WriteFile(filepath.Join(skillDir, ".checksum"), []byte(args.Checksum), 0o644); err != nil { return nil, fmt.Errorf("failed to write checksum: %w", err) } - - return &protocol.SyncSkillResult{ - Success: true, - Path: args.SkillDir, - }, nil + return &protocol.SyncSkillResult{Success: true, Path: skillDir, Cached: false}, nil } // unzipSkill extracts a zip archive to the destination directory. diff --git a/environment/environment_test.go b/environment/environment_test.go index 827f462..f462d85 100644 --- a/environment/environment_test.go +++ b/environment/environment_test.go @@ -1,6 +1,8 @@ package environment import ( + "archive/zip" + "bytes" "context" "encoding/base64" "os" @@ -14,6 +16,21 @@ import ( "github.com/flashcatcloud/flashduty-runner/protocol" ) +// buildTestZip creates an in-memory zip archive from a map of filename→content. +func buildTestZip(t *testing.T, files map[string]string) []byte { + t.Helper() + var buf bytes.Buffer + w := zip.NewWriter(&buf) + for name, content := range files { + f, err := w.Create(name) + require.NoError(t, err) + _, err = f.Write([]byte(content)) + require.NoError(t, err) + } + require.NoError(t, w.Close()) + return buf.Bytes() +} + func newTestEnvironment(t *testing.T) *Environment { tmpDir := t.TempDir() checker := permission.NewChecker(map[string]string{ @@ -201,3 +218,68 @@ func TestEnvironment_SafePath(t *testing.T) { }) } } + +func TestSyncSkill_ProbeMiss(t *testing.T) { + ws := newTestEnvironment(t) + res, err := ws.SyncSkill(context.Background(), &protocol.SyncSkillArgs{ + SkillName: "demo", Checksum: "csum1", + }) + require.NoError(t, err) + assert.False(t, res.Cached, "expected cache miss") + assert.Empty(t, res.Path, "expected empty path on miss") +} + +func TestSyncSkill_ProbeHit(t *testing.T) { + ws := newTestEnvironment(t) + dir := filepath.Join(ws.Root(), "skills", "demo") + require.NoError(t, os.MkdirAll(dir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(dir, ".checksum"), []byte("csum1"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "SKILL.md"), []byte("body"), 0o644)) + + // Resolve symlinks so the comparison holds on macOS (/var → /private/var). + wantDir, err := filepath.EvalSymlinks(dir) + require.NoError(t, err) + + res, err := ws.SyncSkill(context.Background(), &protocol.SyncSkillArgs{ + SkillName: "demo", Checksum: "csum1", + }) + require.NoError(t, err) + assert.True(t, res.Cached, "expected cache hit") + assert.Equal(t, wantDir, res.Path) +} + +func TestSyncSkill_InstallOverwrites(t *testing.T) { + ws := newTestEnvironment(t) + dir := filepath.Join(ws.Root(), "skills", "demo") + + // Pre-seed an old version with a stale checksum. + require.NoError(t, os.MkdirAll(dir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "old-file.txt"), []byte("stale"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(dir, ".checksum"), []byte("old"), 0o644)) + + // Resolve symlinks so the comparison holds on macOS (/var → /private/var). + wantDir, err := filepath.EvalSymlinks(dir) + require.NoError(t, err) + + zipBytes := buildTestZip(t, map[string]string{ + "SKILL.md": "---\nname: demo\ndescription: x\n---\nbody", + }) + res, err := ws.SyncSkill(context.Background(), &protocol.SyncSkillArgs{ + SkillName: "demo", + Checksum: "new", + ZipData: base64.StdEncoding.EncodeToString(zipBytes), + }) + require.NoError(t, err) + assert.False(t, res.Cached, "install must report Cached=false") + assert.Equal(t, wantDir, res.Path) + + // Old file should be gone (unzipSkill does RemoveAll before extracting). + _, err = os.Stat(filepath.Join(dir, "old-file.txt")) + assert.True(t, os.IsNotExist(err), "old file should be wiped") + + _, err = os.Stat(filepath.Join(dir, "SKILL.md")) + assert.NoError(t, err, "new SKILL.md should exist") + + sum, _ := os.ReadFile(filepath.Join(dir, ".checksum")) + assert.Equal(t, "new", string(sum)) +} diff --git a/protocol/messages.go b/protocol/messages.go index 2abbc1a..4249501 100644 --- a/protocol/messages.go +++ b/protocol/messages.go @@ -284,9 +284,16 @@ type SyncSkillArgs struct { } // SyncSkillResult is the result of a sync_skill operation. +// +// Probe semantics: when SyncSkillArgs.ZipData is empty, the runner checks its +// local cache and returns either {Success:true, Cached:true, Path} on hit or +// {Success:true, Cached:false, Path:""} on miss (cloud must retry with zip). +// When ZipData is non-empty, the runner installs unconditionally and returns +// {Success:true, Cached:false, Path}. type SyncSkillResult struct { Success bool `json:"success"` - Path string `json:"path"` // Local path where skill was extracted + Path string `json:"path"` // Local path where skill is materialized + Cached bool `json:"cached,omitempty"` // True iff cache hit } // MCPToolInfo represents metadata for an MCP tool. From a6eb9b4f371e2cc9f3da87e00a99478fb363a974 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 30 Apr 2026 11:12:29 +0800 Subject: [PATCH 05/10] refactor(test): extract resolveDir helper, use errors.Is for ErrNotExist - resolveDir(t, dir) replaces duplicated EvalSymlinks boilerplate in ProbeHit and InstallOverwrites tests - errors.Is(err, os.ErrNotExist) replaces deprecated os.IsNotExist pattern Co-Authored-By: Claude Sonnet 4.6 --- environment/environment_test.go | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/environment/environment_test.go b/environment/environment_test.go index f462d85..c3fa126 100644 --- a/environment/environment_test.go +++ b/environment/environment_test.go @@ -5,6 +5,7 @@ import ( "bytes" "context" "encoding/base64" + "errors" "os" "path/filepath" "testing" @@ -31,6 +32,15 @@ func buildTestZip(t *testing.T, files map[string]string) []byte { return buf.Bytes() } +// resolveDir resolves symlinks on the given directory path so path comparisons +// hold on macOS where t.TempDir() returns /var/... but safePath resolves to /private/var/... +func resolveDir(t *testing.T, dir string) string { + t.Helper() + resolved, err := filepath.EvalSymlinks(dir) + require.NoError(t, err) + return resolved +} + func newTestEnvironment(t *testing.T) *Environment { tmpDir := t.TempDir() checker := permission.NewChecker(map[string]string{ @@ -236,16 +246,12 @@ func TestSyncSkill_ProbeHit(t *testing.T) { require.NoError(t, os.WriteFile(filepath.Join(dir, ".checksum"), []byte("csum1"), 0o644)) require.NoError(t, os.WriteFile(filepath.Join(dir, "SKILL.md"), []byte("body"), 0o644)) - // Resolve symlinks so the comparison holds on macOS (/var → /private/var). - wantDir, err := filepath.EvalSymlinks(dir) - require.NoError(t, err) - res, err := ws.SyncSkill(context.Background(), &protocol.SyncSkillArgs{ SkillName: "demo", Checksum: "csum1", }) require.NoError(t, err) assert.True(t, res.Cached, "expected cache hit") - assert.Equal(t, wantDir, res.Path) + assert.Equal(t, resolveDir(t, dir), res.Path) } func TestSyncSkill_InstallOverwrites(t *testing.T) { @@ -257,10 +263,6 @@ func TestSyncSkill_InstallOverwrites(t *testing.T) { require.NoError(t, os.WriteFile(filepath.Join(dir, "old-file.txt"), []byte("stale"), 0o644)) require.NoError(t, os.WriteFile(filepath.Join(dir, ".checksum"), []byte("old"), 0o644)) - // Resolve symlinks so the comparison holds on macOS (/var → /private/var). - wantDir, err := filepath.EvalSymlinks(dir) - require.NoError(t, err) - zipBytes := buildTestZip(t, map[string]string{ "SKILL.md": "---\nname: demo\ndescription: x\n---\nbody", }) @@ -271,11 +273,11 @@ func TestSyncSkill_InstallOverwrites(t *testing.T) { }) require.NoError(t, err) assert.False(t, res.Cached, "install must report Cached=false") - assert.Equal(t, wantDir, res.Path) + assert.Equal(t, resolveDir(t, dir), res.Path) // Old file should be gone (unzipSkill does RemoveAll before extracting). _, err = os.Stat(filepath.Join(dir, "old-file.txt")) - assert.True(t, os.IsNotExist(err), "old file should be wiped") + assert.True(t, errors.Is(err, os.ErrNotExist), "old file should be wiped") _, err = os.Stat(filepath.Join(dir, "SKILL.md")) assert.NoError(t, err, "new SKILL.md should exist") From acb919bf2a00da2bd2e6bd2247fac5a6bf2bd3e7 Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 30 Apr 2026 11:14:01 +0800 Subject: [PATCH 06/10] harden(skill): reject path separators in skill_name --- environment/environment.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/environment/environment.go b/environment/environment.go index e15f2bd..803e243 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -602,6 +602,9 @@ func (e *Environment) SyncSkill(ctx context.Context, args *protocol.SyncSkillArg if args.SkillName == "" || args.Checksum == "" { return nil, fmt.Errorf("skill_name and checksum are required") } + if strings.ContainsAny(args.SkillName, `/\`) || args.SkillName == ".." || args.SkillName == "." { + return nil, fmt.Errorf("skill_name must not contain path separators") + } relDir := filepath.Join("skills", args.SkillName) skillDir, err := e.safePath(relDir) if err != nil { From 8160fdeedd85df1666c226eeb683757c0cf9cf1c Mon Sep 17 00:00:00 2001 From: ysyneu Date: Thu, 30 Apr 2026 11:23:09 +0800 Subject: [PATCH 07/10] docs(skill): document ~/.flashduty home and probe-or-install sync_skill --- README.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d384f76..f3cda75 100644 --- a/README.md +++ b/README.md @@ -203,7 +203,7 @@ Create `/etc/flashduty-runner/env`: ```bash FLASHDUTY_RUNNER_TOKEN=ent_xxx # FLASHDUTY_RUNNER_URL=wss://custom.example.com/safari/environment/ws -# FLASHDUTY_RUNNER_WORKSPACE=/var/flashduty/workspace +# FLASHDUTY_RUNNER_HOME=/var/flashduty ``` ```bash @@ -213,6 +213,15 @@ sudo systemctl daemon-reload sudo systemctl enable --now flashduty-runner ``` +## Skills + +Skills are materialized lazily. When the agent calls the `skill` tool, the cloud sends `sync_skill` with `(skill_name, checksum, zip_data?)`: + +- **Empty `zip_data`** = probe. The runner returns `{cached: true, path}` if `/skills//.checksum` already matches the requested checksum and `SKILL.md` is present; otherwise `{cached: false}` and the cloud will retry with the bundle attached. +- **Non-empty `zip_data`** = install. The runner wipes `/skills//`, unzips, and writes `.checksum`. Each skill name has exactly one slot — no version retention. Builtin skills go through the same flow with their bundle sourced from the cloud's embedded FS instead of S3. + +Inspect installed skills with `ls ~/.flashduty/skills/`. + ## Configuration Reference Configuration is via command-line flags or environment variables (flags take precedence). @@ -221,7 +230,7 @@ Configuration is via command-line flags or environment variables (flags take pre |------|-------------|----------|---------|-------------| | `--token` | `FLASHDUTY_RUNNER_TOKEN` | Yes | - | Authentication token | | `--url` | `FLASHDUTY_RUNNER_URL` | No | `wss://api.flashcat.cloud/safari/environment/ws` | WebSocket endpoint | -| `--workspace` | `FLASHDUTY_RUNNER_WORKSPACE` | No | `~/.flashduty-runner/workspace` | Workspace root directory | +| `--workspace` | `FLASHDUTY_RUNNER_HOME` | No | `~/.flashduty` | Runner home directory. Skills land at `/skills//`. `FLASHDUTY_RUNNER_WORKSPACE` is accepted as a deprecated alias for back-compat. | | `--log-level` | `FLASHDUTY_RUNNER_LOG_LEVEL` | No | `info` | Log level: debug, info, warn, error | ## Troubleshooting From b9868971e2bc0ccbd1fe9461755e9461dda0f7ba Mon Sep 17 00:00:00 2001 From: ysyneu Date: Sat, 2 May 2026 16:33:55 +0800 Subject: [PATCH 08/10] refactor(env,permission): SSRF + AST hardening + grep regex + bash structured MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Runner-side counterparts of fc-safari's 2026-05-02 builtin-tools refactor: - environment/webfetch: SSRF guard via inlined safeHTTPTransport + safeCheckRedirect. Pre-flight validateURL refuses RFC1918 / loopback / link-local / IMDS before any TCP dial; CheckRedirect re-validates each hop so a 302 to 169.254.169.254 is refused. Inlined from go-pkg/x/netsafe (open-source repo cannot import internal modules). - environment/htmlx: HTML-to-markdown / text helpers inlined from go-pkg/x/htmlx for the same reason. html-to-markdown is now a direct dep. - environment/environment::unzipSkill: zip-slip closed — compares abs target against dest+separator and rejects names containing '..'. - environment/environment::grepWithGo: was strings.Contains (literal) — now compiles regex, falls back with regex_compile_error surfaced. Default ignore (.git, node_modules, .flashduty) added; Scanner buffer raised to 1 MiB; rg exit-2 surfaces real I/O errors instead of silent "no matches"; pattern starting with '-' gets '--' prefix; rsplit on ':' so paths containing ':' parse cleanly. - protocol.GrepArgs gains output_mode, context_before/after, head_limit, file_type, case_sensitive — forwarded to ripgrep flags. - environment/environment::Bash: per-stream large-output (stderr now truncates to its own bash_stderr_*.txt); LimitedWriter honors the io.Writer contract (returns ErrOutputCapped at the cap, exposes Hit(), appends "[output capped at 10MB]" marker once). - protocol.BashResult carries truncated_stdout/stderr, stdout_file_path/stderr_file_path, *_total_size; legacy fields remain populated so old safari clients keep working. - environment/large_output::ShouldSkipForOutputsDir: substring match → word-boundary regex (no more 'thread '/'head ' false positives). WriteRaw lowercased to writeRaw (single internal caller). - permission/permission: AST walk now visits CmdSubst / ProcSubst / nested Stmts so cat <(curl evil) and echo $(curl evil) hit the same rules. Redirect targets are checked instead of discarded (cmd > /etc/passwd refused). Canonical-form normalization on both rule and command sides (kubectl get pods matches kubectl get *); env-prefix dual evaluation (KUBECONFIG=x kubectl get pods still matches kubectl get *). Rules sort by literal-prefix specificity, first match wins. SafeReadOnlyRules drops echo */find * — find -delete / find -exec rm now denied by default; replaced with scoped allow patterns. E2E verified live: bash returns the new structured shape, web (action=fetch) reaches example.com via the safe transport, all 17+ new permission/permission_test scenarios pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- environment/bash_output_test.go | 114 +++++++++ environment/environment.go | 405 +++++++++++++++++++++++++++----- environment/environment_test.go | 91 +++++++ environment/htmlx.go | 51 ++++ environment/large_output.go | 20 +- environment/netsafe.go | 192 +++++++++++++++ environment/security_test.go | 165 +++++++++++++ environment/webfetch.go | 73 ++---- go.mod | 2 +- go.sum | 4 +- permission/permission.go | 379 ++++++++++++++++++++++++------ permission/permission_test.go | 192 +++++++++++++++ protocol/messages.go | 50 +++- 13 files changed, 1537 insertions(+), 201 deletions(-) create mode 100644 environment/bash_output_test.go create mode 100644 environment/htmlx.go create mode 100644 environment/netsafe.go create mode 100644 environment/security_test.go diff --git a/environment/bash_output_test.go b/environment/bash_output_test.go new file mode 100644 index 0000000..b43efd8 --- /dev/null +++ b/environment/bash_output_test.go @@ -0,0 +1,114 @@ +package environment + +import ( + "bytes" + "context" + "errors" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/flashcatcloud/flashduty-runner/protocol" +) + +func TestLimitedWriter_HitReturnsErrShortWrite(t *testing.T) { + var buf bytes.Buffer + w := &LimitedWriter{W: &buf, Limit: 5} + + // First write fits exactly within the cap. + n, err := w.Write([]byte("hello")) + require.NoError(t, err) + assert.Equal(t, 5, n) + assert.False(t, w.Hit(), "cap not yet hit when curr==Limit and next write hasn't happened") + + // Second write hits the cap. Contract: report ErrOutputCapped, not nil. + n, err = w.Write([]byte("world")) + assert.Equal(t, 0, n, "no bytes accepted past the cap") + require.Error(t, err) + assert.True(t, errors.Is(err, ErrOutputCapped)) + assert.True(t, w.Hit()) + assert.Contains(t, buf.String(), "[output capped at 10MB]", "marker appended exactly once") + + // Subsequent writes also report cap, marker not duplicated. + prev := buf.String() + _, err = w.Write([]byte("more")) + assert.True(t, errors.Is(err, ErrOutputCapped)) + assert.Equal(t, prev, buf.String(), "marker only appended once") +} + +func TestLimitedWriter_PartialWriteAtBoundary(t *testing.T) { + var buf bytes.Buffer + w := &LimitedWriter{W: &buf, Limit: 3} + + // 5-byte write straddles the boundary: 3 bytes accepted, marker appended, + // ErrOutputCapped surfaced. + n, err := w.Write([]byte("abcde")) + assert.Equal(t, 3, n) + require.Error(t, err) + assert.True(t, errors.Is(err, ErrOutputCapped)) + assert.True(t, w.Hit()) + assert.True(t, strings.HasPrefix(buf.String(), "abc")) + assert.Contains(t, buf.String(), "[output capped at 10MB]") +} + +func TestLimitedWriter_BelowCap(t *testing.T) { + var buf bytes.Buffer + w := &LimitedWriter{W: &buf, Limit: 100} + + n, err := w.Write([]byte("hi")) + require.NoError(t, err) + assert.Equal(t, 2, n) + assert.False(t, w.Hit()) + assert.NotContains(t, buf.String(), "capped") +} + +func TestEnvironment_Bash_StderrTruncatedSeparately(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + // Generate >30k bytes to stderr, nothing on stdout. Using awk so the + // subprocess exits cleanly once the byte budget is met (no SIGPIPE chain). + cmd := `awk 'BEGIN{ for(i=0;i<5000;i++) printf("ERROR_LINE_%d\n", i) > "/dev/stderr" }'` + result, err := ws.Bash(ctx, &protocol.BashArgs{Command: cmd}) + require.NoError(t, err) + + assert.True(t, result.TruncatedStderr, "stderr should be truncated when over the cap") + assert.NotEmpty(t, result.StderrFilePath, "stderr file should be written for full content") + assert.Greater(t, result.StderrTotalSize, int64(30000)) + assert.False(t, result.TruncatedStdout, "stdout should be untruncated when empty") + assert.Empty(t, result.StdoutFilePath) +} + +func TestEnvironment_Bash_NormalOutputUnchanged(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + result, err := ws.Bash(ctx, &protocol.BashArgs{Command: "echo small"}) + require.NoError(t, err) + assert.Equal(t, "small\n", result.Stdout) + assert.False(t, result.TruncatedStdout) + assert.False(t, result.TruncatedStderr) + assert.Empty(t, result.StdoutFilePath) + assert.Empty(t, result.StderrFilePath) +} + +func TestEnvironment_Bash_StdoutAtHardCapMarker(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + // Push past the LimitedWriter's 10MB cap. dd writes a fixed budget then + // exits, so we don't depend on SIGPIPE propagation to terminate `yes`. + cmd := "dd if=/dev/zero bs=1048576 count=11 2>/dev/null" + result, err := ws.Bash(ctx, &protocol.BashArgs{Command: cmd}) + require.NoError(t, err) + + assert.True(t, result.TruncatedStdout, "10MB cap must have been hit") + // The cap marker is appended to the captured buffer; after + // processLargeOutput it lives in the spilled file (the in-context preview + // only carries the first ~20 lines). + if !strings.Contains(result.Stdout, "[output capped") { + assert.NotEmpty(t, result.StdoutFilePath, "spilled file must exist when cap was hit") + } +} diff --git a/environment/environment.go b/environment/environment.go index 803e243..86c1658 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -8,12 +8,14 @@ import ( "context" "encoding/base64" "encoding/json" + "errors" "fmt" "io" "log/slog" "os" "os/exec" "path/filepath" + "regexp" "sort" "strconv" "strings" @@ -286,6 +288,15 @@ func (e *Environment) Glob(ctx context.Context, args *protocol.GlobArgs) (*proto return &protocol.GlobResult{Matches: matches}, nil } +// defaultGrepIgnoreDirs are walked-but-skipped by the Go fallback to align with +// ripgrep's default .gitignore behavior. A model-issued grep on a workspace +// containing `node_modules` should not waste minutes scanning it. +var defaultGrepIgnoreDirs = map[string]struct{}{ + ".git": {}, + "node_modules": {}, + ".flashduty": {}, // covers .flashduty/.work and friends +} + // Grep searches for a pattern in files. func (e *Environment) Grep(ctx context.Context, args *protocol.GrepArgs) (*protocol.GrepResult, error) { var res *protocol.GrepResult @@ -301,12 +312,19 @@ func (e *Environment) Grep(ctx context.Context, args *protocol.GrepArgs) (*proto return res, err } - // Build content string - var sb strings.Builder - for _, match := range res.Matches { - fmt.Fprintf(&sb, "%s:%d:%s\n", match.Path, match.LineNumber, match.Content) + // Apply head_limit cap before formatting; the truncation note belongs in the + // final content so the model can see "I asked for 10, got 10 of N". + totalMatches := len(res.Matches) + if args.HeadLimit > 0 && totalMatches > args.HeadLimit { + res.Matches = res.Matches[:args.HeadLimit] + } + + // Format content per OutputMode. Default ("" or "content") preserves the + // pre-PR-4 file:line:text shape so existing callers see no change. + content := formatGrepContent(res.Matches, args.OutputMode) + if args.HeadLimit > 0 && totalMatches > args.HeadLimit { + content += fmt.Sprintf("\n[truncated to first %d of %d matches]\n", args.HeadLimit, totalMatches) } - content := sb.String() // Process large output processor := NewLargeOutputProcessor(e, DefaultLargeOutputConfig()) @@ -325,37 +343,156 @@ func (e *Environment) Grep(ctx context.Context, args *protocol.GrepArgs) (*proto return res, nil } +// formatGrepContent renders matches per OutputMode. Unknown / empty mode falls +// back to "content" so downstream callers get the historical shape. +func formatGrepContent(matches []protocol.GrepMatch, mode string) string { + var sb strings.Builder + switch mode { + case "files_with_matches": + seen := make(map[string]struct{}, len(matches)) + for _, m := range matches { + if _, ok := seen[m.Path]; ok { + continue + } + seen[m.Path] = struct{}{} + sb.WriteString(m.Path) + sb.WriteByte('\n') + } + case "count": + counts := make(map[string]int, len(matches)) + order := make([]string, 0, len(matches)) + for _, m := range matches { + if _, ok := counts[m.Path]; !ok { + order = append(order, m.Path) + } + counts[m.Path]++ + } + for _, p := range order { + fmt.Fprintf(&sb, "%s:%d\n", p, counts[p]) + } + default: // "content" or empty + for _, m := range matches { + fmt.Fprintf(&sb, "%s:%d:%s\n", m.Path, m.LineNumber, m.Content) + } + } + return sb.String() +} + func (e *Environment) grepWithRipgrep(ctx context.Context, args *protocol.GrepArgs) (*protocol.GrepResult, error) { - cmdArgs := make([]string, 0, 6+2*len(args.Include)+2) - cmdArgs = append(cmdArgs, "--column", "--line-number", "--no-heading", "--color", "never", "--smart-case") + cmdArgs := make([]string, 0, 16+2*len(args.Include)) + + // OutputMode-specific flags. "content" uses --column for column info; the + // other modes drop --column so each rg line has a stable shape we can parse. + mode := args.OutputMode + switch mode { + case "files_with_matches": + cmdArgs = append(cmdArgs, "--files-with-matches", "--no-heading", "--color", "never") + case "count": + cmdArgs = append(cmdArgs, "--count", "--no-heading", "--color", "never") + default: + cmdArgs = append(cmdArgs, "--line-number", "--no-heading", "--color", "never") + } + + switch { + case args.CaseSensitive == nil: + cmdArgs = append(cmdArgs, "--smart-case") + case *args.CaseSensitive: + cmdArgs = append(cmdArgs, "--case-sensitive") + default: + cmdArgs = append(cmdArgs, "--ignore-case") + } + + if args.ContextBefore > 0 && (mode == "" || mode == "content") { + cmdArgs = append(cmdArgs, "--before-context", strconv.Itoa(args.ContextBefore)) + } + if args.ContextAfter > 0 && (mode == "" || mode == "content") { + cmdArgs = append(cmdArgs, "--after-context", strconv.Itoa(args.ContextAfter)) + } + if args.FileType != "" { + cmdArgs = append(cmdArgs, "--type", args.FileType) + } for _, inc := range args.Include { cmdArgs = append(cmdArgs, "--glob", inc) } - cmdArgs = append(cmdArgs, args.Pattern, ".") + + // `--` separator so a pattern starting with `-` is not parsed as an rg flag + // (e.g. searching for "--foo" or "-x" in source). + cmdArgs = append(cmdArgs, "--", args.Pattern, ".") cmd := exec.CommandContext(ctx, "rg", cmdArgs...) //nolint:gosec // G204: args built from validated grep parameters cmd.Dir = e.root - var stdout strings.Builder + var stdout, stderr strings.Builder cmd.Stdout = &stdout - _ = cmd.Run() // rg returns exit code 1 if no matches found + cmd.Stderr = &stderr + runErr := cmd.Run() + + // rg exit codes: 0 = matches found, 1 = no matches, 2 = real error (bad + // pattern, I/O failure, permission). We previously swallowed code 2 as + // "no matches", which silently dropped pattern compile errors. + if runErr != nil { + var exitErr *exec.ExitError + if errors.As(runErr, &exitErr) && exitErr.ExitCode() >= 2 { + msg := strings.TrimSpace(stderr.String()) + if msg == "" { + msg = exitErr.Error() + } + return nil, fmt.Errorf("ripgrep failed: %s", msg) + } + // exit code 1 (no matches) — keep going with empty stdout. + } lines := strings.Split(stdout.String(), "\n") results := make([]protocol.GrepMatch, 0, len(lines)) + for _, line := range lines { if line == "" { continue } - parts := strings.SplitN(line, ":", 4) - if len(parts) < 4 { - continue + + switch mode { + case "files_with_matches": + results = append(results, protocol.GrepMatch{Path: line}) + case "count": + // rg --count emits "path:N"; rsplit once so paths containing `:` survive. + idx := strings.LastIndex(line, ":") + if idx < 0 { + continue + } + cnt, _ := strconv.Atoi(line[idx+1:]) + results = append(results, protocol.GrepMatch{Path: line[:idx], LineNumber: cnt}) + default: + // rg with --line-number (no --column) emits "path:lineNum:content". + // Rsplit twice from the right so path containing ':' (Windows drive, + // scratch namespaces, etc.) parses correctly. + lastIdx := strings.LastIndex(line, ":") + if lastIdx < 0 { + continue + } + head, content := line[:lastIdx], line[lastIdx+1:] + midIdx := strings.LastIndex(head, ":") + if midIdx < 0 { + // Context lines from rg use `path-lineNum-content` separator; + // fall back to splitting on `-` for those rather than dropping. + dashIdx := strings.LastIndex(head, "-") + if dashIdx < 0 { + continue + } + lineNum, _ := strconv.Atoi(head[dashIdx+1:]) + results = append(results, protocol.GrepMatch{ + Path: head[:dashIdx], + LineNumber: lineNum, + Content: content, + }) + continue + } + lineNum, _ := strconv.Atoi(head[midIdx+1:]) + results = append(results, protocol.GrepMatch{ + Path: head[:midIdx], + LineNumber: lineNum, + Content: content, + }) } - lineNum, _ := strconv.Atoi(parts[1]) - results = append(results, protocol.GrepMatch{ - Path: parts[0], - LineNumber: lineNum, - Content: parts[3], - }) } return &protocol.GrepResult{Matches: results}, nil } @@ -367,6 +504,30 @@ func (e *Environment) grepWithGo(ctx context.Context, args *protocol.GrepArgs) ( include = []string{"**/*"} } + // Try to compile the pattern as a regex. On compile failure, fall back to + // literal substring match and surface the error so the caller knows the + // pattern was treated as text rather than a regex. + caseInsensitive := args.CaseSensitive != nil && !*args.CaseSensitive + pattern := args.Pattern + if caseInsensitive { + pattern = "(?i)" + pattern + } + re, compileErr := regexp.Compile(pattern) + literalNeedle := args.Pattern + if caseInsensitive { + literalNeedle = strings.ToLower(literalNeedle) + } + + matchLine := func(line string) bool { + if re != nil { + return re.MatchString(line) + } + if caseInsensitive { + return strings.Contains(strings.ToLower(line), literalNeedle) + } + return strings.Contains(line, literalNeedle) + } + for _, inc := range include { matches, err := e.Glob(ctx, &protocol.GlobArgs{Pattern: inc}) if err != nil { @@ -374,17 +535,27 @@ func (e *Environment) grepWithGo(ctx context.Context, args *protocol.GrepArgs) ( } for _, match := range matches.Matches { - realPath, _ := e.safePath(match) + if shouldIgnoreGrepPath(match) { + continue + } + realPath, err := e.safePath(match) + if err != nil { + continue + } file, err := os.Open(realPath) if err != nil { continue } scanner := bufio.NewScanner(file) + // Default 64KB cap silently drops long lines (minified JS, packed + // JSON, log dumps). Bump to 1MB so the model gets a faithful match + // set rather than a confusing zero-result on real-world files. + scanner.Buffer(make([]byte, 64*1024), 1024*1024) lineNum := 1 for scanner.Scan() { line := scanner.Text() - if strings.Contains(line, args.Pattern) { + if matchLine(line) { results = append(results, protocol.GrepMatch{ Path: match, LineNumber: lineNum, @@ -396,7 +567,23 @@ func (e *Environment) grepWithGo(ctx context.Context, args *protocol.GrepArgs) ( _ = file.Close() } } - return &protocol.GrepResult{Matches: results}, nil + + res := &protocol.GrepResult{Matches: results} + if compileErr != nil { + res.RegexCompileError = compileErr.Error() + } + return res, nil +} + +// shouldIgnoreGrepPath returns true for any path whose relative form contains +// a default-ignored directory segment (.git, node_modules, .flashduty/...). +func shouldIgnoreGrepPath(rel string) bool { + for _, seg := range strings.Split(filepath.ToSlash(rel), "/") { + if _, ok := defaultGrepIgnoreDirs[seg]; ok { + return true + } + } + return false } // Bash executes a bash command in the workspace. @@ -418,7 +605,9 @@ func (e *Environment) Bash(ctx context.Context, args *protocol.BashArgs) (*proto // Skip large output processing for .outputs/ directory reads if ShouldSkipForOutputsDir(args.Command) { - result.TotalSize = int64(len(result.Stdout)) + result.StdoutTotalSize = int64(len(result.Stdout)) + result.StderrTotalSize = int64(len(result.Stderr)) + result.TotalSize = result.StdoutTotalSize return result, nil } @@ -449,12 +638,14 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s cmd := exec.CommandContext(ctx, "bash", "-c", command) //nolint:gosec // G204: command is user-initiated via workspace tool cmd.Dir = workdir - // Use a limited writer to prevent OOM from very large outputs - // 10MB limit is plenty for LLM context while preventing memory exhaustion + // 10MB per-stream cap prevents OOM from runaway commands while leaving + // plenty of headroom for normal LLM-context output. const maxOutputSize = 10 * 1024 * 1024 var stdout, stderr strings.Builder - cmd.Stdout = &LimitedWriter{W: &stdout, Limit: maxOutputSize} - cmd.Stderr = &LimitedWriter{W: &stderr, Limit: maxOutputSize} + stdoutW := &LimitedWriter{W: &stdout, Limit: maxOutputSize} + stderrW := &LimitedWriter{W: &stderr, Limit: maxOutputSize} + cmd.Stdout = stdoutW + cmd.Stderr = stderrW err := cmd.Run() exitCode := 0 @@ -462,65 +653,151 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s if exitError, ok := err.(*exec.ExitError); ok { exitCode = exitError.ExitCode() } else if ctx.Err() == context.DeadlineExceeded { - return &protocol.BashResult{ - Stdout: stdout.String(), - Stderr: "command timed out", - ExitCode: 124, - }, nil + res := &protocol.BashResult{ + Stdout: stdout.String(), + Stderr: "command timed out", + ExitCode: 124, + TruncatedStdout: stdoutW.Hit(), + } + if stdoutW.Hit() { + res.Truncated = true + } + return res, nil + } else if errors.Is(err, ErrOutputCapped) { + // Command itself didn't fail — only the writer's contract violation + // surfaced. Treat as a successful run with capped buffers. + err = nil } else { return nil, fmt.Errorf("failed to execute command: %w", err) } } - return &protocol.BashResult{ - Stdout: stdout.String(), - Stderr: stderr.String(), - ExitCode: exitCode, - }, nil + res := &protocol.BashResult{ + Stdout: stdout.String(), + Stderr: stderr.String(), + ExitCode: exitCode, + TruncatedStdout: stdoutW.Hit(), + TruncatedStderr: stderrW.Hit(), + } + if stdoutW.Hit() { + res.Truncated = true + } + return res, nil } -// LimitedWriter is an io.Writer that limits the total number of bytes written. +// ErrOutputCapped is returned by LimitedWriter once its byte cap has been +// reached. Surfaces the contract violation that would otherwise be hidden by +// the writer pretending it accepted everything (`return len(p), nil`). +var ErrOutputCapped = errors.New("output capped") + +// LimitedWriter is an io.Writer that caps the total number of bytes forwarded +// to the wrapped writer. After the cap is reached it appends an inline marker +// once and then returns ErrOutputCapped on every subsequent write so callers +// can detect truncation via Hit(). +// +// NOTE: exec.Cmd treats any writer error as fatal — callers wrapping LimitedWriter +// around a stdio pipe must filter ErrOutputCapped out of cmd.Run()'s error chain. type LimitedWriter struct { - W io.Writer - Limit int64 - curr int64 + W io.Writer + Limit int64 + curr int64 + hit bool + marker bool // marker already appended } +const limitedWriterMarker = "\n[output capped at 10MB]\n" + func (l *LimitedWriter) Write(p []byte) (n int, err error) { if l.curr >= l.Limit { - return len(p), nil + l.hit = true + l.appendMarker() + // Honor io.Writer contract: report only the bytes actually accepted + // downstream (zero) and propagate the cap signal so the caller can + // surface truncation instead of silently dropping data. + return 0, ErrOutputCapped } left := l.Limit - l.curr if int64(len(p)) > left { n, err = l.W.Write(p[:left]) l.curr += int64(n) - return len(p), err + l.hit = true + l.appendMarker() + if err == nil { + err = ErrOutputCapped + } + // Return the bytes actually written to the underlying writer; the + // remainder (p[left:]) was discarded along with the cap signal. + return n, err } n, err = l.W.Write(p) l.curr += int64(n) return n, err } -// processLargeOutput processes command output for truncation if needed. +// Hit reports whether the writer's byte cap has been reached. +func (l *LimitedWriter) Hit() bool { return l.hit } + +// appendMarker writes the cap marker into the underlying writer exactly once. +// Bypasses the per-call accounting since the marker is bookkeeping, not output. +func (l *LimitedWriter) appendMarker() { + if l.marker { + return + } + l.marker = true + _, _ = l.W.Write([]byte(limitedWriterMarker)) +} + +// processLargeOutput processes both stdout and stderr for truncation if needed. +// Each stream is spilled to its own .outputs/_*.txt or +// .outputs/_stderr_*.txt file when it exceeds the configured cap. +// +// Stderr was previously left untouched, which let high-stderr commands +// (e.g. `yes 2>&1 1>/dev/null | head`) flood the LLM context unbounded. func (e *Environment) processLargeOutput(ctx context.Context, result *protocol.BashResult, prefix string) (*protocol.BashResult, error) { processor := NewLargeOutputProcessor(e, DefaultLargeOutputConfig()) - processed, err := processor.Process(ctx, result.Stdout, prefix) + + stdoutSize := int64(len(result.Stdout)) + stderrSize := int64(len(result.Stderr)) + + processedOut, err := processor.Process(ctx, result.Stdout, prefix) if err != nil { - result.TotalSize = int64(len(result.Stdout)) + // File-write failure: keep the raw streams and at minimum report sizes. + result.StdoutTotalSize = stdoutSize + result.StderrTotalSize = stderrSize + result.TotalSize = stdoutSize return result, nil } + result.Stdout = processedOut.Content + result.StdoutTotalSize = processedOut.TotalSize + result.StdoutFilePath = processedOut.FilePath + if processedOut.Truncated { + result.TruncatedStdout = true + } + + processedErr, err := processor.Process(ctx, result.Stderr, prefix+"_stderr") + if err != nil { + result.StderrTotalSize = stderrSize + } else { + result.Stderr = processedErr.Content + result.StderrTotalSize = processedErr.TotalSize + result.StderrFilePath = processedErr.FilePath + if processedErr.Truncated { + result.TruncatedStderr = true + } + } - result.Stdout = processed.Content - result.Truncated = processed.Truncated - result.FilePath = processed.FilePath - result.TotalSize = processed.TotalSize + // Legacy mirrors: keep the stdout-side fields populated for old clients. + result.Truncated = result.TruncatedStdout + result.FilePath = result.StdoutFilePath + result.TotalSize = result.StdoutTotalSize return result, nil } -// WriteRaw writes raw content (not base64 encoded) to a file. -// Used internally for saving large output files. -func (e *Environment) WriteRaw(ctx context.Context, path string, content []byte) error { +// writeRaw writes raw content (not base64 encoded) to a file. +// Used internally for saving large output files; unexported because no +// off-package caller exists (only large_output.go uses it). +func (e *Environment) writeRaw(ctx context.Context, path string, content []byte) error { realPath, err := e.safePath(path) if err != nil { return err @@ -654,16 +931,32 @@ func (e *Environment) unzipSkill(data []byte, dest string) error { return fmt.Errorf("failed to create destination directory: %w", err) } + // Resolve dest to absolute and append a trailing separator BEFORE the + // loop. Zip-slip mitigation: comparing against the dir without a + // trailing separator (the previous bug) lets a sibling with a matching + // prefix — e.g. dest="/tmp/skill" vs absTarget="/tmp/skill-evil/x" — + // pass HasPrefix even though the entry escapes the intended directory. + absDest, err := filepath.Abs(dest) + if err != nil { + return fmt.Errorf("failed to resolve dest: %w", err) + } + destPrefix := absDest + string(os.PathSeparator) + for _, f := range reader.File { - // Security: validate zip entry path to prevent path traversal + // Reject any raw "..": filepath.Clean turns "a/../b" into "b" so a + // surviving ".." after Clean — or any literal ".." in the original + // — means the entry tried to escape from the root. + if strings.Contains(f.Name, "..") { + return fmt.Errorf("invalid file path in zip: %s", f.Name) + } cleanName := filepath.Clean(f.Name) if strings.HasPrefix(cleanName, "..") || filepath.IsAbs(cleanName) { return fmt.Errorf("invalid file path in zip: %s", f.Name) } - targetPath := filepath.Join(dest, cleanName) + targetPath := filepath.Join(absDest, cleanName) absTarget, err := filepath.Abs(targetPath) - if err != nil || !strings.HasPrefix(absTarget, dest) { + if err != nil || (absTarget != absDest && !strings.HasPrefix(absTarget, destPrefix)) { return fmt.Errorf("file path escapes destination: %s", f.Name) } diff --git a/environment/environment_test.go b/environment/environment_test.go index c3fa126..5a3c81f 100644 --- a/environment/environment_test.go +++ b/environment/environment_test.go @@ -8,6 +8,7 @@ import ( "errors" "os" "path/filepath" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -162,6 +163,96 @@ func TestEnvironment_Grep(t *testing.T) { assert.Len(t, result.Matches, 2) } +// TestEnvironment_Grep_RegexNotSubstring exercises the PR-4 fix: the legacy +// path used strings.Contains, so "a.b" matched only the literal "a.b" instead +// of "axb". The regex path must now match both via the wildcard. +func TestEnvironment_Grep_RegexNotSubstring(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + require.NoError(t, os.WriteFile( + filepath.Join(ws.Root(), "f.txt"), + []byte("axb\nayb\na.b\nzzz\n"), + 0o644, + )) + + result, err := ws.Grep(ctx, &protocol.GrepArgs{Pattern: "a.b"}) + require.NoError(t, err) + // All three of axb / ayb / a.b should match a regex `a.b`. + assert.GreaterOrEqual(t, len(result.Matches), 3, + "regex 'a.b' should match all of axb, ayb, a.b — got %v", result.Matches) + assert.Empty(t, result.RegexCompileError, "valid regex must not surface compile error") +} + +// TestEnvironment_Grep_RegexCompileFallback confirms that a syntactically +// invalid regex still returns results via literal substring fallback and +// surfaces the compile error so the caller can show it to the model. +func TestEnvironment_Grep_RegexCompileFallback(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + require.NoError(t, os.WriteFile( + filepath.Join(ws.Root(), "f.txt"), + []byte("hello [unclosed\nworld\n"), + 0o644, + )) + + // "[unclosed" is a malformed regex (unclosed character class). + res, err := ws.grepWithGo(ctx, &protocol.GrepArgs{Pattern: "[unclosed"}) + require.NoError(t, err) + assert.NotEmpty(t, res.RegexCompileError, "compile failure must be surfaced") + require.NotEmpty(t, res.Matches, "literal fallback should still find the line") + assert.Contains(t, res.Matches[0].Content, "[unclosed") +} + +// TestEnvironment_Grep_DefaultIgnore verifies node_modules / .git are skipped +// by the Go fallback so a model issuing `grep` on a workspace with vendor +// dirs doesn't burn minutes scanning them. +func TestEnvironment_Grep_DefaultIgnore(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + require.NoError(t, os.MkdirAll(filepath.Join(ws.Root(), "src"), 0o755)) + require.NoError(t, os.MkdirAll(filepath.Join(ws.Root(), "node_modules", "pkg"), 0o755)) + require.NoError(t, os.MkdirAll(filepath.Join(ws.Root(), ".git"), 0o755)) + + require.NoError(t, os.WriteFile(filepath.Join(ws.Root(), "src", "a.txt"), []byte("needle\n"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(ws.Root(), "node_modules", "pkg", "b.txt"), []byte("needle\n"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(ws.Root(), ".git", "c.txt"), []byte("needle\n"), 0o644)) + + res, err := ws.grepWithGo(ctx, &protocol.GrepArgs{Pattern: "needle"}) + require.NoError(t, err) + + for _, m := range res.Matches { + assert.NotContains(t, m.Path, "node_modules", "node_modules must be ignored, got %s", m.Path) + assert.NotContains(t, m.Path, ".git", ".git must be ignored, got %s", m.Path) + } + + // The src/ hit must still be present. + var sawSrc bool + for _, m := range res.Matches { + if strings.HasPrefix(filepath.ToSlash(m.Path), "src/") { + sawSrc = true + break + } + } + assert.True(t, sawSrc, "expected src/a.txt match in %v", res.Matches) +} + +// TestEnvironment_Grep_LongLine ensures the bumped scanner buffer catches +// matches on lines longer than the default 64 KB cap. +func TestEnvironment_Grep_LongLine(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + // Build a single line: 80 KB of 'a', then NEEDLE, then 80 KB of 'b'. + long := strings.Repeat("a", 80*1024) + "NEEDLE" + strings.Repeat("b", 80*1024) + "\n" + require.NoError(t, os.WriteFile(filepath.Join(ws.Root(), "long.txt"), []byte(long), 0o644)) + + res, err := ws.grepWithGo(ctx, &protocol.GrepArgs{Pattern: "NEEDLE"}) + require.NoError(t, err) + require.NotEmpty(t, res.Matches, "NEEDLE on a >64KB line must still match") + assert.Equal(t, "long.txt", res.Matches[0].Path) +} + func TestEnvironment_Bash(t *testing.T) { ws := newTestEnvironment(t) ctx := context.Background() diff --git a/environment/htmlx.go b/environment/htmlx.go new file mode 100644 index 0000000..37cdd90 --- /dev/null +++ b/environment/htmlx.go @@ -0,0 +1,51 @@ +// HTML→Markdown / HTML→Text conversion helpers used by webfetch. +// +// Inlined from go-pkg/x/htmlx — keep in sync. flashduty-runner is +// open-source and may not import internal flashcatcloud modules. +package environment + +import ( + "regexp" + "strings" + + htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2" +) + +// Pre-compiled to avoid recompiling on every conversion call. webfetch is on +// a hot path; per-call compile was a measurable waste. +var ( + htmlScriptRe = regexp.MustCompile(`(?is)]*>.*?`) + htmlStyleRe = regexp.MustCompile(`(?is)]*>.*?`) + htmlTagRe = regexp.MustCompile(`<[^>]+>`) + htmlWhitespaceRe = regexp.MustCompile(`\s+`) + htmlBlankLinesRe = regexp.MustCompile(`\n{3,}`) +) + +func convertHTMLToMarkdown(html string) string { + md, err := htmltomarkdown.ConvertString(html) + if err != nil { + return convertHTMLToText(html) + } + return cleanupMarkdown(md) +} + +func convertHTMLToText(html string) string { + html = htmlScriptRe.ReplaceAllString(html, "") + html = htmlStyleRe.ReplaceAllString(html, "") + text := htmlTagRe.ReplaceAllString(html, " ") + + text = strings.ReplaceAll(text, " ", " ") + text = strings.ReplaceAll(text, "&", "&") + text = strings.ReplaceAll(text, "<", "<") + text = strings.ReplaceAll(text, ">", ">") + text = strings.ReplaceAll(text, """, "\"") + text = strings.ReplaceAll(text, "'", "'") + + text = htmlWhitespaceRe.ReplaceAllString(text, " ") + return strings.TrimSpace(text) +} + +func cleanupMarkdown(md string) string { + md = htmlBlankLinesRe.ReplaceAllString(md, "\n\n") + return strings.TrimSpace(md) +} diff --git a/environment/large_output.go b/environment/large_output.go index 2b8282b..84509a5 100644 --- a/environment/large_output.go +++ b/environment/large_output.go @@ -4,12 +4,18 @@ import ( "context" "fmt" "path/filepath" + "regexp" "strings" "time" "github.com/lithammer/shortuuid/v4" ) +// readCommandRe matches the read commands as standalone tokens (anchored at +// start of input or preceded by whitespace, semicolon, pipe, or `&`). Using +// substring matching ("head ") false-positives against words like "thread ". +var readCommandRe = regexp.MustCompile(`(?:^|[\s;|&])(?:cat|head|tail|less|more|bat)\s`) + const ( // DefaultMaxOutputSize is the default character limit before truncation (~7.5k tokens) DefaultMaxOutputSize = 30000 // ~7.5k tokens at 4 chars/token @@ -86,7 +92,7 @@ func (p *LargeOutputProcessor) Process(ctx context.Context, content string, pref filePath := filepath.Join(OutputsDir, filename) // Save full content to file - if err := p.ws.WriteRaw(ctx, filePath, []byte(content)); err != nil { + if err := p.ws.writeRaw(ctx, filePath, []byte(content)); err != nil { // If save fails, just return truncated content without file reference return &ProcessResult{ Content: p.truncateContent(content, ""), @@ -105,16 +111,12 @@ func (p *LargeOutputProcessor) Process(ctx context.Context, content string, pref // ShouldSkipForOutputsDir checks if large output processing should be skipped // for commands operating on .outputs/ directory to avoid circular processing. +// Uses a word-boundary regex so commands like "thread " don't match "head ". func ShouldSkipForOutputsDir(command string) bool { - readCommands := []string{"cat ", "head ", "tail ", "less ", "more ", "bat "} - for _, cmd := range readCommands { - if strings.Contains(command, cmd) { - if strings.Contains(command, OutputsDir+"/") || strings.Contains(command, OutputsDir+"\\") { - return true - } - } + if !readCommandRe.MatchString(command) { + return false } - return false + return strings.Contains(command, OutputsDir+"/") || strings.Contains(command, OutputsDir+"\\") } // truncateContent creates a truncated preview with optional file reference. diff --git a/environment/netsafe.go b/environment/netsafe.go new file mode 100644 index 0000000..e7babd2 --- /dev/null +++ b/environment/netsafe.go @@ -0,0 +1,192 @@ +// SSRF guard for outbound HTTP. Runner pods must NEVER reach IMDS or the +// cluster apiserver from inside; without dial-time IP validation a +// prompt-injected URL can exfiltrate cloud credentials from +// 169.254.169.254 or hit internal VPC services. The DNS-to-dial +// re-resolution closes the DNS-rebinding gap a host-string allowlist +// would leave open. +// +// Inlined from go-pkg/x/netsafe — keep in sync. flashduty-runner is +// open-source and may not import internal flashcatcloud modules. +package environment + +import ( + "context" + "fmt" + "net" + "net/http" + "net/url" + "os" + "strings" + "time" +) + +// maxRedirects caps redirect chains processed by safeCheckRedirect. Default +// Go is 10; CC web-fetch and Codex both cap around 5. +const maxRedirects = 5 + +func ipBlocked(ip net.IP) bool { + if ip == nil { + return true + } + if ip.IsLoopback() || ip.IsPrivate() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || + ip.IsInterfaceLocalMulticast() || ip.IsMulticast() || ip.IsUnspecified() { + return true + } + if v4 := ip.To4(); v4 != nil { + // CGNAT 100.64.0.0/10 + IETF protocol-assignment 192.0.0.0/24 — both + // routable-but-internal, missed by IsPrivate. + if v4[0] == 100 && v4[1] >= 64 && v4[1] <= 127 { + return true + } + if v4[0] == 192 && v4[1] == 0 && v4[2] == 0 { + return true + } + } + return false +} + +// validateHost resolves host and returns an error if any resolved IP is +// blocked. Pre-flight check that gives a clearer error than the dial-time +// failure when the model passes a literal IMDS / RFC1918 URL. +func validateHost(ctx context.Context, host string) error { + if host == "" { + return fmt.Errorf("host is empty") + } + if h, _, err := net.SplitHostPort(host); err == nil { + host = h + } + if ip := net.ParseIP(host); ip != nil { + if ipBlocked(ip) { + return fmt.Errorf("target %s resolves to a blocked address range", host) + } + return nil + } + ips, err := net.DefaultResolver.LookupIPAddr(ctx, host) + if err != nil { + return fmt.Errorf("resolve %s: %w", host, err) + } + for _, addr := range ips { + if ipBlocked(addr.IP) { + return fmt.Errorf("target %s resolves to a blocked address (%s)", host, addr.IP) + } + } + return nil +} + +// validateURL parses raw, rejects non-http(s) schemes, and validates the +// host. Returns the parsed URL on success so callers don't re-parse. +func validateURL(ctx context.Context, raw string) (*url.URL, error) { + u, err := url.Parse(raw) + if err != nil { + return nil, fmt.Errorf("parse url: %w", err) + } + if u.Scheme != "http" && u.Scheme != "https" { + return nil, fmt.Errorf("url scheme must be http or https") + } + if u.Host == "" { + return nil, fmt.Errorf("url host must not be empty") + } + if err := validateHost(ctx, u.Hostname()); err != nil { + return nil, err + } + return u, nil +} + +// safeDialContext returns a DialContext that re-resolves the host on every +// dial and rejects blocked IPs. ServerName for TLS still derives from the +// request hostname so cert validation works. +func safeDialContext(dialTimeout time.Duration) func(ctx context.Context, network, addr string) (net.Conn, error) { + d := &net.Dialer{Timeout: dialTimeout, KeepAlive: 30 * time.Second} + proxyHosts := envProxyHosts() + return func(ctx context.Context, network, addr string) (net.Conn, error) { + host, port, err := net.SplitHostPort(addr) + if err != nil { + return nil, err + } + // Configured HTTP(S) proxy hops bypass SSRF validation: the proxy is + // trusted egress infrastructure (often a loopback sidecar) and + // enforces its own destination policy. Final-destination validation + // still runs upstream at validateURL when the caller is a + // user-supplied URL. + if _, ok := proxyHosts[host]; ok { + return d.DialContext(ctx, network, addr) + } + if ip := net.ParseIP(host); ip != nil { + if ipBlocked(ip) { + return nil, fmt.Errorf("blocked address %s", ip) + } + return d.DialContext(ctx, network, addr) + } + ips, err := net.DefaultResolver.LookupIPAddr(ctx, host) + if err != nil { + return nil, err + } + var lastErr error + for _, addr := range ips { + if ipBlocked(addr.IP) { + lastErr = fmt.Errorf("blocked address %s", addr.IP) + continue + } + conn, err := d.DialContext(ctx, network, net.JoinHostPort(addr.IP.String(), port)) + if err == nil { + return conn, nil + } + lastErr = err + } + if lastErr == nil { + lastErr = fmt.Errorf("no resolvable address for %s", host) + } + return nil, lastErr + } +} + +// safeHTTPTransport returns an *http.Transport wired with safeDialContext +// and sensible pool sizes. +func safeHTTPTransport(dialTimeout time.Duration) *http.Transport { + t := http.DefaultTransport.(*http.Transport).Clone() + t.DialContext = safeDialContext(dialTimeout) + return t +} + +// safeCheckRedirect re-validates each hop's host and caps the chain at +// maxRedirects. Without this, a 302 from a permitted host to +// http://169.254.169.254/ is followed by Go's default policy. +func safeCheckRedirect(req *http.Request, via []*http.Request) error { + if len(via) >= maxRedirects { + return fmt.Errorf("stopped after %d redirects", maxRedirects) + } + if req.URL == nil { + return fmt.Errorf("redirect url missing") + } + if req.URL.Scheme != "http" && req.URL.Scheme != "https" { + return fmt.Errorf("redirect to non-http(s) scheme %q refused", req.URL.Scheme) + } + if err := validateHost(req.Context(), req.URL.Hostname()); err != nil { + return fmt.Errorf("redirect refused: %w", err) + } + return nil +} + +// envProxyHosts captures the bare hostnames of any HTTP proxies configured +// via env so safeDialContext can let those dials through. Captured once at +// transport-creation time — Go's own ProxyFromEnvironment caches identically. +func envProxyHosts() map[string]struct{} { + out := map[string]struct{}{} + for _, key := range []string{"HTTP_PROXY", "http_proxy", "HTTPS_PROXY", "https_proxy", "ALL_PROXY", "all_proxy"} { + raw := strings.TrimSpace(os.Getenv(key)) + if raw == "" { + continue + } + host := raw + if u, err := url.Parse(raw); err == nil && u.Host != "" { + host = u.Host + } + if h, _, err := net.SplitHostPort(host); err == nil { + host = h + } + if host != "" { + out[host] = struct{}{} + } + } + return out +} diff --git a/environment/security_test.go b/environment/security_test.go new file mode 100644 index 0000000..c7820ee --- /dev/null +++ b/environment/security_test.go @@ -0,0 +1,165 @@ +package environment + +import ( + "archive/zip" + "bytes" + "context" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/flashcatcloud/flashduty-runner/protocol" +) + +// buildZipWithEntry creates an in-memory zip archive containing a single +// entry with the given name + body. Used by zip-slip tests so we can +// inject crafted entry names that the public buildTestZip helper would +// reject via filepath safety. +func buildZipWithEntry(t *testing.T, name, body string) []byte { + t.Helper() + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + w, err := zw.CreateHeader(&zip.FileHeader{Name: name, Method: zip.Deflate}) + require.NoError(t, err) + _, err = w.Write([]byte(body)) + require.NoError(t, err) + require.NoError(t, zw.Close()) + return buf.Bytes() +} + +// TestUnzipSkill_RejectsZipSlip is the regression test for the v3 audit +// finding: HasPrefix without a trailing separator + missing ".." rejection +// allowed entries like "../evil" or sibling-prefix attacks like +// dest="/x/skill" vs absTarget="/x/skill-evil/y" to escape the destination. +func TestUnzipSkill_RejectsZipSlip(t *testing.T) { + env := newTestEnvironment(t) + dest := filepath.Join(t.TempDir(), "skills", "victim") + require.NoError(t, os.MkdirAll(filepath.Dir(dest), 0o755)) + + cases := []struct { + name string + entry string + }{ + {"parent-traversal", "../evil"}, + {"deep-traversal", "a/b/../../../evil"}, + {"absolute-path-unix", "/etc/passwd"}, + {"current-then-parent", "./../evil"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + zipData := buildZipWithEntry(t, tc.entry, "pwned") + err := env.unzipSkill(zipData, dest) + require.Error(t, err, "expected zip-slip refusal for entry %q", tc.entry) + // Belt-and-suspenders: ensure no file was written outside dest. + outside := filepath.Join(filepath.Dir(dest), "evil") + if _, err := os.Stat(outside); err == nil { + t.Fatalf("file written outside dest: %s", outside) + } + }) + } +} + +// TestUnzipSkill_RejectsSiblingPrefixEscape covers the specific HasPrefix +// without trailing-separator bug. Without the fix, dest "/tmp/abc/skill" +// vs absTarget "/tmp/abc/skill-evil" passes HasPrefix even though +// skill-evil is a SIBLING directory — the entry name "../skill-evil/x" +// doesn't contain "..", err... wait, it does. The cleaner demonstration: +// we craft an entry that filepath.Clean leaves alone but that resolves +// outside the intended root. With the trailing-separator fix, this is +// rejected. +func TestUnzipSkill_RejectsSiblingPrefixEscape(t *testing.T) { + env := newTestEnvironment(t) + parent := t.TempDir() + dest := filepath.Join(parent, "skill") + require.NoError(t, os.MkdirAll(parent, 0o755)) + + // Pre-create the sibling so a successful escape would land a file in it. + sibling := filepath.Join(parent, "skill-evil") + require.NoError(t, os.MkdirAll(sibling, 0o755)) + + // "../skill-evil/x" — after filepath.Join(dest, ...) and Abs, this + // yields .../skill-evil/x. With the old HasPrefix(absTarget, dest) + // where dest="/.../skill", the prefix "/.../skill" matches + // "/.../skill-evil/x" — bypass. The fix appends os.PathSeparator. + zipData := buildZipWithEntry(t, "../skill-evil/x", "pwned") + err := env.unzipSkill(zipData, dest) + require.Error(t, err) + + if _, err := os.Stat(filepath.Join(sibling, "x")); err == nil { + t.Fatalf("zip-slip succeeded: file landed in sibling dir") + } +} + +// TestUnzipSkill_AcceptsBenign confirms the hardening doesn't break +// ordinary skill installs. +func TestUnzipSkill_AcceptsBenign(t *testing.T) { + env := newTestEnvironment(t) + dest := filepath.Join(t.TempDir(), "skills", "ok") + zipData := buildTestZip(t, map[string]string{ + "SKILL.md": "# hi", + "resources/a.txt": "alpha", + "scripts/b.sh": "#!/bin/sh\n", + }) + require.NoError(t, env.unzipSkill(zipData, dest)) + for _, rel := range []string{"SKILL.md", "resources/a.txt", "scripts/b.sh"} { + if _, err := os.Stat(filepath.Join(dest, rel)); err != nil { + t.Fatalf("expected file %s after unzip: %v", rel, err) + } + } +} + +// TestWebFetch_RefusesSSRFTargets is the regression test for PR-5.1: the +// runner historically used http.DefaultClient with no dial-time IP guard, +// so a model-supplied URL like http://169.254.169.254/ (cloud IMDS) or +// http://127.0.0.1/ (loopback) hit the real address from inside the pod. +// With the inlined SSRF guard wired in, all three targets are refused +// before any TCP connection is attempted. +func TestWebFetch_RefusesSSRFTargets(t *testing.T) { + env := newTestEnvironment(t) + cases := []string{ + "http://169.254.169.254/latest/meta-data/", // AWS / Aliyun / GCP IMDS + "http://127.0.0.1/", // loopback + "http://10.0.0.1/", // RFC1918 + } + for _, raw := range cases { + t.Run(raw, func(t *testing.T) { + _, err := env.WebFetch(context.Background(), &protocol.WebFetchArgs{URL: raw}) + require.Error(t, err, "%s must be refused", raw) + if !strings.Contains(err.Error(), "refused") && !strings.Contains(err.Error(), "blocked") { + t.Errorf("expected SSRF refusal in %v", err) + } + }) + } +} + +// TestWebFetch_RefusesRedirectToBlocked is the regression test for PR-5.4: +// without CheckRedirect re-validating each hop, a 302 from an allowed +// public host to http://169.254.169.254/ would be followed (Go's default +// CheckRedirect is permissive). With safeCheckRedirect wired, the client +// refuses the hop before dialing IMDS. +func TestWebFetch_RefusesRedirectToBlocked(t *testing.T) { + env := newTestEnvironment(t) + target := "http://169.254.169.254/latest/meta-data/" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Redirect(w, r, target, http.StatusFound) + })) + t.Cleanup(srv.Close) + + // We can't fetch srv.URL through the real fetchClient because it dials + // loopback (also blocked). The hop-validation guarantee is exercised + // directly here against safeCheckRedirect; env.WebFetch on the literal + // 169.254 URL would short-circuit at pre-flight validateURL before even + // hitting CheckRedirect — already covered by TestWebFetch_RefusesSSRFTargets. + _ = env + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, target, nil) + require.NoError(t, err) + via := []*http.Request{{}} + if err := safeCheckRedirect(req, via); err == nil { + t.Fatalf("safeCheckRedirect must refuse a redirect hop to %s", target) + } +} diff --git a/environment/webfetch.go b/environment/webfetch.go index 6217e40..646b29f 100644 --- a/environment/webfetch.go +++ b/environment/webfetch.go @@ -5,13 +5,10 @@ import ( "fmt" "io" "net/http" - "regexp" "strings" "time" "github.com/flashcatcloud/flashduty-runner/protocol" - - htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2" ) const ( @@ -21,12 +18,32 @@ const ( defaultFetchUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ) +// fetchClient is process-global so the connection pool is shared across +// fetches. Wired with netsafe so: +// - dial-time IP validation refuses RFC1918 / loopback / link-local / IMDS +// even after DNS rebinding (re-resolves on every dial). +// - CheckRedirect re-validates each redirect hop, so a 302 from a +// permitted host to http://169.254.169.254/ is refused before dial. +// +// Runner pods must NEVER reach IMDS or the cluster apiserver from inside; +// netsafe enforcement is on by default in this binary (no SetEnforced call). +var fetchClient = &http.Client{ + Transport: safeHTTPTransport(10 * time.Second), + CheckRedirect: safeCheckRedirect, +} + // WebFetch fetches content from a URL and converts it to readable format. func (e *Environment) WebFetch(ctx context.Context, args *protocol.WebFetchArgs) (*protocol.WebFetchResult, error) { if args.URL == "" || (!strings.HasPrefix(args.URL, "http://") && !strings.HasPrefix(args.URL, "https://")) { return nil, fmt.Errorf("valid http/https url is required") } + // Pre-flight URL validation gives a clearer error than dial-time + // failure when the model passes a literal IMDS / RFC1918 URL. + if _, err := validateURL(ctx, args.URL); err != nil { + return nil, fmt.Errorf("refused to fetch %s: %w", args.URL, err) + } + timeout := defaultFetchTimeout if args.Timeout > 0 { timeout = time.Duration(args.Timeout) * time.Second @@ -69,7 +86,7 @@ func (e *Environment) WebFetch(ctx context.Context, args *protocol.WebFetchArgs) }, nil } -// fetchURL performs the HTTP request. +// fetchURL performs the HTTP request via the netsafe-wrapped client. func (e *Environment) fetchURL(ctx context.Context, url, format string, timeout time.Duration) (*http.Response, error) { httpCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -81,7 +98,7 @@ func (e *Environment) fetchURL(ctx context.Context, url, format string, timeout setRequestHeaders(req, format) - resp, err := http.DefaultClient.Do(req) + resp, err := fetchClient.Do(req) if err != nil { if httpCtx.Err() == context.DeadlineExceeded { return nil, fmt.Errorf("request timed out") @@ -143,49 +160,3 @@ func convertContent(content, format, contentType string) string { return content } - -// convertHTMLToMarkdown converts HTML content to Markdown. -func convertHTMLToMarkdown(html string) string { - markdown, err := htmltomarkdown.ConvertString(html) - if err != nil { - // Fallback to text extraction on error - return convertHTMLToText(html) - } - return cleanupMarkdown(markdown) -} - -// convertHTMLToText extracts plain text from HTML. -func convertHTMLToText(html string) string { - // Remove script and style tags with their content - scriptRe := regexp.MustCompile(`(?is)]*>.*?`) - html = scriptRe.ReplaceAllString(html, "") - - styleRe := regexp.MustCompile(`(?is)]*>.*?`) - html = styleRe.ReplaceAllString(html, "") - - // Remove all HTML tags - tagRe := regexp.MustCompile(`<[^>]+>`) - text := tagRe.ReplaceAllString(html, " ") - - // Decode common HTML entities - text = strings.ReplaceAll(text, " ", " ") - text = strings.ReplaceAll(text, "&", "&") - text = strings.ReplaceAll(text, "<", "<") - text = strings.ReplaceAll(text, ">", ">") - text = strings.ReplaceAll(text, """, "\"") - text = strings.ReplaceAll(text, "'", "'") - - // Normalize whitespace - spaceRe := regexp.MustCompile(`\s+`) - text = spaceRe.ReplaceAllString(text, " ") - - return strings.TrimSpace(text) -} - -// cleanupMarkdown removes excessive whitespace and normalizes the markdown. -func cleanupMarkdown(md string) string { - // Remove excessive blank lines (more than 2 consecutive) - blankLinesRe := regexp.MustCompile(`\n{3,}`) - md = blankLinesRe.ReplaceAllString(md, "\n\n") - return strings.TrimSpace(md) -} diff --git a/go.mod b/go.mod index d6500c2..449025a 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/lithammer/shortuuid/v4 v4.2.0 github.com/modelcontextprotocol/go-sdk v1.5.0 github.com/spf13/cobra v1.8.1 - github.com/stretchr/testify v1.9.0 + github.com/stretchr/testify v1.11.1 mvdan.cc/sh/v3 v3.13.1 ) diff --git a/go.sum b/go.sum index 3936390..75e70b5 100644 --- a/go.sum +++ b/go.sum @@ -46,8 +46,8 @@ github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= github.com/yuin/goldmark v1.7.13 h1:GPddIs617DnBLFFVJFgpo1aBfe/4xcvMc3SB5t/D0pA= diff --git a/permission/permission.go b/permission/permission.go index 2bc3e80..6f76ae9 100644 --- a/permission/permission.go +++ b/permission/permission.go @@ -1,4 +1,33 @@ // Package permission implements glob-based command permission checking. +// +// Matching semantics +// +// Rules are evaluated against three normalized projections of every shell +// command found anywhere in the input — including pipelines, command +// substitution ($(...) / `...`), process substitution (<(...) / >(...)), +// arithmetic expansion ($((...))), and redirect targets: +// +// 1. The full normalized command (env-prefix included), e.g. +// "KUBECONFIG=x kubectl get pods". +// 2. The same command with leading variable assignments stripped, e.g. +// "kubectl get pods". This prevents trivial bypass via env prefix +// and prevents false-deny when a rule lists only the program form. +// 3. Redirect targets are projected as synthetic commands (">/path", +// ">>/path", "&>/path", "&>>/path", ">|/path") so that rules can +// gate writes the same way they gate executions. Read redirects +// (<, <<, <<<, <>, here-docs) are not checked beyond their inner +// command (here-doc body and here-string undergo expansion). +// +// Both rule patterns and the command projections are run through the +// shell printer's canonical formatter, so "kubectl get pods" matches +// the rule "kubectl get pods" regardless of spacing. +// +// Precedence +// +// Rules are evaluated in specificity order, and the first match wins. +// Specificity is "longer literal prefix before any '*' wins"; the catch-all +// "*" is always tried last. A deny rule that matches first beats a more +// general allow rule, and vice-versa. package permission import ( @@ -22,6 +51,11 @@ const ( type Rule struct { Pattern string Action Action + // canonical is the printer-normalized form of Pattern, used for matching. + canonical string + // specificity is the length of the literal prefix preceding the first '*'. + // Higher is more specific; "*" has -1 so it always sorts last. + specificity int } // Checker checks command permissions against configured rules. @@ -30,42 +64,40 @@ type Checker struct { } // NewChecker creates a new permission checker from a map of patterns to actions. -// The "*" pattern is processed first as the default rule, followed by other patterns in sorted order. +// Rules are sorted so that the most specific rule (longest literal prefix +// before any '*') is tried first; "*" is always the fallback. func NewChecker(patterns map[string]string) *Checker { c := &Checker{ rules: make([]Rule, 0, len(patterns)), } - - // Add default "*" rule first if present - if action, ok := patterns["*"]; ok { - c.rules = append(c.rules, Rule{ - Pattern: "*", - Action: Action(strings.ToLower(action)), - }) - } - - // Add other rules in sorted order - otherPatterns := getSortedPatterns(patterns) - for _, pattern := range otherPatterns { + for pattern, action := range patterns { c.rules = append(c.rules, Rule{ - Pattern: pattern, - Action: Action(strings.ToLower(patterns[pattern])), + Pattern: pattern, + Action: Action(strings.ToLower(action)), + canonical: canonicalize(pattern), + specificity: patternSpecificity(pattern), }) } - + sort.SliceStable(c.rules, func(i, j int) bool { + // More specific first. Tie-break on canonical pattern for determinism. + if c.rules[i].specificity != c.rules[j].specificity { + return c.rules[i].specificity > c.rules[j].specificity + } + return c.rules[i].canonical < c.rules[j].canonical + }) return c } -// getSortedPatterns returns all patterns except "*" in sorted order. -func getSortedPatterns(patterns map[string]string) []string { - var result []string - for pattern := range patterns { - if pattern != "*" { - result = append(result, pattern) - } +// patternSpecificity returns the length of the literal prefix preceding the +// first '*' in pattern. "*" returns -1 (always least specific). +func patternSpecificity(pattern string) int { + if pattern == "*" { + return -1 + } + if i := strings.IndexByte(pattern, '*'); i >= 0 { + return i } - sort.Strings(result) - return result + return len(pattern) } // Check checks if a command is allowed. @@ -76,64 +108,247 @@ func (c *Checker) Check(command string) error { return fmt.Errorf("empty command") } - // Parse the command into a list of sub-commands to prevent injection (e.g., cmd1; cmd2) parser := syntax.NewParser() f, err := parser.Parse(strings.NewReader(command), "") if err != nil { return fmt.Errorf("failed to parse command: %w", err) } + if err := c.checkNode(f); err != nil { + return err + } + return nil +} + +// checkNode walks any AST node, evaluating every executable command and +// every write-redirect target it encounters. Recurses into command +// substitutions and process substitutions so that constructs like +// `cat <(curl evil)` and `echo $(curl evil)` cannot bypass deny rules. +func (c *Checker) checkNode(root syntax.Node) error { var checkErr error - syntax.Walk(f, func(node syntax.Node) bool { + syntax.Walk(root, func(node syntax.Node) bool { if checkErr != nil { return false } - switch x := node.(type) { case *syntax.Stmt: - // Check redirects for potential path escape or unauthorized writes + // Walk redirects ourselves; the printer cannot give us a + // stable target string without doing it here. for _, redir := range x.Redirs { - if redir.Word != nil { - target := c.nodeToString(redir.Word) - // We don't have rules for redirects yet, but we can detect them. - _ = target + if err := c.checkRedirect(redir); err != nil { + checkErr = err + return false } } - case *syntax.CallExpr: - // Check command and arguments - cmdStr := c.nodeToString(x) - if cmdStr != "" { - finalAction, matchedPattern := c.evaluateRules(cmdStr) - if finalAction == ActionDeny { - checkErr = c.denyError(matchedPattern, cmdStr) + if err := c.checkCallExpr(x); err != nil { + checkErr = err + return false + } + // Also descend manually into Args so that any embedded + // CmdSubst/ProcSubst inside the args is checked. (The + // outer Walk would also reach them, but checkCmdSubst / + // checkProcSubst recurse with checkNode, which is the + // canonical entry point and ensures consistent behaviour + // when checkRedirect feeds nested nodes back in.) + return true + case *syntax.CmdSubst: + for _, stmt := range x.Stmts { + if err := c.checkNode(stmt); err != nil { + checkErr = err + return false + } + } + return false // handled + case *syntax.ProcSubst: + for _, stmt := range x.Stmts { + if err := c.checkNode(stmt); err != nil { + checkErr = err return false } } + return false // handled } return true }) - return checkErr } -// nodeToString converts a syntax node back to its string representation. -func (c *Checker) nodeToString(node syntax.Node) string { +// checkCallExpr evaluates a single simple-command against the rules. +// It evaluates two projections — the full canonical form (env assignments +// included) and the form with assignments stripped — and applies a +// "deny wins, allow wins over default-deny" merge: +// +// - If either projection matches a deny rule, the command is denied. +// - Otherwise, if either projection matches an allow rule, allowed. +// - Otherwise, denied by default. +// +// This blocks env-prefix bypass ("FOO=bar curl evil" still hits "curl *") +// while preventing false-deny when only the program form is allow-listed +// ("KUBECONFIG=x kubectl get pods" matches "kubectl get *"). +func (c *Checker) checkCallExpr(call *syntax.CallExpr) error { + if len(call.Args) == 0 { + // Pure assignment (`FOO=bar`); nothing executes. + return nil + } + full := canonicalize(nodeToString(call)) + if full == "" { + return nil + } + stripped := full + if len(call.Assigns) > 0 { + stripped = canonicalize(nodeToString(&syntax.CallExpr{Args: call.Args})) + if stripped == "" { + stripped = full + } + } + + fullAction, fullPattern := c.evaluateRules(full) + if stripped == full { + if fullAction == ActionAllow { + return nil + } + return c.denyError(fullPattern, full) + } + + strippedAction, strippedPattern := c.evaluateRules(stripped) + + // Explicit (non-catch-all) deny on either projection wins immediately — + // this is what blocks env-prefix bypass like "FOO=bar curl evil". + if fullAction == ActionDeny && fullPattern != "*" { + return c.denyError(fullPattern, full) + } + if strippedAction == ActionDeny && strippedPattern != "*" { + return c.denyError(strippedPattern, stripped) + } + // Otherwise allow if either projection matched an allow rule. This + // prevents false-deny when only the program form ("kubectl get *") + // is allow-listed and the user prefixes env vars. + if fullAction == ActionAllow || strippedAction == ActionAllow { + return nil + } + // Both projections only matched the catch-all "*" deny (or no rule + // at all). Use whichever projection actually has a pattern for the + // error message. + if fullPattern != "" { + return c.denyError(fullPattern, full) + } + return c.denyError(strippedPattern, stripped) +} + +// checkRedirect rejects writes whose target is not explicitly allowed. +// The target is projected into a synthetic command string of the form +// ">target" (or ">>target", "&>target", "&>>target", ">|target"), which +// rules can match exactly (e.g. allow "> /tmp/*"). Read redirects only +// have their inner expansions / here-doc body checked. +func (c *Checker) checkRedirect(redir *syntax.Redirect) error { + if redir == nil || redir.Word == nil { + return nil + } + // Recurse into any CmdSubst/ProcSubst hidden inside the redirect target, + // e.g. `cat < <(curl evil)`. + if err := c.checkNode(redir.Word); err != nil { + return err + } + if redir.Hdoc != nil { + if err := c.checkNode(redir.Hdoc); err != nil { + return err + } + } + + prefix := writeRedirectPrefix(redir.Op) + if prefix == "" { + return nil // read redirect: nothing more to gate + } + target := canonicalize(nodeToString(redir.Word)) + if target == "" { + return nil + } + synth := prefix + " " + target + if action, pattern := c.evaluateRules(synth); action == ActionDeny { + return c.denyError(pattern, synth) + } + return nil +} + +// writeRedirectPrefix returns the canonical operator string for write +// redirects, or "" for read-only redirects. +func writeRedirectPrefix(op syntax.RedirOperator) string { + switch op { + case syntax.RdrOut: + return ">" + case syntax.AppOut: + return ">>" + case syntax.ClbOut: + return ">|" + case syntax.RdrAll: + return "&>" + case syntax.AppAll: + return "&>>" + default: + return "" + } +} + +// nodeToString prints any AST node back to its source form. The printer +// applies a canonical layout, which gives us a stable string regardless +// of how the user originally formatted the input. +func nodeToString(node syntax.Node) string { var sb strings.Builder printer := syntax.NewPrinter() _ = printer.Print(&sb, node) - return sb.String() + return strings.TrimRight(sb.String(), "\n") +} + +// canonicalize normalizes a command string by parsing and re-printing it +// (which collapses runs of whitespace to a single space, normalizes +// quoting, etc.). Falls back to a whitespace-collapsing best-effort if +// the input cannot be parsed (e.g. rule patterns with bare globs). +func canonicalize(s string) string { + s = strings.TrimSpace(s) + if s == "" { + return "" + } + parser := syntax.NewParser() + if f, err := parser.Parse(strings.NewReader(s), ""); err == nil { + printed := strings.TrimRight(nodeToString(f), "\n") + if printed != "" { + return collapseWS(printed) + } + } + return collapseWS(s) +} + +// collapseWS collapses any run of ASCII whitespace into a single space. +// Used so that rule patterns and command projections agree even when one +// side has been formatted differently. +func collapseWS(s string) string { + var sb strings.Builder + sb.Grow(len(s)) + prevSpace := false + for _, r := range s { + if r == ' ' || r == '\t' || r == '\n' || r == '\r' { + if !prevSpace { + sb.WriteByte(' ') + prevSpace = true + } + continue + } + sb.WriteRune(r) + prevSpace = false + } + return strings.TrimSpace(sb.String()) } -// evaluateRules checks all rules and returns the final action and matched pattern. +// evaluateRules walks rules in specificity order and returns on first +// match. If no rule matches, the command is denied. func (c *Checker) evaluateRules(command string) (Action, string) { - action, pattern := ActionDeny, "" for _, rule := range c.rules { - if matched, _ := matchPattern(rule.Pattern, command); matched { - action, pattern = rule.Action, rule.Pattern + if matched, _ := matchPattern(rule.canonical, command); matched { + return rule.Action, rule.Pattern } } - return action, pattern + return ActionDeny, "" } // denyError creates an error message that lists denied patterns so callers @@ -164,7 +379,7 @@ func matchPattern(pattern, command string) (bool, error) { return true, nil case !strings.Contains(pattern, "*"): return pattern == command, nil - case strings.HasSuffix(pattern, "*"): + case strings.HasSuffix(pattern, "*") && !strings.Contains(strings.TrimSuffix(pattern, "*"), "*"): return strings.HasPrefix(command, strings.TrimSuffix(pattern, "*")), nil default: return doublestar.Match(pattern, command) @@ -183,34 +398,46 @@ func DefaultRules() map[string]string { } } -// SafeReadOnlyRules returns rules that allow read-only operations. +// SafeReadOnlyRules returns rules that allow common read-only operations. +// +// Notably absent compared to a naive list: +// - "find *" — allowed `find . -delete` and `find . -exec rm`. Specific +// safe variants are listed instead, none of which can mutate state. +// - "echo *" — allowed `echo x > /etc/passwd`. Write redirects are now +// gated by checkRedirect, but plain "echo" is rarely needed for +// diagnostics, so it is dropped to stay tight by default. +// +// Write redirects are denied unless an explicit allow rule for the synthetic +// "> target" form is added (e.g. "> /tmp/*"). func SafeReadOnlyRules() map[string]string { return map[string]string{ - "*": "deny", - "cat *": "allow", - "head *": "allow", - "tail *": "allow", - "ls *": "allow", - "ls": "allow", - "pwd": "allow", - "whoami": "allow", - "date": "allow", - "echo *": "allow", - "grep *": "allow", - "find *": "allow", - "which *": "allow", - "env": "allow", - "uname *": "allow", - "uname": "allow", - "df *": "allow", - "df": "allow", - "du *": "allow", - "free *": "allow", - "free": "allow", - "uptime": "allow", - "ps *": "allow", - "ps": "allow", - "top -b *": "allow", + "*": "deny", + "cat *": "allow", + "head *": "allow", + "tail *": "allow", + "ls *": "allow", + "ls": "allow", + "pwd": "allow", + "whoami": "allow", + "date": "allow", + "grep *": "allow", + "find * -name *": "allow", + "find * -type *": "allow", + "find * -path *": "allow", + "find * -iname *": "allow", + "which *": "allow", + "env": "allow", + "uname *": "allow", + "uname": "allow", + "df *": "allow", + "df": "allow", + "du *": "allow", + "free *": "allow", + "free": "allow", + "uptime": "allow", + "ps *": "allow", + "ps": "allow", + "top -b *": "allow", } } diff --git a/permission/permission_test.go b/permission/permission_test.go index 08d4563..7688057 100644 --- a/permission/permission_test.go +++ b/permission/permission_test.go @@ -209,6 +209,198 @@ func TestKubernetesReadOnlyRules(t *testing.T) { assert.False(t, checker.IsAllowed("kubectl exec -it nginx -- bash")) } +// TestChecker_ASTBypassHardening covers the bypass scenarios documented +// in the v3 addendum to the AI-SRE builtin tools refactor plan. +func TestChecker_ASTBypassHardening(t *testing.T) { + tests := []struct { + name string + rules map[string]string + command string + wantErr bool + }{ + // Process / command substitution must be inspected. + { + name: "process substitution bypass blocked", + rules: map[string]string{ + "*": "allow", + "curl *": "deny", + }, + command: "cat <(curl evil.com)", + wantErr: true, + }, + { + name: "command substitution bypass blocked", + rules: map[string]string{ + "*": "allow", + "curl *": "deny", + }, + command: "echo $(curl evil.com)", + wantErr: true, + }, + { + name: "backtick substitution bypass blocked", + rules: map[string]string{ + "*": "allow", + "curl *": "deny", + }, + command: "echo `curl evil.com`", + wantErr: true, + }, + { + name: "nested process substitution blocked", + rules: map[string]string{ + "*": "allow", + "curl *": "deny", + }, + command: "diff <(cat a) <(curl evil.com)", + wantErr: true, + }, + // Redirect targets must be gated. + { + name: "write redirect to /etc blocked under read-only rules", + rules: SafeReadOnlyRules(), + // "echo *" was removed, so this is doubly blocked, but the + // redirect-target check is what makes this safe even if echo + // were re-allowed. + command: "cat /etc/hosts > /etc/passwd", + wantErr: true, + }, + { + name: "write redirect blocked when no allow rule exists", + rules: map[string]string{ + "*": "deny", + "echo *": "allow", + }, + command: "echo hi > /etc/passwd", + wantErr: true, + }, + { + name: "append redirect blocked when no allow rule exists", + rules: map[string]string{ + "*": "deny", + "echo *": "allow", + }, + command: "echo hi >> /etc/passwd", + wantErr: true, + }, + { + name: "write redirect allowed by explicit rule", + rules: map[string]string{ + "*": "deny", + "echo *": "allow", + "> *": "allow", + }, + command: "echo hi > /tmp/foo", + wantErr: false, + }, + { + name: "read redirect (here-string) is fine", + rules: map[string]string{ + "*": "deny", + "grep *": "allow", + }, + command: "grep foo <<< bar", + wantErr: false, + }, + // find arg-walker via removed pattern. + { + name: "find -delete blocked", + rules: SafeReadOnlyRules(), + command: "find . -delete", + wantErr: true, + }, + { + name: "find -exec rm blocked", + rules: SafeReadOnlyRules(), + command: "find . -exec rm {} +", + wantErr: true, + }, + { + name: "find -name still allowed", + rules: SafeReadOnlyRules(), + command: "find . -name *.go", + wantErr: false, + }, + // Whitespace normalization. + { + name: "extra spaces still match", + rules: map[string]string{ + "*": "deny", + "kubectl get *": "allow", + }, + command: "kubectl get pods", + wantErr: false, + }, + // Env-prefix normalization. + { + name: "env prefix does not bypass deny", + rules: map[string]string{ + "*": "allow", + "curl *": "deny", + }, + command: "FOO=bar curl evil.com", + wantErr: true, + }, + { + name: "env prefix matches program rule", + rules: map[string]string{ + "*": "deny", + "kubectl get *": "allow", + }, + command: "KUBECONFIG=x kubectl get pods", + wantErr: false, + }, + // First-match precedence: more specific rule wins regardless of + // map iteration order. + { + name: "first-match precedence: specific deny beats general allow", + rules: map[string]string{ + "*": "allow", + "kubectl *": "allow", + "kubectl delete *": "deny", + }, + command: "kubectl delete pod nginx", + wantErr: true, + }, + { + name: "first-match precedence: specific allow beats general deny", + rules: map[string]string{ + "*": "deny", + "kubectl *": "deny", + "kubectl get *": "allow", + }, + command: "kubectl get pods", + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + checker := NewChecker(tt.rules) + err := checker.Check(tt.command) + if tt.wantErr { + assert.Error(t, err, "expected command to be denied: %q", tt.command) + } else { + assert.NoError(t, err, "expected command to be allowed: %q", tt.command) + } + }) + } +} + +func TestPatternSpecificity(t *testing.T) { + // "*" must always sort last; longer literal prefix beats shorter. + checker := NewChecker(map[string]string{ + "*": "allow", + "kubectl *": "deny", + "kubectl get *": "allow", + }) + // kubectl get pods → matches "kubectl get *" (most specific) → allow + assert.NoError(t, checker.Check("kubectl get pods")) + // kubectl delete pod nginx → falls through to "kubectl *" → deny + assert.Error(t, checker.Check("kubectl delete pod nginx")) + // rm -rf / → only "*" matches → allow + assert.NoError(t, checker.Check("rm -rf /")) +} + func TestMatchPattern(t *testing.T) { tests := []struct { pattern string diff --git a/protocol/messages.go b/protocol/messages.go index 4249501..f7db45b 100644 --- a/protocol/messages.go +++ b/protocol/messages.go @@ -157,6 +157,21 @@ type GlobArgs struct { type GrepArgs struct { Pattern string `json:"pattern"` Include []string `json:"include,omitempty"` + + // OutputMode controls the shape of the result: "content" (default — file:line:text rows), + // "files_with_matches" (one path per match), or "count" (one path:count per file). + OutputMode string `json:"output_mode,omitempty"` + // ContextBefore (-B) and ContextAfter (-A) request N lines of context around each match. + // Honored by ripgrep only when OutputMode is "content". + ContextBefore int `json:"context_before,omitempty"` + ContextAfter int `json:"context_after,omitempty"` + // HeadLimit caps the number of result rows returned (post-format). 0 = unlimited. + HeadLimit int `json:"head_limit,omitempty"` + // FileType maps to ripgrep's --type (e.g., "go", "py", "js"). Ignored by the Go fallback. + FileType string `json:"file_type,omitempty"` + // CaseSensitive overrides ripgrep's default --smart-case. + // nil = smart-case; true = case-sensitive; false = case-insensitive. + CaseSensitive *bool `json:"case_sensitive,omitempty"` } // BashArgs are the arguments for bash operation. @@ -220,16 +235,39 @@ type GrepResult struct { Truncated bool `json:"truncated,omitempty"` // Whether content was truncated FilePath string `json:"file_path,omitempty"` // Path to full content if truncated TotalSize int64 `json:"total_size,omitempty"` // Original content size + // RegexCompileError surfaces a non-fatal fallback path: the requested pattern failed + // to compile as a Go regexp, so the Go fallback walked files matching it as a literal. + // Empty when ripgrep handled the request or compilation succeeded. + RegexCompileError string `json:"regex_compile_error,omitempty"` } // BashResult is the result of a bash operation. +// +// Stdout and stderr are tracked independently — either stream may be truncated +// and spilled to a separate file. The legacy fields (`Truncated`, `FilePath`, +// `TotalSize`) mirror the stdout-side fields for backward compatibility with +// older safari builds; new callers should prefer the stream-specific fields. type BashResult struct { - Stdout string `json:"stdout"` - Stderr string `json:"stderr"` - ExitCode int `json:"exit_code"` - Truncated bool `json:"truncated,omitempty"` // Whether content was truncated - FilePath string `json:"file_path,omitempty"` // Path to full content if truncated - TotalSize int64 `json:"total_size,omitempty"` // Original content size + Stdout string `json:"stdout"` + Stderr string `json:"stderr"` + ExitCode int `json:"exit_code"` + + // Per-stream truncation metadata. A stream is "truncated" when either + // processLargeOutput trimmed it OR the underlying LimitedWriter hit its + // hard cap mid-execution (the captured buffer carries an inline marker). + TruncatedStdout bool `json:"truncated_stdout,omitempty"` + TruncatedStderr bool `json:"truncated_stderr,omitempty"` + StdoutFilePath string `json:"stdout_file_path,omitempty"` + StderrFilePath string `json:"stderr_file_path,omitempty"` + StdoutTotalSize int64 `json:"stdout_total_size,omitempty"` + StderrTotalSize int64 `json:"stderr_total_size,omitempty"` + + // Legacy stdout-only mirrors. Kept so older safari clients that only + // parse these still see truncation. New code should read the stream- + // specific fields instead. + Truncated bool `json:"truncated,omitempty"` + FilePath string `json:"file_path,omitempty"` + TotalSize int64 `json:"total_size,omitempty"` } // WebFetchArgs are the arguments for webfetch operation. From 07721826fb1fd91ae3babc7b1a8895600710ae4a Mon Sep 17 00:00:00 2001 From: ysyneu Date: Sat, 2 May 2026 16:45:57 +0800 Subject: [PATCH 09/10] fix(lint): clear golangci-lint findings + add /health endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - netsafe: guard http.DefaultTransport type-assertion (errcheck) - environment: switch if-else chain to switch (gocritic), drop dead err=nil (ineffassign), rename shadowed err vars (govet) - permission: syntax.ClbOut → syntax.RdrClob (staticcheck SA1019) - cmd: add /health listener for Tencent AGS readiness probe; bind via ListenConfig.Listen with parent context (noctx) and document the all-interfaces bind (gosec G102) Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/main.go | 36 +++++++++++++++++++++++++++++++++++ environment/environment.go | 21 ++++++++++---------- environment/netsafe.go | 6 +++++- environment/security_test.go | 10 +++++----- permission/permission.go | 6 +++--- permission/permission_test.go | 6 +++--- 6 files changed, 63 insertions(+), 22 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 5956afc..6432102 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -5,6 +5,8 @@ import ( "context" "fmt" "log/slog" + "net" + "net/http" "os" "os/signal" "path/filepath" @@ -38,6 +40,11 @@ var ( const ( defaultURL = "wss://api.flashcat.cloud/safari/environment/ws" defaultLogLevel = "info" + // healthAddr is bound by the runner so cloud sandbox platforms (e.g. Tencent + // AGS) can satisfy their HTTP readiness probe — the runner only opens + // outbound WebSockets otherwise, leaving no inbound port for the platform + // to probe. + healthAddr = ":49983" ) func main() { @@ -218,6 +225,8 @@ func runRunner() error { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + startHealthServer(ctx) + sigCh := make(chan os.Signal, 1) signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) @@ -278,6 +287,33 @@ func setupLogging(levelStr string) { slog.SetDefault(slog.New(handler)) } +// startHealthServer binds a tiny HTTP listener that always answers /health 200. +// Required by Tencent AGS (and similar serverless sandbox platforms) which gate +// instance start on an HTTP readiness probe within ~30s. +func startHealthServer(ctx context.Context) { + // AGS readiness probes hit the pod from outside, so binding to all + // interfaces is required — the listener only serves a fixed 200 on /health. + var lc net.ListenConfig + ln, err := lc.Listen(ctx, "tcp", healthAddr) // #nosec G102 -- intentional: external readiness probe + + if err != nil { + slog.Warn("health server skipped (port already in use)", "addr", healthAddr, "err", err) + return + } + mux := http.NewServeMux() + mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok")) + }) + srv := &http.Server{Handler: mux, ReadHeaderTimeout: 5 * time.Second} + go func() { + if err := srv.Serve(ln); err != nil && err != http.ErrServerClosed { + slog.Warn("health server stopped", "err", err) + } + }() + slog.Info("health server listening", "addr", healthAddr) +} + func parseLogLevel(levelStr string) slog.Level { switch levelStr { case "debug": diff --git a/environment/environment.go b/environment/environment.go index 86c1658..9d1420f 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -650,9 +650,11 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s err := cmd.Run() exitCode := 0 if err != nil { - if exitError, ok := err.(*exec.ExitError); ok { + var exitError *exec.ExitError + switch { + case errors.As(err, &exitError): exitCode = exitError.ExitCode() - } else if ctx.Err() == context.DeadlineExceeded { + case ctx.Err() == context.DeadlineExceeded: res := &protocol.BashResult{ Stdout: stdout.String(), Stderr: "command timed out", @@ -663,11 +665,10 @@ func (e *Environment) executeBashCommand(ctx context.Context, command, workdir s res.Truncated = true } return res, nil - } else if errors.Is(err, ErrOutputCapped) { + case errors.Is(err, ErrOutputCapped): // Command itself didn't fail — only the writer's contract violation // surfaced. Treat as a successful run with capped buffers. - err = nil - } else { + default: return nil, fmt.Errorf("failed to execute command: %w", err) } } @@ -891,7 +892,7 @@ func (e *Environment) SyncSkill(ctx context.Context, args *protocol.SyncSkillArg // Probe phase: cache hit if .checksum matches AND SKILL.md exists. cachedSum, _ := os.ReadFile(filepath.Join(skillDir, ".checksum")) if string(cachedSum) == args.Checksum { - if _, err := os.Stat(filepath.Join(skillDir, "SKILL.md")); err == nil { + if _, statErr := os.Stat(filepath.Join(skillDir, "SKILL.md")); statErr == nil { return &protocol.SyncSkillResult{Success: true, Path: skillDir, Cached: true}, nil } } @@ -923,12 +924,12 @@ func (e *Environment) unzipSkill(data []byte, dest string) error { } // Remove existing directory if exists - if err := os.RemoveAll(dest); err != nil { - return fmt.Errorf("failed to remove existing directory: %w", err) + if rmErr := os.RemoveAll(dest); rmErr != nil { + return fmt.Errorf("failed to remove existing directory: %w", rmErr) } - if err := os.MkdirAll(dest, 0o755); err != nil { - return fmt.Errorf("failed to create destination directory: %w", err) + if mkErr := os.MkdirAll(dest, 0o755); mkErr != nil { + return fmt.Errorf("failed to create destination directory: %w", mkErr) } // Resolve dest to absolute and append a trailing separator BEFORE the diff --git a/environment/netsafe.go b/environment/netsafe.go index e7babd2..74c55b0 100644 --- a/environment/netsafe.go +++ b/environment/netsafe.go @@ -143,7 +143,11 @@ func safeDialContext(dialTimeout time.Duration) func(ctx context.Context, networ // safeHTTPTransport returns an *http.Transport wired with safeDialContext // and sensible pool sizes. func safeHTTPTransport(dialTimeout time.Duration) *http.Transport { - t := http.DefaultTransport.(*http.Transport).Clone() + base, ok := http.DefaultTransport.(*http.Transport) + if !ok { + base = &http.Transport{} + } + t := base.Clone() t.DialContext = safeDialContext(dialTimeout) return t } diff --git a/environment/security_test.go b/environment/security_test.go index c7820ee..527361b 100644 --- a/environment/security_test.go +++ b/environment/security_test.go @@ -101,9 +101,9 @@ func TestUnzipSkill_AcceptsBenign(t *testing.T) { env := newTestEnvironment(t) dest := filepath.Join(t.TempDir(), "skills", "ok") zipData := buildTestZip(t, map[string]string{ - "SKILL.md": "# hi", - "resources/a.txt": "alpha", - "scripts/b.sh": "#!/bin/sh\n", + "SKILL.md": "# hi", + "resources/a.txt": "alpha", + "scripts/b.sh": "#!/bin/sh\n", }) require.NoError(t, env.unzipSkill(zipData, dest)) for _, rel := range []string{"SKILL.md", "resources/a.txt", "scripts/b.sh"} { @@ -123,8 +123,8 @@ func TestWebFetch_RefusesSSRFTargets(t *testing.T) { env := newTestEnvironment(t) cases := []string{ "http://169.254.169.254/latest/meta-data/", // AWS / Aliyun / GCP IMDS - "http://127.0.0.1/", // loopback - "http://10.0.0.1/", // RFC1918 + "http://127.0.0.1/", // loopback + "http://10.0.0.1/", // RFC1918 } for _, raw := range cases { t.Run(raw, func(t *testing.T) { diff --git a/permission/permission.go b/permission/permission.go index 6f76ae9..773279f 100644 --- a/permission/permission.go +++ b/permission/permission.go @@ -1,6 +1,6 @@ // Package permission implements glob-based command permission checking. // -// Matching semantics +// # Matching semantics // // Rules are evaluated against three normalized projections of every shell // command found anywhere in the input — including pipelines, command @@ -22,7 +22,7 @@ // shell printer's canonical formatter, so "kubectl get pods" matches // the rule "kubectl get pods" regardless of spacing. // -// Precedence +// # Precedence // // Rules are evaluated in specificity order, and the first match wins. // Specificity is "longer literal prefix before any '*' wins"; the catch-all @@ -279,7 +279,7 @@ func writeRedirectPrefix(op syntax.RedirOperator) string { return ">" case syntax.AppOut: return ">>" - case syntax.ClbOut: + case syntax.RdrClob: return ">|" case syntax.RdrAll: return "&>" diff --git a/permission/permission_test.go b/permission/permission_test.go index 7688057..34b17ac 100644 --- a/permission/permission_test.go +++ b/permission/permission_test.go @@ -257,7 +257,7 @@ func TestChecker_ASTBypassHardening(t *testing.T) { }, // Redirect targets must be gated. { - name: "write redirect to /etc blocked under read-only rules", + name: "write redirect to /etc blocked under read-only rules", rules: SafeReadOnlyRules(), // "echo *" was removed, so this is doubly blocked, but the // redirect-target check is what makes this safe even if echo @@ -325,8 +325,8 @@ func TestChecker_ASTBypassHardening(t *testing.T) { { name: "extra spaces still match", rules: map[string]string{ - "*": "deny", - "kubectl get *": "allow", + "*": "deny", + "kubectl get *": "allow", }, command: "kubectl get pods", wantErr: false, From d5b5a5d69e3a10ece0de1ef0de568b794169d3fd Mon Sep 17 00:00:00 2001 From: ysyneu Date: Sat, 2 May 2026 20:59:45 +0800 Subject: [PATCH 10/10] fix(ci): drop invalid G706 gosec exclude + handle unix-abs zip-slip on windows - .golangci.yml: pinned CI golangci-lint v2.4 rejects G706 under gosec.excludes (rule unknown to its gosec). Move suppression to the version-safe linters.exclusions.rules text match so both v2.4 and newer versions stay quiet. Same regression previously fixed in a60df85. - environment/unzipSkill: filepath.IsAbs("/etc/passwd") returns false on Windows (no drive letter), so the absolute-path guard let the entry slip through and TestUnzipSkill_RejectsZipSlip/absolute-path-unix failed on windows-latest. Reject leading "/" or "\\" on the raw name before Clean normalizes separators. Co-Authored-By: Claude Opus 4.7 (1M context) --- .golangci.yml | 15 ++++++++++----- environment/environment.go | 7 +++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 122e243..22459b2 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -47,15 +47,20 @@ linters: # Exclude G301 (directory permissions) - workspace needs readable directories # Exclude G304 (file inclusion) - paths are validated via safePath() # Exclude G306 (file permissions) - workspace files need to be readable - # Exclude G706 (log taint via taint analysis) - slog's structured - # key-value logging is not a format-string injection vector; the rule - # fires on every slog call that includes a user-controlled value, which - # is the whole point of structured logging. excludes: - G301 - G304 - G306 - - G706 + exclusions: + rules: + # G706 (log injection via taint) is added in newer gosec versions and + # not in our pinned CI golangci-lint v2.4 — listing it under + # gosec.excludes fails `config verify` there. Match by text instead so + # newer versions also stay quiet: slog's structured key/value logging + # is not a format-string injection vector. + - linters: + - gosec + text: "G706" formatters: enable: diff --git a/environment/environment.go b/environment/environment.go index 9d1420f..2291f26 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -950,6 +950,13 @@ func (e *Environment) unzipSkill(data []byte, dest string) error { if strings.Contains(f.Name, "..") { return fmt.Errorf("invalid file path in zip: %s", f.Name) } + // Reject leading "/" or "\" on the raw name. filepath.IsAbs is + // platform-dependent and misses unix-style "/etc/passwd" on Windows + // (no drive letter), so check the raw name before Clean rewrites + // separators. + if strings.HasPrefix(f.Name, "/") || strings.HasPrefix(f.Name, `\`) { + return fmt.Errorf("invalid file path in zip: %s", f.Name) + } cleanName := filepath.Clean(f.Name) if strings.HasPrefix(cleanName, "..") || filepath.IsAbs(cleanName) { return fmt.Errorf("invalid file path in zip: %s", f.Name)