Skip to content

Commit bcc1682

Browse files
craig[bot]healthy-pod
andcommitted
Merge #87071
87071: dev,roachprod-stress: support running roachprod-stress using dev r=[rickystewart,jlinder] a=healthy-pod This patch adds a new command to `dev` to run roachprod stress using `dev roachprod-stress`. Release justification: Non-production code changes Release note: None Closes #75902 Co-authored-by: healthy-pod <[email protected]>
2 parents 2372698 + ecc37d8 commit bcc1682

File tree

5 files changed

+236
-68
lines changed

5 files changed

+236
-68
lines changed

dev

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ fi
88
set -euo pipefail
99

1010
# Bump this counter to force rebuilding `dev` on all machines.
11-
DEV_VERSION=55
11+
DEV_VERSION=56
1212

1313
THIS_DIR=$(cd "$(dirname "$0")" && pwd)
1414
BINARY_DIR=$THIS_DIR/bin/dev-versions

pkg/cmd/dev/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ go_library(
1818
"lint.go",
1919
"main.go",
2020
"merge_test_xmls.go",
21+
"roachprod_stress.go",
2122
"test.go",
2223
"testlogic.go",
2324
"ui.go",

pkg/cmd/dev/dev.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ Typical usage:
131131
makeLintCmd(ret.lint),
132132
makeTestCmd(ret.test),
133133
makeUICmd(&ret),
134+
makeRoachprodStressCmd(ret.roachprodStress),
134135
)
135136

136137
// Add all the shared flags.

pkg/cmd/dev/roachprod_stress.go

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// Copyright 2022 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the Business Source License
4+
// included in the file licenses/BSL.txt.
5+
//
6+
// As of the Change Date specified in that file, in accordance with
7+
// the Business Source License, use of this software will be governed
8+
// by the Apache License, Version 2.0, included in the file
9+
// licenses/APL.txt.
10+
11+
package main
12+
13+
import (
14+
"fmt"
15+
"path/filepath"
16+
"strings"
17+
18+
"github.com/alessio/shellescape"
19+
"github.com/spf13/cobra"
20+
)
21+
22+
const (
23+
clusterFlag = "cluster"
24+
)
25+
26+
func makeRoachprodStressCmd(runE func(cmd *cobra.Command, args []string) error) *cobra.Command {
27+
roachprodStressCmd := &cobra.Command{
28+
Use: "roachprod-stress <pkg>",
29+
Short: "stress the given tests on the given roachprod cluster",
30+
Long: "stress the given tests on the given roachprod cluster.",
31+
Example: `dev roachprod-stress ./pkg/sql/importer --cluster my_cluster --stress-args '-arg1 -arg2' -- -test.run="TestMultiNodeExportStmt"`,
32+
Args: cobra.MinimumNArgs(1),
33+
RunE: runE,
34+
}
35+
roachprodStressCmd.Flags().String(stressArgsFlag, "", "additional arguments to pass to stress")
36+
roachprodStressCmd.Flags().String(volumeFlag, "bzlhome", "the Docker volume to use as the container home directory (only used for cross builds)")
37+
roachprodStressCmd.Flags().String(clusterFlag, "", "the name of the cluster (must be set)")
38+
roachprodStressCmd.Flags().Bool(raceFlag, false, "run tests using race builds")
39+
return roachprodStressCmd
40+
}
41+
42+
func (d *dev) roachprodStress(cmd *cobra.Command, commandLine []string) error {
43+
ctx := cmd.Context()
44+
var (
45+
cluster = mustGetFlagString(cmd, clusterFlag)
46+
volume = mustGetFlagString(cmd, volumeFlag)
47+
race = mustGetFlagBool(cmd, raceFlag)
48+
stressCmdArgs = mustGetFlagString(cmd, stressArgsFlag)
49+
)
50+
if cluster == "" {
51+
return fmt.Errorf("must provide --cluster (you can create one via: `roachprod create $USER-stress -n 20 --gce-machine-type=n1-standard-8 --local-ssd=false`)")
52+
}
53+
pkgs, testArgs := splitArgsAtDash(cmd, commandLine)
54+
if len(pkgs) != 1 {
55+
return fmt.Errorf("must provide exactly one test target like ./pkg/cmd/dev")
56+
}
57+
// Find the target we need to build.
58+
pkg := pkgs[0]
59+
pkg = strings.TrimPrefix(pkg, "//")
60+
pkg = strings.TrimPrefix(pkg, "./")
61+
pkg = strings.TrimRight(pkg, "/")
62+
if !strings.HasPrefix(pkg, "pkg/") {
63+
return fmt.Errorf("malformed package %q, expecting pkg/{...}", pkg)
64+
}
65+
66+
var testTarget string
67+
if strings.Contains(pkg, ":") {
68+
testTarget = pkg
69+
} else {
70+
queryArgs := []string{"query", fmt.Sprintf("kind(go_test, //%s:all)", pkg), "--output=label_kind"}
71+
labelKind, queryErr := d.exec.CommandContextSilent(ctx, "bazel", queryArgs...)
72+
if queryErr != nil {
73+
return fmt.Errorf("could not run `bazel %s` (%w)", shellescape.QuoteCommand(queryArgs), queryErr)
74+
}
75+
for _, line := range strings.Split(strings.TrimSpace(string(labelKind)), "\n") {
76+
fields := strings.Fields(line)
77+
if testTarget != "" {
78+
return fmt.Errorf("expected a single test target; got both %s and %s. Please specify in your command, like `dev roachprod-stress %s", testTarget, fields[len(fields)-1], testTarget)
79+
}
80+
testTarget = fields[len(fields)-1]
81+
if fields[0] != "go_test" {
82+
return fmt.Errorf("target %s is of target type %s; expected go_test", testTarget, fields[0])
83+
}
84+
}
85+
}
86+
87+
// List of targets we need to cross-build.
88+
crossTargets := []string{testTarget, stressTarget}
89+
// Check whether this target depends on libgeos.
90+
queryArgs := []string{"query", fmt.Sprintf("somepath(%s, //c-deps:libgeos)", testTarget)}
91+
queryOutput, err := d.exec.CommandContextSilent(ctx, "bazel", queryArgs...)
92+
if err != nil {
93+
return fmt.Errorf("could not run `bazel %s` (%w)", shellescape.QuoteCommand(queryArgs), err)
94+
}
95+
if strings.TrimSpace(string(queryOutput)) != "" {
96+
// If the test depends on geos we additionally want to cross-build it.
97+
crossTargets = append(crossTargets, "//c-deps:libgeos")
98+
}
99+
100+
crossArgs, targets, err := d.getBasicBuildArgs(ctx, crossTargets)
101+
if err != nil {
102+
return err
103+
}
104+
if race {
105+
crossArgs = append(crossArgs, "--config=race")
106+
}
107+
err = d.crossBuild(ctx, crossArgs, targets, "crosslinux", volume)
108+
if err != nil {
109+
return err
110+
}
111+
112+
testTargetBasename := strings.Split(targets[0].fullName, ":")[1]
113+
// Build roachprod-stress and roachprod.
114+
args, buildTargets, err := d.getBasicBuildArgs(ctx, []string{"//pkg/cmd/roachprod-stress"})
115+
if err != nil {
116+
return err
117+
}
118+
if _, err := d.exec.CommandContextSilent(ctx, "bazel", args...); err != nil {
119+
return err
120+
}
121+
if err := d.stageArtifacts(ctx, buildTargets); err != nil {
122+
return err
123+
}
124+
workspace, err := d.getWorkspace(ctx)
125+
if err != nil {
126+
return err
127+
}
128+
// Run roachprod-stress.
129+
roachprodStressArgs := []string{cluster, fmt.Sprintf("./%s", pkg), "-testbin", filepath.Join(workspace, "artifacts", testTargetBasename), "-stressbin", filepath.Join(workspace, "artifacts", "stress"), "-libdir", filepath.Join(workspace, "artifacts", "libgeos", "lib")}
130+
roachprodStressArgs = append(roachprodStressArgs, strings.Fields(stressCmdArgs)...)
131+
roachprodStressArgs = append(roachprodStressArgs, "--")
132+
roachprodStressArgs = append(roachprodStressArgs, testArgs...)
133+
return d.exec.CommandContextInheritingStdStreams(ctx, filepath.Join(workspace, "bin", "roachprod-stress"), roachprodStressArgs...)
134+
}

pkg/cmd/roachprod-stress/main.go

Lines changed: 99 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,20 @@ import (
3737
)
3838

3939
var (
40-
l *logger.Logger
41-
flags = flag.NewFlagSet(os.Args[0], flag.ContinueOnError)
42-
flagP = flags.Int("p", runtime.GOMAXPROCS(0), "run `N` processes in parallel")
43-
flagTimeout = flags.Duration("timeout", 0, "timeout each process after `duration`")
44-
_ = flags.Bool("kill", true, "kill timed out processes if true, otherwise just print pid (to attach with gdb)")
45-
flagFailure = flags.String("failure", "", "fail only if output matches `regexp`")
46-
flagIgnore = flags.String("ignore", "", "ignore failure if output matches `regexp`")
47-
flagMaxTime = flags.Duration("maxtime", 0, "maximum time to run")
48-
flagMaxRuns = flags.Int("maxruns", 0, "maximum number of runs")
49-
_ = flags.Int("maxfails", 1, "maximum number of failures")
50-
flagStderr = flags.Bool("stderr", true, "output failures to STDERR instead of to a temp file")
40+
l *logger.Logger
41+
flags = flag.NewFlagSet(os.Args[0], flag.ContinueOnError)
42+
flagP = flags.Int("p", runtime.GOMAXPROCS(0), "run `N` processes in parallel")
43+
flagTimeout = flags.Duration("timeout", 0, "timeout each process after `duration`")
44+
flagFailure = flags.String("failure", "", "fail only if output matches `regexp`")
45+
flagIgnore = flags.String("ignore", "", "ignore failure if output matches `regexp`")
46+
flagMaxTime = flags.Duration("maxtime", 0, "maximum time to run")
47+
flagMaxRuns = flags.Int("maxruns", 0, "maximum number of runs")
48+
flagStderr = flags.Bool("stderr", true, "output failures to STDERR instead of to a temp file")
49+
flagTestBin = flags.String("testbin", "", "location of the test binary")
50+
flagStressBin = flags.String("stressbin", "bin.docker_amd64/stress", "location of the stress binary")
51+
flagLibDir = flags.String("libdir", "lib.docker_amd64", "location of the directory containing the built geos directories")
52+
_ = flags.Bool("kill", true, "kill timed out processes if true, otherwise just print pid (to attach with gdb)")
53+
_ = flags.Int("maxfails", 1, "maximum number of failures")
5154
)
5255

5356
func init() {
@@ -65,6 +68,70 @@ func init() {
6568
}
6669
}
6770

71+
func verifySourcesAndArtifactsExist(pkg, localTestBin string) error {
72+
// Verify that the given package exists.
73+
fi, err := os.Stat(pkg)
74+
if err != nil {
75+
return errors.Wrapf(err, "the pkg flag %q is not a directory relative to the current working directory", pkg)
76+
}
77+
if !fi.Mode().IsDir() {
78+
return fmt.Errorf("the pkg flag %q is not a directory relative to the current working directory", pkg)
79+
}
80+
81+
// Verify that the test binary exists.
82+
fi, err = os.Stat(localTestBin)
83+
if err != nil {
84+
return errors.Wrapf(err, "test binary %q does not exist", localTestBin)
85+
}
86+
if !fi.Mode().IsRegular() {
87+
return fmt.Errorf("test binary %q is not a file", localTestBin)
88+
}
89+
90+
return nil
91+
}
92+
93+
func verifyFlags() error {
94+
if *flagP <= 0 || *flagTimeout < 0 || len(flags.Args()) == 0 {
95+
var b bytes.Buffer
96+
flags.SetOutput(&b)
97+
flags.Usage()
98+
return errors.Newf("%s", b.String())
99+
}
100+
if *flagFailure != "" {
101+
if _, err := regexp.Compile(*flagFailure); err != nil {
102+
return errors.Wrap(err, "bad failure regexp")
103+
}
104+
}
105+
if *flagIgnore != "" {
106+
if _, err := regexp.Compile(*flagIgnore); err != nil {
107+
return errors.Wrap(err, "bad ignore regexp")
108+
}
109+
}
110+
return nil
111+
}
112+
113+
func getStressSpecificArgs() (ret []string) {
114+
flags.Visit(func(f *flag.Flag) {
115+
if f.Name != "testbin" && f.Name != "stressbin" && f.Name != "libdir" {
116+
ret = append(ret, fmt.Sprintf("-%s=%s", f.Name, f.Value))
117+
}
118+
})
119+
return ret
120+
}
121+
122+
func getTestArgs() (ret []string) {
123+
if len(os.Args) > 3 {
124+
flagsAndArgs := os.Args[3:]
125+
for i, arg := range flagsAndArgs {
126+
if arg == "--" {
127+
ret = flagsAndArgs[i+1:]
128+
break
129+
}
130+
}
131+
}
132+
return ret
133+
}
134+
68135
func roundToSeconds(d time.Duration) time.Duration {
69136
return time.Duration(d.Seconds()+0.5) * time.Second
70137
}
@@ -79,84 +146,48 @@ func run() error {
79146
flags.PrintDefaults()
80147
}
81148

82-
if len(os.Args) < 2 {
149+
if len(os.Args) < 3 {
83150
var b bytes.Buffer
84151
flags.SetOutput(&b)
85152
flags.Usage()
86153
return errors.Newf("%s", b.String())
87154
}
88155

89-
cluster := os.Args[1]
90-
if err := flags.Parse(os.Args[2:]); err != nil {
156+
if err := flags.Parse(os.Args[3:]); err != nil {
91157
return err
92158
}
93159

160+
cluster := os.Args[1]
94161
if !*flagStderr {
95162
return errors.New("-stderr=false is unsupported, please tee to a file (or implement the feature)")
96163
}
97164

98165
pkg := os.Args[2]
99166
localTestBin := filepath.Base(pkg) + ".test"
100-
{
101-
fi, err := os.Stat(pkg)
102-
if err != nil {
103-
return errors.Wrapf(err, "the pkg flag %q is not a directory relative to the current working directory", pkg)
104-
}
105-
if !fi.Mode().IsDir() {
106-
return fmt.Errorf("the pkg flag %q is not a directory relative to the current working directory", pkg)
107-
}
108-
109-
// Verify that the test binary exists.
110-
fi, err = os.Stat(localTestBin)
111-
if err != nil {
112-
return errors.Wrapf(err, "test binary %q does not exist", localTestBin)
113-
}
114-
if !fi.Mode().IsRegular() {
115-
return fmt.Errorf("test binary %q is not a file", localTestBin)
116-
}
117-
}
118-
flagsAndArgs := os.Args[3:]
119-
stressArgs := flagsAndArgs
120-
var testArgs []string
121-
for i, arg := range flagsAndArgs {
122-
if arg == "--" {
123-
stressArgs = flagsAndArgs[:i]
124-
testArgs = flagsAndArgs[i+1:]
125-
break
126-
}
167+
if *flagTestBin != "" {
168+
localTestBin = *flagTestBin
127169
}
128170

129-
if *flagP <= 0 || *flagTimeout < 0 || len(flags.Args()) == 0 {
130-
var b bytes.Buffer
131-
flags.SetOutput(&b)
132-
flags.Usage()
133-
return errors.Newf("%s", b.String())
134-
}
135-
if *flagFailure != "" {
136-
if _, err := regexp.Compile(*flagFailure); err != nil {
137-
return errors.Wrap(err, "bad failure regexp")
138-
}
171+
if err := verifySourcesAndArtifactsExist(pkg, localTestBin); err != nil {
172+
return err
139173
}
140-
if *flagIgnore != "" {
141-
if _, err := regexp.Compile(*flagIgnore); err != nil {
142-
return errors.Wrap(err, "bad ignore regexp")
143-
}
174+
175+
if err := verifyFlags(); err != nil {
176+
return err
144177
}
145178

146179
statuses, err := roachprod.Status(context.Background(), l, cluster, "")
147180
if err != nil {
148181
return err
149182
}
150-
nodes := len(statuses)
183+
numNodes := len(statuses)
151184

152-
const stressBin = "bin.docker_amd64/stress"
153-
if err := roachprod.Put(context.Background(), l, cluster, stressBin, "stress", true); err != nil {
185+
if err := roachprod.Put(context.Background(), l, cluster, *flagStressBin, "stress", true); err != nil {
154186
return err
155187
}
156188

157-
const localLibDir = "lib.docker_amd64/"
158-
if fi, err := os.Stat(localLibDir); err == nil && fi.IsDir() {
159-
if err := roachprod.Put(context.Background(), l, cluster, localLibDir, "lib", true); err != nil {
189+
if fi, err := os.Stat(*flagLibDir); err == nil && fi.IsDir() {
190+
if err := roachprod.Put(context.Background(), l, cluster, *flagLibDir, "lib", true); err != nil {
160191
return err
161192
}
162193
}
@@ -174,7 +205,7 @@ func run() error {
174205
return errors.Wrap(err, "failed to copy testdata")
175206
}
176207
}
177-
testBin := filepath.Join(pkg, localTestBin)
208+
testBin := filepath.Join(pkg, filepath.Base(localTestBin))
178209
if err := roachprod.Put(context.Background(), l, cluster, localTestBin, testBin, true); err != nil {
179210
return errors.Wrap(err, "failed to copy testdata")
180211
}
@@ -220,8 +251,8 @@ func run() error {
220251

221252
statusRE := regexp.MustCompile(`(\d+) runs (so far|completed), (\d+) failures, over .*`)
222253

223-
wg.Add(nodes)
224-
for i := 1; i <= nodes; i++ {
254+
wg.Add(numNodes)
255+
for i := 1; i <= numNodes; i++ {
225256
go func(i int) {
226257
stdoutR, stdoutW := io.Pipe()
227258
defer func() {
@@ -266,11 +297,12 @@ func run() error {
266297
}()
267298

268299
cmdArray := []string{
269-
fmt.Sprintf("cd %s; GOTRACEBACK=all ~/stress %s ./%s %s",
300+
fmt.Sprintf("cd %s; GOTRACEBACK=all ~/%s %s ./%s %s",
270301
pkg,
271-
strings.Join(stressArgs, " "),
302+
filepath.Base(*flagStressBin),
303+
strings.Join(getStressSpecificArgs(), " "),
272304
filepath.Base(testBin),
273-
strings.Join(testArgs, " ")),
305+
strings.Join(getTestArgs(), " ")),
274306
}
275307
if err := roachprodRun(fmt.Sprintf("%s:%d", cluster, i), cmdArray); err != nil {
276308
error(err.Error())

0 commit comments

Comments
 (0)