Skip to content

Commit 6f7b47f

Browse files
authored
Agent: Deduplicate file uploads using sha1 digest (#1264)
1 parent 5e3d09f commit 6f7b47f

File tree

5 files changed

+175
-20
lines changed

5 files changed

+175
-20
lines changed

agent/Cargo.lock

Lines changed: 63 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

agent/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ clap = "2.33"
1212
structopt = "0.3"
1313
rand = "0.8"
1414
url = "2.2"
15+
sha-1 = "0.9"
1516

1617
[dev-dependencies]
1718
assert_cmd = "2.0"

agent/src/artifact/upload.rs

Lines changed: 89 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
use anyhow::{anyhow, Context, Result};
22
use serde_json::Value;
3+
use sha1::{Digest, Sha1};
34
use std::{
5+
collections::HashSet,
46
env,
57
fs::{self, File},
6-
io::{BufRead, BufReader, Seek, SeekFrom},
8+
io::{BufRead, BufReader, Read, Seek, SeekFrom},
79
path::{Path, PathBuf, MAIN_SEPARATOR},
810
process,
911
thread::sleep,
@@ -60,6 +62,7 @@ fn watch_bep_json_file(
6062
let max_retries = 5;
6163
let mut retries = max_retries;
6264
let mut last_offset = 0;
65+
let mut uploader = Uploader::new();
6366

6467
'parse_loop: loop {
6568
match parser.parse() {
@@ -76,9 +79,13 @@ fn watch_bep_json_file(
7679
.filter(|test_result| status.contains(&test_result.overall_status.as_str()))
7780
{
7881
for failed_test in test_summary.failed.iter() {
79-
if let Err(error) =
80-
upload_test_log(dry, local_exec_root, &failed_test.uri, mode)
81-
{
82+
if let Err(error) = upload_test_log(
83+
&mut uploader,
84+
dry,
85+
local_exec_root,
86+
&failed_test.uri,
87+
mode,
88+
) {
8289
error!("{:?}", error);
8390
}
8491
}
@@ -107,16 +114,21 @@ fn watch_bep_json_file(
107114
let should_upload_bep_json_file =
108115
debug || (monitor_flaky_tests && parser.has_overall_test_status("FLAKY"));
109116
if should_upload_bep_json_file {
110-
if let Err(error) = upload_bep_json_file(dry, build_event_json_file, mode) {
117+
if let Err(error) = upload_bep_json_file(&mut uploader, dry, build_event_json_file, mode) {
111118
error!("{:?}", error);
112119
}
113120
}
114121

115122
Ok(())
116123
}
117124

118-
fn upload_bep_json_file(dry: bool, build_event_json_file: &Path, mode: Mode) -> Result<()> {
119-
upload_artifact(dry, None, build_event_json_file, mode)
125+
fn upload_bep_json_file(
126+
uploader: &mut Uploader,
127+
dry: bool,
128+
build_event_json_file: &Path,
129+
mode: Mode,
130+
) -> Result<()> {
131+
uploader.upload_artifact(dry, None, build_event_json_file, mode)
120132
}
121133

122134
fn execute_command(dry: bool, cwd: Option<&Path>, program: &str, args: &[&str]) -> Result<()> {
@@ -148,19 +160,75 @@ fn execute_command(dry: bool, cwd: Option<&Path>, program: &str, args: &[&str])
148160
Ok(())
149161
}
150162

151-
fn upload_artifact_buildkite(dry: bool, cwd: Option<&Path>, artifact: &Path) -> Result<()> {
152-
let artifact = artifact.display().to_string();
153-
execute_command(
154-
dry,
155-
cwd,
156-
"buildkite-agent",
157-
&["artifact", "upload", artifact.as_str()],
158-
)
163+
type Sha1Digest = [u8; 20];
164+
165+
fn read_entire_file(path: &Path) -> Result<Vec<u8>> {
166+
let mut file = File::open(path)?;
167+
let mut buf = Vec::new();
168+
file.read_to_end(&mut buf)?;
169+
Ok(buf)
170+
}
171+
172+
fn sha1_digest(path: &Path) -> Sha1Digest {
173+
let buf = match read_entire_file(path) {
174+
Ok(buf) => buf,
175+
_ => path.display().to_string().into_bytes(),
176+
};
177+
178+
let mut hasher = Sha1::new();
179+
hasher.update(buf);
180+
let hash = hasher.finalize();
181+
hash.into()
182+
}
183+
184+
struct Uploader {
185+
uploaded_digests: HashSet<Sha1Digest>,
159186
}
160187

161-
fn upload_artifact(dry: bool, cwd: Option<&Path>, artifact: &Path, mode: Mode) -> Result<()> {
162-
match mode {
163-
Mode::Buildkite => upload_artifact_buildkite(dry, cwd, artifact),
188+
impl Uploader {
189+
pub fn new() -> Self {
190+
Self {
191+
uploaded_digests: HashSet::new(),
192+
}
193+
}
194+
195+
pub fn upload_artifact(
196+
&mut self,
197+
dry: bool,
198+
cwd: Option<&Path>,
199+
artifact: &Path,
200+
mode: Mode,
201+
) -> Result<()> {
202+
{
203+
let file = match cwd {
204+
Some(cwd) => cwd.join(artifact),
205+
None => PathBuf::from(artifact),
206+
};
207+
let digest = sha1_digest(&file);
208+
if self.uploaded_digests.contains(&digest) {
209+
return Ok(());
210+
}
211+
self.uploaded_digests.insert(digest);
212+
}
213+
214+
match mode {
215+
Mode::Buildkite => self.upload_artifact_buildkite(dry, cwd, artifact),
216+
}
217+
}
218+
219+
fn upload_artifact_buildkite(
220+
&mut self,
221+
dry: bool,
222+
cwd: Option<&Path>,
223+
artifact: &Path,
224+
) -> Result<()> {
225+
let artifact = artifact.display().to_string();
226+
execute_command(
227+
dry,
228+
cwd,
229+
"buildkite-agent",
230+
&["artifact", "upload", artifact.as_str()],
231+
)
164232
}
165233
}
166234

@@ -213,6 +281,7 @@ fn uri_to_file_path(uri: &str) -> Result<PathBuf> {
213281
}
214282

215283
fn upload_test_log(
284+
uploader: &mut Uploader,
216285
dry: bool,
217286
local_exec_root: Option<&Path>,
218287
test_log: &str,
@@ -221,7 +290,7 @@ fn upload_test_log(
221290
let path = uri_to_file_path(test_log)?;
222291

223292
if let Some((first, second)) = split_path_inclusive(&path, "testlogs") {
224-
return upload_artifact(dry, Some(&first), &second, mode);
293+
return uploader.upload_artifact(dry, Some(&first), &second, mode);
225294
}
226295

227296
let artifact = if let Some(local_exec_root) = local_exec_root {
@@ -234,7 +303,7 @@ fn upload_test_log(
234303
&path
235304
};
236305

237-
upload_artifact(dry, local_exec_root, &artifact, mode)
306+
uploader.upload_artifact(dry, local_exec_root, &artifact, mode)
238307
}
239308

240309
#[derive(Debug)]

agent/tests/artifact/upload.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,21 @@ fn truncate_build_event_json_file_recover_from_middle() -> Result<()> {
125125
Ok(())
126126
})
127127
}
128+
129+
#[cfg(not(target_os = "windows"))]
130+
#[test]
131+
fn test_logs_deduplicated() -> Result<()> {
132+
let mut cmd = Command::cargo_bin("bazelci-agent")?;
133+
cmd.args([
134+
"artifact",
135+
"upload",
136+
"--dry",
137+
"--mode=buildkite",
138+
"--build_event_json_file=tests/data/test_bep_duplicated.json",
139+
]);
140+
cmd.assert()
141+
.success()
142+
.stdout(predicates::str::contains("buildkite-agent artifact upload src/test/shell/bazel/starlark_repository_test/shard_4_of_6/test_attempts/attempt_1.log").count(1));
143+
144+
Ok(())
145+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{"id":{"workspace":{}},"workspaceInfo":{"localExecRoot":"/private/var/tmp/_bazel_buildkite/78a0792bf9bb0133b1a4a7d083181fcb/execroot/io_bazel"}}
2+
{"id":{"testSummary":{"label":"//src/test/shell/bazel:starlark_repository_test","configuration":{"id":"7479eaa1eeb472e5c3fdd9f0b604289ffbe45a36edb8a7f474df0c95501b4d00"}}},"testSummary":{"totalRunCount":7,"failed":[{"uri":"file:///private/var/tmp/_bazel_buildkite/78a0792bf9bb0133b1a4a7d083181fcb/execroot/io_bazel/bazel-out/darwin-fastbuild/testlogs/src/test/shell/bazel/starlark_repository_test/shard_4_of_6/test_attempts/attempt_1.log"}],"overallStatus":"FLAKY","firstStartTimeMillis":"1630444947193","lastStopTimeMillis":"1630445154997","totalRunDurationMillis":"338280","runCount":1,"shardCount":6}}
3+
{"id":{"testSummary":{"label":"//src/test/shell/bazel:starlark_repository_test","configuration":{"id":"7479eaa1eeb472e5c3fdd9f0b604289ffbe45a36edb8a7f474df0c95501b4d00"}}},"testSummary":{"totalRunCount":7,"failed":[{"uri":"file:///private/var/tmp/_bazel_buildkite/78a0792bf9bb0133b1a4a7d083181fcb/execroot/io_bazel/bazel-out/darwin-fastbuild/testlogs/src/test/shell/bazel/starlark_repository_test/shard_4_of_6/test_attempts/attempt_1.log"}],"overallStatus":"FLAKY","firstStartTimeMillis":"1630444947193","lastStopTimeMillis":"1630445154997","totalRunDurationMillis":"338280","runCount":1,"shardCount":6}}
4+
{"id":{"progress":{}},"progress":{},"lastMessage":true}

0 commit comments

Comments
 (0)