Skip to content

Commit b9f7af9

Browse files
committed
wip
1 parent ef28f0c commit b9f7af9

File tree

2 files changed

+230
-0
lines changed

2 files changed

+230
-0
lines changed

.gitlab-ci.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,19 @@ WorkerExecutor:
889889
RUNNER: aws/fedora-41-x86_64
890890
IAM_INSTANCE_PROFILE: worker-executor
891891

892+
WorkerExecutorFailure:
893+
stage: test
894+
extends: .terraform
895+
rules:
896+
- !reference [.upstream_rules_all, rules]
897+
- !reference [.ga_rules_all, rules]
898+
script:
899+
- schutzbot/deploy.sh
900+
- /usr/libexec/tests/osbuild-composer/worker-executor-crash.sh
901+
variables:
902+
RUNNER: aws/fedora-41-x86_64
903+
IAM_INSTANCE_PROFILE: worker-executor
904+
892905
finish:
893906
stage: finish
894907
dependencies: []
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
#!/bin/bash
2+
3+
set -euo pipefail
4+
5+
source /usr/libexec/osbuild-composer-test/set-env-variables.sh
6+
source /usr/libexec/tests/osbuild-composer/shared_lib.sh
7+
8+
9+
# Container image used for cloud provider CLI tools
10+
CONTAINER_IMAGE_CLOUD_TOOLS="quay.io/osbuild/cloud-tools:latest"
11+
12+
# Provision the software under test.
13+
/usr/libexec/osbuild-composer-test/provision.sh none
14+
15+
TEMPDIR=$(mktemp -d)
16+
BLUEPRINT_FILE=${TEMPDIR}/blueprint.toml
17+
COMPOSE_START=${TEMPDIR}/compose-start.json
18+
COMPOSE_INFO=${TEMPDIR}/compose-info.json
19+
DESCR_INST=${TEMPDIR}/descr-inst.json
20+
AUTH_SG=${TEMPDIR}/auth-sgrule.json
21+
DESCR_SGRULE=${TEMPDIR}/descr-sgrule.json
22+
KEYPAIR=${TEMPDIR}/keypair.pem
23+
INSTANCE_ID=$(curl -Ls http://169.254.169.254/latest/meta-data/instance-id)
24+
WORKER_HOST=$(curl -Ls http://169.254.169.254/latest/meta-data/local-ipv4)
25+
26+
# Check available container runtime
27+
if type -p podman 2>/dev/null >&2; then
28+
CONTAINER_RUNTIME=podman
29+
elif type -p docker 2>/dev/null >&2; then
30+
CONTAINER_RUNTIME=docker
31+
else
32+
echo No container runtime found, install podman or docker.
33+
exit 2
34+
fi
35+
36+
if ! hash aws; then
37+
echo "Using 'awscli' from a container"
38+
sudo "${CONTAINER_RUNTIME}" pull ${CONTAINER_IMAGE_CLOUD_TOOLS}
39+
40+
AWS_CMD="sudo ${CONTAINER_RUNTIME} run --rm \
41+
-v ${TEMPDIR}:${TEMPDIR}:Z \
42+
${CONTAINER_IMAGE_CLOUD_TOOLS} aws --region $AWS_REGION --output json --color on"
43+
else
44+
echo "Using pre-installed 'aws' from the system"
45+
AWS_CMD="aws --region $AWS_REGION --output json --color on"
46+
fi
47+
$AWS_CMD --version
48+
49+
subprocessPIDs=()
50+
function cleanup() {
51+
# since this function can be called at any time, ensure that we don't expand unbound variables
52+
AWS_CMD="${AWS_CMD:-}"
53+
54+
if [ -n "$AWS_CMD" ] && [ -f "$KEYPAIR" ]; then
55+
$AWS_CMD ec2 delete-key-pair --key-name "key-for-$INSTANCE_ID-executor"
56+
fi
57+
58+
for p in "${subprocessPIDs[@]}"; do
59+
sudo pkill -P "$p" || true
60+
done
61+
}
62+
63+
trap cleanup EXIT
64+
65+
$AWS_CMD ec2 create-key-pair --key-name "key-for-$INSTANCE_ID-executor" --query 'KeyMaterial' --output text > "$KEYPAIR"
66+
chmod 400 "$KEYPAIR"
67+
$AWS_CMD ec2 describe-key-pairs --key-names "key-for-$INSTANCE_ID-executor"
68+
69+
sudo tee "/etc/osbuild-worker/osbuild-worker.toml" <<EOF
70+
[osbuild_executor]
71+
type = "aws.ec2"
72+
key_name = "key-for-$INSTANCE_ID-executor"
73+
EOF
74+
75+
sudo systemctl restart [email protected]
76+
77+
# Write a basic blueprint for our image.
78+
tee "$BLUEPRINT_FILE" > /dev/null << EOF
79+
name = "bash"
80+
description = "A base system"
81+
version = "0.0.1"
82+
83+
[customizations]
84+
[customizations.services]
85+
enabled = ["blergh"]
86+
EOF
87+
88+
sudo composer-cli blueprints push "$BLUEPRINT_FILE"
89+
90+
WORKER_UNIT=$(sudo systemctl list-units | grep -o -E "osbuild.*worker.*\.service")
91+
sudo journalctl -af -n 1 -u "${WORKER_UNIT}" &
92+
subprocessPIDs+=( $! )
93+
94+
sudo composer-cli --json compose start bash container | tee "$COMPOSE_START"
95+
COMPOSE_ID=$(get_build_info ".build_id" "$COMPOSE_START")
96+
97+
EXECUTOR_IP=0
98+
for _ in {1..60}; do
99+
$AWS_CMD ec2 describe-instances --filter "Name=tag:parent,Values=$INSTANCE_ID" > "$DESCR_INST"
100+
RESERVATIONS=$(jq -r '.Reservations | length' "$DESCR_INST")
101+
if [ "$RESERVATIONS" -gt 0 ]; then
102+
EXECUTOR_IP=$(jq -r .Reservations[0].Instances[0].PrivateIpAddress "$DESCR_INST")
103+
break
104+
fi
105+
106+
echo "Reservation not ready ret, waiting..."
107+
sleep 60
108+
done
109+
110+
if [ "$EXECUTOR_IP" = 0 ]; then
111+
redprint "Unable to find executor host"
112+
exit 1
113+
fi
114+
115+
RDY=0
116+
for _ in {0..60}; do
117+
if ssh-keyscan "$EXECUTOR_IP" > /dev/null 2>&1; then
118+
RDY=1
119+
break
120+
fi
121+
sleep 10
122+
done
123+
124+
if [ "$RDY" = 0 ]; then
125+
redprint "Unable to reach executor host $EXECUTOR_IP"
126+
exit 1
127+
fi
128+
129+
greenprint "Setting up executor"
130+
# the executor should be created with exactly one egress rule (allowing traffic to the worker host)
131+
SGID=$(jq -r .Reservations[0].Instances[0].SecurityGroups[0].GroupId "$DESCR_INST")
132+
$AWS_CMD ec2 describe-security-group-rules --filters "Name=group-id,Values=$SGID" > "$DESCR_SGRULE"
133+
134+
EGRESS_TARGET=$(jq -r '.SecurityGroupRules[] | select(.IsEgress).CidrIpv4' "$DESCR_SGRULE")
135+
if [ "$EGRESS_TARGET" != "$WORKER_HOST/32" ]; then
136+
echo executors "$EGRESS_TARGET" is not the expected "$WORKER_HOST/32"
137+
exit 1
138+
fi
139+
140+
# allow the executor to access the internet for the setup:
141+
$AWS_CMD ec2 authorize-security-group-egress --group-id "$SGID" --protocol tcp --cidr 0.0.0.0/0 --port 1-65535 > "$AUTH_SG"
142+
SGRULEID=$(jq -r .SecurityGroupRules[0].SecurityGroupRuleId "$AUTH_SG")
143+
144+
GIT_COMMIT="${GIT_COMMIT:-${CI_COMMIT_SHA}}"
145+
OSBUILD_GIT_COMMIT=$(cat Schutzfile | jq -r '.["'"${ID}-${VERSION_ID}"'"].dependencies.osbuild.commit')
146+
# shellcheck disable=SC2087
147+
ssh -oStrictHostKeyChecking=no -i "$KEYPAIR" "fedora@$EXECUTOR_IP" sudo tee "/etc/yum.repos.d/osbuild.repo" <<EOF
148+
[osbuild-composer]
149+
name=osbuild-composer
150+
baseurl=http://osbuild-composer-repos.s3-website.us-east-2.amazonaws.com/osbuild-composer/${ID}-${VERSION_ID}/${ARCH}/${GIT_COMMIT}
151+
enabled=1
152+
gpgcheck=0
153+
priority=10
154+
[osbuild]
155+
name=osbuild
156+
baseurl=http://osbuild-composer-repos.s3-website.us-east-2.amazonaws.com/osbuild/${ID}-${VERSION_ID}/${ARCH}/${OSBUILD_GIT_COMMIT}
157+
enabled=1
158+
gpgcheck=0
159+
priority=10
160+
EOF
161+
162+
ssh -oStrictHostKeyChecking=no -i "$KEYPAIR" "fedora@EXECUTOR_IP" sudo journalctl -f &
163+
subprocessPIDs+=( $! )
164+
165+
ssh -oStrictHostKeyChecking=no -i "$KEYPAIR" "fedora@$EXECUTOR_IP" sudo dnf install -y osbuild-composer osbuild
166+
167+
# revoke internet access again during the build
168+
$AWS_CMD ec2 revoke-security-group-egress --group-id "$SGID" --security-group-rule-ids "$SGRULEID"
169+
$AWS_CMD ec2 describe-security-group-rules --filters "Name=group-id,Values=$SGID" > "$DESCR_SGRULE"
170+
171+
SGRULES_LENGTH=$(jq -r '.SecurityGroupRules | length' "$DESCR_SGRULE")
172+
if [ "$SGRULES_LENGTH" != 2 ]; then
173+
echo "Expected exactly 2 security group rules (got $SGRULES_LENGTH)"
174+
exit 1
175+
fi
176+
177+
greenprint "🔥 opening worker-executor port on firewall"
178+
ssh -oStrictHostKeyChecking=no -i "$KEYPAIR" "fedora@$EXECUTOR_IP" sudo firewall-cmd --zone=public --add-port=8001/tcp --permanent || true
179+
ssh -oStrictHostKeyChecking=no -i "$KEYPAIR" "fedora@$EXECUTOR_IP" sudo firewall-cmd --reload || true
180+
181+
greenprint "🚀 Starting worker executor"
182+
ssh -oStrictHostKeyChecking=no -i "$KEYPAIR" "fedora@$EXECUTOR_IP" sudo /usr/libexec/osbuild-composer/osbuild-worker-executor -host 0.0.0.0 &
183+
subprocessPIDs+=( $! )
184+
185+
# wait for compose to complete
186+
greenprint "⏱ Waiting for compose to finish: ${COMPOSE_ID}"
187+
while true; do
188+
sudo composer-cli --json compose info "${COMPOSE_ID}" | tee "$COMPOSE_INFO" > /dev/null
189+
COMPOSE_STATUS=$(get_build_info ".queue_status" "$COMPOSE_INFO")
190+
# Is the compose finished?
191+
if [[ $COMPOSE_STATUS != RUNNING ]] && [[ $COMPOSE_STATUS != WAITING ]]; then
192+
break
193+
fi
194+
sleep 30
195+
done
196+
197+
198+
echo "COMPOSES"
199+
sudo curl --silent --show-error --unix-socket /run/cloudapi/api.socket http:///localhost/api/image-builder-composer/v2/composes/
200+
201+
STATUS=$(sudo curl --silent --show-error --unix-socket /run/cloudapi/api.socket http:///localhost/api/image-builder-composer/v2/composes/ | jq -r .items[0])
202+
COMPOSE_STATUS=$(echo "$STATUS" | jq -r '.image_status.status')
203+
COMPOSE_ERROR=$(echo "$STATUS" | jq -r '.image_status.error.reason')
204+
COMPOSE_ERROR_DETAILS=$(echo "$STATUS" | jq -r '.image_status.error.details')
205+
206+
if [ "$COMPOSE_STATUS" != "failure" ]; then
207+
echo "expected build failure, got $STATUS"
208+
exit 1
209+
fi
210+
211+
if [ "$COMPOSE_ERROR" != "obuild build failed" ]; then
212+
echo "expected build failure, got $STATUS"
213+
exit 1
214+
fi
215+
216+
echo "todo add check for:"
217+
echo "$COMPOSE_ERROR_DETAILS"

0 commit comments

Comments
 (0)