diff --git a/projects/goblin/directed_target/config.toml b/projects/goblin/directed_target/config.toml index 5c28eb733..7267786f8 100644 --- a/projects/goblin/directed_target/config.toml +++ b/projects/goblin/directed_target/config.toml @@ -26,6 +26,38 @@ line = 164 file = "/goblin/src/archive/mod.rs" line = 342 +[[target]] +file = "/goblin/src/elf/mod.rs" +line = 163 + +[[target]] +file = "/goblin/src/elf/mod.rs" +line = 269 + +[[target]] +file = "/goblin/src/elf/mod.rs" +line = 365 + +[[target]] +file = "/goblin/src/mach/mod.rs" +line = 157 + +[[target]] +file = "/goblin/src/mach/mod.rs" +line = 186 + +[[target]] +file = "/goblin/src/mach/mod.rs" +line = 245 + +[[target]] +file = "/goblin/src/pe/mod.rs" +line = 100 + +[[target]] +file = "/goblin/src/pe/mod.rs" +line = 171 + [[target]] file = "/goblin/src/pe/debug.rs" line = 172 diff --git a/projects/goblin/parse-afl++.toml b/projects/goblin/parse-afl++.toml index 149102c93..daa5f92c3 100644 --- a/projects/goblin/parse-afl++.toml +++ b/projects/goblin/parse-afl++.toml @@ -20,7 +20,7 @@ args = "-s 90 --wait-jobs -j2" [aflplusplus] target = "/goblin/fuzz-afl/target/release/afl_parse" -args = "-t 2000+ -i /corpus" +args = "-t 2000 -i /corpus" [cov] target = "/cov_parse @@" diff --git a/projects/goblin/parse-libafl.toml b/projects/goblin/parse-libafl.toml index ef1d5eac1..155e055bd 100644 --- a/projects/goblin/parse-libafl.toml +++ b/projects/goblin/parse-libafl.toml @@ -24,7 +24,7 @@ jobs = 2 [difuzz] path = "/directed_target/sydr/difuzz/libafl_difuzz" target = "/parse_libafl_target @@" -args = "-j4 --sync-limit 200 --sync-jobs 2 -l64 -i /corpus -e /ets_parse.toml" +args = "-j4 --sync-limit 200 --sync-jobs 2 --panic-analysis rust -l64 -i /corpus -e /ets_parse.toml" casr_bin = "/casr_parse" [cov] diff --git a/projects/goblin/parse_elf-libafl.toml b/projects/goblin/parse_elf-libafl.toml index c51837914..3f11cbd0c 100644 --- a/projects/goblin/parse_elf-libafl.toml +++ b/projects/goblin/parse_elf-libafl.toml @@ -24,7 +24,7 @@ jobs = 2 [difuzz] path = "/directed_target/sydr/difuzz/libafl_difuzz" target = "/parse_elf_libafl_target @@" -args = "-j4 --sync-limit 200 --sync-jobs 2 -l64 -i /corpus -e /ets_parse_elf.toml" +args = "-j4 --sync-limit 200 --sync-jobs 2 --panic-analysis rust -l64 -i /corpus -e /ets_parse_elf.toml" casr_bin = "/casr_parse_elf" [cov] diff --git a/projects/image-go/gif-libafl.toml b/projects/image-go/gif-libafl.toml index 729f3e4f8..fafaaf77a 100644 --- a/projects/image-go/gif-libafl.toml +++ b/projects/image-go/gif-libafl.toml @@ -22,7 +22,7 @@ jobs = 2 [difuzz] path = "/directed_target/sydr/difuzz/libafl_difuzz" target = "/difuzz_target_image_gif @@" -args = "-j4 --sync-limit 200 --sync-jobs 2 -l64 -i /go-fuzz-corpus/gif/corpus -e /ets_gif.toml" +args = "-j4 --panic-analysis go --sync-limit 200 --sync-jobs 2 -l64 -i /go-fuzz-corpus/gif/corpus -e /ets_gif.toml" casr_bin = "/sydr_image_gif" [cov] diff --git a/projects/image-go/jpeg-libafl.toml b/projects/image-go/jpeg-libafl.toml index aca4ec2ff..e823058f8 100644 --- a/projects/image-go/jpeg-libafl.toml +++ b/projects/image-go/jpeg-libafl.toml @@ -22,7 +22,7 @@ jobs = 2 [difuzz] path = "/directed_target/sydr/difuzz/libafl_difuzz" target = "/difuzz_target_image_jpeg @@" -args = "-j4 --sync-limit 200 --sync-jobs 2 -l64 -i /go-fuzz-corpus/jpeg/corpus -e /ets_jpeg.toml" +args = "-j4 --panic-analysis go --sync-limit 200 --sync-jobs 2 -l64 -i /go-fuzz-corpus/jpeg/corpus -e /ets_jpeg.toml" casr_bin = "/sydr_image_jpeg" [cov] diff --git a/projects/image-go/png-libafl.toml b/projects/image-go/png-libafl.toml index 8b4260af8..1930d8bd6 100644 --- a/projects/image-go/png-libafl.toml +++ b/projects/image-go/png-libafl.toml @@ -22,7 +22,7 @@ jobs = 2 [difuzz] path = "/directed_target/sydr/difuzz/libafl_difuzz" target = "/difuzz_target_image_png @@" -args = "-j4 --sync-limit 200 --sync-jobs 2 -l64 -i /go-fuzz-corpus/png/corpus -e /ets_png.toml" +args = "-j4 --panic-analysis go --sync-limit 200 --sync-jobs 2 -l64 -i /go-fuzz-corpus/png/corpus -e /ets_png.toml" casr_bin = "/sydr_image_png" [cov] diff --git a/projects/image-go/tiff-libafl.toml b/projects/image-go/tiff-libafl.toml index acfe144e3..c0bf792fc 100644 --- a/projects/image-go/tiff-libafl.toml +++ b/projects/image-go/tiff-libafl.toml @@ -22,7 +22,7 @@ jobs = 2 [difuzz] path = "/directed_target/sydr/difuzz/libafl_difuzz" target = "/difuzz_target_image_tiff @@" -args = "-j4 --sync-limit 200 --sync-jobs 2 -l64 -i /go-fuzz-corpus/tiff/corpus -e /ets_tiff.toml" +args = "-j4 --panic-analysis go --sync-limit 200 --sync-jobs 2 -l64 -i /go-fuzz-corpus/tiff/corpus -e /ets_tiff.toml" casr_bin = "/sydr_image_tiff" [cov] diff --git a/projects/image-go/webp-libafl.toml b/projects/image-go/webp-libafl.toml index b20f52f6b..c9c9c7093 100644 --- a/projects/image-go/webp-libafl.toml +++ b/projects/image-go/webp-libafl.toml @@ -22,7 +22,7 @@ jobs = 2 [difuzz] path = "/directed_target/sydr/difuzz/libafl_difuzz" target = "/difuzz_target_image_webp @@" -args = "-j4 --sync-limit 200 --sync-jobs 2 -l64 -i /go-fuzz-corpus/webp/corpus -e /ets_webp.toml" +args = "-j4 --panic-analysis go --sync-limit 200 --sync-jobs 2 -l64 -i /go-fuzz-corpus/webp/corpus -e /ets_webp.toml" casr_bin = "/sydr_image_webp" [cov] diff --git a/projects/ollama/Dockerfile b/projects/ollama/Dockerfile new file mode 100644 index 000000000..9d9b2af8c --- /dev/null +++ b/projects/ollama/Dockerfile @@ -0,0 +1,62 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +ARG BASE_IMAGE="sydr/ubuntu22.04-sydr-fuzz" +FROM $BASE_IMAGE + +# Clone Ollama. +RUN git clone https://github.com/ollama/ollama.git /ollama + +WORKDIR /ollama + +RUN git checkout 05a43e078a89247dcc71c703c1bee2af97c1655d + +# Apply patch. +COPY ollama.patch build.sh ./ +RUN git apply ollama.patch + +# Extract corpuses. +COPY corpus.zip / +RUN unzip /corpus.zip -d / + +# Create directories for fuzz targets. +RUN mkdir sydr && cd sydr && mkdir -p convert parser server harmony wordpiece + +# Move fuzz targets. +RUN mkdir fuzz +COPY fuzz.go fuzz + +COPY server_manifest_sydr.go sydr/server +COPY parser_parsefile_sydr.go sydr/parser +COPY convert_tokenizer_sydr.go sydr/convert +COPY convert_vocabulary_sydr.go sydr/convert +COPY harmony_parser_sydr.go /ollama/sydr/harmony +COPY wordpiece_sydr.go /ollama/sydr/wordpiece + +# Build GGML. +RUN mkdir -p build && cd build && \ + CC=clang-18 CXX=clang++-18 cmake --preset 'CPU' -DGGML_AVX_VNNI=OFF .. && \ + make -j + +# Install go-fuzz. +RUN go install github.com/dvyukov/go-fuzz/go-fuzz@latest && \ + go install github.com/dvyukov/go-fuzz/go-fuzz-build@latest && \ + go get github.com/dvyukov/go-fuzz/go-fuzz-dep + +# Build targets. +RUN ./build.sh + +WORKDIR / diff --git a/projects/ollama/Dockerfile_libafl b/projects/ollama/Dockerfile_libafl new file mode 100644 index 000000000..dbe401275 --- /dev/null +++ b/projects/ollama/Dockerfile_libafl @@ -0,0 +1,69 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +ARG BASE_IMAGE="sydr/ubuntu22.04-sydr-fuzz" +FROM $BASE_IMAGE + +ARG SYDR_ARCHIVE="./sydr.zip" + +WORKDIR / + +# Clone Ollama. +RUN git clone https://github.com/ollama/ollama.git /ollama + +WORKDIR /ollama + +RUN git checkout 05a43e078a89247dcc71c703c1bee2af97c1655d + +# Apply patch. +COPY ollama.patch build.sh ./ +RUN git apply ollama.patch + +# Extract corpuses. +COPY corpus.zip / +RUN unzip /corpus.zip -d / + +# Create directories for fuzz targets. +RUN mkdir sydr && cd sydr && mkdir -p convert/tokenizer convert/vocabulary \ + parser/parsefile server/manifest harmony/parser wordpiece/encode + +# Move fuzz targets. +RUN mkdir fuzz +COPY fuzz.go fuzz + +COPY server_manifest_sydr.go sydr/server/manifest/main.go +COPY parser_parsefile_sydr.go sydr/parser/parsefile/main.go +COPY convert_tokenizer_sydr.go sydr/convert/tokenizer/main.go +COPY convert_vocabulary_sydr.go sydr/convert/vocabulary/main.go +COPY harmony_parser_sydr.go sydr/harmony/parser/main.go +COPY wordpiece_sydr.go sydr/wordpiece/encode/main.go + +# Build GGML. +RUN mkdir -p build && cd build && \ + CC=clang-18 CXX=clang++-18 cmake --preset 'CPU' -DGGML_AVX_VNNI=OFF .. && \ + make -j + +# Copy LibAFL-DiFuzz target template. +COPY directed_target /directed_target + +WORKDIR /directed_target + +# Build image for LibAFL-DiFuzz. +ADD ${SYDR_ARCHIVE} ./ +RUN unzip -o ${SYDR_ARCHIVE} && rm ${SYDR_ARCHIVE} +RUN OUT_DIR=/ cargo make all + +WORKDIR / diff --git a/projects/ollama/README.md b/projects/ollama/README.md new file mode 100644 index 000000000..9e8faed05 --- /dev/null +++ b/projects/ollama/README.md @@ -0,0 +1,102 @@ +# Ollama + +Ollama is an application which lets you run offline large language models locally. + +## Build Docker + + $ sudo docker build -t oss-sydr-fuzz-ollama . + +## Build LibAFL-DiFuzz Docker + +Pass `sydr.zip` as an argument: + + $ sudo docker build --build-arg SYDR_ARCHIVE="sydr.zip" -t oss-sydr-fuzz-libafl-ollama -f ./Dockerfile_libafl . + +## Run Hybrid Fuzzing + +Unzip Sydr (`sydr.zip`) in `projects/ollama` directory: + + $ unzip sydr.zip + +Run docker: + + $ sudo docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v /etc/localtime:/etc/localtime:ro --rm -it -v $PWD:/fuzz oss-sydr-fuzz-ollama /bin/bash + +Run docker for LibAFL-DiFuzz: + + $ sudo docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v /etc/localtime:/etc/localtime:ro --rm -it -v $PWD:/fuzz oss-sydr-fuzz-libafl-ollama /bin/bash + +Change directory to `/fuzz`: + + # cd /fuzz + +Run hybrid fuzzing with libfuzzer: + + # sydr-fuzz -c convert_tokenizer-lf.toml run + +Run hybrid fuzzing with LibAFL-DiFuzz: + + # sydr-fuzz -c convert_tokenizer-libafl.toml run + +Minimize corpus (only for libfuzzer): + + # sydr-fuzz -c convert_tokenizer-lf.toml cmin + +Collect coverage: + + # sydr-fuzz -c convert_tokenizer-lf.toml cov-html + # sydr-fuzz -c convert_tokenizer-libafl.toml cov-html + +## Alternative Fuzz Targets + +Ollama project has 10 fuzz targets. + +### convert_vocabulary (libfuzzer) + + # cd /fuzz + # sydr-fuzz -c convert_vocabulary-lf.toml run + +### convert_vocabulary (LibAFL-DiFuzz) + + # cd /fuzz + # sydr-fuzz -c convert_vocabulary-libafl.toml run + +### server_manifest (libfuzzer) + + # cd /fuzz + # sydr-fuzz -c server_manifest-lf.toml run + +### server_manifest (LibAFL-DiFuzz) + + # cd /fuzz + # sydr-fuzz -c server_manifest-libafl.toml run + +### parser_parsefile (libfuzzer) + + # cd /fuzz + # sydr-fuzz -c parser_parsefile-lf.toml run + +### parser_parsefile (LibAFL-DiFuzz) + + # cd /fuzz + # sydr-fuzz -c parser_parsefile-libafl.toml run + +### harmony_parser (libfuzzer) + + # cd /fuzz + # sydr-fuzz -c harmony_parser-lf.toml run + +### harmony_parser (LibAFL-DiFuzz) + + # cd /fuzz + # sydr-fuzz -c harmony_parser-libafl.toml run + +### wordpiece (libfuzzer) + + # cd /fuzz + # sydr-fuzz -c wordpiece-lf.toml run + +### wordpiece (LibAFL-DiFuzz) + + # cd /fuzz + # sydr-fuzz -c wordpiece-libafl.toml run diff --git a/projects/ollama/build.sh b/projects/ollama/build.sh new file mode 100755 index 000000000..b03e2c1e3 --- /dev/null +++ b/projects/ollama/build.sh @@ -0,0 +1,79 @@ +#!/bin/bash -ex +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +# Set compiler and flags +export CC=clang-18 +export CXX=clang++-18 +export CFLAGS="-I/ollama/ml/backend/ggml/ggml/include -I/ollama/llama/llama.cpp/vendor -I/ollama/llama/llama.cpp/include -I/ollama/ml/backend/ggml/ggml/src/ggml-cpu" +export CXXFLAGS=$CFLAGS +export CGO_CFLAGS=$CFLAGS +export CGO_CXXFLAGS=$CFLAGS +export LDFLAGS="-ldl" +export CGO_LDFLAGS="-ldl" + +# LibFuzzer targets +LIBFUZZER_TARGETS=( + "parser_parsefile_fuzz:/ollama/fuzz:FuzzParseFile" + "convert_tokenizer_fuzz:/ollama/fuzz:FuzzParseVocabularyFromTokenizer" + "convert_vocabulary_fuzz:/ollama/fuzz:FuzzParseVocabulary" + "server_manifest_fuzz:/ollama/fuzz:FuzzParseNamedManifest" + "harmony_parser_fuzz:/ollama/fuzz:FuzzHarmonyParser" + "wordpiece_fuzz:/ollama/fuzz:FuzzWordPiece" +) + +# Sydr targets +SYDR_TARGETS=( + "parser_parsefile_sydr:/ollama/sydr/parser" + "convert_tokenizer_sydr:/ollama/sydr/convert" + "convert_vocabulary_sydr:/ollama/sydr/convert" + "server_manifest_sydr:/ollama/sydr/server" + "harmony_parser_sydr:/ollama/sydr/harmony" + "wordpiece_sydr:/ollama/sydr/wordpiece" +) + +build_libfuzzer() { + local output_name="$1" pkg_dir="$2" func="$3" + local output_path="/${output_name}" + + echo -e "Building libfuzzer target ${output_name}...\n" + cd "$pkg_dir" + go mod download + go-fuzz-build -libfuzzer -o "${output_path}.a" -func "$func" + $CC -fsanitize=fuzzer "${output_path}.a" -o "$output_path" $LDFLAGS + rm -f "${output_path}.a" +} + +build_sydr() { + local output_name="$1" pkg_dir="$2" + local output_path="/${output_name}" + + echo -e "Building sydr target ${output_name}...\n" + cd "$pkg_dir" + go build -o $output_path "${output_name}.go" +} + +# Build LibFuzzer targets +for target in "${LIBFUZZER_TARGETS[@]}"; do + IFS=':' read -r output_name pkg_dir func <<< "$target" + build_libfuzzer "$output_name" "$pkg_dir" "$func" +done + +# Build Sydr targets +for target in "${SYDR_TARGETS[@]}"; do + IFS=':' read -r output_name pkg_dir <<< "$target" + build_sydr "$output_name" "$pkg_dir" +done diff --git a/projects/ollama/convert_tokenizer-lf.toml b/projects/ollama/convert_tokenizer-lf.toml new file mode 100644 index 000000000..53b664fe7 --- /dev/null +++ b/projects/ollama/convert_tokenizer-lf.toml @@ -0,0 +1,28 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[sydr] +target = "/convert_tokenizer_sydr @@" +args = "-s 90 --wait-jobs -j2" +jobs = 2 + +[libfuzzer] +path = "/convert_tokenizer_fuzz" +args = "-jobs=1000 -workers=4 -rss_limit_mb=8192 /corpus/convert/corpus_tokenizer" + +[cov] +target = "/convert_tokenizer_sydr @@" +source = "/ollama" diff --git a/projects/ollama/convert_tokenizer-libafl.toml b/projects/ollama/convert_tokenizer-libafl.toml new file mode 100644 index 000000000..ab76544ce --- /dev/null +++ b/projects/ollama/convert_tokenizer-libafl.toml @@ -0,0 +1,30 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[sydr] +target = "/convert_tokenizer_sydr @@" +args = "-s 90 --wait-jobs -j2" +jobs = 2 + +[difuzz] +path = "/directed_target/sydr/difuzz/libafl_difuzz" +target = "/difuzz_target_convert_tokenizer @@" +args = "-j4 --panic-analysis go --sync-limit 200 --sync-jobs 2 -l64 -i /corpus/convert/corpus_tokenizer -e /ets_convert_tokenizer.toml" +casr_bin = "/convert_tokenizer_sydr" + +[cov] +target = "/convert_tokenizer_coverage @@" +source = "/ollama" diff --git a/projects/ollama/convert_tokenizer_sydr.go b/projects/ollama/convert_tokenizer_sydr.go new file mode 100644 index 000000000..228d4769a --- /dev/null +++ b/projects/ollama/convert_tokenizer_sydr.go @@ -0,0 +1,12 @@ +package main + +import ( + "os" + + "github.com/ollama/ollama/fuzz" +) + +func main() { + data, _ := os.ReadFile(os.Args[1]) + fuzz.FuzzParseVocabularyFromTokenizer(data) +} diff --git a/projects/ollama/convert_vocabulary-lf.toml b/projects/ollama/convert_vocabulary-lf.toml new file mode 100644 index 000000000..41f0f69b4 --- /dev/null +++ b/projects/ollama/convert_vocabulary-lf.toml @@ -0,0 +1,28 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[sydr] +target = "/convert_vocabulary_sydr @@" +args = "-s 90 --wait-jobs -j2" +jobs = 2 + +[libfuzzer] +path = "/convert_vocabulary_fuzz" +args = "-jobs=1000 -workers=4 -rss_limit_mb=8192 /corpus/convert/corpus_vocabulary" + +[cov] +target = "/convert_vocabulary_sydr @@" +source = "/ollama" diff --git a/projects/ollama/convert_vocabulary-libafl.toml b/projects/ollama/convert_vocabulary-libafl.toml new file mode 100644 index 000000000..3ce806eff --- /dev/null +++ b/projects/ollama/convert_vocabulary-libafl.toml @@ -0,0 +1,30 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[sydr] +target = "/convert_vocabulary_sydr @@" +args = "-s 90 --wait-jobs -j2" +jobs = 2 + +[difuzz] +path = "/directed_target/sydr/difuzz/libafl_difuzz" +target = "/difuzz_target_convert_vocabulary @@" +args = "-j4 --panic-analysis go --sync-limit 200 --sync-jobs 2 -l64 -i /corpus/convert/corpus_vocabulary -e /ets_convert_vocabulary.toml" +casr_bin = "/convert_vocabulary_sydr" + +[cov] +target = "/convert_vocabulary_coverage @@" +source = "/ollama" diff --git a/projects/ollama/convert_vocabulary_sydr.go b/projects/ollama/convert_vocabulary_sydr.go new file mode 100644 index 000000000..d3af6baec --- /dev/null +++ b/projects/ollama/convert_vocabulary_sydr.go @@ -0,0 +1,12 @@ +package main + +import ( + "os" + + "github.com/ollama/ollama/fuzz" +) + +func main() { + data, _ := os.ReadFile(os.Args[1]) + fuzz.FuzzParseVocabulary(data) +} diff --git a/projects/ollama/corpus.zip b/projects/ollama/corpus.zip new file mode 100644 index 000000000..4deda937f Binary files /dev/null and b/projects/ollama/corpus.zip differ diff --git a/projects/ollama/directed_target/Makefile.toml b/projects/ollama/directed_target/Makefile.toml new file mode 100644 index 000000000..e18f789ef --- /dev/null +++ b/projects/ollama/directed_target/Makefile.toml @@ -0,0 +1,177 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +# Variables +[env] +PROJECT_DIR = { script = ["pwd"], condition = { env_not_set = ["PROJECT_DIR"] } } +EXAMPLE_DIR = { value = "/ollama" } +EXAMPLE_DIR_INSTR = { value = "/ollama-instr" } +DIFUZZ_DIR = { value = "/directed_target/sydr/difuzz", condition = { env_not_set = ["DIFUZZ_DIR"] } } +DIFUZZ_DIR_ABS = { script = ["realpath ${DIFUZZ_DIR}"] } +LIBFORKSERVER_DIR = { value = "${DIFUZZ_DIR_ABS}", condition = { env_not_set = ["LIBFORKSERVER_DIR"] } } +LIBFORKSERVER_DIR_ABS = { script = ["realpath ${LIBFORKSERVER_DIR}"] } +OUT_DIR = { value = "${PROJECT_DIR}", condition = { env_not_set = ["OUT_DIR"] } } +OUT_DIR_ABS = { script = ["realpath ${OUT_DIR}"] } +DIFUZZ_ARGS = { value = "--weights reverse -o difuzz -j8", condition = { env_not_set = ["DIFUZZ_ARGS"] } } +CARGO_TARGET_DIR = { value = "${PROJECT_DIR}/target", condition = { env_not_set = [ + "CARGO_TARGET_DIR", +] } } +PROFILE = { value = "release", condition = { env_not_set = ["PROFILE"] } } +FUZZER = '${DIFUZZ_DIR_ABS}/libafl_difuzz' +GOINSTR_DIFUZZ = '${DIFUZZ_DIR_ABS}/goinstr_difuzz' +GOINSTR_SANCOV = '${DIFUZZ_DIR_ABS}/goinstr_sancov' +GOINSTR_COVERAGE = '${DIFUZZ_DIR_ABS}/goinstr_coverage' + +[tasks.unsupported] +script_runner = "@shell" +script = ''' +echo "Cargo-make not integrated yet on this" +''' + +[tasks.debug] +linux_alias = "debug_unix" +mac_alias = "debug_unix" +windows_alias = "unsupported" + +[tasks.debug_unix] +script_runner = "@shell" +script = ''' +cd ${EXAMPLE_DIR}/sydr/convert/tokenizer +go build -o ${OUT_DIR_ABS}/convert_tokenizer_sydr +cd ${EXAMPLE_DIR}/sydr/convert/vocabulary +go build -o ${OUT_DIR_ABS}/convert_vocabulary_sydr +cd ${EXAMPLE_DIR}/sydr/parser/parsefile +go build -o ${OUT_DIR_ABS}/parser_parsefile_sydr +cd ${EXAMPLE_DIR}/sydr/server/manifest +go build -o ${OUT_DIR_ABS}/server_manifest_sydr +cd ${EXAMPLE_DIR}/sydr/harmony/parser +go build -o ${OUT_DIR_ABS}/harmony_parser_sydr +cd ${EXAMPLE_DIR}/sydr/wordpiece/encode +go build -o ${OUT_DIR_ABS}/wordpiece_sydr +''' + +[tasks.coverage] +linux_alias = "coverage_unix" +mac_alias = "coverage_unix" +windows_alias = "unsupported" + +[tasks.coverage_unix] +script_runner = "@shell" +script = ''' +cd ${EXAMPLE_DIR} +${GOINSTR_COVERAGE} -i sydr/convert/tokenizer/main.go -a insert -l info +go build -cover -covermode=atomic -coverpkg=./... -o ${OUT_DIR_ABS}/convert_tokenizer_coverage sydr/convert/tokenizer/main.go +${GOINSTR_COVERAGE} -i sydr/convert/tokenizer/main.go -a remove -l info + +${GOINSTR_COVERAGE} -i sydr/convert/vocabulary/main.go -a insert -l info +go build -cover -covermode=atomic -coverpkg=./... -o ${OUT_DIR_ABS}/convert_vocabulary_coverage sydr/convert/vocabulary/main.go +${GOINSTR_COVERAGE} -i sydr/convert/vocabulary/main.go -a remove -l info + +${GOINSTR_COVERAGE} -i sydr/parser/parsefile/main.go -a insert -l info +go build -cover -covermode=atomic -coverpkg=./... -o ${OUT_DIR_ABS}/parser_parsefile_coverage sydr/parser/parsefile/main.go +${GOINSTR_COVERAGE} -i sydr/parser/parsefile/main.go -a remove -l info + +${GOINSTR_COVERAGE} -i sydr/server/manifest/main.go -a insert -l info +go build -cover -covermode=atomic -coverpkg=./... -o ${OUT_DIR_ABS}/server_manifest_coverage sydr/server/manifest/main.go +${GOINSTR_COVERAGE} -i sydr/server/manifest/main.go -a remove -l info + +${GOINSTR_COVERAGE} -i sydr/harmony/parser/main.go -a insert -l info +go build -cover -covermode=atomic -coverpkg=./... -o ${OUT_DIR_ABS}/harmony_parser_coverage sydr/harmony/parser/main.go +${GOINSTR_COVERAGE} -i sydr/harmony/parser/main.go -a remove -l info + +${GOINSTR_COVERAGE} -i sydr/wordpiece/encode/main.go -a insert -l info +go build -cover -covermode=atomic -coverpkg=./... -o ${OUT_DIR_ABS}/wordpiece_coverage sydr/wordpiece/encode/main.go +${GOINSTR_COVERAGE} -i sydr/wordpiece/encode/main.go -a remove -l info +''' + +[tasks.difuzz] +linux_alias = "difuzz_unix" +mac_alias = "difuzz_unix" +windows_alias = "unsupported" + +[tasks.difuzz_unix] +script_runner = "@shell" +script = ''' +${DIFUZZ_DIR_ABS}/difuzz-go -r tokenizer.main -c ${PROJECT_DIR}/config_convert_tokenizer.toml -p ${EXAMPLE_DIR}/sydr/convert/tokenizer/main.go -e ${OUT_DIR_ABS}/ets_convert_tokenizer.toml ${DIFUZZ_ARGS} +${DIFUZZ_DIR_ABS}/difuzz-go -r vocabulary.main -c ${PROJECT_DIR}/config_convert_vocabulary.toml -p ${EXAMPLE_DIR}/sydr/convert/vocabulary/main.go -e ${OUT_DIR_ABS}/ets_convert_vocabulary.toml ${DIFUZZ_ARGS} +${DIFUZZ_DIR_ABS}/difuzz-go -r parsefile.main -c ${PROJECT_DIR}/config_parser_parsefile.toml -p ${EXAMPLE_DIR}/sydr/parser/parsefile/main.go -e ${OUT_DIR_ABS}/ets_parser_parsefile.toml ${DIFUZZ_ARGS} +${DIFUZZ_DIR_ABS}/difuzz-go -r manifest.main -c ${PROJECT_DIR}/config_server_manifest.toml -p ${EXAMPLE_DIR}/sydr/server/manifest/main.go -e ${OUT_DIR_ABS}/ets_server_manifest.toml ${DIFUZZ_ARGS} +${DIFUZZ_DIR_ABS}/difuzz-go -r parser.main -c ${PROJECT_DIR}/config_harmony_parser.toml -p ${EXAMPLE_DIR}/sydr/harmony/parser/main.go -e ${OUT_DIR_ABS}/ets_harmony_parser.toml ${DIFUZZ_ARGS} +${DIFUZZ_DIR_ABS}/difuzz-go -r encode.main -c ${PROJECT_DIR}/config_wordpiece.toml -p ${EXAMPLE_DIR}/sydr/wordpiece/encode/main.go -e ${OUT_DIR_ABS}/ets_wordpiece.toml ${DIFUZZ_ARGS} +''' + +[tasks.target] +linux_alias = "target_unix" +mac_alias = "target_unix" +windows_alias = "unsupported" + +[tasks.target_unix] +script_runner = "@shell" +script = ''' +${GOINSTR_DIFUZZ} -a insert -i ${EXAMPLE_DIR} -o / -e ${OUT_DIR_ABS}/ets_convert_tokenizer.toml -l info -j 8 +${GOINSTR_SANCOV} -a insert -i ${EXAMPLE_DIR} -o / -l info -j 8 +cd ${EXAMPLE_DIR_INSTR}/sydr/convert/tokenizer +CGO_LDFLAGS="-L${LIBFORKSERVER_DIR_ABS}" go build -o ${OUT_DIR_ABS}/difuzz_target_convert_tokenizer +${GOINSTR_DIFUZZ} -a remove -i ${EXAMPLE_DIR} -o / -e ${OUT_DIR_ABS}/ets_convert_tokenizer.toml -keep-ets -l info + +${GOINSTR_DIFUZZ} -a insert -i ${EXAMPLE_DIR} -o / -e ${OUT_DIR_ABS}/ets_convert_vocabulary.toml -l info -j 8 +${GOINSTR_SANCOV} -a insert -i ${EXAMPLE_DIR} -o / -l info -j 8 +cd ${EXAMPLE_DIR_INSTR}/sydr/convert/vocabulary +CGO_LDFLAGS="-L${LIBFORKSERVER_DIR_ABS}" go build -o ${OUT_DIR_ABS}/difuzz_target_convert_vocabulary +${GOINSTR_DIFUZZ} -a remove -i ${EXAMPLE_DIR} -o / -e ${OUT_DIR_ABS}/ets_convert_vocabulary.toml -keep-ets -l info + +${GOINSTR_DIFUZZ} -a insert -i ${EXAMPLE_DIR} -o / -e ${OUT_DIR_ABS}/ets_parser_parsefile.toml -l info -j 8 +${GOINSTR_SANCOV} -a insert -i ${EXAMPLE_DIR} -o / -l info -j 8 +cd ${EXAMPLE_DIR_INSTR}/sydr/parser/parsefile +CGO_LDFLAGS="-L${LIBFORKSERVER_DIR_ABS}" go build -o ${OUT_DIR_ABS}/difuzz_target_parser_parsefile +${GOINSTR_DIFUZZ} -a remove -i ${EXAMPLE_DIR} -o / -e ${OUT_DIR_ABS}/ets_parser_parsefile.toml -keep-ets -l info + +${GOINSTR_DIFUZZ} -a insert -i ${EXAMPLE_DIR} -o / -e ${OUT_DIR_ABS}/ets_server_manifest.toml -l info -j 8 +${GOINSTR_SANCOV} -a insert -i ${EXAMPLE_DIR} -o / -l info -j 8 +cd ${EXAMPLE_DIR_INSTR}/sydr/server/manifest +CGO_LDFLAGS="-L${LIBFORKSERVER_DIR_ABS}" go build -o ${OUT_DIR_ABS}/difuzz_target_server_manifest +${GOINSTR_DIFUZZ} -a remove -i ${EXAMPLE_DIR} -o / -e ${OUT_DIR_ABS}/ets_server_manifest.toml -keep-ets -l info + +${GOINSTR_DIFUZZ} -a insert -i ${EXAMPLE_DIR} -o / -e ${OUT_DIR_ABS}/ets_harmony_parser.toml -l info -j 8 +${GOINSTR_SANCOV} -a insert -i ${EXAMPLE_DIR} -o / -l info -j 8 +cd ${EXAMPLE_DIR_INSTR}/sydr/harmony/parser +CGO_LDFLAGS="-L${LIBFORKSERVER_DIR_ABS}" go build -o ${OUT_DIR_ABS}/difuzz_target_harmony_parser +${GOINSTR_DIFUZZ} -a remove -i ${EXAMPLE_DIR} -o / -e ${OUT_DIR_ABS}/ets_harmony_parser.toml -keep-ets -l info + +${GOINSTR_DIFUZZ} -a insert -i ${EXAMPLE_DIR} -o / -e ${OUT_DIR_ABS}/ets_wordpiece.toml -l info -j 8 +${GOINSTR_SANCOV} -a insert -i ${EXAMPLE_DIR} -o / -l info -j 8 +cd ${EXAMPLE_DIR_INSTR}/sydr/wordpiece/encode +CGO_LDFLAGS="-L${LIBFORKSERVER_DIR_ABS}" go build -o ${OUT_DIR_ABS}/difuzz_target_wordpiece +${GOINSTR_DIFUZZ} -a remove -i ${EXAMPLE_DIR} -o / -e ${OUT_DIR_ABS}/ets_wordpiece.toml -keep-ets -l info +''' +dependencies = ["difuzz"] + +# Clean all built artifacts +[tasks.cleanall] +script_runner = "@shell" +script = ''' +cd ${PROJECT_DIR} +rm -rf corpus crashes target ${CARGO_TARGET_DIR} difuzz difuzz_target_* debug_* coverage_* Cargo.lock ets.toml fuzzer.log target.log .cur_input* +pkill difuzz_ || true +''' + +[tasks.all] +linux_alias = "all_unix" +mac_alias = "all_unix" +windows_alias = "unsupported" + +[tasks.all_unix] +dependencies = ["target", "debug", "coverage"] diff --git a/projects/ollama/directed_target/config_convert_tokenizer.toml b/projects/ollama/directed_target/config_convert_tokenizer.toml new file mode 100644 index 000000000..be94f412d --- /dev/null +++ b/projects/ollama/directed_target/config_convert_tokenizer.toml @@ -0,0 +1,31 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[[target]] +file = "/ollama/convert/tokenizer.go" +line = 257 + +[[target]] +file = "/ollama/convert/tokenizer.go" +line = 270 + +[[target]] +file = "/ollama/convert/tokenizer.go" +line = 272 + +[[target]] +file = "/ollama/convert/tokenizer.go" +line = 274 diff --git a/projects/ollama/directed_target/config_convert_vocabulary.toml b/projects/ollama/directed_target/config_convert_vocabulary.toml new file mode 100644 index 000000000..25dd98470 --- /dev/null +++ b/projects/ollama/directed_target/config_convert_vocabulary.toml @@ -0,0 +1,39 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[[target]] +file = "/ollama/convert/tokenizer.go" +line = 290 + +[[target]] +file = "/ollama/convert/tokenizer.go" +line = 297 + +[[target]] +file = "/ollama/convert/tokenizer.go" +line = 263 + +[[target]] +file = "/ollama/convert/tokenizer_spm.go" +line = 47 + +[[target]] +file = "/ollama/convert/tokenizer_spm.go" +line = 49 + +[[target]] +file = "/ollama/convert/tokenizer_spm.go" +line = 94 diff --git a/projects/ollama/directed_target/config_harmony_parser.toml b/projects/ollama/directed_target/config_harmony_parser.toml new file mode 100644 index 000000000..184fc8aad --- /dev/null +++ b/projects/ollama/directed_target/config_harmony_parser.toml @@ -0,0 +1,35 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[[target]] +file = "/ollama/harmony/harmonyparser.go" +line = 106 + +[[target]] +file = "/ollama/harmony/harmonyparser.go" +line = 198 + +[[target]] +file = "/ollama/harmony/harmonyparser.go" +line = 212 + +[[target]] +file = "/ollama/harmony/harmonyparser.go" +line = 221 + +[[target]] +file = "/ollama/harmony/harmonyparser.go" +line = 228 diff --git a/projects/ollama/directed_target/config_parser_parsefile.toml b/projects/ollama/directed_target/config_parser_parsefile.toml new file mode 100644 index 000000000..e10f63dd8 --- /dev/null +++ b/projects/ollama/directed_target/config_parser_parsefile.toml @@ -0,0 +1,35 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[[target]] +file = "/ollama/parser/parser.go" +line = 396 + +[[target]] +file = "/ollama/parser/parser.go" +line = 406 + +[[target]] +file = "/ollama/parser/parser.go" +line = 430 + +[[target]] +file = "/ollama/parser/parser.go" +line = 441 + +[[target]] +file = "/ollama/parser/parser.go" +line = 475 diff --git a/projects/ollama/directed_target/config_server_manifest.toml b/projects/ollama/directed_target/config_server_manifest.toml new file mode 100644 index 000000000..3b7ad030a --- /dev/null +++ b/projects/ollama/directed_target/config_server_manifest.toml @@ -0,0 +1,27 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[[target]] +file = "/ollama/server/manifest.go" +line = 75 + +[[target]] +file = "/ollama/server/manifest.go" +line = 89 + +[[target]] +file = "/ollama/server/manifest.go" +line = 94 diff --git a/projects/ollama/directed_target/config_wordpiece.toml b/projects/ollama/directed_target/config_wordpiece.toml new file mode 100644 index 000000000..84dcfb74a --- /dev/null +++ b/projects/ollama/directed_target/config_wordpiece.toml @@ -0,0 +1,31 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[[target]] +file = "/ollama/model/wordpiece.go" +line = 118 + +[[target]] +file = "/ollama/model/wordpiece.go" +line = 144 + +[[target]] +file = "/ollama/model/vocabulary.go" +line = 72 + +[[target]] +file = "/ollama/model/vocabulary.go" +line = 80 diff --git a/projects/ollama/fuzz.go b/projects/ollama/fuzz.go new file mode 100644 index 000000000..e813c1018 --- /dev/null +++ b/projects/ollama/fuzz.go @@ -0,0 +1,160 @@ +package fuzz + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + + "github.com/ollama/ollama/convert" + model "github.com/ollama/ollama/model" + "github.com/ollama/ollama/parser" + "github.com/ollama/ollama/server" + "github.com/ollama/ollama/harmony" + typesmodel "github.com/ollama/ollama/types/model" +) + +func FuzzParseVocabularyFromTokenizer(data []byte) int { + tmpDir, err := os.MkdirTemp("", "fuzz-") + if err != nil { + return 0 + } + defer os.RemoveAll(tmpDir) + + tokenizerFile := tmpDir + "/tokenizer.json" + if err := os.WriteFile(tokenizerFile, data, 0644); err != nil { + return 0 + } + + fsys := os.DirFS(tmpDir) + _, err = convert.ParseVocabularyFromTokenizer(fsys) + if err != nil { + return 0 + } + + return 1 +} + +func FuzzParseVocabulary(data []byte) int { + if FuzzParseVocabularyFromTokenizer(data) == 1 { + return 1 + } + + tmpDir, err := os.MkdirTemp("", "fuzz-") + if err != nil { + return 0 + } + defer os.RemoveAll(tmpDir) + + modelFile := tmpDir + "/tokenizer.model" + if err := os.WriteFile(modelFile, data, 0644); err != nil { + return 0 + } + + fsys := os.DirFS(tmpDir) + _, err = convert.ParseVocabulary(fsys) + if err != nil { + return 0 + } + + return 1 +} + +func FuzzParseFile(data []byte) int { + _, err := parser.ParseFile(bytes.NewReader(data)) + if err != nil { + return 0 + } + return 1 +} + +func FuzzParseNamedManifest(data []byte) int { + const maxSize = 100 * 1024 + if len(data) > maxSize { + return 0 + } + + var manifest struct { + SchemaVersion int `json:"schemaVersion"` + MediaType string `json:"mediaType"` + Config struct { + MediaType string `json:"mediaType"` + Size int64 `json:"size"` + Digest string `json:"digest"` + } `json:"config"` + Layers []struct { + MediaType string `json:"mediaType"` + Size int64 `json:"size"` + Digest string `json:"digest"` + } `json:"layers"` + } + + if err := json.Unmarshal(data, &manifest); err != nil { + return 0 + } + + name := typesmodel.Name{ + Host: "registry.ollama.ai", + Namespace: "library", + Model: "fuzz-test", + Tag: "latest", + } + + manifestDir, err := os.MkdirTemp("", "ollama-manifest-fuzz") + if err != nil { + return 0 + } + defer os.RemoveAll(manifestDir) + + manifestPath := filepath.Join(manifestDir, name.Filepath()) + if err := os.MkdirAll(filepath.Dir(manifestPath), 0755); err != nil { + return 0 + } + + if err := os.WriteFile(manifestPath, data, 0644); err != nil { + return 0 + } + + if _, err := server.ParseNamedManifest(name); err != nil { + return 0 + } + + return 1 +} + +func FuzzHarmonyParser(data []byte) int { + if len(data) == 0 { + return -1 + } + + parser := harmony.HarmonyParser{ + MessageStartTag: "<|start|>", + MessageEndTag: "<|end|>", + HeaderEndTag: "<|message|>", + } + parser.ParseHeader(string(data)) + + gotEvents := parser.AddContent(string(data)) + if len(gotEvents) == 0 { + return 1 + } + + return 0 +} + +func FuzzWordPiece(data []byte) int { + wpm := model.NewWordPiece( + &model.Vocabulary{ + Values: []string{"[UNK]", "[CLS]", "[SEP]", "▁hello", "▁world", "s", "▁!", "▁@", "▁#", "▁abc", "▁a", "▁b", "▁c", "▁s", "a", "b", "c", "d", "z"}, + AddBOS: true, + AddEOS: true, + BOS: []int32{1}, + EOS: []int32{2}, + }) + + _, err := wpm.Encode(string(data), true) + if err != nil { + return 1 + } + return 0 +} diff --git a/projects/ollama/harmony_parser-lf.toml b/projects/ollama/harmony_parser-lf.toml new file mode 100644 index 000000000..a2639eada --- /dev/null +++ b/projects/ollama/harmony_parser-lf.toml @@ -0,0 +1,28 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[sydr] +target = "/harmony_parser_sydr @@" +args = "-s 90 --wait-jobs -j2" +jobs = 2 + +[libfuzzer] +path = "/harmony_parser_fuzz" +args = "-jobs=1000 -workers=4 -rss_limit_mb=8192 /corpus/harmony" + +[cov] +target = "/harmony_parser_sydr @@" +source = "/ollama" diff --git a/projects/ollama/harmony_parser-libafl.toml b/projects/ollama/harmony_parser-libafl.toml new file mode 100644 index 000000000..26a368caf --- /dev/null +++ b/projects/ollama/harmony_parser-libafl.toml @@ -0,0 +1,30 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[sydr] +target = "/harmony_parser_sydr @@" +args = "-s 90 --wait-jobs -j2" +jobs = 2 + +[difuzz] +path = "/directed_target/sydr/difuzz/libafl_difuzz" +target = "/difuzz_target_harmony_parser @@" +args = "-j4 --panic-analysis go --sync-limit 200 --sync-jobs 2 -l64 -i /corpus/harmony -e /ets_harmony_parser.toml" +casr_bin = "/harmony_parser_sydr" + +[cov] +target = "/harmony_parser_coverage @@" +source = "/ollama" diff --git a/projects/ollama/harmony_parser_sydr.go b/projects/ollama/harmony_parser_sydr.go new file mode 100644 index 000000000..f3c92ebdc --- /dev/null +++ b/projects/ollama/harmony_parser_sydr.go @@ -0,0 +1,12 @@ +package main + +import ( + "os" + + "github.com/ollama/ollama/fuzz" +) + +func main() { + data, _ := os.ReadFile(os.Args[1]) + fuzz.FuzzHarmonyParser(data) +} diff --git a/projects/ollama/ollama.patch b/projects/ollama/ollama.patch new file mode 100644 index 000000000..4a3de8ae7 --- /dev/null +++ b/projects/ollama/ollama.patch @@ -0,0 +1,35 @@ +diff --git a/convert/tokenizer.go b/convert/tokenizer.go +index 41d0310a..72d495c8 100644 +--- a/convert/tokenizer.go ++++ b/convert/tokenizer.go +@@ -325,3 +325,15 @@ func (sv SpecialVocabulary) Key() string { + + panic("unknown special vocabulary type") + } ++ ++// ParseVocabularyFromTokenizer is the exported wrapper for parseVocabularyFromTokenizer ++// for use in fuzz testing. ++func ParseVocabularyFromTokenizer(fsys fs.FS) (*Vocabulary, error) { ++ return parseVocabularyFromTokenizer(fsys) ++} ++ ++// ParseVocabulary is the exported wrapper for parseVocabulary ++// for use in fuzz testing. ++func ParseVocabulary(fsys fs.FS) (*Vocabulary, error) { ++ return parseVocabulary(fsys) ++} +diff --git a/harmony/harmonyparser.go b/harmony/harmonyparser.go +index 3ec2c21f..868c8c8e 100644 +--- a/harmony/harmonyparser.go ++++ b/harmony/harmonyparser.go +@@ -263,6 +263,10 @@ func (s *HarmonyParser) parseHeader(raw string) HarmonyHeader { + return harmonyHeader + } + ++func (s *HarmonyParser) ParseHeader(raw string) HarmonyHeader { ++ return s.parseHeader(raw) ++} ++ + // longest overlap between suffix of s and prefix of delim + func overlap(s, delim string) int { + max := min(len(delim), len(s)) diff --git a/projects/ollama/parser_parsefile-lf.toml b/projects/ollama/parser_parsefile-lf.toml new file mode 100644 index 000000000..4793442c6 --- /dev/null +++ b/projects/ollama/parser_parsefile-lf.toml @@ -0,0 +1,28 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[sydr] +target = "/parser_parsefile_sydr @@" +args = "-s 90 --wait-jobs -j2" +jobs = 2 + +[libfuzzer] +path = "/parser_parsefile_fuzz" +args = "-jobs=1000 -workers=4 -rss_limit_mb=8192 /corpus/parser/corpus_parsefile" + +[cov] +target = "/parser_parsefile_sydr @@" +source = "/ollama" diff --git a/projects/ollama/parser_parsefile-libafl.toml b/projects/ollama/parser_parsefile-libafl.toml new file mode 100644 index 000000000..4305a8dd9 --- /dev/null +++ b/projects/ollama/parser_parsefile-libafl.toml @@ -0,0 +1,30 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[sydr] +target = "/parser_parsefile_sydr @@" +args = "-s 90 --wait-jobs -j2" +jobs = 2 + +[difuzz] +path = "/directed_target/sydr/difuzz/libafl_difuzz" +target = "/difuzz_target_parser_parsefile @@" +args = "-j4 --panic-analysis go --sync-limit 200 --sync-jobs 2 -l64 -i /corpus/parser/corpus_parsefile -e /ets_parser_parsefile.toml" +casr_bin = "/parser_parsefile_sydr" + +[cov] +target = "/parser_parsefile_coverage @@" +source = "/ollama" diff --git a/projects/ollama/parser_parsefile_sydr.go b/projects/ollama/parser_parsefile_sydr.go new file mode 100644 index 000000000..0471a2999 --- /dev/null +++ b/projects/ollama/parser_parsefile_sydr.go @@ -0,0 +1,12 @@ +package main + +import ( + "os" + + "github.com/ollama/ollama/fuzz" +) + +func main() { + data, _ := os.ReadFile(os.Args[1]) + fuzz.FuzzParseFile(data) +} diff --git a/projects/ollama/server_manifest-lf.toml b/projects/ollama/server_manifest-lf.toml new file mode 100644 index 000000000..cc15756fd --- /dev/null +++ b/projects/ollama/server_manifest-lf.toml @@ -0,0 +1,28 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[sydr] +target = "/server_manifest_sydr @@" +args = "-s 90 --wait-jobs -j2" +jobs = 2 + +[libfuzzer] +path = "/server_manifest_fuzz" +args = "-jobs=1000 -workers=4 -rss_limit_mb=8192 /corpus/server/corpus_manifest" + +[cov] +target = "/server_manifest_sydr @@" +source = "/ollama" diff --git a/projects/ollama/server_manifest-libafl.toml b/projects/ollama/server_manifest-libafl.toml new file mode 100644 index 000000000..23dc9b3a0 --- /dev/null +++ b/projects/ollama/server_manifest-libafl.toml @@ -0,0 +1,30 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[sydr] +target = "/server_manifest_sydr @@" +args = "-s 90 --wait-jobs -j2" +jobs = 2 + +[difuzz] +path = "/directed_target/sydr/difuzz/libafl_difuzz" +target = "/difuzz_target_server_manifest @@" +args = "-j4 --panic-analysis go --sync-limit 200 --sync-jobs 2 -l64 -i /corpus/server/corpus_manifest -e /ets_server_manifest.toml" +casr_bin = "/server_manifest_sydr" + +[cov] +target = "/server_manifest_coverage @@" +source = "/ollama" diff --git a/projects/ollama/server_manifest_sydr.go b/projects/ollama/server_manifest_sydr.go new file mode 100644 index 000000000..59807f9d8 --- /dev/null +++ b/projects/ollama/server_manifest_sydr.go @@ -0,0 +1,12 @@ +package main + +import ( + "os" + + "github.com/ollama/ollama/fuzz" +) + +func main() { + data, _ := os.ReadFile(os.Args[1]) + fuzz.FuzzParseNamedManifest(data) +} diff --git a/projects/ollama/wordpiece-lf.toml b/projects/ollama/wordpiece-lf.toml new file mode 100644 index 000000000..2326d94e8 --- /dev/null +++ b/projects/ollama/wordpiece-lf.toml @@ -0,0 +1,28 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[sydr] +target = "/wordpiece_sydr @@" +args = "-s 90 --wait-jobs -j2" +jobs = 2 + +[libfuzzer] +path = "/wordpiece_fuzz" +args = "-jobs=1000 -workers=4 -rss_limit_mb=8192 /corpus/wordpiece" + +[cov] +target = "/wordpiece_sydr @@" +source = "/ollama" diff --git a/projects/ollama/wordpiece-libafl.toml b/projects/ollama/wordpiece-libafl.toml new file mode 100644 index 000000000..3d0691c4a --- /dev/null +++ b/projects/ollama/wordpiece-libafl.toml @@ -0,0 +1,30 @@ +# Copyright 2025 ISP RAS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +[sydr] +target = "/wordpiece_sydr @@" +args = "-s 90 --wait-jobs -j2" +jobs = 2 + +[difuzz] +path = "/directed_target/sydr/difuzz/libafl_difuzz" +target = "/difuzz_target_wordpiece @@" +args = "-j4 --panic-analysis go --sync-limit 200 --sync-jobs 2 -l64 -i /corpus/wordpiece -e /ets_wordpiece.toml" +casr_bin = "/wordpiece_sydr" + +[cov] +target = "/wordpiece_coverage @@" +source = "/ollama" diff --git a/projects/ollama/wordpiece_sydr.go b/projects/ollama/wordpiece_sydr.go new file mode 100644 index 000000000..4ea3e9d5d --- /dev/null +++ b/projects/ollama/wordpiece_sydr.go @@ -0,0 +1,12 @@ +package main + +import ( + "os" + + "github.com/ollama/ollama/fuzz" +) + +func main() { + data, _ := os.ReadFile(os.Args[1]) + fuzz.FuzzWordPiece(data) +}