Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
26f3f61
docs: add sd.cpp-webui as an available frontend (#738)
daniandtheweb Jul 23, 2025
ab835f7
fix: correct head dim check and L_k padding of flash attention (#736)
Green-Sky Jul 23, 2025
bd1eaef
fix: convert f64 to f32 and i64 to i32 when loading weights
leejet Jul 23, 2025
fb86bf4
docs: add LocalAI to README's UIs (#741)
mudler Jul 24, 2025
eed97a5
sync: update ggml
leejet Jul 24, 2025
f54524f
sync: update ggml
leejet Jul 28, 2025
8c3c788
feat: upgrade musa sdk to rc4.2.0 (#732)
yeahdongcn Jul 28, 2025
59080d3
feat: change image dimensions requirement for DiT models (#742)
stduhpf Jul 28, 2025
7eb30d0
feat: add missing models and parameters to image metadata (#743)
wbruna Jul 28, 2025
f6b9aa1
refector: optimize the usage of tensor_types
leejet Jul 28, 2025
6167e29
feat: support build against system installed GGML library (#749)
Seas0 Aug 2, 2025
f7f05fb
chore: avoid setting GGML_MAX_NAME when building against external ggm…
wbruna Aug 2, 2025
5b8996f
Conv2D direct support (#744)
daniandtheweb Aug 2, 2025
5900ef6
sync: update ggml, make cuda im2col a little faster
leejet Aug 2, 2025
0e86d90
chore: add Nvidia 30 series (cuda arch 86) to build
nekopendev Sep 1, 2025
eea77cb
feat: throttle model loading progress updates (#782)
wbruna Sep 1, 2025
a7c7905
docs: add missing dash to docs/chroma.md (#771)
SmallAndSoft Sep 1, 2025
f0fa7dd
docs: add compile option needed by Ninja (#770)
SmallAndSoft Sep 1, 2025
4c6475f
feat: show usage on unknown arg (#767)
stduhpf Sep 1, 2025
2eb3845
fix: typo in the verbose long flag (#783)
wbruna Sep 3, 2025
cb1d975
feat: add wan2.1/2.2 support (#778)
leejet Sep 6, 2025
21ce9fe
feat: add support for timestep boundary based automatic expert routin…
stduhpf Sep 6, 2025
141a4b4
feat: add flow shift parameter (for SD3 and Wan) (#780)
stduhpf Sep 6, 2025
d7f430c
docs: update docs and help message
leejet Sep 6, 2025
675208d
chore: update to c++17
leejet Sep 7, 2025
1c07fb6
docs: update docs/wan.md
leejet Sep 7, 2025
f8fe4e7
fix: add flash attn support check (#803)
leejet Sep 7, 2025
c587a43
feat: support incrementing ref image index (omni-kontext) (#755)
stduhpf Sep 7, 2025
c648001
feat: add detailed tensor loading time stat (#793)
leejet Sep 7, 2025
abb115c
fix: clarify lora quant support and small fixes (#792)
hartmark Sep 8, 2025
ff4fdbb
fix: accept NULL in sd_img_gen_params_t::input_id_images_path (#809)
wbruna Sep 10, 2025
abb36d6
chore: update flash attention warnings (#805)
wbruna Sep 10, 2025
ac5a215
fix: use {} for params init instead of memset (#781)
wbruna Sep 10, 2025
b017918
chore: remove sd3 flash attention warn (#812)
leejet Sep 10, 2025
87cdbd5
feat: use log_printf to print ggml logs (#545)
clibdev Sep 11, 2025
6bbaf16
chore: add install() support in CMakeLists.txt (#540)
clibdev Sep 11, 2025
49d6570
feat: add SmoothStep Scheduler (#813)
Green-Sky Sep 11, 2025
fce6afc
feat: add sd3 flash attn support (#815)
leejet Sep 11, 2025
ddc4a18
fix: make tiled VAE reuse the compute buffer (#821)
wbruna Sep 14, 2025
48956ff
feat: reduce CLIP memory usage with no embeddings (#768)
wbruna Sep 14, 2025
5869987
fix: make weight override more robust against ggml changes (#760)
wbruna Sep 14, 2025
b54bec3
fix: do not force VAE type to f32 on SDXL (#716)
wbruna Sep 14, 2025
c607fc3
feat: use Euler sampling by default for SD3 and Flux (#753)
wbruna Sep 14, 2025
9e7befa
fix: harden for large files (#643)
Green-Sky Sep 14, 2025
a6a8569
feat: Add SYCL Dockerfile (#651)
richiejp Sep 14, 2025
dc46993
feat: increase work_ctx memory buffer size (#814)
leejet Sep 14, 2025
288e2d6
docs: update docs
leejet Sep 14, 2025
2c9b1e2
feat: add VAE encoding tiling support and adaptive overlap (#484)
stduhpf Sep 14, 2025
52a97b3
feat: add vace support (#819)
leejet Sep 14, 2025
55c2e05
feat: optimize tensor loading time (#790)
rmatif Sep 14, 2025
0ebe6fe
refactor: simplify the logic of pm id image loading (#827)
leejet Sep 14, 2025
8376dfb
feat: add sgm_uniform scheduler, simple scheduler, and support for Ni…
rmatif Sep 16, 2025
8909523
refactor: move tiling cacl and debug print into the tiling code branc…
Green-Sky Sep 16, 2025
97ad3e7
refactor: simplify DPM++ (2S) Ancestral (#667)
vmobilis Sep 16, 2025
79426d5
chore: set release tag by commit count
leejet Sep 16, 2025
1e5f207
chore: fix workflow (#836)
leejet Sep 17, 2025
567f9f1
fix: avoid multithreading issues in the model loader
leejet Sep 17, 2025
171b222
fix: avoid segfault for pix2pix models without reference images (#766)
wbruna Sep 17, 2025
fd693ac
refactor: remove unused --normalize-input parameter (#835)
leejet Sep 17, 2025
057abde
merge with master fd693ac6a2ab
wbruna Sep 20, 2025
1e0d282
fix: correct tensor deduplication logic (#844)
rmatif Sep 24, 2025
513f36d
docs: include Vulkan compatibility for LoRA quants (#845)
wbruna Sep 24, 2025
98ba155
docs: HipBLAS / ROCm build instruction fix (#843)
Stefan-Olt Sep 24, 2025
f3140ea
fix: tensor loading thread count (#854)
wbruna Sep 24, 2025
2abe945
fix: optimize the handling of CLIP embedding weight (#840)
leejet Sep 24, 2025
1ba30ce
sync: update ggml
leejet Sep 24, 2025
6ad46bb
sync: update ggml
leejet Sep 25, 2025
35843c7
fix: optimize the handling of embedding weight (#859)
leejet Sep 25, 2025
11f436c
feat: add support for Flux Controls and Flex.2 (#692)
stduhpf Oct 10, 2025
b451728
docs: update README.md (#866)
sharuzzaman Oct 10, 2025
940a201
chore: fix dockerfile libgomp1 dependency + improvements (#852)
SergeantSerk Oct 10, 2025
e12d5e0
fix: ensure directory iteration results are sorted by filename (#858)
leejet Oct 10, 2025
02af48a
chore: fix vulkan ci (#878)
leejet Oct 10, 2025
e70d020
feat: add support for more esrgan models & x2 & x1 models (#855)
pedroCabrera Oct 12, 2025
5b261b9
feat: add a stand-alone upscale mode (#865)
wbruna Oct 12, 2025
aa68b87
refactor: deal with default img-cfg-scale at the library level (#869)
wbruna Oct 12, 2025
beb99a2
feat: add Qwen Image support (#851)
leejet Oct 12, 2025
9727c6b
fix: resolve VAE tiling problem in Qwen Image (#873)
wbruna Oct 12, 2025
1c32fa0
fix: avoid generating black images when running T5 on the GPU (#882)
leejet Oct 12, 2025
5436f6b
fix: correct canny preprocessor (#861)
wbruna Oct 13, 2025
c64994d
fix: better progress display for second-order samplers (#834)
wbruna Oct 13, 2025
2e9242e
feat: add Qwen Image Edit support (#877)
leejet Oct 13, 2025
4fa6350
Merge 'master-323-2e9242e' into apg_merge_master
wbruna Oct 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ jobs:
runs-on: windows-2025

env:
VULKAN_VERSION: 1.3.261.1
VULKAN_VERSION: 1.4.328.1

strategy:
matrix:
Expand All @@ -163,7 +163,7 @@ jobs:
- build: "avx512"
defines: "-DGGML_NATIVE=OFF -DGGML_AVX512=ON -DGGML_AVX=ON -DGGML_AVX2=ON -DSD_BUILD_SHARED_LIBS=ON"
- build: "cuda12"
defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=90;89;80;75"
defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=90;89;86;80;75"
# - build: "rocm5.5"
# defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
- build: 'vulkan'
Expand Down Expand Up @@ -199,9 +199,9 @@ jobs:
version: 1.11.1
- name: Install Vulkan SDK
id: get_vulkan
if: ${{ matrix.build == 'vulkan' }}
if: ${{ matrix.build == 'vulkan' }} https://sdk.lunarg.com/sdk/download/1.4.328.1/windows/vulkansdk-windows-X64-1.4.328.1.exe
run: |
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
Expand Down Expand Up @@ -254,15 +254,15 @@ jobs:

- name: Copy and pack Cuda runtime
id: pack_cuda_runtime
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' && matrix.build == 'cuda12' ) || github.event.inputs.create_release == 'true' }}
if: ${{ matrix.build == 'cuda12' && (github.event_name == 'push' && github.ref == 'refs/heads/master' || github.event.inputs.create_release == 'true') }}
run: |
echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
$dst='.\build\bin\cudart\'
robocopy "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
7z a cudart-sd-bin-win-cu12-x64.zip $dst\*

- name: Upload Cuda runtime
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' && matrix.build == 'cuda12' ) || github.event.inputs.create_release == 'true' }}
if: ${{ matrix.build == 'cuda12' && (github.event_name == 'push' && github.ref == 'refs/heads/master' || github.event.inputs.create_release == 'true') }}
uses: actions/upload-artifact@v4
with:
name: sd-cudart-sd-bin-win-cu12-x64.zip
Expand All @@ -288,6 +288,11 @@ jobs:
- windows-latest-cmake

steps:
- name: Clone
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Download artifacts
id: download-artifact
uses: actions/download-artifact@v4
Expand All @@ -296,20 +301,27 @@ jobs:
pattern: sd-*
merge-multiple: true

- name: Get commit count
id: commit_count
run: |
echo "count=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT

- name: Get commit hash
id: commit
uses: pr-mpt/actions-commit-hash@v2

- name: Create release
id: create_release
if: ${{ github.event_name == 'workflow_dispatch' || github.ref_name == 'master' }}
uses: anzz1/action-create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}
tag_name: ${{ format('{0}-{1}-{2}', env.BRANCH_NAME, steps.commit_count.outputs.count, steps.commit.outputs.short) }}

- name: Upload release
id: upload_release
if: ${{ github.event_name == 'workflow_dispatch' || github.ref_name == 'master' }}
uses: actions/github-script@v3
with:
github-token: ${{secrets.GITHUB_TOKEN}}
Expand Down
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
build*/
cmake-build-*/
test/
.vscode/
.idea/
.cache/
*.swp
.vscode/
*.bat
*.bin
*.exe
*.gguf
output*.png
models*
*.log
*.log
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "ggml"]
path = ggml
url = https://github.com/ggerganov/ggml.git
url = https://github.com/ggml-org/ggml.git
23 changes: 19 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ option(SD_SYCL "sd: sycl backend" OFF)
option(SD_MUSA "sd: musa backend" OFF)
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
option(SD_USE_SYSTEM_GGML "sd: use system-installed GGML library" OFF)
#option(SD_BUILD_SERVER "sd: build server example" ON)

if(SD_CUDA)
Expand Down Expand Up @@ -118,23 +119,37 @@ endif()

set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)

# see https://github.com/ggerganov/ggml/pull/682
add_definitions(-DGGML_MAX_NAME=128)
if (NOT SD_USE_SYSTEM_GGML)
# see https://github.com/ggerganov/ggml/pull/682
add_definitions(-DGGML_MAX_NAME=128)
endif()

# deps
# Only add ggml if it hasn't been added yet
if (NOT TARGET ggml)
add_subdirectory(ggml)
if (SD_USE_SYSTEM_GGML)
find_package(ggml REQUIRED)
if (NOT ggml_FOUND)
message(FATAL_ERROR "System-installed GGML library not found.")
endif()
add_library(ggml ALIAS ggml::ggml)
else()
add_subdirectory(ggml)
endif()
endif()

add_subdirectory(thirdparty)

target_link_libraries(${SD_LIB} PUBLIC ggml zip)
target_include_directories(${SD_LIB} PUBLIC . thirdparty)
target_compile_features(${SD_LIB} PUBLIC cxx_std_11)
target_compile_features(${SD_LIB} PUBLIC c_std_11 cxx_std_17)


if (SD_BUILD_EXAMPLES)
add_subdirectory(examples)
endif()

set(SD_PUBLIC_HEADERS stable-diffusion.h)
set_target_properties(${SD_LIB} PROPERTIES PUBLIC_HEADER "${SD_PUBLIC_HEADERS}")

install(TARGETS ${SD_LIB} LIBRARY PUBLIC_HEADER)
13 changes: 9 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
ARG UBUNTU_VERSION=22.04

FROM ubuntu:$UBUNTU_VERSION as build
FROM ubuntu:$UBUNTU_VERSION AS build

RUN apt-get update && apt-get install -y build-essential git cmake
RUN apt-get update && apt-get install -y --no-install-recommends build-essential git cmake

WORKDIR /sd.cpp

COPY . .

RUN mkdir build && cd build && cmake .. && cmake --build . --config Release
RUN cmake . -B ./build
RUN cmake --build ./build --config Release --parallel

FROM ubuntu:$UBUNTU_VERSION as runtime
FROM ubuntu:$UBUNTU_VERSION AS runtime

RUN apt-get update && \
apt-get install --yes --no-install-recommends libgomp1 && \
apt-get clean

COPY --from=build /sd.cpp/build/bin/sd /sd

Expand Down
7 changes: 4 additions & 3 deletions Dockerfile.musa
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
ARG MUSA_VERSION=rc3.1.1
ARG MUSA_VERSION=rc4.2.0
ARG UBUNTU_VERSION=22.04

FROM mthreads/musa:${MUSA_VERSION}-devel-ubuntu22.04 as build
FROM mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64 as build

RUN apt-get update && apt-get install -y ccache cmake git

Expand All @@ -15,7 +16,7 @@ RUN mkdir build && cd build && \
-DSD_MUSA=ON -DCMAKE_BUILD_TYPE=Release && \
cmake --build . --config Release

FROM mthreads/musa:${MUSA_VERSION}-runtime-ubuntu22.04 as runtime
FROM mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64 as runtime

COPY --from=build /sd.cpp/build/bin/sd /sd

Expand Down
19 changes: 19 additions & 0 deletions Dockerfile.sycl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
ARG SYCL_VERSION=2025.1.0-0

FROM intel/oneapi-basekit:${SYCL_VERSION}-devel-ubuntu24.04 AS build

RUN apt-get update && apt-get install -y cmake

WORKDIR /sd.cpp

COPY . .

RUN mkdir build && cd build && \
cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON -DCMAKE_BUILD_TYPE=Release && \
cmake --build . --config Release -j$(nproc)

FROM intel/oneapi-basekit:${SYCL_VERSION}-devel-ubuntu24.04 AS runtime

COPY --from=build /sd.cpp/build/bin/sd /sd

ENTRYPOINT [ "/sd" ]
Loading
Loading