real-stanford
diff --git a/‎alper_env.yaml
Lines changed: 246 additions & 0 deletions b/‎alper_env.yaml
Lines changed: 246 additions & 0 deletions
diff --git a/‎diffusion_policy/common/schedulers.py
Lines changed: 131 additions & 0 deletions b/‎diffusion_policy/common/schedulers.py
Lines changed: 131 additions & 0 deletions
@@ -0,0 +1,246 @@
+name: robodiff
+channels:
+  - pytorch
+  - nvidia
+  - conda-forge
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=2_kmp_llvm
+  - aom=3.6.1=h59595ed_0
+  - blas=2.116=mkl
+  - blas-devel=3.9.0=16_linux64_mkl
+  - brotli-python=1.1.0=py310hf71b8c6_2
+  - bzip2=1.0.8=h4bc722e_7
+  - ca-certificates=2024.8.30=hbcca054_0
+  - certifi=2024.8.30=pyhd8ed1ab_0
+  - cffi=1.17.1=py310h8deb56e_0
+  - charset-normalizer=3.4.0=pyhd8ed1ab_0
+  - cpython=3.10.15=py310hd8ed1ab_2
+  - cuda-cudart=12.4.127=0
+  - cuda-cupti=12.4.127=0
+  - cuda-libraries=12.4.1=0
+  - cuda-nvrtc=12.4.127=0
+  - cuda-nvtx=12.4.127=0
+  - cuda-opencl=12.6.77=0
+  - cuda-runtime=12.4.1=0
+  - cuda-version=12.6=3
+  - expat=2.6.3=h5888daf_0
+  - ffmpeg=4.4.2=gpl_hdf48244_113
+  - filelock=3.16.1=pyhd8ed1ab_0
+  - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
+  - font-ttf-inconsolata=3.000=h77eed37_0
+  - font-ttf-source-code-pro=2.038=h77eed37_0
+  - font-ttf-ubuntu=0.83=h77eed37_3
+  - fontconfig=2.14.2=h14ed4e7_0
+  - fonts-conda-ecosystem=1=0
+  - fonts-conda-forge=1=0
+  - freetype=2.12.1=h267a509_2
+  - gettext=0.22.5=he02047a_3
+  - gettext-tools=0.22.5=he02047a_3
+  - giflib=5.2.2=hd590300_0
+  - gmp=6.3.0=hac33072_2
+  - gmpy2=2.1.5=py310he8512ff_2
+  - gnutls=3.7.9=hb077bed_0
+  - h2=4.1.0=pyhd8ed1ab_0
+  - hpack=4.0.0=pyh9f0ad1d_0
+  - hyperframe=6.0.1=pyhd8ed1ab_0
+  - icu=75.1=he02047a_0
+  - idna=3.10=pyhd8ed1ab_0
+  - jinja2=3.1.4=pyhd8ed1ab_0
+  - lame=3.100=h166bdaf_1003
+  - lcms2=2.16=hb7c19ff_0
+  - ld_impl_linux-64=2.43=h712a8e2_1
+  - lerc=4.0.0=h27087fc_0
+  - libasprintf=0.22.5=he8f35ee_3
+  - libasprintf-devel=0.22.5=he8f35ee_3
+  - libblas=3.9.0=16_linux64_mkl
+  - libcblas=3.9.0=16_linux64_mkl
+  - libcublas=12.4.5.8=0
+  - libcufft=11.2.1.3=0
+  - libcufile=1.11.1.6=0
+  - libcurand=10.3.7.77=0
+  - libcusolver=11.6.1.9=0
+  - libcusparse=12.3.1.170=0
+  - libdeflate=1.22=hb9d3cd8_0
+  - libdrm=2.4.123=hb9d3cd8_0
+  - libegl=1.7.0=ha4b6fd6_1
+  - libexpat=2.6.3=h5888daf_0
+  - libffi=3.4.2=h7f98852_5
+  - libgcc=14.2.0=h77fa898_1
+  - libgcc-ng=14.2.0=h69a702a_1
+  - libgettextpo=0.22.5=he02047a_3
+  - libgettextpo-devel=0.22.5=he02047a_3
+  - libgfortran=14.2.0=h69a702a_1
+  - libgfortran-ng=14.2.0=h69a702a_1
+  - libgfortran5=14.2.0=hd5240d6_1
+  - libgl=1.7.0=ha4b6fd6_1
+  - libglvnd=1.7.0=ha4b6fd6_1
+  - libglx=1.7.0=ha4b6fd6_1
+  - libgomp=14.2.0=h77fa898_1
+  - libhwloc=2.11.1=default_hecaa2ac_1000
+  - libiconv=1.17=hd590300_2
+  - libidn2=2.3.7=hd590300_0
+  - libjpeg-turbo=3.0.0=hd590300_1
+  - liblapack=3.9.0=16_linux64_mkl
+  - liblapacke=3.9.0=16_linux64_mkl
+  - libnpp=12.2.5.30=0
+  - libnsl=2.0.1=hd590300_0
+  - libnvfatbin=12.6.77=0
+  - libnvjitlink=12.4.127=0
+  - libnvjpeg=12.3.1.117=0
+  - libpciaccess=0.18=hd590300_0
+  - libpng=1.6.44=hadc24fc_0
+  - libsqlite=3.46.1=hadc24fc_0
+  - libstdcxx=14.2.0=hc0a3c3a_1
+  - libstdcxx-ng=14.2.0=h4852527_1
+  - libtasn1=4.19.0=h166bdaf_0
+  - libtiff=4.7.0=he137b08_1
+  - libunistring=0.9.10=h7f98852_0
+  - libuuid=2.38.1=h0b41bf4_0
+  - libva=2.22.0=h8a09558_1
+  - libvpx=1.13.1=h59595ed_0
+  - libwebp=1.4.0=h2c329e2_0
+  - libwebp-base=1.4.0=hd590300_0
+  - libxcb=1.17.0=h8a09558_0
+  - libxcrypt=4.4.36=hd590300_1
+  - libxml2=2.12.7=he7c6b58_4
+  - libzlib=1.3.1=hb9d3cd8_2
+  - llvm-openmp=15.0.7=h0cdce71_0
+  - markupsafe=3.0.1=py310h89163eb_1
+  - mkl=2022.1.0=h84fe81f_915
+  - mkl-devel=2022.1.0=ha770c72_916
+  - mkl-include=2022.1.0=h84fe81f_915
+  - mpc=1.3.1=h24ddda3_1
+  - mpfr=4.2.1=h90cbb55_3
+  - mpmath=1.3.0=pyhd8ed1ab_0
+  - ncurses=6.5=he02047a_1
+  - nettle=3.9.1=h7ab15ed_0
+  - networkx=3.4.1=pyhd8ed1ab_0
+  - numpy=2.1.2=py310hd6e36ab_0
+  - openh264=2.3.1=hcb278e6_2
+  - openjpeg=2.5.2=h488ebb8_0
+  - openssl=3.3.2=hb9d3cd8_0
+  - p11-kit=0.24.1=hc5aa10d_0
+  - pillow=11.0.0=py310hfeaa1f3_0
+  - pip=24.2=pyh8b19718_1
+  - pthread-stubs=0.4=hb9d3cd8_1002
+  - pycparser=2.22=pyhd8ed1ab_0
+  - pysocks=1.7.1=pyha2e5f31_6
+  - python=3.10.15=h4a871b0_2_cpython
+  - python_abi=3.10=5_cp310
+  - pytorch=2.5.0=py3.10_cuda12.4_cudnn9.1.0_0
+  - pytorch-cuda=12.4=hc786d27_7
+  - pytorch-mutex=1.0=cuda
+  - pyyaml=6.0.2=py310ha75aee5_1
+  - readline=8.2=h8228510_1
+  - requests=2.32.3=pyhd8ed1ab_0
+  - svt-av1=1.4.1=hcb278e6_0
+  - tbb=2021.13.0=h84d6215_0
+  - tk=8.6.13=noxft_h4845f30_101
+  - torchaudio=2.5.0=py310_cu124
+  - torchtriton=3.1.0=py310
+  - torchvision=0.20.0=py310_cu124
+  - typing_extensions=4.12.2=pyha770c72_0
+  - tzdata=2024b=hc8b5060_0
+  - urllib3=2.2.3=pyhd8ed1ab_0
+  - wayland=1.23.1=h3e06ad9_0
+  - wayland-protocols=1.37=hd8ed1ab_0
+  - wheel=0.44.0=pyhd8ed1ab_0
+  - x264=1!164.3095=h166bdaf_2
+  - x265=3.5=h924138e_3
+  - xorg-libx11=1.8.10=h4f16b4b_0
+  - xorg-libxau=1.0.11=hb9d3cd8_1
+  - xorg-libxdmcp=1.1.5=hb9d3cd8_0
+  - xorg-libxext=1.3.6=hb9d3cd8_0
+  - xorg-libxfixes=6.0.1=hb9d3cd8_0
+  - xorg-xorgproto=2024.1=hb9d3cd8_1
+  - xz=5.2.6=h166bdaf_0
+  - yaml=0.2.5=h7f98852_2
+  - zstandard=0.23.0=py310ha39cb0e_1
+  - zstd=1.5.6=ha6fb4c9_0
+  - pip:
+      - aiosignal==1.3.1
+      - antlr4-python3-runtime==4.9.3
+      - asciitree==0.3.3
+      - attrs==24.2.0
+      - av==13.1.0
+      - blessed==1.20.0
+      - click==8.1.7
+      - cloudpickle==3.1.0
+      - contourpy==1.3.0
+      - cycler==0.12.1
+      - diffusers==0.31.0
+      - dill==0.3.9
+      - docker-pycreds==0.4.0
+      - egl-probe==1.0.2
+      - einops==0.8.0
+      - etils==1.10.0
+      - evdev==1.7.1
+      - fasteners==0.19
+      - fonttools==4.54.1
+      - frozenlist==1.5.0
+      - fsspec==2024.9.0
+      - gitdb==4.0.11
+      - gitpython==3.1.43
+      - glfw==2.7.0
+      - gpustat==1.1.1
+      - gym==0.26.2
+      - gym-notices==0.0.8
+      - h5py==3.12.1
+      - huggingface-hub==0.26.0
+      - hydra-core==1.3.2
+      - imagecodecs==2024.9.22
+      - imageio==2.36.0
+      - imageio-ffmpeg==0.5.1
+      - importlib-metadata==8.5.0
+      - importlib-resources==6.4.5
+      - jsonschema==4.23.0
+      - jsonschema-specifications==2024.10.1
+      - kiwisolver==1.4.7
+      - lazy-loader==0.4
+      - llvmlite==0.43.0
+      - markdown==3.7
+      - matplotlib==3.9.2
+      - msgpack==1.1.0
+      - mujoco==3.2.4
+      - numba==0.60.0
+      - numcodecs==0.13.1
+      - nvidia-ml-py==12.560.30
+      - omegaconf==2.3.0
+      - opencv-python==4.10.0.84
+      - packaging==24.1
+      - platformdirs==4.3.6
+      - protobuf==5.28.2
+      - psutil==6.1.0
+      - pygame==2.6.1
+      - pymunk==6.9.0
+      - pynput==1.7.7
+      - pyopengl==3.1.7
+      - pyparsing==3.2.0
+      - python-xlib==0.33
+      - pytz==2024.2
+      - ray==2.39.0
+      - referencing==0.35.1
+      - regex==2024.9.11
+      - robosuite==1.4.1
+      - rpds-py==0.21.0
+      - safetensors==0.4.5
+      - scikit-image==0.24.0
+      - scipy==1.14.1
+      - sentry-sdk==2.17.0
+      - setproctitle==1.3.3
+      - setuptools==75.2.0
+      - shapely==2.0.6
+      - six==1.16.0
+      - smmap==5.0.1
+      - sympy==1.13.1
+      - tensorboardx==2.6.2.2
+      - termcolor==2.5.0
+      - threadpoolctl==3.5.0
+      - tifffile==2024.9.20
+      - tqdm==4.66.5
+      - wandb==0.18.5
+      - wcwidth==0.2.13
+      - zarr==2.18.3
+      - zipp==3.20.2
+prefix: /local/vondrick/alper/miniforge3/envs/robodiff
@@ -0,0 +1,131 @@
+
+# Copyright 2024 Stability AI, Katherine Crowson and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.utils import BaseOutput, logging
+from diffusers.schedulers.scheduling_utils import SchedulerMixin
+
+
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+
+def logit(x):
+    return torch.log(x) - torch.log(1-x)
+
+def logit_normal_pdf(x, m, s):
+    x = torch.tensor(x).clamp(1e-7, 1-1e-7)
+    return (1/(s * math.sqrt(2*math.pi))) * (1/x * (1-x)) * torch.exp(-(logit(x)-m)**2/(2*s**2))
+
+@dataclass
+class FlowMatchEulerDiscreteSchedulerOutput(BaseOutput):
+    """
+    Output class for the scheduler's `step` function output.
+
+    Args:
+        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+            Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
+            denoising loop.
+    """
+
+    prev_sample: torch.FloatTensor
+
+
+class FlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
+    """
+    Euler scheduler.
+
+    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
+    methods the library implements for all schedulers such as loading and saving.
+
+    Args:
+        num_train_timesteps (`int`, defaults to 1000):
+            The number of diffusion steps to train the model.
+        timestep_spacing (`str`, defaults to `"linspace"`):
+            The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
+            Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
+        shift (`float`, defaults to 1.0):
+            The shift value for the timestep schedule.
+    """
+
+    _compatibles = []
+    order = 1
+
+    @register_to_config
+    def __init__(
+        self,
+        num_train_timesteps: int = 1024,
+        prediction_type: str = 'flow',
+        sampling_weight: str = 'logit_normal',
+    ):
+        self.prediction_type = prediction_type
+        self.num_train_timesteps = num_train_timesteps
+        self.sampling_weight = sampling_weight
+
+        self.timesteps = None
+
+    def add_noise(self, original_samples, noise, timesteps):
+
+        timesteps = timesteps.to(original_samples.device).float()/self.num_train_timesteps
+
+        while len(timesteps.shape) < len(original_samples.shape):
+            timesteps = timesteps.unsqueeze(-1)
+        
+        return original_samples * timesteps + noise * (1 - timesteps)
+    
+    def sample_timesteps(self, bsz, device):
+        if self.sampling_weight == 'logit_normal':
+            x = torch.linspace(0, 1, self.num_train_timesteps, device=device)
+            prob = logit_normal_pdf(x, m=0.0, s=1.0) + 1e-3
+            prob = prob / prob.sum()
+
+            sample = torch.multinomial(prob, bsz, replacement=True).long()
+            return sample
+        else:
+            return torch.randint(0, self.num_train_timesteps, (bsz,), device=device).long()
+    
+    def set_timesteps(self, num_inference_steps):
+        """
+        Don't judge me, I just tried matching the Diffusion Policy inference API
+        """
+        self.timesteps = np.linspace(0, self.num_train_timesteps, num_inference_steps+1)[:-1]
+
+    def step(self, model_output, timestep, sample, generator=None, **kwargs):
+        
+        dt = 1.0 / len(self.timesteps)
+        sample = model_output * dt + sample
+        
+        return FlowMatchEulerDiscreteSchedulerOutput(prev_sample=sample)
+
+    def __len__(self):
+        return self.config.num_train_timesteps
+    
+if __name__ == "__main__":
+    scheduler = FlowMatchEulerDiscreteScheduler(1024)
+    print(scheduler.add_noise(torch.randn(1, 1024), torch.tensor([1.,2.,3.,4.,5.]), noise=torch.randn(1, 1024)))    
+
+    scheduler.set_timesteps(8)
+    print(scheduler.add_noise(torch.randn(1, 1024), torch.tensor([1.,2.,3.,4.,5.]), noise=torch.randn(1, 1024)))
+
+    scheduler.set_timesteps(16)
+    print(scheduler.add_noise(torch.randn(1, 1024), torch.tensor([1.,2.,3.,4.,5.]), noise=torch.randn(1, 1024)))
+
+    #do a step
+    print(scheduler.step(torch.randn(1, 1024), torch.tensor([1.,2.,3.,4.,5.]), torch.randn(1, 1024)))