From 51d1d95ec0ee8b42a5d8dd39a5087d762ec58b81 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 11 Feb 2024 16:37:32 -0300 Subject: [PATCH 01/43] Autocast --- src/Native/LibTorchSharp/THSTorch.cpp | 112 +++++++++++++++++- src/Native/LibTorchSharp/THSTorch.h | 34 +++++- .../PInvoke/LibTorchSharp.THSTorch.cs | 40 +++++++ src/TorchSharp/Tensor/torch.Autocast.cs | 79 ++++++++++++ 4 files changed, 263 insertions(+), 2 deletions(-) create mode 100644 src/TorchSharp/Tensor/torch.Autocast.cs diff --git a/src/Native/LibTorchSharp/THSTorch.cpp b/src/Native/LibTorchSharp/THSTorch.cpp index b846557bc..1a170913c 100644 --- a/src/Native/LibTorchSharp/THSTorch.cpp +++ b/src/Native/LibTorchSharp/THSTorch.cpp @@ -323,4 +323,114 @@ double THSSpecial_erf_scalar(const double x) double THSSpecial_erfc_scalar(const double x) { return erfc(x); -} \ No newline at end of file +} + +bool THSTorch_is_torch_function_mode_enabled() +{ + return at::impl::torch_function_mode_enabled(); //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/csrc/autograd/init.cpp#L911 +} + +bool THSTorch_is_autocast_cache_enabled() +{ + return at::autocast::is_autocast_cache_enabled(); +} + +bool THSTorch_is_autocast_cpu_enabled() +{ + return at::autocast::is_cpu_enabled(); //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/csrc/autograd/init.cpp#L523 +} + +bool THSTorch_is_autocast_gpu_enabled() +{ + return at::autocast::is_enabled(); //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/amp/autocast_mode.py#L363 +} +bool THSTorch_is_autocast_xpu_enabled() +{ + return at::autocast::is_xpu_enabled(); +} +bool THSTorch_is_autocast_hpu_enabled() +{ + return at::autocast::is_hpu_enabled(); +} + +#if (TORCH_VERSION_MAJOR ==2 && TORCH_VERSION_MINOR > 0) +bool THSTorch_is_autocast_ipu_enabled() +{ + return at::autocast::is_ipu_enabled(); +} + +bool THSTorch_is_autocast_xla_enabled() +{ + return at::autocast::is_xla_enabled(); +} + +#endif + +int8_t THSTorch_get_autocast_cpu_dtype() +{ + return (int8_t)at::autocast::get_autocast_cpu_dtype(); +} + +int8_t THSTorch_get_autocast_gpu_dtype() +{ + //TODO: Implement AUTOCAST AMP AND GRADSCALER + + //INFO: Enter/Exit function of autocast_mode not need to do in C/C++ only in C# with Disposable C# Can handle all of that function (if exists) + //https://github.com/pytorch/pytorch/blob/main/torch/amp/autocast_mode.py + + + //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/csrc/autograd/init.cpp#L629 + //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/aten/src/ATen/autocast_mode.h#L20 + return (int8_t)at::autocast::get_autocast_gpu_dtype(); +} + +int8_t THSTorch_get_autocast_xpu_dtype() +{ + return (int8_t)at::autocast::get_autocast_xpu_dtype(); +} + + +int THSTorch_autocast_increment_nesting() +{ + return at::autocast::increment_nesting(); +} + +int THSTorch_autocast_decremental_nesting() +{ + return at::autocast::decrement_nesting(); +} + +void THSTorch_set_autocast_enabled(bool enabled) +{ + at::autocast::set_enabled(enabled); +} + +void THSTorch_set_autocast_cache_enabled(bool enabled) +{ + at::autocast::set_autocast_cache_enabled(enabled); +} + +void THSTorch_set_autocast_cpu_dtype(int8_t dtype) +{ + at::autocast::set_autocast_cpu_dtype((c10::ScalarType)dtype); +} + +void THSTorch_set_autocast_gpu_dtype(int8_t dtype) +{ + at::autocast::set_autocast_gpu_dtype((c10::ScalarType)dtype); +} + +void THSTorch_set_autocast_xpu_dtype(int8_t dtype) +{ + at::autocast::set_autocast_xpu_dtype((c10::ScalarType)dtype); +} + +void THSTorch_clear_autocast_cache() +{ + at::autocast::clear_cache(); +} + +/*bool THSTorch_jit_is_scripting() +{ + +}*/ \ No newline at end of file diff --git a/src/Native/LibTorchSharp/THSTorch.h b/src/Native/LibTorchSharp/THSTorch.h index 9ab80e828..dd9483f5f 100644 --- a/src/Native/LibTorchSharp/THSTorch.h +++ b/src/Native/LibTorchSharp/THSTorch.h @@ -4,7 +4,8 @@ #include "../Stdafx.h" #include "Utils.h" - +#include +//#include // API. // Sets manually the seed. @@ -91,3 +92,34 @@ EXPORT_API(void) THSTorch_dispose_scalar(Scalar scalar); EXPORT_API(double) THSSpecial_erf_scalar(const double x); EXPORT_API(double) THSSpecial_erfc_scalar(const double x); + +EXPORT_API(bool) THSTorch_is_torch_function_mode_enabled(); + +//Maybe the best work is call THSTorch_is_autocast_enabled(enum of devices c# as int8_t); +EXPORT_API(bool) THSTorch_is_autocast_cache_enabled(); +EXPORT_API(bool) THSTorch_is_autocast_cpu_enabled(); +EXPORT_API(bool) THSTorch_is_autocast_gpu_enabled(); +EXPORT_API(bool) THSTorch_is_autocast_xpu_enabled(); +EXPORT_API(bool) THSTorch_is_autocast_hpu_enabled(); + +#if (TORCH_VERSION_MAJOR ==2 && TORCH_VERSION_MINOR > 0) +EXPORT_API(bool) THSTorch_is_autocast_ipu_enabled(); +EXPORT_API(bool) THSTorch_is_autocast_xla_enabled(); +#endif + +EXPORT_API(int8_t) THSTorch_get_autocast_cpu_dtype(); +EXPORT_API(int8_t) THSTorch_get_autocast_gpu_dtype(); +EXPORT_API(int8_t) THSTorch_get_autocast_xpu_dtype(); + +EXPORT_API(int) THSTorch_autocast_increment_nesting(); +EXPORT_API(int) THSTorch_autocast_decrement_nesting(); + +EXPORT_API(void) THSTorch_set_autocast_enabled(bool enabled); +EXPORT_API(void) THSTorch_set_autocast_cache_enabled(bool enabled); +EXPORT_API(void) THSTorch_set_autocast_cpu_dtype(int8_t dtype); +EXPORT_API(void) THSTorch_set_autocast_gpu_dtype(int8_t dtype); +EXPORT_API(void) THSTorch_set_autocast_xpu_dtype(int8_t dtype); + +EXPORT_API(void) THSTorch_clear_autocast_cache(); + +//EXPORT_API(bool) THSTorch_jit_is_scripting(); \ No newline at end of file diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSTorch.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSTorch.cs index 3d3919ee3..fb609e286 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSTorch.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSTorch.cs @@ -108,5 +108,45 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern void THSTorch_set_num_interop_threads(int threads); + + [DllImport("LibTorchSharp")] + internal static extern bool THSTorch_is_torch_function_mode_enabled(); + + [DllImport("LibTorchSharp")] + internal static extern bool THSTorch_is_autocast_cache_enabled(); + [DllImport("LibTorchSharp")] + internal static extern bool THSTorch_is_autocast_cpu_enabled(); + [DllImport("LibTorchSharp")] + internal static extern bool THSTorch_is_autocast_gpu_enabled(); + [DllImport("LibTorchSharp")] + internal static extern bool THSTorch_is_autocast_xpu_enabled(); + [DllImport("LibTorchSharp")] + internal static extern bool THSTorch_is_autocast_hpu_enabled(); + + [DllImport("LibTorchSharp")] + internal static extern sbyte THSTorch_get_autocast_cpu_dtype(); + [DllImport("LibTorchSharp")] + internal static extern sbyte THSTorch_get_autocast_gpu_dtype(); + [DllImport("LibTorchSharp")] + internal static extern sbyte THSTorch_get_autocast_xpu_dtype(); + + [DllImport("LibTorchSharp")] + internal static extern int THSTorch_autocast_increment_nesting(); + [DllImport("LibTorchSharp")] + internal static extern int THSTorch_autocast_decrement_nesting(); + + [DllImport("LibTorchSharp")] + internal static extern void THSTorch_set_autocast_enabled(bool enabled); + [DllImport("LibTorchSharp")] + internal static extern void THSTorch_set_autocast_cache_enabled(bool enabled); + [DllImport("LibTorchSharp")] + internal static extern void THSTorch_set_autocast_cpu_dtype(sbyte dtype); + [DllImport("LibTorchSharp")] + internal static extern void THSTorch_set_autocast_gpu_dtype(sbyte dtype); + [DllImport("LibTorchSharp")] + internal static extern void THSTorch_set_autocast_xpu_dtype(sbyte dtype); + + [DllImport("LibTorchSharp")] + internal static extern void THSTorch_clear_autocast_cache(); } } diff --git a/src/TorchSharp/Tensor/torch.Autocast.cs b/src/TorchSharp/Tensor/torch.Autocast.cs new file mode 100644 index 000000000..6745133be --- /dev/null +++ b/src/TorchSharp/Tensor/torch.Autocast.cs @@ -0,0 +1,79 @@ +using System; +using static TorchSharp.PInvoke.NativeMethods; + +namespace TorchSharp +{ + public static partial class torch + { + public static bool is_autocast_cache_enabled() + { + return THSTorch_is_autocast_cache_enabled(); + } + public static bool is_autocast_cpu_enabled() + { + return THSTorch_is_autocast_cpu_enabled(); + } + public static bool is_autocast_gpu_enabled() + { + return THSTorch_is_autocast_gpu_enabled(); + } + public static bool is_autocast_xpu_enabled() + { + return THSTorch_is_autocast_xpu_enabled(); + } + public static bool is_autocast_hpu_enabled() + { + return THSTorch_is_autocast_hpu_enabled(); + } + + public static ScalarType get_autocast_cpu_dtype() + { + return (ScalarType)THSTorch_get_autocast_cpu_dtype(); + } + public static ScalarType get_autocast_gpu_dtype() + { + return (ScalarType)THSTorch_get_autocast_gpu_dtype(); + } + public static ScalarType get_autocast_xpu_dtype() + { + return (ScalarType)THSTorch_get_autocast_xpu_dtype(); + } + + public static int autocast_increment_nesting() + { + return THSTorch_autocast_increment_nesting(); + } + + public static int autocast_decrement_nesting() + { + return THSTorch_autocast_decrement_nesting(); + } + + public static void set_autocast_enabled(bool enabled) + { + THSTorch_set_autocast_enabled(enabled); + } + public static void set_autocast_cache_enabled(bool enabled) + { + THSTorch_set_autocast_cache_enabled(enabled); + } + + public static void set_autocast_cpu_dtype(ScalarType dtype) + { + THSTorch_set_autocast_cpu_dtype((sbyte)dtype); + } + public static void set_autocast_gpu_dtype(ScalarType dtype) + { + THSTorch_set_autocast_gpu_dtype((sbyte)dtype); + } + public static void set_autocast_xpu_dtype(ScalarType dtype) + { + THSTorch_set_autocast_xpu_dtype((sbyte)dtype); + } + + public static void clear_autocast_cache() + { + THSTorch_clear_autocast_cache(); + } + } +} \ No newline at end of file From 29b490026f9e600ec75b022cbc9dadab5330c46e Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sat, 17 Feb 2024 19:17:16 -0300 Subject: [PATCH 02/43] Added some features --- .gitignore | 1 + src/Native/CMakeSettings.json | 16 ++-- src/Native/LibTorchSharp/CMakeLists.txt | 2 +- src/Native/LibTorchSharp/THSTensor.cpp | 15 ++++ src/Native/LibTorchSharp/THSTensor.h | 4 + src/TorchSharp/Amp/AutocastMode.cs | 54 +++++++++++++ src/TorchSharp/Amp/GradScaler.cs | 66 ++++++++++++++++ .../PInvoke/LibTorchSharp.THSTensor.cs | 2 + src/TorchSharp/Tensor/Tensor.cs | 9 +++ src/TorchSharp/Torch.cs | 25 +++++- src/TorchSharp/TorchSharp.csproj | 78 ------------------- 11 files changed, 187 insertions(+), 85 deletions(-) create mode 100644 src/TorchSharp/Amp/AutocastMode.cs create mode 100644 src/TorchSharp/Amp/GradScaler.cs delete mode 100644 src/TorchSharp/TorchSharp.csproj diff --git a/.gitignore b/.gitignore index bab8676e1..f34d405aa 100644 --- a/.gitignore +++ b/.gitignore @@ -272,3 +272,4 @@ packages/ *.code-workspace /.idea /test/TorchSharpTest/exportsd.py +/src/TorchSharp/TorchSharp.csproj diff --git a/src/Native/CMakeSettings.json b/src/Native/CMakeSettings.json index 9204f06eb..f47283578 100644 --- a/src/Native/CMakeSettings.json +++ b/src/Native/CMakeSettings.json @@ -1,15 +1,21 @@ -{ +{ "configurations": [ { "name": "x64-Debug", - "generator": "Ninja", + "generator": "Visual Studio 17 2022 Win64", "configurationType": "Debug", "inheritEnvironments": [ "msvc_x64_x64" ], "buildRoot": "${projectDir}\\out\\build\\${name}", "installRoot": "${projectDir}\\out\\install\\${name}", - "cmakeCommandArgs": "", - "buildCommandArgs": "", - "ctestCommandArgs": "" + "cmakeCommandArgs": "-DCMAKE_PREFIX_PATH=\"K:\\FrameworksForC\\LibTorch\\libtorch-win-shared-with-deps-debug-2.0.1+cu117\"", + "ctestCommandArgs": "", + "variables": [ + { + "name": "Torch_DIR", + "value": "K:/FrameworksForC/LibTorch/libtorch-win-shared-with-deps-debug-2.0.1+cu117", + "type": "PATH" + } + ] } ] } \ No newline at end of file diff --git a/src/Native/LibTorchSharp/CMakeLists.txt b/src/Native/LibTorchSharp/CMakeLists.txt index 17c2b7fcf..544ac3e22 100644 --- a/src/Native/LibTorchSharp/CMakeLists.txt +++ b/src/Native/LibTorchSharp/CMakeLists.txt @@ -64,7 +64,7 @@ add_library(LibTorchSharp SHARED ${SOURCES} ${RESOURCES}) target_link_libraries(LibTorchSharp ${TORCH_LIBRARIES}) -set_property(TARGET LibTorchSharp PROPERTY CXX_STANDARD 14) +set_property(TARGET LibTorchSharp PROPERTY CXX_STANDARD 17) if(APPLE) set_target_properties(LibTorchSharp PROPERTIES INSTALL_RPATH "@loader_path;@executable_path;") diff --git a/src/Native/LibTorchSharp/THSTensor.cpp b/src/Native/LibTorchSharp/THSTensor.cpp index 2bdc96a83..f4617b5f7 100644 --- a/src/Native/LibTorchSharp/THSTensor.cpp +++ b/src/Native/LibTorchSharp/THSTensor.cpp @@ -1836,6 +1836,21 @@ Tensor THSTensor_to_type_and_device(const Tensor tensor, int8_t scalar_type, con ); } +/*Tensor THSTensor_device_and_non_blocking(const Tensor tensor, const int device_type, const int device_index, const bool non_blocking) +{ + CATCH_RETURN_Tensor( + auto device = c10::Device((c10::DeviceType)device_type, (c10::DeviceIndex)device_index); + res = ResultTensor(tensor->to(device, non_blocking, at::ScalarType(scalar_type), false)); + ); +}*/ +Tensor THSTensor_to_type_and_device_and_non_blocking(const Tensor tensor, int8_t scalar_type, const int device_type, const int device_index,const bool non_blocking) +{ + CATCH_RETURN_Tensor( + auto device = c10::Device((c10::DeviceType)device_type, (c10::DeviceIndex)device_index); + res = ResultTensor(tensor->to(device, non_blocking, at::ScalarType(scalar_type), false)); + ); +} + Tensor THSTensor_triu(const Tensor tensor, const int64_t diagonal, const bool inplace) { CATCH_TENSOR(inplace ? tensor->triu_(diagonal) : tensor->triu(diagonal)); diff --git a/src/Native/LibTorchSharp/THSTensor.h b/src/Native/LibTorchSharp/THSTensor.h index 6af55912b..63bb976d7 100644 --- a/src/Native/LibTorchSharp/THSTensor.h +++ b/src/Native/LibTorchSharp/THSTensor.h @@ -1333,6 +1333,10 @@ EXPORT_API(Tensor) THSTensor_to_type(const Tensor tensor, int8_t scalar_type, co EXPORT_API(Tensor) THSTensor_to_type_and_device(const Tensor tensor, int8_t scalar_type, const int device_type, const int device_index, const bool copy); +//EXPORT_API(Tensor) THSTensor_device_and_non_blocking(const Tensor tensor, const int device_type, const int device_index, const bool non_blocking); + +EXPORT_API(Tensor) THSTensor_to_type_and_device_and_non_blocking(const Tensor tensor, int8_t scalar_type, const int device_type, const int device_index, const bool non_blocking); + EXPORT_API(void) THSTensor_topk(const Tensor tensor, Tensor* (*allocator)(size_t length), const int k, const int64_t dim, const bool largest, const bool sorted); EXPORT_API(Tensor) THSTensor_trunc(const Tensor tensor); diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs new file mode 100644 index 000000000..7b9af69eb --- /dev/null +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -0,0 +1,54 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace TorchSharp.Amp +{ + public class AutocastMode : IDisposable + { + private bool Enabled, Prev; + private torch.ScalarType Dtype; + private torch.ScalarType fast_dtype; + private torch.Device Device; + public AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) + { + fast_dtype = dtype.Value; + if (dev.type == DeviceType.CUDA) + fast_dtype = torch.get_autocast_gpu_dtype(); + if (dev.type == DeviceType.CPU) + fast_dtype = torch.get_autocast_cpu_dtype(); + + bool _cache_enabled = torch.is_autocast_cache_enabled(); + if (!torch.cuda.is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast + Enabled = false; + if (dtype.HasValue) + fast_dtype = dtype.Value; + if(cache_enabled.HasValue) + _cache_enabled=cache_enabled.Value; + + if (dev.type == DeviceType.CUDA) { + if (enabled && fast_dtype == torch.ScalarType.BFloat16 && !torch.cuda.is_bf16_supported()) + throw new Exception("Current CUDA Device does not support bfloat16. Please switch dtype to float16."); + } + this.Enabled = enabled; + + this.Prev = torch.is_autocast_cpu_enabled(); + if (dev.type == DeviceType.CUDA) { + this.Prev = torch.is_autocast_gpu_enabled(); + } + throw new NotImplementedException(); + } + public void Dispose() + { + if (Device.type == DeviceType.CUDA) { + if(torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + torch.set_autocast_gpu_dtype(this.fast_dtype); + torch.set_autocast_enabled(this.Prev); + } + throw new NotImplementedException(); + } + } +} diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs new file mode 100644 index 000000000..6da7a9dab --- /dev/null +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -0,0 +1,66 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace TorchSharp.Amp +{ + public class GradScaler + { + private bool Enabled; + + private torch.Tensor _scale, _growth_tracker; + + private float InitScale, GrowthFactor, BackoffFactor, GrowthInterval, InitGrowthTracker; + + //https://github.com/pytorch/pytorch/blob/main/torch/amp/grad_scaler.py + public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_factor = 2.0f, + float backoff_factor = 0.5f, int growth_interval = 2000, bool enabled = true) + { + Debug.Assert(dev == torch.CPU || dev == torch.CUDA); + this.Enabled = enabled; + this.InitScale = init_scale; + this.GrowthFactor = growth_factor; + this.BackoffFactor = backoff_factor; + this.GrowthInterval = growth_interval; + this.InitGrowthTracker = 0.0f; + throw new NotImplementedException(); + } + + private void LazyInitScaleGrowthTracker(torch.Device dev) + { + this._scale = torch.full(0, this.InitScale, torch.ScalarType.Float32, device: dev); + this._growth_tracker = torch.full(0, this.InitGrowthTracker, torch.ScalarType.Float32, device: dev); + } + + //private check_scale_growth_tracker + public torch.Tensor scale(torch.Tensor output) + { + if (!Enabled) + return output; + if (_scale.numel() == 0) + this.LazyInitScaleGrowthTracker(output.device); + return output * this._scale.to(output.device, output.dtype, true); + } + + public torch.Tensor unscale_grads(torch.optim.Optimizer optimizer, torch.Tensor inv_scale, torch.Tensor found_inf, bool allow_fp16) + { + return false; + } + + public void unscale(torch.optim.Optimizer optimizer) + { + if (!Enabled) + return; + + + } + /*public IList scale(IList outputs) + { + + + }*/ + } +} \ No newline at end of file diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs index c82b659a3..28b3b6f2f 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs @@ -293,6 +293,8 @@ internal static extern IntPtr THSTensor_upsample_nearest3d(IntPtr input, [DllImport("LibTorchSharp")] internal static extern IntPtr THSTensor_to_type_and_device(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool copy); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSTensor_to_type_and_device_and_non_blocking(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool non_blocking); [DllImport("LibTorchSharp")] internal static extern void THSTensor_set_(IntPtr tensor, IntPtr source); diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index b8b457063..83924753e 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -794,6 +794,15 @@ public Tensor to(ScalarType type, torch.Device device, bool copy = false, bool d return new Tensor(res); } + public Tensor to(torch.Device device, ScalarType type, bool non_blocking) + { + torch.InitializeDevice(device); + var res = NativeMethods.THSTensor_to_type_and_device_and_non_blocking(Handle, (sbyte)type, (int)device.type, device.index, non_blocking); + if (res == IntPtr.Zero) + CheckForErrors(); + return new Tensor(res); + } + /// /// Cast the tensor to the given element type. /// diff --git a/src/TorchSharp/Torch.cs b/src/TorchSharp/Torch.cs index 9028d2bdb..5523c8e53 100644 --- a/src/TorchSharp/Torch.cs +++ b/src/TorchSharp/Torch.cs @@ -406,7 +406,6 @@ public static void vector_to_parameters(Tensor vec, IEnumerable= 11) + return true; + } + + return check_bf16_tensor_supported(torch.CUDA); + } + + private static bool check_bf16_tensor_supported(torch.Device dev) + { + try { + var va = torch.tensor(new float[] { 1.0f }, dtype: torch.bfloat16, device: dev); + return true; + } catch { + return false; + } + } } /// diff --git a/src/TorchSharp/TorchSharp.csproj b/src/TorchSharp/TorchSharp.csproj deleted file mode 100644 index 5a102f34e..000000000 --- a/src/TorchSharp/TorchSharp.csproj +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - net6.0;netstandard2.0 - 9.0 - TorchSharp - true - false - false - false - $(DefineConstants);LIBTORCH_$(LibTorchPackageVersion.Replace('.', '_'));CUDA_$(CudaVersionDot.Replace('.', '_')) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - True - True - TensorTyped.tt - - - - - - - $(PackDependsOn); - RealPack - - True - ..\..\build\TorchSharp.snk - - - - - - - - - - - - - - - - - - - - - From defd582da252fe90d5f43f90a963e5797cdb6ea5 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 18 Feb 2024 13:32:16 -0300 Subject: [PATCH 03/43] Fix mistake gitignore --- .gitignore | 1 - src/Native/LibTorchSharp/THSTensor.cpp | 2 +- src/TorchSharp/Amp/AutocastMode.cs | 6 +- src/TorchSharp/TorchSharp.csproj | 88 ++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 5 deletions(-) create mode 100644 src/TorchSharp/TorchSharp.csproj diff --git a/.gitignore b/.gitignore index f34d405aa..bab8676e1 100644 --- a/.gitignore +++ b/.gitignore @@ -272,4 +272,3 @@ packages/ *.code-workspace /.idea /test/TorchSharpTest/exportsd.py -/src/TorchSharp/TorchSharp.csproj diff --git a/src/Native/LibTorchSharp/THSTensor.cpp b/src/Native/LibTorchSharp/THSTensor.cpp index f4617b5f7..97499ab42 100644 --- a/src/Native/LibTorchSharp/THSTensor.cpp +++ b/src/Native/LibTorchSharp/THSTensor.cpp @@ -1847,7 +1847,7 @@ Tensor THSTensor_to_type_and_device_and_non_blocking(const Tensor tensor, int8_t { CATCH_RETURN_Tensor( auto device = c10::Device((c10::DeviceType)device_type, (c10::DeviceIndex)device_index); - res = ResultTensor(tensor->to(device, non_blocking, at::ScalarType(scalar_type), false)); + res = ResultTensor(tensor->to(device, at::ScalarType(scalar_type),non_blocking, false)); ); } diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 7b9af69eb..c7fdaa857 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -9,9 +9,9 @@ namespace TorchSharp.Amp public class AutocastMode : IDisposable { private bool Enabled, Prev; - private torch.ScalarType Dtype; - private torch.ScalarType fast_dtype; - private torch.Device Device; + //private torch.ScalarType Dtype = torch.ScalarType.Float32; + private torch.ScalarType fast_dtype = torch.ScalarType.Float32; + private torch.Device Device = new torch.Device(DeviceType.CUDA); public AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) { fast_dtype = dtype.Value; diff --git a/src/TorchSharp/TorchSharp.csproj b/src/TorchSharp/TorchSharp.csproj new file mode 100644 index 000000000..ef6d6ff94 --- /dev/null +++ b/src/TorchSharp/TorchSharp.csproj @@ -0,0 +1,88 @@ + + + + + + netstandard2.0 + 9.0 + TorchSharp + true + false + false + false + $(DefineConstants);LIBTORCH_$(LibTorchPackageVersion.Replace('.', '_'));CUDA_$(CudaVersionDot.Replace('.', '_')) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + True + True + TensorTyped.tt + + + + + + + $(PackDependsOn); + RealPack + + True + ..\..\build\TorchSharp.snk + + + + + 4 + + + + + 4 + + + + + + + + + + + + + + + + + + + + + From d5324020a35dccd93e67f890131d34fd9f352652 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 18 Feb 2024 15:37:17 -0300 Subject: [PATCH 04/43] AMP --- src/Native/LibTorchSharp/THSTorch.cpp | 4 +- src/Native/LibTorchSharp/Utils.h | 17 ++++- src/TorchSharp/Amp/AutocastMode.cs | 68 +++++++++++++++++-- src/TorchSharp/NN/Module.cs | 25 ++++++- .../Tensor/Factories/Tensor.Factories.cs | 6 ++ .../Tensor/Factories/tensor_float.cs | 10 ++- src/TorchSharp/Tensor/torch.Autocast.cs | 17 +++++ 7 files changed, 134 insertions(+), 13 deletions(-) diff --git a/src/Native/LibTorchSharp/THSTorch.cpp b/src/Native/LibTorchSharp/THSTorch.cpp index 1a170913c..93f550de6 100644 --- a/src/Native/LibTorchSharp/THSTorch.cpp +++ b/src/Native/LibTorchSharp/THSTorch.cpp @@ -375,7 +375,7 @@ int8_t THSTorch_get_autocast_gpu_dtype() { //TODO: Implement AUTOCAST AMP AND GRADSCALER - //INFO: Enter/Exit function of autocast_mode not need to do in C/C++ only in C# with Disposable C# Can handle all of that function (if exists) + //INFO: Enter/Exit function of autocast_mode not need to do in C/C++ only in C# with Disposable can handle all of that function (if exists) //https://github.com/pytorch/pytorch/blob/main/torch/amp/autocast_mode.py @@ -395,7 +395,7 @@ int THSTorch_autocast_increment_nesting() return at::autocast::increment_nesting(); } -int THSTorch_autocast_decremental_nesting() +int THSTorch_autocast_decrement_nesting() { return at::autocast::decrement_nesting(); } diff --git a/src/Native/LibTorchSharp/Utils.h b/src/Native/LibTorchSharp/Utils.h index 4c3606491..cc0242af1 100644 --- a/src/Native/LibTorchSharp/Utils.h +++ b/src/Native/LibTorchSharp/Utils.h @@ -4,7 +4,7 @@ #include #include "torch/torch.h" - +#include extern thread_local char *torch_last_err; typedef torch::Tensor *Tensor; @@ -59,8 +59,21 @@ struct TensorArray { // Return undefined tensors as nullptr to C# inline Tensor ResultTensor(const at::Tensor & res) { - if (res.defined()) + if (res.defined()) { + /*at::Tensor* resT = new torch::Tensor(res); + if (at::autocast::is_autocast_cache_enabled()){ + if (res.is_cuda()) { + ::std::cout << "IS CUDA" << std::endl; + resT->to(at::autocast::get_autocast_gpu_dtype()); + } + if (res.is_cpu()) { + ::std::cout << "IS CPU" << std::endl; + resT->to(at::autocast::get_autocast_cpu_dtype()); + } + } + return resT;*/ return new torch::Tensor(res); + } else return nullptr; } diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index c7fdaa857..43d3805fa 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -6,20 +6,42 @@ namespace TorchSharp.Amp { - public class AutocastMode : IDisposable + public static class Autocast + { + public static torch.Tensor AutoCast(this torch.Tensor input) + { + return AutocastMode.GetInstance().CastTensor(input); + } + } + //TODO: Should make Singleton and IDisposable on ENTER + public sealed class AutocastMode : IDisposable { private bool Enabled, Prev; //private torch.ScalarType Dtype = torch.ScalarType.Float32; private torch.ScalarType fast_dtype = torch.ScalarType.Float32; private torch.Device Device = new torch.Device(DeviceType.CUDA); - public AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) + private static AutocastMode instance; + /*public static AutocastMode GetInstance(torch.Device dev, torch.ScalarType? dtype = null, bool enabled = true, bool? cache_enabled = null) + { + if(instance ==null) + instance = new AutocastMode(dev, dtype, enabled, cache_enabled); + return instance; + }*/ + public static AutocastMode GetInstance() { - fast_dtype = dtype.Value; + return instance ?? (instance = new AutocastMode(torch.CUDA, cache_enabled:true)); + } + + private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) + { + //var la = torch.tensor(9); + fast_dtype = dtype ?? torch.ScalarType.Float32; if (dev.type == DeviceType.CUDA) fast_dtype = torch.get_autocast_gpu_dtype(); if (dev.type == DeviceType.CPU) fast_dtype = torch.get_autocast_cpu_dtype(); - + IntPtr ptr = IntPtr.Zero; + bool _cache_enabled = torch.is_autocast_cache_enabled(); if (!torch.cuda.is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast Enabled = false; @@ -38,17 +60,49 @@ public AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabl if (dev.type == DeviceType.CUDA) { this.Prev = torch.is_autocast_gpu_enabled(); } - throw new NotImplementedException(); + + torch.set_autocast_cache_enabled(_cache_enabled); + torch.set_autocast_enabled(this.Enabled); + //throw new NotImplementedException(); } + + /*internal void Cast(torch.Tensor tensor) + { + tensor.to(fast_dtype, tensor.device); + }*/ + + internal torch.Tensor CastTensor(torch.Tensor tensor) + { + if (!Enabled) + return tensor; + return tensor.to(fast_dtype, tensor.device); + } + /*public IDisposable Enter() + { + + return this; + }*/ public void Dispose() { + this.Enabled = false; if (Device.type == DeviceType.CUDA) { if(torch.autocast_decrement_nesting() == 0) torch.clear_autocast_cache(); torch.set_autocast_gpu_dtype(this.fast_dtype); - torch.set_autocast_enabled(this.Prev); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); + } + + if (Device.type == DeviceType.CPU) { + if (torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_cpu_dtype(this.fast_dtype); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); } - throw new NotImplementedException(); + //throw new NotImplementedException(); } } } diff --git a/src/TorchSharp/NN/Module.cs b/src/TorchSharp/NN/Module.cs index 4ca8a3258..911f29fd9 100644 --- a/src/TorchSharp/NN/Module.cs +++ b/src/TorchSharp/NN/Module.cs @@ -681,6 +681,8 @@ public virtual void register_buffer(string name, Tensor tensor, bool persistent if (!_internal_buffers.TryAdd(name, (tensor, persistent))) throw new InvalidOperationException($"Tensor {name} is already registered."); + + } /// @@ -700,6 +702,13 @@ public virtual void register_parameter(string name, Parameter param) if (!_internal_params.TryAdd(name, param)) throw new InvalidOperationException($"Parameter {name} is already registered."); + + /*if (is_autocast_cache_enabled()) { + if (is_autocast_gpu_enabled()) + param = param.to(get_autocast_dtype(CUDA)).AsParameter(); + if (is_autocast_cpu_enabled()) + param = param.to(get_autocast_dtype(CPU)).AsParameter(); + }*/ } /// @@ -740,7 +749,15 @@ public virtual void register_module(string name, Module submodule) } submodule.RegisterComponents(); - + if (!is_autocast_cache_enabled()) { + _internal_submodules.Add(name, submodule); + return; + } + if (is_autocast_gpu_enabled()) + submodule = submodule.to(get_autocast_dtype(CUDA)); + if (is_autocast_cpu_enabled()) + submodule = submodule.to(get_autocast_dtype(CPU)); + _internal_submodules.Add(name, submodule); } } @@ -1042,6 +1059,8 @@ protected virtual void RegisterComponents() _areComponentsRegistered = true; } + + protected static (Device device, ScalarType dtype) GetDefaultDeviceAndType(Device device = null, ScalarType? dtype = null) { if (!dtype.HasValue) @@ -1295,6 +1314,10 @@ public TResult call(T input) input = modified; } + /*if (is_autocast_cache_enabled()) { //Should i cast this for better managment??? + if(input is Tensor) + }*/ + var result = forward(input); // Call post-hooks, if available. diff --git a/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs b/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs index 9bc1c562f..899342207 100644 --- a/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs +++ b/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs @@ -179,6 +179,12 @@ private static Tensor _tensor_generic(Array rawArray, ReadOnlySpan dimensi tensor.rename_(names); } + if (!is_autocast_cache_enabled()) + return tensor; + if (is_autocast_gpu_enabled()) + tensor = tensor.to(get_autocast_gpu_dtype()); + if (is_autocast_cpu_enabled()) + tensor = tensor.to(get_autocast_cpu_dtype()); return tensor; } } diff --git a/src/TorchSharp/Tensor/Factories/tensor_float.cs b/src/TorchSharp/Tensor/Factories/tensor_float.cs index 562c826f2..f33d1b90a 100644 --- a/src/TorchSharp/Tensor/Factories/tensor_float.cs +++ b/src/TorchSharp/Tensor/Factories/tensor_float.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; +using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; #nullable enable @@ -18,7 +19,14 @@ public static Tensor tensor(float scalar, Device? device = null, bool requires_g device = InitializeDevice(device); var handle = THSTensor_newFloat32Scalar(scalar, (int)device.type, device.index, requires_grad); if (handle == IntPtr.Zero) { CheckForErrors(); } - return new Tensor(handle); + + + var t = new Tensor(handle).AutoCast(); + /*if (is_autocast_cache_enabled()) { + if (is_autocast_gpu_enabled()) + return t.to(get_autocast_gpu_dtype()); //this work, but should put that on all tensor factorie... + }*/ + return t; } /// diff --git a/src/TorchSharp/Tensor/torch.Autocast.cs b/src/TorchSharp/Tensor/torch.Autocast.cs index 6745133be..e3fc33f52 100644 --- a/src/TorchSharp/Tensor/torch.Autocast.cs +++ b/src/TorchSharp/Tensor/torch.Autocast.cs @@ -9,6 +9,15 @@ public static bool is_autocast_cache_enabled() { return THSTorch_is_autocast_cache_enabled(); } + + public static bool is_autocast_enabled(Device device) + { + if(device.type == DeviceType.CPU) + return THSTorch_is_autocast_cpu_enabled(); + if(device.type == DeviceType.CUDA) + return THSTorch_is_autocast_gpu_enabled(); + return THSTorch_is_autocast_cache_enabled(); + } public static bool is_autocast_cpu_enabled() { return THSTorch_is_autocast_cpu_enabled(); @@ -26,6 +35,14 @@ public static bool is_autocast_hpu_enabled() return THSTorch_is_autocast_hpu_enabled(); } + public static ScalarType get_autocast_dtype(Device device) + { + if (device.type == DeviceType.CPU) + return get_autocast_cpu_dtype(); + if (device.type == DeviceType.CUDA) + return get_autocast_gpu_dtype(); + return ScalarType.Float32; + } public static ScalarType get_autocast_cpu_dtype() { return (ScalarType)THSTorch_get_autocast_cpu_dtype(); From 0b839dbbb5bff741162ddd14ac270660325f3fca Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 18 Feb 2024 21:21:49 -0300 Subject: [PATCH 05/43] Add Print Modules Still in progress --- src/Native/LibTorchSharp/THSConvolution.cpp | 8 ++++++++ src/Native/LibTorchSharp/THSNN.cpp | 12 ++++++++++++ src/Native/LibTorchSharp/THSNN.h | 5 +++++ src/Native/LibTorchSharp/Utils.h | 1 - src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs | 3 +++ src/TorchSharp/Tensor/torch.Utilities.cs | 6 ++++++ 6 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/Native/LibTorchSharp/THSConvolution.cpp b/src/Native/LibTorchSharp/THSConvolution.cpp index e1500d939..27e2e62a7 100644 --- a/src/Native/LibTorchSharp/THSConvolution.cpp +++ b/src/Native/LibTorchSharp/THSConvolution.cpp @@ -683,6 +683,7 @@ void THSNN_Conv1d_set_weight(const NNModule module, const Tensor weight) set_weight(module, weight); } + NNModule THSNN_Conv2d_ctor(const int64_t inputChannel, const int64_t outputChannel, const int64_t kernelSize, const int64_t stride, const int64_t padding, const int64_t dilation, const int64_t paddingMode, const int64_t groups, const bool bias, @@ -757,6 +758,13 @@ void THSNN_Conv2d_set_weight(const NNModule module, const Tensor weight) set_weight(module, weight); } +/*void THSNN_Conv2d_print_options(const NNModule module) { + auto opt = (*module)->as()->options; + ::std::cout << "Conv2d (" << std::to_string(opt.in_channels()) << "," << std::to_string(opt.out_channels()) << ")" << std::endl; +}*/ + + + NNModule THSNN_Conv3d_ctor(const int64_t inputChannel, const int64_t outputChannel, const int64_t kernelSize, const int64_t stride, const int64_t padding, const int64_t dilation, const int64_t paddingMode, const int64_t groups, const bool bias, diff --git a/src/Native/LibTorchSharp/THSNN.cpp b/src/Native/LibTorchSharp/THSNN.cpp index 12b6a461a..a164f0f67 100644 --- a/src/Native/LibTorchSharp/THSNN.cpp +++ b/src/Native/LibTorchSharp/THSNN.cpp @@ -1334,4 +1334,16 @@ Tensor THSNN_scaled_dot_product_attention(const Tensor query, const Tensor key, auto mask = attention_mask == nullptr ? c10::nullopt : c10::optional(*attention_mask); CATCH_TENSOR(torch::scaled_dot_product_attention(*query, *key, *value, mask, p, casual)); +} + +void THSNN_Print_Module(const NNModule module) { + if (auto* conv = (*module)->as()) + { + auto opt = conv->options; + ::std::cout << conv->name() << "(" << opt.in_channels() << "," << opt.out_channels() << ", K=" << opt.kernel_size() <<", S=" << opt.stride() << ")" << std::endl; //TODO: Add padding + } + if (auto* bn = (*module)->as()) { + auto opt = bn->options; + ::std::cout << bn->name() << "(" << opt.num_features() << ", Eps=" << opt.eps() << ", M=" << (opt.momentum().has_value() ? opt.momentum().value() : 0) << ")" << std::endl; //TODO: Add another data + } } \ No newline at end of file diff --git a/src/Native/LibTorchSharp/THSNN.h b/src/Native/LibTorchSharp/THSNN.h index 07d247d87..49d293113 100644 --- a/src/Native/LibTorchSharp/THSNN.h +++ b/src/Native/LibTorchSharp/THSNN.h @@ -145,6 +145,7 @@ EXPORT_API(Tensor) THSNN_Conv2d_weight(const NNModule module); EXPORT_API(void) THSNN_Conv2d_set_weight(const NNModule module, const Tensor weight); EXPORT_API(Tensor) THSNN_Conv2d_bias(const NNModule module); EXPORT_API(void) THSNN_Conv2d_set_bias(const NNModule module, const Tensor bias); +//EXPORT_API(void) THSNN_Conv2d_print_options(const NNModule module); EXPORT_API(NNModule) THSNN_Conv3d_ctor(const int64_t inputChannel, const int64_t outputChannel, const int64_t kernelSize, const int64_t stride, const int64_t padding, const int64_t dilation, const int64_t paddingMode, const int64_t groups, const bool bias, NNAnyModule* outAsAnyModule); EXPORT_API(NNModule) THSNN_Conv3d_ctor_1(const int64_t inputChannel, const int64_t outputChannel, const int64_t kernelX, const int64_t kernelY, const int64_t kernelZ, const int64_t strideX, const int64_t strideY, const int64_t strideZ, const int64_t paddingX, const int64_t paddingY, const int64_t paddingZ, const int64_t dilationX, const int64_t dilationY, const int64_t dilationZ, const int64_t paddingMode, const int64_t groups, const bool bias, NNAnyModule* outAsAnyModule); EXPORT_API(Tensor) THSNN_Conv3d_forward(const NNModule module, const Tensor tensor); @@ -592,3 +593,7 @@ EXPORT_API(PackedSequence) THSNN_pack_padded_sequence(Tensor input, Tensor lengt EXPORT_API(void) THSNN_pad_packed_sequence(PackedSequence sequence, bool batch_first, double padding_value, int64_t total_length, Tensor* res1, Tensor* res2); EXPORT_API(Tensor) THSNN_pad_sequence(const Tensor* sequences, const int sequences_len, bool batch_first, double padding_value); EXPORT_API(PackedSequence) THSNN_pack_sequence(const Tensor* sequences, int sequences_len, bool enforce_sorted); + + +// Printer Modules +EXPORT_API(void) THSNN_Print_Module(const NNModule module); diff --git a/src/Native/LibTorchSharp/Utils.h b/src/Native/LibTorchSharp/Utils.h index cc0242af1..892e0e2ec 100644 --- a/src/Native/LibTorchSharp/Utils.h +++ b/src/Native/LibTorchSharp/Utils.h @@ -2,7 +2,6 @@ #pragma once #include - #include "torch/torch.h" #include extern thread_local char *torch_last_err; diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs index 8bef36230..870e4e647 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs @@ -1318,6 +1318,9 @@ internal static extern IntPtr THSNN_custom_module( [DllImport("LibTorchSharp")] internal static extern IntPtr THSNN_MaxUnpool2d_ctor(IntPtr pkernelSize, int kernelSizeLength, IntPtr pstrides, int stridesLength, IntPtr pPadding, int paddingLength, out IntPtr pBoxedModule); + + [DllImport("LibTorchSharp")] + internal static extern void THSNN_Print_Module(torch.nn.Module.HType module); } #pragma warning restore CA2101 } diff --git a/src/TorchSharp/Tensor/torch.Utilities.cs b/src/TorchSharp/Tensor/torch.Utilities.cs index 42745a786..91d79539a 100644 --- a/src/TorchSharp/Tensor/torch.Utilities.cs +++ b/src/TorchSharp/Tensor/torch.Utilities.cs @@ -2,6 +2,7 @@ #nullable enable using System; using System.Diagnostics.Contracts; +using TorchSharp.PInvoke; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp @@ -79,5 +80,10 @@ public static ScalarType promote_types(ScalarType type1, ScalarType type2) [Obsolete("not implemented", true)] public static void _assert(Func condition, string message) => throw new NotImplementedException(); + + public static void PrintModule(torch.nn.Module module) + { + NativeMethods.THSNN_Print_Module(module.handle); + } } } \ No newline at end of file From 98cabfa4496b1a9bb1bbc996cbf931dd73fd2961 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 18 Feb 2024 22:49:43 -0300 Subject: [PATCH 06/43] Add some printing module --- src/Native/LibTorchSharp/THSNN.cpp | 47 +++++++++++++++++--- src/TorchSharp/NN/Dropout2d.cs | 4 +- src/TorchSharp/NN/Normalization/LayerNorm.cs | 4 +- src/TorchSharp/Tensor/torch.Utilities.cs | 14 ++++++ 4 files changed, 59 insertions(+), 10 deletions(-) diff --git a/src/Native/LibTorchSharp/THSNN.cpp b/src/Native/LibTorchSharp/THSNN.cpp index a164f0f67..430c17f5e 100644 --- a/src/Native/LibTorchSharp/THSNN.cpp +++ b/src/Native/LibTorchSharp/THSNN.cpp @@ -1337,13 +1337,48 @@ Tensor THSNN_scaled_dot_product_attention(const Tensor query, const Tensor key, } void THSNN_Print_Module(const NNModule module) { - if (auto* conv = (*module)->as()) + std::ostringstream oss; + const std::string name = module->get()->name(); + oss << name << "("; + if (auto* conv2 = (*module)->as()) { - auto opt = conv->options; - ::std::cout << conv->name() << "(" << opt.in_channels() << "," << opt.out_channels() << ", K=" << opt.kernel_size() <<", S=" << opt.stride() << ")" << std::endl; //TODO: Add padding + const auto opt = &conv2->options; + oss << opt->in_channels() << "," << opt->out_channels() << ", K=" << opt->kernel_size(); + oss << ", S=" << opt->stride() << ", P=" << opt->padding().index() << ", D=" << opt->dilation(); + oss << ", G=" << opt->groups() << ", B=" << opt->bias(); } - if (auto* bn = (*module)->as()) { - auto opt = bn->options; - ::std::cout << bn->name() << "(" << opt.num_features() << ", Eps=" << opt.eps() << ", M=" << (opt.momentum().has_value() ? opt.momentum().value() : 0) << ")" << std::endl; //TODO: Add another data + if (auto* bn2 = (*module)->as()) { + const auto opt = &bn2->options; + oss << opt->num_features() << ", Eps=" << opt->eps() << ", M=" << (opt->momentum().has_value() ? std::to_string(opt->momentum().value()) : "NaN"); + oss << ", A=" << opt->affine() << ", T=" << opt->track_running_stats(); } + if(auto* ln = (*module)->as()) //This not printed because the TorchSharp not have a ctor of LayerNorm + { + const auto opt = ln->options; + oss << opt.eps() << ", Elem=" << opt.elementwise_affine() << ", N=["; + for(int64_t i=0;i< static_cast(opt.normalized_shape().size());i++) + oss << opt.normalized_shape()[i] << ((i == static_cast(opt.normalized_shape().size()-1)) ? "]" : ","); + } + if (const auto* d2 = (*module)->as()) //This not printed because the TorchSharp not have a ctor of Dropout2d + { + auto opt = d2->options; + oss << opt.p() << ", Inplace=" << opt.inplace(); + } + if(auto* avp2 = (*module)->as()) + { + const auto opt = &avp2->options; + oss << "["; + for (int64_t i = 0; i < opt->output_size().size(); i++) + oss << opt->output_size()->at(i).value() << ((i == opt->output_size().size() - 1) ? "]" : ","); + } + if (auto* amp2 = (*module)->as()) + { + const auto opt = &2->options; + oss << "["; + for (int64_t i = 0; i < opt->output_size().size(); i++) + oss << opt->output_size()->at(i).value() << ((i == opt->output_size().size() - 1) ? "]" : ","); + } + + oss << ")"; + std::cout << oss.str() << std::endl; } \ No newline at end of file diff --git a/src/TorchSharp/NN/Dropout2d.cs b/src/TorchSharp/NN/Dropout2d.cs index 363cb40d5..49db468d7 100644 --- a/src/TorchSharp/NN/Dropout2d.cs +++ b/src/TorchSharp/NN/Dropout2d.cs @@ -33,8 +33,8 @@ public override Tensor forward(Tensor input) protected internal override nn.Module _to(DeviceType deviceType, int deviceIndex = -1) => this; protected internal override nn.Module _to(ScalarType dtype) => this; - private bool inplace; - private double p; + internal bool inplace; //Set internal accesibility for PrintModule + internal double p; //Set internal accesibility for PrintModule } } diff --git a/src/TorchSharp/NN/Normalization/LayerNorm.cs b/src/TorchSharp/NN/Normalization/LayerNorm.cs index 7010e754e..6ed8dae45 100644 --- a/src/TorchSharp/NN/Normalization/LayerNorm.cs +++ b/src/TorchSharp/NN/Normalization/LayerNorm.cs @@ -18,8 +18,8 @@ namespace Modules /// public sealed class LayerNorm : torch.nn.Module { - private long[] _normalized_shape; - private double _eps; + internal long[] _normalized_shape; + internal double _eps; internal LayerNorm(long[] normalized_shape, double eps, bool elementwise_affine, bool bias, Device? device, ScalarType? dtype) : base(nameof(LayerNorm)) { diff --git a/src/TorchSharp/Tensor/torch.Utilities.cs b/src/TorchSharp/Tensor/torch.Utilities.cs index 91d79539a..7525ea6c9 100644 --- a/src/TorchSharp/Tensor/torch.Utilities.cs +++ b/src/TorchSharp/Tensor/torch.Utilities.cs @@ -2,6 +2,7 @@ #nullable enable using System; using System.Diagnostics.Contracts; +using TorchSharp.Modules; using TorchSharp.PInvoke; using static TorchSharp.PInvoke.NativeMethods; @@ -83,6 +84,19 @@ public static ScalarType promote_types(ScalarType type1, ScalarType type2) public static void PrintModule(torch.nn.Module module) { + if (module is Dropout2d drop2d) { + Console.WriteLine($"{module.GetName()}({drop2d.p}, {drop2d.inplace})"); + return; + } + + if (module is LayerNorm ln) { + string str= "["; + for (int i = 0; i < ln._normalized_shape.Length; i++) + str += ln._normalized_shape[i] + ","; + str = str.TrimEnd(',')+"]"; + Console.WriteLine($"{module.GetName()}({ln._eps}, {str})"); + return; + } NativeMethods.THSNN_Print_Module(module.handle); } } From 669b4facd7eac6dcd6ba01c25c2be0831c9ffe67 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 20 Feb 2024 16:08:27 -0300 Subject: [PATCH 07/43] Fix some dotnet build. Need fix tests --- .gitignore | 22 +++ .../FileRestitcher.Tests.csproj | 2 +- .../FileRestitcher/FileRestitcher.csproj | 6 +- src/Examples.Utils/Examples.Utils.csproj | 3 +- src/Examples.Utils/Vocab.cs | 9 +- src/Examples/Examples.csproj | 2 +- src/FSharp.Examples/FSharp.Examples.fsproj | 2 +- src/Native/build.cmd | 151 ------------------ src/TorchSharp/TorchSharp.csproj | 28 ++-- 9 files changed, 51 insertions(+), 174 deletions(-) delete mode 100644 src/Native/build.cmd diff --git a/.gitignore b/.gitignore index bab8676e1..a17061b33 100644 --- a/.gitignore +++ b/.gitignore @@ -272,3 +272,25 @@ packages/ *.code-workspace /.idea /test/TorchSharpTest/exportsd.py +/src/Native/CMakeFiles +/src/Native/LibTorchSharp/CMakeFiles +/src/Native/ALL_BUILD.vcxproj +/src/Native/ALL_BUILD.vcxproj.filters +/src/Native/build.cmd +/src/Native/CMakeCache.txt +/src/Native/cmake_install.cmake +/src/Native/INSTALL.vcxproj +/src/Native/INSTALL.vcxproj.filters +/src/Native/install_manifest.txt +/src/Native/LibTorchSharp/ALL_BUILD.vcxproj +/src/Native/LibTorchSharp/ALL_BUILD.vcxproj.filters +/src/Native/LibTorchSharp/cmake_install.cmake +/src/Native/LibTorchSharp/INSTALL.vcxproj +/src/Native/LibTorchSharp/INSTALL.vcxproj.filters +/src/Native/LibTorchSharp/LibTorchSharp.sln +/src/Native/LibTorchSharp/LibTorchSharp.vcxproj +/src/Native/LibTorchSharp/LibTorchSharp.vcxproj.filters +/src/Native/Project.sln +/src/Native/ZERO_CHECK.vcxproj +/src/Native/ZERO_CHECK.vcxproj.filters +/src/FSharp.Examples/FSharp.Examples.fsproj diff --git a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj index e76338122..bc96dbe96 100644 --- a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj +++ b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj @@ -3,7 +3,7 @@ false - + net472;netstandard2.0;$(TargetFrameworks) net6.0 net472;$(TargetFrameworks) diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.csproj b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.csproj index bbfbab0cc..3b4d8b200 100644 --- a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.csproj +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.csproj @@ -1,11 +1,11 @@ - + false Library - netstandard2.0 + netstandard2.0;net6.0 false x64 - + diff --git a/src/Examples.Utils/Examples.Utils.csproj b/src/Examples.Utils/Examples.Utils.csproj index 1f6d5a081..6a5a09eeb 100644 --- a/src/Examples.Utils/Examples.Utils.csproj +++ b/src/Examples.Utils/Examples.Utils.csproj @@ -5,7 +5,8 @@ 9.0 net6.0 - net472;$(TargetFrameworks) + net472;$(TargetFrameworks);netstandard2.0 + net6.0 diff --git a/src/Examples.Utils/Vocab.cs b/src/Examples.Utils/Vocab.cs index 743e4c55c..7a1deb298 100644 --- a/src/Examples.Utils/Vocab.cs +++ b/src/Examples.Utils/Vocab.cs @@ -88,12 +88,17 @@ public void Add(KeyValuePair item) { Add(item.Key, item.Value); } - +#if NETSTANDARD2_0 + public bool TryGetValue(string key, out int value) + { + return _dict.TryGetValue(key, out value); + } +#else public bool TryGetValue(string key, [MaybeNullWhen(false)] out int value) { return _dict.TryGetValue(key, out value); } - +#endif private Dictionary _dict = new Dictionary(); private int _last = 0; } diff --git a/src/Examples/Examples.csproj b/src/Examples/Examples.csproj index f6fe32680..79c448399 100644 --- a/src/Examples/Examples.csproj +++ b/src/Examples/Examples.csproj @@ -5,7 +5,7 @@ true true - + net472;netstandard2.0;$(TargetFrameworks) 9.0 net6.0 net472;$(TargetFrameworks) diff --git a/src/FSharp.Examples/FSharp.Examples.fsproj b/src/FSharp.Examples/FSharp.Examples.fsproj index 900e25caa..a6ecbb723 100644 --- a/src/FSharp.Examples/FSharp.Examples.fsproj +++ b/src/FSharp.Examples/FSharp.Examples.fsproj @@ -6,7 +6,7 @@ true net6.0 - net472;$(TargetFrameworks) + net472;netstandard2.0;$(TargetFrameworks) net6.0 true Examples diff --git a/src/Native/build.cmd b/src/Native/build.cmd deleted file mode 100644 index c805b2608..000000000 --- a/src/Native/build.cmd +++ /dev/null @@ -1,151 +0,0 @@ -@if not defined _echo @echo off -setlocal - -:: Store current script directory before %~dp0 gets affected by another process later. -set __currentScriptDir=%~dp0 - -:SetupArgs -:: Initialize the args that will be passed to cmake -set __binDir=%__currentScriptDir%..\..\bin -set __rootDir=%__currentScriptDir%..\.. -set __CMakeBinDir="" -set __IntermediatesDir="" -set __BuildArch=x64 -set __VCBuildArch=x86_amd64 -set CMAKE_BUILD_TYPE=Debug -set LIBTORCH_PATH="" - -:Arg_Loop -if [%1] == [] goto :ToolsVersion -if /i [%1] == [Release] ( set CMAKE_BUILD_TYPE=Release&&shift&goto Arg_Loop) -if /i [%1] == [Debug] ( set CMAKE_BUILD_TYPE=Debug&&shift&goto Arg_Loop) - -if /i [%1] == [x86] ( set __BuildArch=x86&&set __VCBuildArch=x86&&shift&goto Arg_Loop) -if /i [%1] == [x64] ( set __BuildArch=x64&&set __VCBuildArch=x86_amd64&&shift&goto Arg_Loop) -if /i [%1] == [amd64] ( set __BuildArch=x64&&set __VCBuildArch=x86_amd64&&shift&goto Arg_Loop) - -if /i [%1] == [--libtorchpath] ( set LIBTORCH_PATH=%2&&shift&goto Arg_Loop) - -shift -goto :Arg_Loop - -:ToolsVersion -if defined VisualStudioVersion goto :RunVCVars - -set _VSWHERE="%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -if exist %_VSWHERE% ( - for /f "usebackq tokens=*" %%i in (`%_VSWHERE% -latest -prerelease -property installationPath`) do set _VSCOMNTOOLS=%%i\Common7\Tools -) -if not exist "%_VSCOMNTOOLS%" set _VSCOMNTOOLS=%VS140COMNTOOLS% -if not exist "%_VSCOMNTOOLS%" goto :MissingVersion - - -set "VSCMD_START_DIR=%__currentScriptDir%" -call "%_VSCOMNTOOLS%\VsDevCmd.bat" - -:RunVCVars -if "%VisualStudioVersion%"=="17.0" ( - goto :VS2022 -) else if "%VisualStudioVersion%"=="16.0" ( - goto :VS2019 -) else if "%VisualStudioVersion%"=="15.0" ( - goto :VS2017 -) else if "%VisualStudioVersion%"=="14.0" ( - goto :VS2015 -) - -:MissingVersion -:: Can't find VS 2015, 2017 or 2019 -echo Error: Visual Studio 2015, 2017 or 2019 required -echo Please see https://github.com/dotnet/machinelearning/tree/master/Documentation for build instructions. -exit /b 1 - -:VS2022 -:: Setup vars for VS2022 -set __PlatformToolset=v143 -set __VSVersion=17 2022 -if NOT "%__BuildArch%" == "arm64" ( - :: Set the environment for the native build - call "%VS160COMNTOOLS%..\..\VC\Auxiliary\Build\vcvarsall.bat" %__VCBuildArch% -) -goto :SetupDirs - -:VS2019 -:: Setup vars for VS2019 -set __PlatformToolset=v142 -set __VSVersion=16 2019 -if NOT "%__BuildArch%" == "arm64" ( - :: Set the environment for the native build - call "%VS160COMNTOOLS%..\..\VC\Auxiliary\Build\vcvarsall.bat" %__VCBuildArch% -) -goto :SetupDirs - -:VS2017 -:: Setup vars for VS2017 -set __PlatformToolset=v141 -set __VSVersion=15 2017 -if NOT "%__BuildArch%" == "arm64" ( - :: Set the environment for the native build - call "%VS150COMNTOOLS%..\..\VC\Auxiliary\Build\vcvarsall.bat" %__VCBuildArch% -) -goto :SetupDirs - -:VS2015 -:: Setup vars for VS2015build -set __PlatformToolset=v140 -set __VSVersion=14 2015 -if NOT "%__BuildArch%" == "arm64" ( - :: Set the environment for the native build - call "%VS140COMNTOOLS%..\..\VC\vcvarsall.bat" %__VCBuildArch% -) - -:SetupDirs -:: Setup to cmake the native components -echo Commencing native build of dotnet/machinelearning -echo. - -if %__CMakeBinDir% == "" ( - set "__CMakeBinDir=%__binDir%\%__BuildArch%.%CMAKE_BUILD_TYPE%\Native" -) -if %__IntermediatesDir% == "" ( - set "__IntermediatesDir=%__binDir%\obj\%__BuildArch%.%CMAKE_BUILD_TYPE%\Native" -) -set "__CMakeBinDir=%__CMakeBinDir:\=/%" -set "__IntermediatesDir=%__IntermediatesDir:\=/%" - -:: Check that the intermediate directory exists so we can place our cmake build tree there -if not exist "%__IntermediatesDir%" md "%__IntermediatesDir%" - -:: Regenerate the VS solution - -set "__gen-buildsys-win-path=%__currentScriptDir%\gen-buildsys-win.bat" -set "__source-code-path=%__currentScriptDir%" - -echo Calling "%__gen-buildsys-win-path%" "%__source-code-path%" "%__VSVersion%" %__BuildArch% -pushd "%__IntermediatesDir%" -call "%__gen-buildsys-win-path%" "%__source-code-path%" "%__VSVersion%" %__BuildArch% -popd - -:CheckForProj -:: Check that the project created by Cmake exists -if exist "%__IntermediatesDir%\INSTALL.vcxproj" goto BuildNativeProj -goto :Failure - -:BuildNativeProj -:: Build the project created by Cmake -set __msbuildArgs=/p:Platform=%__BuildArch% /p:PlatformToolset="%__PlatformToolset%" - -cd %__rootDir% - -echo msbuild "%__IntermediatesDir%\INSTALL.vcxproj" /t:build /p:Configuration=%CMAKE_BUILD_TYPE% %__msbuildArgs% -call msbuild "%__IntermediatesDir%\INSTALL.vcxproj" /t:build /p:Configuration=%CMAKE_BUILD_TYPE% %__msbuildArgs% -IF ERRORLEVEL 1 ( - goto :Failure -) -echo Done building Native components -exit /B 0 - -:Failure -:: Build failed -echo Failed to generate native component build project! -exit /b 1 diff --git a/src/TorchSharp/TorchSharp.csproj b/src/TorchSharp/TorchSharp.csproj index ef6d6ff94..054f5c18a 100644 --- a/src/TorchSharp/TorchSharp.csproj +++ b/src/TorchSharp/TorchSharp.csproj @@ -3,14 +3,14 @@ - netstandard2.0 - 9.0 - TorchSharp - true - false - false - false - $(DefineConstants);LIBTORCH_$(LibTorchPackageVersion.Replace('.', '_'));CUDA_$(CudaVersionDot.Replace('.', '_')) + netstandard2.0;net6.0 + 9.0 + TorchSharp + true + false + false + false + $(DefineConstants);LIBTORCH_$(LibTorchPackageVersion.Replace('.', '_'));CUDA_$(CudaVersionDot.Replace('.', '_')) @@ -49,12 +49,12 @@ - - $(PackDependsOn); - RealPack - - True - ..\..\build\TorchSharp.snk + + $(PackDependsOn); + RealPack + + True + ..\..\build\TorchSharp.snk From 394041426e75864e182b0e4bcb0ceb2289351f2f Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 30 Jun 2024 19:39:43 -0300 Subject: [PATCH 08/43] Fast tensor accessor for ToArray() --- src/Examples.Utils/Examples.Utils.csproj | 8 +- src/TorchSharp/Amp/AutocastDisposedManager.cs | 10 +++ src/TorchSharp/Amp/AutocastDisposedScope.cs | 10 +++ .../Tensor/Factories/tensor_float.cs | 3 +- src/TorchSharp/Utils/TensorAccessor.cs | 79 ++++++++++++++++--- 5 files changed, 97 insertions(+), 13 deletions(-) create mode 100644 src/TorchSharp/Amp/AutocastDisposedManager.cs create mode 100644 src/TorchSharp/Amp/AutocastDisposedScope.cs diff --git a/src/Examples.Utils/Examples.Utils.csproj b/src/Examples.Utils/Examples.Utils.csproj index 6a5a09eeb..d8ce3a24a 100644 --- a/src/Examples.Utils/Examples.Utils.csproj +++ b/src/Examples.Utils/Examples.Utils.csproj @@ -21,7 +21,13 @@ - + + + + + + + diff --git a/src/TorchSharp/Amp/AutocastDisposedManager.cs b/src/TorchSharp/Amp/AutocastDisposedManager.cs new file mode 100644 index 000000000..d4ec1ccd7 --- /dev/null +++ b/src/TorchSharp/Amp/AutocastDisposedManager.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace TorchSharp.Amp +{ + class AutocastDisposedManager + { + } +} diff --git a/src/TorchSharp/Amp/AutocastDisposedScope.cs b/src/TorchSharp/Amp/AutocastDisposedScope.cs new file mode 100644 index 000000000..7c771d16f --- /dev/null +++ b/src/TorchSharp/Amp/AutocastDisposedScope.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace TorchSharp.Amp +{ + class AutocastDisposedScope + { + } +} diff --git a/src/TorchSharp/Tensor/Factories/tensor_float.cs b/src/TorchSharp/Tensor/Factories/tensor_float.cs index f33d1b90a..e50943689 100644 --- a/src/TorchSharp/Tensor/Factories/tensor_float.cs +++ b/src/TorchSharp/Tensor/Factories/tensor_float.cs @@ -21,7 +21,8 @@ public static Tensor tensor(float scalar, Device? device = null, bool requires_g if (handle == IntPtr.Zero) { CheckForErrors(); } - var t = new Tensor(handle).AutoCast(); + //var t = new Tensor(handle).AutoCast(); + var t = new Tensor(handle); /*if (is_autocast_cache_enabled()) { if (is_autocast_gpu_enabled()) return t.to(get_autocast_gpu_dtype()); //this work, but should put that on all tensor factorie... diff --git a/src/TorchSharp/Utils/TensorAccessor.cs b/src/TorchSharp/Utils/TensorAccessor.cs index 9514003f2..ab9846eec 100644 --- a/src/TorchSharp/Utils/TensorAccessor.cs +++ b/src/TorchSharp/Utils/TensorAccessor.cs @@ -38,16 +38,28 @@ internal TensorAccessor(torch.Tensor tensor) _tensor = tensor; // Keep the tensor alive now that everything is alright. } + /// + /// This is important for performance because only called with CopyTo, CopyFrom. Is not necesary in each invocation call tensor.numel() because that use intensive CPU. + /// This temporary count avoid so much use CPU. The Property act as method. + /// If tensor is for example 640*640*3 = 1.228.800, property invoke 1 millons times!!! + /// If we only want copy is not necesary call that method so many times. + /// + private long TempCount = -1; public long Count => (_tensor is not null ? _tensor.numel() : 0); public bool IsReadOnly => false; + public T[] ToArray() { if (_tensor.ndim < 2) return (T[])ToNDArray(); - var result = new T[Count]; + var shps = _tensor.shape; + TempCount = 1; + for(int i=0;i array, int arrayIndex = 0, long tensorIndex = 0) + { + int idx = arrayIndex; + foreach (int offset in GetSubsequentIndices(tensorIndex)) { + if (idx >= array.Length) break; + unsafe { array[idx] = ((T*)_tensor_data_ptr)[offset]; } + idx += 1; + } + } + public void CopyFrom(T[] array, int arrayIndex = 0, long tensorIndex = 0) { int idx = arrayIndex; @@ -251,6 +273,16 @@ public void CopyFrom(T[] array, int arrayIndex = 0, long tensorIndex = 0) } } + public void CopyFrom(ReadOnlySpan array, int arrayIndex = 0, long tensorIndex = 0) + { + int idx = arrayIndex; + foreach (int offset in GetSubsequentIndices(tensorIndex)) { + if (idx >= array.Length) break; + unsafe { ((T*)_tensor_data_ptr)[offset] = array[idx]; } + idx += 1; + } + } + /// /// Translates a linear index within the span represented by the accessor to a linear index /// used by the underlying tensor. The two should only be different if the tensor is a view @@ -274,7 +306,27 @@ private static long TranslateIndex(long idx, torch.Tensor tensor) return result; } + /// + /// WARNING: Test purpose not use in production + /// + private long TranslateIndexNonStatic(long idx, torch.Tensor tensor) + { + if (idx >= TempCount || idx < 0) + throw new ArgumentOutOfRangeException($"{idx} in a collection of ${tensor.numel()} elements."); + + if (tensor.is_contiguous() || idx == 0) return idx; + long result = 0; + var shape = tensor.shape; + var strides = tensor.stride(); + + for (var i = shape.Length - 1; i >= 0; i--) { + idx = Math.DivRem(idx, shape[i], out long s); + result += s * strides[i]; + } + + return result; + } private static long TranslateIndex(long[] idx, torch.Tensor tensor) { long result = 0; @@ -347,15 +399,18 @@ internal static T ReadItemAt(torch.Tensor tensor, long index) private IEnumerable GetSubsequentIndices(long startingIndex) { - if (startingIndex < 0 || startingIndex >= Count) + TempCount = Count; + + if (startingIndex < 0 || startingIndex >= TempCount) throw new ArgumentOutOfRangeException(nameof(startingIndex)); - if (Count <= 1) { - if (Count == 0) { + if (TempCount <= 1) { + if (TempCount == 0) { return Enumerable.Empty(); } - return (new long[] { 0 }).AsEnumerable(); + return new List() { 0 }; + //return (new long[] { 0 }).AsEnumerable(); } if (_tensor.is_contiguous()) { @@ -371,7 +426,6 @@ private IEnumerable GetSubsequentIndices(long startingIndex) return MultiDimensionIndices(startingIndex); } - private IEnumerable MultiDimensionIndices(long startingIndex) { long[] shape = _tensor.shape; @@ -379,7 +433,8 @@ private IEnumerable MultiDimensionIndices(long startingIndex) long[] inds = new long[stride.Length]; long index = startingIndex; - long offset = TranslateIndex(startingIndex, _tensor); + //long offset = TranslateIndex(startingIndex, _tensor); + long offset = TranslateIndexNonStatic(startingIndex, _tensor); //WARNING: Test purpose not use in production while (true) { @@ -387,7 +442,7 @@ private IEnumerable MultiDimensionIndices(long startingIndex) yield return offset; - if (index >= Count) break; + if (index >= TempCount) break; for (int i = inds.Length - 1; ; i--) { Debug.Assert(i >= 0); @@ -408,21 +463,23 @@ private IEnumerable MultiDimensionIndices(long startingIndex) private IEnumerable SimpleIndices(long startingIndex, long stride) { long index = startingIndex; - long offset = TranslateIndex(startingIndex, _tensor); + //long offset = TranslateIndex(startingIndex, _tensor); + long offset = TranslateIndexNonStatic(startingIndex, _tensor); //WARNING: Test purpose not use in production - while (index < Count) { + while (index < TempCount) { yield return offset; offset += stride; index += 1; } } + private IEnumerable ContiguousIndices(long startingIndex) { // If there was an overload for Enumerable.Range that // produced long integers, we wouldn't need this implementation. long index = startingIndex; - while (index < Count) { + while (index < TempCount) { yield return index; index += 1; } From 5062339fe0cc4989f286bcd5812c00b4f920bc4a Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 30 Jun 2024 20:02:32 -0300 Subject: [PATCH 09/43] fix local build dotnet --- src/Examples/AdversarialExampleGeneration.cs | 2 ++ src/Examples/SequenceToSequence.cs | 7 +++++++ src/Examples/TextClassification.cs | 2 ++ src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs | 6 +++--- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Examples/AdversarialExampleGeneration.cs b/src/Examples/AdversarialExampleGeneration.cs index 7bfc174b2..49bd10956 100644 --- a/src/Examples/AdversarialExampleGeneration.cs +++ b/src/Examples/AdversarialExampleGeneration.cs @@ -34,6 +34,8 @@ public class AdversarialExampleGeneration { #if NET472_OR_GREATER private readonly static string _dataLocation = NSPath.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "mnist"); +#elif NETSTANDARD2_0 + private readonly static string _dataLocation = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "mnist"); #else private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "mnist"); #endif // NET472_OR_GREATER diff --git a/src/Examples/SequenceToSequence.cs b/src/Examples/SequenceToSequence.cs index 436c05a67..8ff2c6dc5 100644 --- a/src/Examples/SequenceToSequence.cs +++ b/src/Examples/SequenceToSequence.cs @@ -6,6 +6,7 @@ using System.Diagnostics; using static TorchSharp.torch; using static TorchSharp.torch.nn; +using System.Text.RegularExpressions; namespace TorchSharp.Examples { @@ -26,6 +27,8 @@ public class SequenceToSequence // This path assumes that you're running this on Windows. #if NET472_OR_GREATER private readonly static string _dataLocation = NSPath.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "wikitext-2-v1"); +#elif NETSTANDARD2_0 + private readonly static string _dataLocation = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "wikitext-2-v1"); #else private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "wikitext-2-v1"); #endif // NET472_OR_GREATER @@ -251,7 +254,11 @@ private void InitWeights() public override Tensor forward(Tensor t, Tensor mask) { +#if !NETSTANDARD2_0 var src = pos_encoder.call(encoder.call(t) * MathF.Sqrt(ninputs)); +#else + var src = pos_encoder.call(encoder.call(t) * (float)Math.Sqrt(ninputs)); +#endif var enc = transformer_encoder.call(src, mask); return decoder.call(enc); } diff --git a/src/Examples/TextClassification.cs b/src/Examples/TextClassification.cs index 8fb175718..4cdc79bc1 100644 --- a/src/Examples/TextClassification.cs +++ b/src/Examples/TextClassification.cs @@ -36,6 +36,8 @@ public class TextClassification // This path assumes that you're running this on Windows. #if NET472_OR_GREATER private readonly static string _dataLocation = NSPath.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "AG_NEWS"); +#elif NETSTANDARD2_0 + private readonly static string _dataLocation = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "AG_NEWS"); #else private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "AG_NEWS"); #endif // NET472_OR_GREATER diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs index 4b38f5655..173ccd48a 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs @@ -288,12 +288,12 @@ internal static extern IntPtr THSTensor_upsample_nearest3d(IntPtr input, [DllImport("LibTorchSharp")] internal static extern IntPtr THSTensor_to_device(IntPtr handle, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool copy, [MarshalAs(UnmanagedType.U1)] bool non_blocking); + [DllImport("LibTorchSharp")] + //internal static extern IntPtr THSTensor_to_type_and_device(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool copy); + internal static extern IntPtr THSTensor_to_type_and_device(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool copy, [MarshalAs(UnmanagedType.U1)] bool non_blocking); [DllImport("LibTorchSharp")] internal static extern IntPtr THSTensor_to_type(IntPtr handle, sbyte scalar_type, [MarshalAs(UnmanagedType.U1)] bool copy, [MarshalAs(UnmanagedType.U1)] bool non_blocking); - [DllImport("LibTorchSharp")] - internal static extern IntPtr THSTensor_to_type_and_device(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool copy, [MarshalAs(UnmanagedType.U1)] bool non_blocking); - internal static extern IntPtr THSTensor_to_type_and_device(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool copy); [DllImport("LibTorchSharp")] internal static extern IntPtr THSTensor_to_type_and_device_and_non_blocking(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool non_blocking); From 3a467af99a1afc640d780e52510ecf82c97e5c5a Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 2 Jul 2024 18:16:42 -0300 Subject: [PATCH 10/43] Fast ToArray() TensorAccessor --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a17061b33..875954e1a 100644 --- a/.gitignore +++ b/.gitignore @@ -294,3 +294,4 @@ packages/ /src/Native/ZERO_CHECK.vcxproj /src/Native/ZERO_CHECK.vcxproj.filters /src/FSharp.Examples/FSharp.Examples.fsproj +/pkg/FileRestitcher From 18c7528a50173ac26e21a5ec4d833c84510608be Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 2 Jul 2024 18:28:45 -0300 Subject: [PATCH 11/43] Fast tensor accesor --- Directory.Build.props | 9 +++- src/Native/LibTorchSharp/Utils.h | 3 ++ src/TorchSharp/Amp/AutocastDisposeManager.cs | 29 ++++++++++++ src/TorchSharp/Amp/AutocastDisposeScope.cs | 23 ++++++++++ src/TorchSharp/Amp/AutocastDisposedManager.cs | 10 ----- src/TorchSharp/Amp/AutocastDisposedScope.cs | 10 ----- src/TorchSharp/Amp/AutocastMode.cs | 5 ++- src/TorchSharp/Tensor/Tensor.cs | 18 +++++++- src/TorchSharp/Utils/TensorAccessor.cs | 44 +++++++++++++++---- 9 files changed, 118 insertions(+), 33 deletions(-) create mode 100644 src/TorchSharp/Amp/AutocastDisposeManager.cs create mode 100644 src/TorchSharp/Amp/AutocastDisposeScope.cs delete mode 100644 src/TorchSharp/Amp/AutocastDisposedManager.cs delete mode 100644 src/TorchSharp/Amp/AutocastDisposedScope.cs diff --git a/Directory.Build.props b/Directory.Build.props index 1321ec4ff..aad7547a9 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -5,6 +5,10 @@ + + true + $(LibTorch)libtorch-win-shared-with-deps-2.3.1+cpu\libtorch + $(LibTorch)libtorch-win-shared-with-deps-2.3.1+cu121\libtorch Debug Debug;Release <_DefaultArchitecture>$([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture.ToString().ToLower()) @@ -133,7 +137,7 @@ .dylib.dwarf - + pytorch conda osx-arm64 @@ -152,6 +156,9 @@ $(LibTorchArchiveCoreName)-$(LibTorchVersion)$(LibTorchCudaLocalNameSuffix) $(IntermediateOutputRootPath)libtorch-cpu\$(LibTorchCpuLocalBase)\libtorch\share\cmake\Torch + + $(LibTorchPathCPU)\share\cmake\Torch + diff --git a/src/Native/LibTorchSharp/Utils.h b/src/Native/LibTorchSharp/Utils.h index 892e0e2ec..42573753b 100644 --- a/src/Native/LibTorchSharp/Utils.h +++ b/src/Native/LibTorchSharp/Utils.h @@ -59,6 +59,9 @@ struct TensorArray { inline Tensor ResultTensor(const at::Tensor & res) { if (res.defined()) { + + //TODO: Autocast here only if is INNER-SCOPE + /*at::Tensor* resT = new torch::Tensor(res); if (at::autocast::is_autocast_cache_enabled()){ if (res.is_cuda()) { diff --git a/src/TorchSharp/Amp/AutocastDisposeManager.cs b/src/TorchSharp/Amp/AutocastDisposeManager.cs new file mode 100644 index 000000000..83c31f335 --- /dev/null +++ b/src/TorchSharp/Amp/AutocastDisposeManager.cs @@ -0,0 +1,29 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace TorchSharp.Amp +{ + public class AutocastDisposeManager + { + + /*[ThreadStatic] private static AutocastDisposeManager _threadAutocastSingleton; + + internal static AutocastDisposeManager ThreadAutocastSingleton => _threadAutocastSingleton ??= new AutocastDisposeManager(); + + internal AutocastDisposeScope CurrentAutocastDispose; + //internal HashSet Modules = new List(); + public AutocastDisposeManager() + { + CurrentAutocastDispose = new AutocastDisposeScope(this); + } + internal AutocastDisposeScope RegisterTensorAutocastScope(torch.Tensor t) + { + if (CurrentAutocastDispose == null) + return null; + CurrentAutocastDispose.Tensors.Add(t); + return CurrentAutocastDispose; + }*/ + + } +} diff --git a/src/TorchSharp/Amp/AutocastDisposeScope.cs b/src/TorchSharp/Amp/AutocastDisposeScope.cs new file mode 100644 index 000000000..8f5df9490 --- /dev/null +++ b/src/TorchSharp/Amp/AutocastDisposeScope.cs @@ -0,0 +1,23 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace TorchSharp.Amp +{ + public sealed class AutocastDisposeScope : IDisposable + { + //private AutocastDisposeManager autocastDisposeManager; + public bool IsEnabled; + /*internal AutocastMode autocastMode = AutocastMode.GetInstance(); + internal HashSet Tensors = new HashSet(); + public AutocastDisposeScope(AutocastDisposeManager autocastDisposeManager) + { + this.autocastDisposeManager = autocastDisposeManager; + IsEnabled = true; + }*/ + public void Dispose() + { + IsEnabled = false; + } + } +} diff --git a/src/TorchSharp/Amp/AutocastDisposedManager.cs b/src/TorchSharp/Amp/AutocastDisposedManager.cs deleted file mode 100644 index d4ec1ccd7..000000000 --- a/src/TorchSharp/Amp/AutocastDisposedManager.cs +++ /dev/null @@ -1,10 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace TorchSharp.Amp -{ - class AutocastDisposedManager - { - } -} diff --git a/src/TorchSharp/Amp/AutocastDisposedScope.cs b/src/TorchSharp/Amp/AutocastDisposedScope.cs deleted file mode 100644 index 7c771d16f..000000000 --- a/src/TorchSharp/Amp/AutocastDisposedScope.cs +++ /dev/null @@ -1,10 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace TorchSharp.Amp -{ - class AutocastDisposedScope - { - } -} diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 43d3805fa..07c8149d2 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -16,6 +16,7 @@ public static torch.Tensor AutoCast(this torch.Tensor input) //TODO: Should make Singleton and IDisposable on ENTER public sealed class AutocastMode : IDisposable { + //NEED "Register" all tensor in scope for uncasting outer-scope private bool Enabled, Prev; //private torch.ScalarType Dtype = torch.ScalarType.Float32; private torch.ScalarType fast_dtype = torch.ScalarType.Float32; @@ -29,7 +30,7 @@ public sealed class AutocastMode : IDisposable }*/ public static AutocastMode GetInstance() { - return instance ?? (instance = new AutocastMode(torch.CUDA, cache_enabled:true)); + return instance ??= new AutocastMode(torch.CUDA, cache_enabled:true); } private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) @@ -40,7 +41,7 @@ private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enab fast_dtype = torch.get_autocast_gpu_dtype(); if (dev.type == DeviceType.CPU) fast_dtype = torch.get_autocast_cpu_dtype(); - IntPtr ptr = IntPtr.Zero; + //IntPtr ptr = IntPtr.Zero; bool _cache_enabled = torch.is_autocast_cache_enabled(); if (!torch.cuda.is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index c2055d0ec..81f97cafa 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -9,6 +9,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text; +using TorchSharp.Amp; using TorchSharp.PInvoke; #nullable enable @@ -33,13 +34,25 @@ public partial class Tensor : IDisposable static long _peakCount = 0; internal DisposeScope? OwningDisposeScope { get; set; } - + //internal AutocastDisposeScope? AutocastDisposeScope; internal Tensor(IntPtr handle) { this.handle = handle; + + /*if (_totalCount > 0) { + //have used + AutocastDisposeScope = AutocastDisposeManager.ThreadAutocastSingleton.RegisterTensorAutocastScope(this); + this = AutocastDisposeScope.autocastMode.CastTensor(this); //should cast when using INSIDE NOT WHERE CREATED + }*/ System.Threading.Interlocked.Increment(ref _totalCount); _peakCount = Math.Max(_totalCount, _peakCount); OwningDisposeScope = DisposeScopeManager.ThreadSingleton.RegisterOnCurrentDisposeScope(this); + + //TODO: Add Autocast/AMP ScopeManager, need improve this.. 1) is not threadsafe and may have big problem while casting and uncasting. + //DANGER: DONT USE THIS ON PRODUCTION + /*AutocastDisposeScope = AutocastDisposeManager.ThreadAutocastSingleton.RegisterTensorAutocastScope(this); + this = AutocastDisposeScope.autocastMode.CastTensor(this); //should cast when using INSIDE NOT WHERE CREATED*/ + //Should cast inner scope when get tensors for every each method? example prod, sum, div, reshape, etc??? } /// @@ -209,6 +222,9 @@ public IntPtr Handle { get { if (handle == IntPtr.Zero) throw new InvalidOperationException("Tensor invalid -- empty handle."); + + //AutocastDisposeScope.autocastMode.CastTensor(this); //This is wrong right??? + return handle; } } diff --git a/src/TorchSharp/Utils/TensorAccessor.cs b/src/TorchSharp/Utils/TensorAccessor.cs index ab9846eec..f0050c928 100644 --- a/src/TorchSharp/Utils/TensorAccessor.cs +++ b/src/TorchSharp/Utils/TensorAccessor.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Linq; +using System.Runtime.InteropServices; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp.Utils @@ -43,13 +44,13 @@ internal TensorAccessor(torch.Tensor tensor) /// This temporary count avoid so much use CPU. The Property act as method. /// If tensor is for example 640*640*3 = 1.228.800, property invoke 1 millons times!!! /// If we only want copy is not necesary call that method so many times. + /// For some reason the method numel() use so much cpu. /// - private long TempCount = -1; - public long Count => (_tensor is not null ? _tensor.numel() : 0); + internal long TempCount = -1; + public long Count => _tensor?.numel() ?? 0; public bool IsReadOnly => false; - public T[] ToArray() { if (_tensor.ndim < 2) @@ -59,6 +60,14 @@ public T[] ToArray() TempCount = 1; for(int i=0;i(_tensor_data_ptr.ToPointer(), Convert.ToInt32(TempCount)).ToArray(); + } + } + } var result = new T[TempCount]; CopyTo(result); return result; @@ -246,6 +255,18 @@ private void validate(long index) public void CopyTo(T[] array, int arrayIndex = 0, long tensorIndex = 0) { int idx = arrayIndex; + /*if (_tensor.is_contiguous()) { + if (typeof(T) == typeof(float)) { + float[] ff = new float[TempCount]; + Marshal.Copy(_tensor_data_ptr, ff, 0,ff.Length); + } + }*/ + //Because the contiguous cause arange from tensorIndex to Numel. So is not necesary "create" array of arange, i said "create" because in fact enumerable do not create itself. Very cool. + if (_tensor.is_contiguous()) { + for(long i= tensorIndex; i= array.Length) break; unsafe { array[idx] = ((T*)_tensor_data_ptr)[offset]; } @@ -399,7 +420,7 @@ internal static T ReadItemAt(torch.Tensor tensor, long index) private IEnumerable GetSubsequentIndices(long startingIndex) { - TempCount = Count; + //TempCount = Count; if (startingIndex < 0 || startingIndex >= TempCount) throw new ArgumentOutOfRangeException(nameof(startingIndex)); @@ -477,7 +498,7 @@ private IEnumerable ContiguousIndices(long startingIndex) { // If there was an overload for Enumerable.Range that // produced long integers, we wouldn't need this implementation. - + long index = startingIndex; while (index < TempCount) { yield return index; @@ -534,11 +555,16 @@ private void Dispose(bool disposing) #if true public IEnumerator GetEnumerator() { - if (Count <= 1) { - if (Count == 0) + if (TempCount <= 1) { + if (TempCount == 0) return Enumerable.Empty().GetEnumerator(); return new T[1] { this[0] }.AsEnumerable().GetEnumerator(); } + /*if (Count <= 1) { + if (Count == 0) + return Enumerable.Empty().GetEnumerator(); + return new T[1] { this[0] }.AsEnumerable().GetEnumerator(); + }*/ if (_tensor.is_contiguous()) { return new SimpleAtorImpl(this, 1); @@ -568,7 +594,7 @@ private class SimpleAtorImpl : IEnumerator public SimpleAtorImpl(TensorAccessor span, long stride) { _span = span; - _count = span.Count; + _count = span.TempCount; Debug.Assert(_count > 0); _stride = stride; Reset(); @@ -623,7 +649,7 @@ public GeneralAtorImpl(TensorAccessor span, long[] stride) { Debug.Assert(stride.Length > 1); _span = span; - _count = span.Count; + _count = span.TempCount; Debug.Assert(_count > 0); _shape = span._tensor.shape; Debug.Assert(_shape.Length == stride.Length); From 728c9fb7100eeb893d15af636783972a6ab1a6c7 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Mon, 8 Jul 2024 22:22:43 -0300 Subject: [PATCH 12/43] fix accesor for every types --- Directory.Build.props | 2 +- TorchSharp.sln | 14 +++++++------- src/TorchSharp/Utils/TensorAccessor.cs | 8 +++----- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/Directory.Build.props b/Directory.Build.props index aad7547a9..1dbeae229 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -6,7 +6,7 @@ - true + false $(LibTorch)libtorch-win-shared-with-deps-2.3.1+cpu\libtorch $(LibTorch)libtorch-win-shared-with-deps-2.3.1+cu121\libtorch Debug diff --git a/TorchSharp.sln b/TorchSharp.sln index 8cec25c7d..054c07bb3 100644 --- a/TorchSharp.sln +++ b/TorchSharp.sln @@ -34,7 +34,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "TorchSharp", "TorchSharp", pkg\TorchSharp\TorchSharp.symbols.nupkgproj = pkg\TorchSharp\TorchSharp.symbols.nupkgproj EndProjectSection EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibTorchSharp", "bin\obj\x64.Debug\Native\LibTorchSharp\LibTorchSharp.vcxproj", "{2B359162-062E-3C52-91D3-027A8542A58C}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibTorchSharp", "bin\obj\x64.Debug\Native\LibTorchSharp\LibTorchSharp.vcxproj", "{265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibTorchSharp", "bin\obj\x64.Release\Native\LibTorchSharp\LibTorchSharp.vcxproj", "{E4C0DBEE-0815-311B-9065-137BB50BD793}" EndProject @@ -66,9 +66,9 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution azure-pipelines.yml = azure-pipelines.yml build\BranchInfo.props = build\BranchInfo.props DEVGUIDE.md = DEVGUIDE.md + global.json = global.json README.md = README.md RELEASENOTES.md = RELEASENOTES.md - global.json = global.json EndProjectSection EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TorchVision", "src\TorchVision\TorchVision.csproj", "{DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}" @@ -107,10 +107,10 @@ Global {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|Any CPU.Build.0 = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|x64.ActiveCfg = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|x64.Build.0 = Release|Any CPU - {2B359162-062E-3C52-91D3-027A8542A58C}.Debug|Any CPU.ActiveCfg = Debug|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.Debug|x64.ActiveCfg = Debug|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.Release|Any CPU.ActiveCfg = Release|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.Release|x64.ActiveCfg = Release|x64 + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}.Debug|Any CPU.ActiveCfg = Debug|x64 + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}.Debug|x64.ActiveCfg = Debug|x64 + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}.Release|Any CPU.ActiveCfg = Release|x64 + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}.Release|x64.ActiveCfg = Release|x64 {E4C0DBEE-0815-311B-9065-137BB50BD793}.Debug|Any CPU.ActiveCfg = Debug|x64 {E4C0DBEE-0815-311B-9065-137BB50BD793}.Debug|x64.ActiveCfg = Debug|x64 {E4C0DBEE-0815-311B-9065-137BB50BD793}.Release|Any CPU.ActiveCfg = Release|x64 @@ -181,7 +181,7 @@ Global {6C323B05-9028-4B09-911C-3C03AE058BEE} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {42B45168-476D-4BFA-87B8-81A34E6295CD} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {567456AD-B026-4CB6-B98D-4FC930C90223} = {D3D38B03-B557-484D-8348-8BADEE4DF592} - {2B359162-062E-3C52-91D3-027A8542A58C} = {CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D} + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06} = {CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D} {E4C0DBEE-0815-311B-9065-137BB50BD793} = {4DB9E84D-324C-408F-87A6-246E86205540} {CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {D8C60CD8-8429-45F2-A755-47B6CD10FDF8} = {09EADF06-BE25-4228-AB53-95AE3E15B530} diff --git a/src/TorchSharp/Utils/TensorAccessor.cs b/src/TorchSharp/Utils/TensorAccessor.cs index f0050c928..f7f825ffc 100644 --- a/src/TorchSharp/Utils/TensorAccessor.cs +++ b/src/TorchSharp/Utils/TensorAccessor.cs @@ -61,11 +61,9 @@ public T[] ToArray() for(int i=0;i(_tensor_data_ptr.ToPointer(), Convert.ToInt32(TempCount)).ToArray(); - } + if (_tensor.is_contiguous()) { //This is very fast. And work VERY WELL + unsafe { + return new Span(_tensor_data_ptr.ToPointer(), Convert.ToInt32(TempCount)).ToArray(); } } var result = new T[TempCount]; From a9a611aeecfa85b75cc51021f2eeef0145493b5d Mon Sep 17 00:00:00 2001 From: Dimitri Date: Fri, 12 Jul 2024 13:43:16 -0300 Subject: [PATCH 13/43] GradScaler --- src/Native/LibTorchSharp/CMakeLists.txt | 2 + src/Native/LibTorchSharp/THSAmp.cpp | 15 +++ src/Native/LibTorchSharp/THSAmp.h | 13 ++ src/Native/LibTorchSharp/THSTensor.cpp | 13 ++ src/Native/LibTorchSharp/THSTensor.h | 3 + src/TorchSharp/Amp/GradScaler.cs | 121 +++++++++++++++--- .../PInvoke/LibTorchSharp.THSAmp.cs | 15 +++ .../PInvoke/LibTorchSharp.THSTensor.cs | 5 + .../PInvoke/LibTorchSharp.THSTorchCuda.cs | 2 + src/TorchSharp/Tensor/Tensor.cs | 29 +++++ src/TorchSharp/Tensor/torch.Amp.cs | 17 +++ 11 files changed, 216 insertions(+), 19 deletions(-) create mode 100644 src/Native/LibTorchSharp/THSAmp.cpp create mode 100644 src/Native/LibTorchSharp/THSAmp.h create mode 100644 src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs create mode 100644 src/TorchSharp/Tensor/torch.Amp.cs diff --git a/src/Native/LibTorchSharp/CMakeLists.txt b/src/Native/LibTorchSharp/CMakeLists.txt index a592475ad..c0852a2a1 100644 --- a/src/Native/LibTorchSharp/CMakeLists.txt +++ b/src/Native/LibTorchSharp/CMakeLists.txt @@ -9,6 +9,7 @@ find_package(Torch REQUIRED PATHS ${LIBTORCH_PATH}) set(SOURCES cifar10.h crc32c.h + THSAmp.h THSAutograd.h THSData.h THSJIT.h @@ -21,6 +22,7 @@ set(SOURCES cifar10.cpp crc32c.c THSActivation.cpp + THSAmp.cpp THSAutograd.cpp THSConvolution.cpp THSData.cpp diff --git a/src/Native/LibTorchSharp/THSAmp.cpp b/src/Native/LibTorchSharp/THSAmp.cpp new file mode 100644 index 000000000..56ea1ac18 --- /dev/null +++ b/src/Native/LibTorchSharp/THSAmp.cpp @@ -0,0 +1,15 @@ +// Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. +#include "THSAmp.h" + +#include +#include + +/*void THSAmp_amp_foreach_non_finite_check_and_unscale_(const at::TensorList self, at::Tensor& found_inf, const at::Tensor& inv_scale) +{ + torch::_amp_foreach_non_finite_check_and_unscale_(self, found_inf, inv_scale); +}*/ + +void THSAmp_amp_foreach_non_finite_check_and_unscale_(Tensor* self, const int64_t tLength, at::Tensor& found_inf, const at::Tensor& inv_scale) +{ + torch::_amp_foreach_non_finite_check_and_unscale_(toTensors((torch::Tensor**)self, tLength),found_inf,inv_scale); +} diff --git a/src/Native/LibTorchSharp/THSAmp.h b/src/Native/LibTorchSharp/THSAmp.h new file mode 100644 index 000000000..c85eb0609 --- /dev/null +++ b/src/Native/LibTorchSharp/THSAmp.h @@ -0,0 +1,13 @@ +// Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. +#pragma once + +#include "../Stdafx.h" + +#include "torch/torch.h" + +#include "Utils.h" + +//https://github.com/pytorch/pytorch/blob/main/torch/_meta_registrations.py#L5957 +//EXPORT_API(void) THSAmp_amp_foreach_non_finite_check_and_unscale_(const at::TensorList self, at::Tensor& found_inf, const at::Tensor& inv_scale); + +EXPORT_API(void) THSAmp_amp_foreach_non_finite_check_and_unscale_(Tensor* self, const int64_t tLength, at::Tensor& found_inf, const at::Tensor& inv_scale); diff --git a/src/Native/LibTorchSharp/THSTensor.cpp b/src/Native/LibTorchSharp/THSTensor.cpp index 5a41bdca0..970dbdeb6 100644 --- a/src/Native/LibTorchSharp/THSTensor.cpp +++ b/src/Native/LibTorchSharp/THSTensor.cpp @@ -2226,3 +2226,16 @@ Tensor THSTensor_unflatten_names(Tensor tensor, const char** names, const int64_ return nullptr; } + +bool THSTensor_is_coalesce(Tensor tensor) +{ + return tensor->is_coalesced(); +} + +Tensor THSTensor_coalesce(Tensor tensor) +{ + CATCH( + return ResultTensor(tensor->coalesce()); + ); + return nullptr; +} \ No newline at end of file diff --git a/src/Native/LibTorchSharp/THSTensor.h b/src/Native/LibTorchSharp/THSTensor.h index 36468d995..b889ca055 100644 --- a/src/Native/LibTorchSharp/THSTensor.h +++ b/src/Native/LibTorchSharp/THSTensor.h @@ -1743,3 +1743,6 @@ EXPORT_API(Tensor) THSTensor_kaiser_window(const int64_t len, bool periodic, dou EXPORT_API(Tensor) THSTensor_stft(const Tensor x, int64_t n_fft, int64_t hop_length, int64_t win_length, const Tensor window, bool normalized, int64_t onesided, bool return_complex); EXPORT_API(Tensor) THSTensor_istft(const Tensor x, int64_t n_fft, int64_t hop_length, int64_t win_length, const Tensor window, bool center, bool normalized, int64_t onesided, int64_t length, bool return_complex); + +EXPORT_API(Tensor) THSTensor_coalesce(const Tensor x); +EXPORT_API(bool) THSTensor_is_coalesce(const Tensor x); \ No newline at end of file diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs index 6da7a9dab..ac10ef6ea 100644 --- a/src/TorchSharp/Amp/GradScaler.cs +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -4,6 +4,7 @@ using System.Linq; using System.Text; using System.Threading.Tasks; +using TorchSharp.Modules; namespace TorchSharp.Amp { @@ -20,19 +21,19 @@ public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_fac float backoff_factor = 0.5f, int growth_interval = 2000, bool enabled = true) { Debug.Assert(dev == torch.CPU || dev == torch.CUDA); - this.Enabled = enabled; - this.InitScale = init_scale; - this.GrowthFactor = growth_factor; - this.BackoffFactor = backoff_factor; - this.GrowthInterval = growth_interval; - this.InitGrowthTracker = 0.0f; + Enabled = enabled; + InitScale = init_scale; + GrowthFactor = growth_factor; + BackoffFactor = backoff_factor; + GrowthInterval = growth_interval; + InitGrowthTracker = 0.0f; throw new NotImplementedException(); } private void LazyInitScaleGrowthTracker(torch.Device dev) { - this._scale = torch.full(0, this.InitScale, torch.ScalarType.Float32, device: dev); - this._growth_tracker = torch.full(0, this.InitGrowthTracker, torch.ScalarType.Float32, device: dev); + _scale = torch.full(0, InitScale, torch.ScalarType.Float32, device: dev); + _growth_tracker = torch.full(0, InitGrowthTracker, torch.ScalarType.Int32, device: dev); } //private check_scale_growth_tracker @@ -40,27 +41,109 @@ public torch.Tensor scale(torch.Tensor output) { if (!Enabled) return output; - if (_scale.numel() == 0) - this.LazyInitScaleGrowthTracker(output.device); - return output * this._scale.to(output.device, output.dtype, true); + if (_scale.is_null()) + LazyInitScaleGrowthTracker(output.device); + return output * _scale.to(output.device, output.dtype, true); } - public torch.Tensor unscale_grads(torch.optim.Optimizer optimizer, torch.Tensor inv_scale, torch.Tensor found_inf, bool allow_fp16) + public IList scale(IList outputs) { - return false; + apply_scale(outputs); + return outputs; } + private class MultiDeviceReplicator + { + private torch.Tensor master; - public void unscale(torch.optim.Optimizer optimizer) + internal Dictionary per_device_tensors = new Dictionary(); + public MultiDeviceReplicator(torch.Tensor master_tensor) + { + master = master_tensor; + } + + public torch.Tensor Get(torch.Device device) + { + torch.Tensor retval=null; + if (!per_device_tensors.ContainsKey(device)) { + retval = master.to(device, true, non_blocking: true); + per_device_tensors.Add(device, retval); + } + return retval; + } + } + + private torch.Tensor apply_scale(torch.Tensor scale) { - if (!Enabled) - return; + IList stash = new List(); + if (stash.Count == 0) { + if (_scale.is_null()) { + LazyInitScaleGrowthTracker(scale.device); + } + stash.Add(new MultiDeviceReplicator(_scale)); + } + return scale * stash[0].Get(scale.device); + } - + private void apply_scale(IList scales) + { + for (int i = 0; i < scales.Count; i++) + scales[i] = apply_scale(scales[i]); } - /*public IList scale(IList outputs) + public Dictionary unscale_grads(torch.optim.Optimizer optimizer, torch.Tensor inv_scale, torch.Tensor found_inf, bool allow_fp16) { + var per_device_inv_scale = new MultiDeviceReplicator(inv_scale); + var per_device_found_inf= new MultiDeviceReplicator(found_inf); + Dictionary>> per_device_and_dtype_grads = new Dictionary>>(); + + using (torch.no_grad()) { + if (optimizer is AdamW adamW){ //Some optimizer have parameter tensor for unscale_grads i need that. + using (var enumer = adamW.parameters().GetEnumerator()) { + while (enumer.MoveNext()) { + var param = enumer.Current; + if (param.is_null()) + continue; + if (!allow_fp16 && param.dtype == torch.ScalarType.Float16) + throw new Exception("Attempting to unscale FP16 Gradients"); + torch.Tensor to_unscale; + if (param.grad.is_sparse) { + if (param.grad.dtype == torch.ScalarType.Float16) { + + param.grad = param.grad.coalesce(); + } + + to_unscale = param.grad.SparseValues; + } else { + to_unscale = param.grad; + } + if (!per_device_and_dtype_grads.ContainsKey(to_unscale.device)) { + per_device_and_dtype_grads.Add(to_unscale.device, new Dictionary>()); + per_device_and_dtype_grads[to_unscale.device].Add(to_unscale.dtype, new List()); + per_device_and_dtype_grads[to_unscale.device][to_unscale.dtype].Add(to_unscale); + } else { + if (!per_device_and_dtype_grads[to_unscale.device].ContainsKey(to_unscale.dtype)) { + per_device_and_dtype_grads[to_unscale.device].Add(to_unscale.dtype, new List()); + } else { + per_device_and_dtype_grads[to_unscale.device][to_unscale.dtype].Add(to_unscale); + } + } - }*/ + } + } + + foreach (var d in per_device_and_dtype_grads) + foreach (var g in d.Value) + torch._amp_foreach_non_finite_check_and_unscale_(g.Value, per_device_found_inf.Get(d.Key), per_device_inv_scale.Get(d.Key)); + } + } + + return per_device_found_inf.per_device_tensors; + } + + public void unscale(torch.optim.Optimizer optimizer) + { + if (!Enabled) + return; + } } } \ No newline at end of file diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs new file mode 100644 index 000000000..5b1716bf3 --- /dev/null +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs @@ -0,0 +1,15 @@ +// Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. +#nullable enable +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace TorchSharp.PInvoke +{ + internal static partial class NativeMethods + { + [DllImport("LibTorchSharp")] + internal static extern void THSAmp_amp_foreach_non_finite_check_and_unscale_(IntPtr tensors, long tLength, IntPtr found_inf, IntPtr inv_scale); + + } +} \ No newline at end of file diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs index 173ccd48a..2428223d9 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs @@ -2110,6 +2110,11 @@ internal static extern IntPtr THSTensor_upsample_nearest3d(IntPtr input, internal static extern IntPtr THSTensor_histogram_out_t(IntPtr input, IntPtr bins, IntPtr weight, bool density, out IntPtr hist, out IntPtr bin_edges, out IntPtr r_bin_edges); [DllImport("LibTorchSharp")] internal static extern IntPtr THSTensor_histogram_out_i(IntPtr input, long bins, IntPtr range, int length, IntPtr weight, bool density, out IntPtr hist, out IntPtr bin_edges, out IntPtr r_bin_edges); + + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSTensor_coalesce(IntPtr input); + [DllImport("LibTorchSharp")] + internal static extern bool THSTensor_is_coalesce(IntPtr input); } #pragma warning restore CA2101 } diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSTorchCuda.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSTorchCuda.cs index fc67a88de..531b47d76 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSTorchCuda.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSTorchCuda.cs @@ -19,5 +19,7 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern void THSTorchCuda_synchronize(long device_index); + + } } diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 81f97cafa..167fcb738 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -261,6 +261,7 @@ internal IntPtr MoveHandle() /// public long numel() => NumberOfElements; + public bool is_null() => handle == IntPtr.Zero; /// /// Get the size of each element in the tensor. /// @@ -294,6 +295,21 @@ public bool is_nonzero() return res != 0; } + public bool is_coalesce() + { + var res = NativeMethods.THSTensor_is_coalesce(Handle); + CheckForErrors(); + return res; + } + + public Tensor coalesce() + { + var res = NativeMethods.THSTensor_coalesce(Handle); + if(res == IntPtr.Zero) + CheckForErrors(); + return new Tensor(res); + } + public bool is_cuda => device.type == DeviceType.CUDA; public bool is_meta => device.type == DeviceType.META; @@ -716,6 +732,7 @@ public bool is_sparse { public void backward(IList? grad_tensors = null, bool create_graph = false, bool retain_graph = false, IList? inputs = null) => torch.autograd.backward(new[] { this }, grad_tensors, create_graph, retain_graph, inputs); + /// /// Creates a tensor by loading it from a file. /// @@ -7427,5 +7444,17 @@ public static Tensor WrappedTensorDisposeScope(Func expr) var result = expr(); return result.MoveToOuterDisposeScope(); } + + public static void _amp_foreach_non_finite_check_and_unscale(Tensor found_inf, Tensor inv_scale) + { + if (found_inf.numel() == 1) + throw new Exception("found_inf must be a 1-element tensor."); + if (found_inf.numel() == 1) + throw new Exception("found_inf must be a 1-element tensor."); + if (found_inf.numel() == 1) + throw new Exception("found_inf must be a 1-element tensor."); + if (found_inf.numel() == 1) + throw new Exception("found_inf must be a 1-element tensor."); + } } } \ No newline at end of file diff --git a/src/TorchSharp/Tensor/torch.Amp.cs b/src/TorchSharp/Tensor/torch.Amp.cs new file mode 100644 index 000000000..dfa4245fd --- /dev/null +++ b/src/TorchSharp/Tensor/torch.Amp.cs @@ -0,0 +1,17 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using static TorchSharp.PInvoke.NativeMethods; + +namespace TorchSharp +{ + public static partial class torch + { + public static void _amp_foreach_non_finite_check_and_unscale_(IList tensors, Tensor found_inf, Tensor inv_scale) + { + using var ts = new PinnedArray(); + IntPtr tens = ts.CreateArray(tensors.Select(x => x.Handle).ToArray()); + THSAmp_amp_foreach_non_finite_check_and_unscale_(tens, ts.Array.Length, found_inf.Handle, inv_scale.Handle); + } + } +} From 4a406ece7e7b9a0119300cb2230c6c02b9712b2b Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 14 Jul 2024 14:50:13 -0300 Subject: [PATCH 14/43] Trying fix build for azure --- .../FileRestitcher.Tests/FileRestitcher.Tests.csproj | 8 ++++++-- src/Examples/Examples.csproj | 7 +++++-- src/TorchSharp/Torch.cs | 2 +- src/TorchVision/models/VGG.cs | 6 +++--- .../TorchSharpTest.WithCudaBinaries.csproj | 1 + test/TorchSharpTest/TorchSharpTest.csproj | 1 + 6 files changed, 17 insertions(+), 8 deletions(-) diff --git a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj index 37f37a9bb..39dc54a1b 100644 --- a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj +++ b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj @@ -1,4 +1,4 @@ - + false @@ -14,7 +14,11 @@ - + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + runtime; build; native; contentfiles; analyzers; buildtransitive all diff --git a/src/Examples/Examples.csproj b/src/Examples/Examples.csproj index 10d6171e7..37ec4b75d 100644 --- a/src/Examples/Examples.csproj +++ b/src/Examples/Examples.csproj @@ -5,9 +5,12 @@ true true - net472;netstandard2.0;$(TargetFrameworks) + 9.0 - net6.0 + + net6.0 true false false diff --git a/src/TorchSharp/Torch.cs b/src/TorchSharp/Torch.cs index 6a6bbec0f..d10254a2c 100644 --- a/src/TorchSharp/Torch.cs +++ b/src/TorchSharp/Torch.cs @@ -158,7 +158,7 @@ private static void LoadNativeBackend(bool useCudaBackend, out StringBuilder? tr var torchsharpLoc = Path.GetDirectoryName(typeof(torch).Assembly.Location); var packagesDir = Path.GetFullPath(Path.Combine(torchsharpLoc!, "..", "..", "..", "..")); var torchsharpHome = Path.GetFullPath(Path.Combine(torchsharpLoc!, "..", "..")); - + //torchsharpLoc = @"K:\Proyects_Repos\TorchSharp"; trace.AppendLine($" torchsharpLoc = {torchsharpLoc}"); trace.AppendLine($" packagesDir = {packagesDir}"); trace.AppendLine($" torchsharpHome = {torchsharpHome}"); diff --git a/src/TorchVision/models/VGG.cs b/src/TorchVision/models/VGG.cs index e79f9ddec..cb6ff9f7f 100644 --- a/src/TorchVision/models/VGG.cs +++ b/src/TorchVision/models/VGG.cs @@ -332,9 +332,9 @@ public class VGG : Module { "VGG19", new long[] { 64, 64, 0, 128, 128, 0, 256, 256, 256, 256, 0, 512, 512, 512, 512, 0, 512, 512, 512, 512, 0 } } }; - private readonly Module features; - private readonly Module avgpool; - private readonly Module classifier; + public readonly Module features; + public readonly Module avgpool; + public readonly Module classifier; protected override void Dispose(bool disposing) { diff --git a/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj b/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj index 055fb9ffc..c7ef48fd8 100644 --- a/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj +++ b/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj @@ -12,6 +12,7 @@ false trx $(OutputPath) + Debug;Release;LibTorch2.3.1 diff --git a/test/TorchSharpTest/TorchSharpTest.csproj b/test/TorchSharpTest/TorchSharpTest.csproj index 2de45fe06..d0d7ace08 100644 --- a/test/TorchSharpTest/TorchSharpTest.csproj +++ b/test/TorchSharpTest/TorchSharpTest.csproj @@ -13,6 +13,7 @@ trx $(OutputPath) 10.0 + Debug;Release;LibTorch2.3.1 From 280c8d59df7db5990efc6fe27d1bd474f27abf1a Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 16 Jul 2024 23:03:16 -0300 Subject: [PATCH 15/43] Range sequential --- src/Examples/Examples.csproj | 4 ++-- src/TorchSharp/Amp/AutocastManager.cs | 11 +++++++++++ src/TorchSharp/Amp/GradScaler.cs | 19 ++++++++++++++++--- src/TorchSharp/NN/Sequential.cs | 7 ++++++- .../Tensor/Factories/Tensor.Factories.cs | 6 +++--- test/TorchSharpTest/TorchSharpTest.csproj | 3 +-- 6 files changed, 39 insertions(+), 11 deletions(-) create mode 100644 src/TorchSharp/Amp/AutocastManager.cs diff --git a/src/Examples/Examples.csproj b/src/Examples/Examples.csproj index 37ec4b75d..9b7a980b9 100644 --- a/src/Examples/Examples.csproj +++ b/src/Examples/Examples.csproj @@ -5,8 +5,8 @@ true true - + + net472;netstandard2.0;$(TargetFrameworks) 9.0 diff --git a/src/TorchSharp/Amp/AutocastManager.cs b/src/TorchSharp/Amp/AutocastManager.cs new file mode 100644 index 000000000..d1808d316 --- /dev/null +++ b/src/TorchSharp/Amp/AutocastManager.cs @@ -0,0 +1,11 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace TorchSharp.Amp +{ + public class AutocastManager + { + + } +} diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs index ac10ef6ea..060ad64ee 100644 --- a/src/TorchSharp/Amp/GradScaler.cs +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -11,11 +11,10 @@ namespace TorchSharp.Amp public class GradScaler { private bool Enabled; - private torch.Tensor _scale, _growth_tracker; - private float InitScale, GrowthFactor, BackoffFactor, GrowthInterval, InitGrowthTracker; + private Dictionary> _per_optimizer_states = new Dictionary>(); //https://github.com/pytorch/pytorch/blob/main/torch/amp/grad_scaler.py public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_factor = 2.0f, float backoff_factor = 0.5f, int growth_interval = 2000, bool enabled = true) @@ -27,7 +26,8 @@ public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_fac BackoffFactor = backoff_factor; GrowthInterval = growth_interval; InitGrowthTracker = 0.0f; - throw new NotImplementedException(); + + throw new NotImplementedException("This need to finish"); } private void LazyInitScaleGrowthTracker(torch.Device dev) @@ -35,6 +35,7 @@ private void LazyInitScaleGrowthTracker(torch.Device dev) _scale = torch.full(0, InitScale, torch.ScalarType.Float32, device: dev); _growth_tracker = torch.full(0, InitGrowthTracker, torch.ScalarType.Int32, device: dev); } + //private Dictionary //private check_scale_growth_tracker public torch.Tensor scale(torch.Tensor output) @@ -140,10 +141,22 @@ private void apply_scale(IList scales) return per_device_found_inf.per_device_tensors; } + private Tuple check_scale_growth_tracker(string name) + { + var fix = "This may indicate your script did not use scaler.scale(loss or outputs) earlier in the iteration."; + Debug.Assert(_scale.is_null(), $"Attempted {name} but {nameof(_scale)} is None {fix}"); + Debug.Assert(_growth_tracker.is_null(), $"Attempted {name} but {nameof(_growth_tracker)} is None {fix}"); + return new Tuple(_scale, _growth_tracker); + } + public void unscale(torch.optim.Optimizer optimizer) { if (!Enabled) return; + + check_scale_growth_tracker(nameof(unscale)); + + } } } \ No newline at end of file diff --git a/src/TorchSharp/NN/Sequential.cs b/src/TorchSharp/NN/Sequential.cs index 711be65d1..2796aa913 100644 --- a/src/TorchSharp/NN/Sequential.cs +++ b/src/TorchSharp/NN/Sequential.cs @@ -31,7 +31,6 @@ public Sequential append(string name, torch.nn.IModule module) Add(name, module); return this; } - internal void Add(string name, torch.nn.IModule sm) { var submodule = (torch.nn.Module)sm; @@ -51,6 +50,12 @@ public Sequential append(torch.nn.IModule module) return this; } + public Sequential append(IList> modules) + { + for (int i = 0; i < modules.Count; i++) + Add(_modules.Count.ToString(), modules[i]); + return this; + } internal void Add(torch.nn.IModule module) { var name = _modules.Count.ToString(); diff --git a/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs b/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs index 67c28bd10..eee072261 100644 --- a/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs +++ b/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs @@ -165,7 +165,7 @@ private static Tensor _tensor_generic(Array rawArray, ReadOnlySpan dimensi unsafe { void *ptr = null; - IntPtr iPtr = (IntPtr)ptr; + IntPtr iPtr = (IntPtr)ptr; //Warning: Unused variable fixed (long* shape = dimensions) { var handle = THSTensor_new(dataArrayAddr, deleter, (IntPtr)shape, dimensions.Length, origType, (sbyte)dtype.Value, (int)device.type, device.index, requires_grad); @@ -224,8 +224,8 @@ private static Tensor _tensor_generic(Memory rawArray, ReadOnlySpan deleters.TryAdd(deleter, deleter); // keep the delegate alive void *ptr = null; - IntPtr iPtr = (IntPtr)ptr; - + IntPtr iPtr = (IntPtr)ptr; //Warning: Unused variable + fixed (long* shape = dimensions) { var handle = THSTensor_new(dataArrayAddr, deleter, (IntPtr)shape, dimensions.Length, origType, (sbyte)dtype.Value, (int)device.type, device.index, requires_grad); diff --git a/test/TorchSharpTest/TorchSharpTest.csproj b/test/TorchSharpTest/TorchSharpTest.csproj index d0d7ace08..808aa1ccf 100644 --- a/test/TorchSharpTest/TorchSharpTest.csproj +++ b/test/TorchSharpTest/TorchSharpTest.csproj @@ -114,7 +114,7 @@ - + @@ -123,7 +123,6 @@ - true true From 3c42a87bf4770d04fda2f67fc7ce1bca826b5598 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Fri, 19 Jul 2024 17:00:57 -0300 Subject: [PATCH 16/43] AMPManager --- src/TorchSharp/Amp/AMPManager.cs | 89 ++++++++++++++++++ src/TorchSharp/Amp/AutocastDisposeManager.cs | 29 ------ src/TorchSharp/Amp/AutocastDisposeScope.cs | 23 ----- src/TorchSharp/Amp/AutocastManager.cs | 11 --- src/TorchSharp/Amp/AutocastMode.cs | 97 ++++++++++++++------ src/TorchSharp/Amp/GradScaler.cs | 7 +- src/TorchSharp/NN/Convolution/Conv1D.cs | 28 +++++- src/TorchSharp/NN/Convolution/Conv2D.cs | 60 +++++++++++- src/TorchSharp/NN/Module.cs | 10 ++ src/TorchSharp/NN/Parameter.cs | 13 +++ src/TorchSharp/Tensor/Tensor.cs | 13 ++- src/TorchSharp/Utils/ModuleInfo.cs | 46 ++++++++++ src/TorchSharp/Utils/UnorderedMap.cs | 55 +++++++++++ 13 files changed, 376 insertions(+), 105 deletions(-) create mode 100644 src/TorchSharp/Amp/AMPManager.cs delete mode 100644 src/TorchSharp/Amp/AutocastDisposeManager.cs delete mode 100644 src/TorchSharp/Amp/AutocastDisposeScope.cs delete mode 100644 src/TorchSharp/Amp/AutocastManager.cs create mode 100644 src/TorchSharp/Utils/ModuleInfo.cs create mode 100644 src/TorchSharp/Utils/UnorderedMap.cs diff --git a/src/TorchSharp/Amp/AMPManager.cs b/src/TorchSharp/Amp/AMPManager.cs new file mode 100644 index 000000000..1ac24476a --- /dev/null +++ b/src/TorchSharp/Amp/AMPManager.cs @@ -0,0 +1,89 @@ +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using Google.Protobuf.WellKnownTypes; +using TorchSharp.PInvoke; +using TorchSharp.Utils; + +namespace TorchSharp.Amp +{ + public class AMPManager : IDisposable + { + //TODO: Make Singleton THREADSAFE + public UnorderedMap TensorPtrs; + private readonly AutocastMode autocastMode = AutocastMode.GetInstance(); + + private AMPManager() { } + + public bool IsEnabled => autocastMode.Enabled; + private static AMPManager Instance; + //bool disposedValue; + + public static AMPManager GetInstance() + { + return Instance ??= new AMPManager(); + } + + private void To(IntPtr ptr, torch.ScalarType type) + { + var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); + if (res == IntPtr.Zero) + torch.CheckForErrors(); + } + private void Revert() + { + using (var enumer = TensorPtrs.GetEnumerator()) + while (enumer.MoveNext()) + To(enumer.Current.Key, enumer.Current.Value); + TensorPtrs.Clear(); //Or should use Stack for POP?? May better performance and better ram usage + } + + public void Add(IntPtr ptr) + { + if (!autocastMode.Enabled) { + + if (TensorPtrs.ContainsKey(ptr)) + To(ptr, TensorPtrs[ptr]); + return; + } + + TensorPtrs[ptr] = (torch.ScalarType)NativeMethods.THSTensor_type(ptr); + To(ptr, autocastMode.GetFastType()); //TODO: Set scalar autocast + } + + public IDisposable Enter() + { + return null; + } + protected virtual void Dispose(bool disposing) + { + Revert(); + autocastMode.Dispose(); + /*if (!disposedValue) { + if (disposing) { + + + // TODO: dispose managed state (managed objects) + } + + // TODO: free unmanaged resources (unmanaged objects) and override finalizer + // TODO: set large fields to null + disposedValue = true; + }*/ + } + + // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources + ~AMPManager() + { + Dispose(false); + } + + public void Dispose() + { + // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + } +} diff --git a/src/TorchSharp/Amp/AutocastDisposeManager.cs b/src/TorchSharp/Amp/AutocastDisposeManager.cs deleted file mode 100644 index 83c31f335..000000000 --- a/src/TorchSharp/Amp/AutocastDisposeManager.cs +++ /dev/null @@ -1,29 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace TorchSharp.Amp -{ - public class AutocastDisposeManager - { - - /*[ThreadStatic] private static AutocastDisposeManager _threadAutocastSingleton; - - internal static AutocastDisposeManager ThreadAutocastSingleton => _threadAutocastSingleton ??= new AutocastDisposeManager(); - - internal AutocastDisposeScope CurrentAutocastDispose; - //internal HashSet Modules = new List(); - public AutocastDisposeManager() - { - CurrentAutocastDispose = new AutocastDisposeScope(this); - } - internal AutocastDisposeScope RegisterTensorAutocastScope(torch.Tensor t) - { - if (CurrentAutocastDispose == null) - return null; - CurrentAutocastDispose.Tensors.Add(t); - return CurrentAutocastDispose; - }*/ - - } -} diff --git a/src/TorchSharp/Amp/AutocastDisposeScope.cs b/src/TorchSharp/Amp/AutocastDisposeScope.cs deleted file mode 100644 index 8f5df9490..000000000 --- a/src/TorchSharp/Amp/AutocastDisposeScope.cs +++ /dev/null @@ -1,23 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace TorchSharp.Amp -{ - public sealed class AutocastDisposeScope : IDisposable - { - //private AutocastDisposeManager autocastDisposeManager; - public bool IsEnabled; - /*internal AutocastMode autocastMode = AutocastMode.GetInstance(); - internal HashSet Tensors = new HashSet(); - public AutocastDisposeScope(AutocastDisposeManager autocastDisposeManager) - { - this.autocastDisposeManager = autocastDisposeManager; - IsEnabled = true; - }*/ - public void Dispose() - { - IsEnabled = false; - } - } -} diff --git a/src/TorchSharp/Amp/AutocastManager.cs b/src/TorchSharp/Amp/AutocastManager.cs deleted file mode 100644 index d1808d316..000000000 --- a/src/TorchSharp/Amp/AutocastManager.cs +++ /dev/null @@ -1,11 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace TorchSharp.Amp -{ - public class AutocastManager - { - - } -} diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 07c8149d2..0287e02d6 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Security.Cryptography; using System.Text; using System.Threading.Tasks; @@ -17,22 +18,33 @@ public static torch.Tensor AutoCast(this torch.Tensor input) public sealed class AutocastMode : IDisposable { //NEED "Register" all tensor in scope for uncasting outer-scope - private bool Enabled, Prev; + internal bool Enabled, Prev; //private torch.ScalarType Dtype = torch.ScalarType.Float32; - private torch.ScalarType fast_dtype = torch.ScalarType.Float32; - private torch.Device Device = new torch.Device(DeviceType.CUDA); + internal torch.ScalarType fast_dtype = torch.ScalarType.Float32; + public torch.Device Device = new torch.Device(DeviceType.CUDA); private static AutocastMode instance; + bool disposedValue; + /*public static AutocastMode GetInstance(torch.Device dev, torch.ScalarType? dtype = null, bool enabled = true, bool? cache_enabled = null) - { - if(instance ==null) - instance = new AutocastMode(dev, dtype, enabled, cache_enabled); - return instance; - }*/ +{ +if(instance ==null) +instance = new AutocastMode(dev, dtype, enabled, cache_enabled); +return instance; +}*/ public static AutocastMode GetInstance() { return instance ??= new AutocastMode(torch.CUDA, cache_enabled:true); } + public torch.ScalarType GetFastType() + { + var ft = torch.ScalarType.Float32; + if (Device.type == DeviceType.CUDA) + ft = torch.get_autocast_gpu_dtype(); + if (Device.type == DeviceType.CPU) + ft = torch.get_autocast_cpu_dtype(); + return ft; + } private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) { //var la = torch.tensor(9); @@ -78,32 +90,57 @@ internal torch.Tensor CastTensor(torch.Tensor tensor) return tensor; return tensor.to(fast_dtype, tensor.device); } - /*public IDisposable Enter() - { - return this; - }*/ - public void Dispose() + private void Dispose(bool disposing) { - this.Enabled = false; - if (Device.type == DeviceType.CUDA) { - if(torch.autocast_decrement_nesting() == 0) - torch.clear_autocast_cache(); - torch.set_autocast_gpu_dtype(this.fast_dtype); - //torch.set_autocast_enabled(this.Prev); - torch.set_autocast_enabled(false); - torch.set_autocast_cache_enabled(false); - } + if (!disposedValue) { + if (disposing) { - if (Device.type == DeviceType.CPU) { - if (torch.autocast_decrement_nesting() == 0) - torch.clear_autocast_cache(); - //torch.set_autocast_enabled(this.Prev); - torch.set_autocast_cpu_dtype(this.fast_dtype); - torch.set_autocast_enabled(false); - torch.set_autocast_cache_enabled(false); + this.Enabled = false; + if (Device.type == DeviceType.CUDA) { + if (torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + torch.set_autocast_gpu_dtype(this.fast_dtype); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); + } + + if (Device.type == DeviceType.CPU) { + if (torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_cpu_dtype(this.fast_dtype); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); + } + //throw new NotImplementedException(); + // TODO: dispose managed state (managed objects) + } + + // TODO: free unmanaged resources (unmanaged objects) and override finalizer + // TODO: set large fields to null + disposedValue = true; } - //throw new NotImplementedException(); } + + // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources + // ~AutocastMode() + // { + // // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + // Dispose(disposing: false); + // } + + public void Dispose() + { + // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + /*public IDisposable Enter() +{ + + return this; +}*/ } } diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs index 060ad64ee..899c295cb 100644 --- a/src/TorchSharp/Amp/GradScaler.cs +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -13,7 +13,6 @@ public class GradScaler private bool Enabled; private torch.Tensor _scale, _growth_tracker; private float InitScale, GrowthFactor, BackoffFactor, GrowthInterval, InitGrowthTracker; - private Dictionary> _per_optimizer_states = new Dictionary>(); //https://github.com/pytorch/pytorch/blob/main/torch/amp/grad_scaler.py public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_factor = 2.0f, @@ -54,9 +53,9 @@ public torch.Tensor scale(torch.Tensor output) } private class MultiDeviceReplicator { - private torch.Tensor master; + private readonly torch.Tensor master; - internal Dictionary per_device_tensors = new Dictionary(); + internal readonly Dictionary per_device_tensors = new Dictionary(); public MultiDeviceReplicator(torch.Tensor master_tensor) { master = master_tensor; @@ -155,8 +154,6 @@ public void unscale(torch.optim.Optimizer optimizer) return; check_scale_growth_tracker(nameof(unscale)); - - } } } \ No newline at end of file diff --git a/src/TorchSharp/NN/Convolution/Conv1D.cs b/src/TorchSharp/NN/Convolution/Conv1D.cs index 9e9706e07..cf381af20 100644 --- a/src/TorchSharp/NN/Convolution/Conv1D.cs +++ b/src/TorchSharp/NN/Convolution/Conv1D.cs @@ -27,6 +27,10 @@ namespace Modules { public abstract class Convolution : torch.nn.Module { + internal long _dimension, _in_channel, _out_channel, _kernel,_stride, _padding,_dilation,_groups; + internal PaddingModes _paddingModes; + internal (long, long)? _kernels, _strides, _paddings, _dilations; + internal bool _bias; protected Convolution(IntPtr handle, IntPtr boxedHandle, long input_channels) : base(handle, boxedHandle) { this.input_channels = input_channels; @@ -113,7 +117,17 @@ public static Conv1d Conv1d(long in_channels, long out_channels, long kernelSize { var res = THSNN_Conv1d_ctor(in_channels, out_channels, kernelSize, stride, padding, dilation, (long)padding_mode, groups, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Conv1d(res, boxedHandle, in_channels).MoveModule(device, dtype); + return new Conv1d(res, boxedHandle, in_channels) { + _in_channel = in_channels, + _out_channel = out_channels, + _kernel = kernelSize, + _stride = stride, + _padding = padding, + _dilation = dilation, + _paddingModes = padding_mode, + _groups = groups, + _bias = bias + }.MoveModule(device, dtype); } /// @@ -135,7 +149,17 @@ public static Conv1d Conv1d(long in_channels, long out_channels, long kernelSize { var res = THSNN_Conv1d_ctor(in_channels, out_channels, kernelSize, stride, padding == Padding.Valid ? 0 : -1, dilation, (long)padding_mode, groups, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Conv1d(res, boxedHandle, in_channels).MoveModule(device, dtype); + return new Conv1d(res, boxedHandle, in_channels) { + _in_channel = in_channels, + _out_channel = out_channels, + _kernel = kernelSize, + _stride = stride, + _padding = (long)padding, + _dilation = dilation, + _paddingModes = padding_mode, + _groups = groups, + _bias = bias + }.MoveModule(device, dtype); } public static partial class functional diff --git a/src/TorchSharp/NN/Convolution/Conv2D.cs b/src/TorchSharp/NN/Convolution/Conv2D.cs index 28b37eef2..1143db639 100644 --- a/src/TorchSharp/NN/Convolution/Conv2D.cs +++ b/src/TorchSharp/NN/Convolution/Conv2D.cs @@ -12,8 +12,37 @@ namespace Modules { public sealed class Conv2d : Convolution { + internal Conv2d(IntPtr handle, IntPtr boxedHandle, long input_channels) : base(handle, boxedHandle, input_channels) { } + internal Conv2d(IntPtr handle, IntPtr boxedHandle, long input_channels, long in_channels, long out_channels, long kernelSize, long padding, long stride = 1, long dilation = 1, PaddingModes padding_mode = PaddingModes.Zeros, long groups = 1, bool bias = true) + : base(handle, boxedHandle, input_channels) + { + _dimension = 2; //because is conv 2D; 2 dimension + _in_channel = in_channels; + _out_channel = out_channels; + _kernel = kernelSize; + _stride = stride; + _padding = padding; + _dilation = dilation; + _paddingModes = padding_mode; + _groups = groups; + _bias = bias; + } + internal Conv2d(IntPtr handle, IntPtr boxedHandle, long input_channels, long in_channels, long out_channels, (long, long) kernelSize, Padding padding, (long, long)? stride = null, (long, long)? dilation = null, PaddingModes padding_mode = PaddingModes.Zeros, long groups = 1, bool bias = true) + : base(handle, boxedHandle, input_channels) + { + _dimension = 2; //because is conv 2D; 2 dimension + _in_channel = in_channels; + _out_channel = out_channels; + _kernels = kernelSize; + _strides = stride; + _padding = (long)padding; + _dilations = dilation; + _paddingModes = padding_mode; + _groups = groups; + _bias = bias; + } public override Tensor forward(Tensor input) { if (ValidateShape(input, 2)) { @@ -78,7 +107,19 @@ public static Conv2d Conv2d(long in_channels, long out_channels, long kernelSize { var res = THSNN_Conv2d_ctor(in_channels, out_channels, kernelSize, stride, padding, dilation, (long)padding_mode, groups, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Conv2d(res, boxedHandle, in_channels).MoveModule(device, dtype); + + return new Conv2d(res, boxedHandle, in_channels) { + _in_channel = in_channels, + _out_channel = out_channels, + _kernel = kernelSize, + _stride = stride, + _padding = padding, + _dilation = dilation, + _paddingModes = padding_mode, + _groups = groups, + _bias = bias + }.MoveModule(device, dtype); + //return conv2d.MoveModule(device, dtype); } /// @@ -104,7 +145,17 @@ public static Conv2d Conv2d(long in_channels, long out_channels, (long, long) ke var res = THSNN_Conv2d_ctor_1(in_channels, out_channels, kernelSize.Item1, kernelSize.Item2, stride.Value.Item1, stride.Value.Item2, padding.Value.Item1, padding.Value.Item2, dilation.Value.Item1, dilation.Value.Item2, (long)padding_mode, groups, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Conv2d(res, boxedHandle, in_channels).MoveModule(device, dtype); + return new Conv2d(res, boxedHandle, in_channels) { + _in_channel = in_channels, + _out_channel = out_channels, + _kernels = kernelSize, + _strides = stride, + _paddings = padding, + _dilations = dilation, + _paddingModes = padding_mode, + _groups = groups, + _bias = bias + }.MoveModule(device, dtype); } /// @@ -126,7 +177,7 @@ public static Conv2d Conv2d(long in_channels, long out_channels, long kernelSize { var res = THSNN_Conv2d_ctor(in_channels, out_channels, kernelSize, stride, padding == Padding.Valid ? 0 : -1, dilation, (long)padding_mode, groups, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Conv2d(res, boxedHandle, in_channels).MoveModule(device, dtype); + return new Conv2d(res, boxedHandle, in_channels, in_channels, out_channels, kernelSize, (long)padding, stride, dilation, padding_mode, groups, bias).MoveModule(device, dtype); } /// @@ -151,7 +202,8 @@ public static Conv2d Conv2d(long in_channels, long out_channels, (long, long) ke var res = THSNN_Conv2d_ctor_1(in_channels, out_channels, kernelSize.Item1, kernelSize.Item2, stride.Value.Item1, stride.Value.Item2, padding == Padding.Valid ? 0 : -1, 0, dilation.Value.Item1, dilation.Value.Item2, (long)padding_mode, groups, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Conv2d(res, boxedHandle, in_channels).MoveModule(device, dtype); + + return new Conv2d(res, boxedHandle, in_channels, in_channels, out_channels, kernelSize, padding,stride, dilation, padding_mode ,groups,bias).MoveModule(device, dtype); } public static partial class functional diff --git a/src/TorchSharp/NN/Module.cs b/src/TorchSharp/NN/Module.cs index 1398ab4e3..19b64d8a9 100644 --- a/src/TorchSharp/NN/Module.cs +++ b/src/TorchSharp/NN/Module.cs @@ -778,6 +778,16 @@ public virtual void register_module(string name, Module submodule) } } + public virtual void unregister_module(string name) + { + if (_internal_submodules.ContainsKey(name)) + _internal_submodules.Remove(name); + } + public virtual void unregister_module(Module module) + { + unregister_module(module.GetName()); + } + protected void ConditionallyRegisterParameter(string name, Tensor value) { if (value is null) { diff --git a/src/TorchSharp/NN/Parameter.cs b/src/TorchSharp/NN/Parameter.cs index 81e9051d8..cd3b66b44 100644 --- a/src/TorchSharp/NN/Parameter.cs +++ b/src/TorchSharp/NN/Parameter.cs @@ -36,6 +36,19 @@ internal Parameter(System.IntPtr handle) : base(handle) { } + /// + /// For prevent cast as torch.Tensor i provided the data method for get Tensor. + /// https://github.com/ultralytics/ultralytics/blob/dcde8bd23d12bbb4867ebf45f936dd37c2445974/ultralytics/nn/modules/conv.py#L78 + /// + /// + public torch.Tensor data { + get { + return new Tensor(base.handle); + } + set { + handle = value.handle; + } + } }; } diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 167fcb738..601544619 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -34,11 +34,13 @@ public partial class Tensor : IDisposable static long _peakCount = 0; internal DisposeScope? OwningDisposeScope { get; set; } + //internal AutocastDisposeScope? AutocastDisposeScope; internal Tensor(IntPtr handle) { this.handle = handle; - + if (AMPManager.GetInstance().IsEnabled) + AMPManager.GetInstance().Add(handle); //MMM.... This is the more abstract of any method Tensor right???? /*if (_totalCount > 0) { //have used AutocastDisposeScope = AutocastDisposeManager.ThreadAutocastSingleton.RegisterTensorAutocastScope(this); @@ -922,6 +924,15 @@ public Tensor to(ScalarType type, torch.Device device, bool copy = false, bool d return new Tensor(res); } + /*internal static void to(this IntPtr ptr, ScalarType type) + { + var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); + if (res == IntPtr.Zero) + CheckForErrors(); + if (disposeAfter) + this.Dispose(); + return new Tensor(res); + }*/ public Tensor to(torch.Device device, ScalarType type, bool non_blocking) { torch.InitializeDevice(device); diff --git a/src/TorchSharp/Utils/ModuleInfo.cs b/src/TorchSharp/Utils/ModuleInfo.cs new file mode 100644 index 000000000..800dc977d --- /dev/null +++ b/src/TorchSharp/Utils/ModuleInfo.cs @@ -0,0 +1,46 @@ +using System; +using System.Collections.Generic; +using System.Text; +using TorchSharp.Modules; + +namespace TorchSharp.Utils +{ + public static class ModuleInfo + { + + public class ConvInfo + { + public long Dimension,InChannel,OutChannel, PaddingMode; + public object Kernel, Dilation, Stride; + public ConvInfo(Convolution conv) + { + InChannel = conv._in_channel; + OutChannel = conv._out_channel; + if (conv._kernels.HasValue) { + Kernel = conv._kernels.Value; + } + else { + Kernel = conv._kernel; + } + + //TODO: Make all props; + throw new NotImplementedException("Need finish"); + } + + public (long, long)? CastTuple(object obj) + { + if (obj.GetType() == typeof((long,long))) + return obj as (long, long)?; + if (obj is long l) + return (l, l); + return null; + } + + public long CastValue(object obj) + { + var v = CastTuple(obj); + return v?.Item1 ?? 0; + } + } + } +} diff --git a/src/TorchSharp/Utils/UnorderedMap.cs b/src/TorchSharp/Utils/UnorderedMap.cs new file mode 100644 index 000000000..7db88a94c --- /dev/null +++ b/src/TorchSharp/Utils/UnorderedMap.cs @@ -0,0 +1,55 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace TorchSharp.Utils +{ + public class UnorderedMap : Dictionary, IDisposable + { + bool disposedValue; + + public UnorderedMap() { } + public new TValue this[TKey tk] { + get { + if (this.ContainsKey(tk)) + return base[tk]; + return default(TValue); + } + set { + if (!this.ContainsKey(tk)) { + this.Add(tk, value); + return; + } + base[tk] = value; + } + } + + protected virtual void Dispose(bool disposing) + { + if (!disposedValue) { + if (disposing) { + base.Clear(); + // TODO: dispose managed state (managed objects) + } + + // TODO: free unmanaged resources (unmanaged objects) and override finalizer + // TODO: set large fields to null + disposedValue = true; + } + } + + // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources + // ~UnorderedMap() + // { + // // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + // Dispose(disposing: false); + // } + + public void Dispose() + { + // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + } +} From 7cd7f9cfecfdb2e3958e1638f89899638d99836e Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sat, 20 Jul 2024 00:13:24 -0300 Subject: [PATCH 17/43] Amp --- src/TorchSharp/Amp/AMPManager.cs | 4 ++-- src/TorchSharp/Tensor/Tensor.cs | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/TorchSharp/Amp/AMPManager.cs b/src/TorchSharp/Amp/AMPManager.cs index 1ac24476a..29c5da90c 100644 --- a/src/TorchSharp/Amp/AMPManager.cs +++ b/src/TorchSharp/Amp/AMPManager.cs @@ -11,7 +11,7 @@ namespace TorchSharp.Amp public class AMPManager : IDisposable { //TODO: Make Singleton THREADSAFE - public UnorderedMap TensorPtrs; + public UnorderedMap TensorPtrs= new UnorderedMap(); private readonly AutocastMode autocastMode = AutocastMode.GetInstance(); private AMPManager() { } @@ -36,7 +36,6 @@ private void Revert() using (var enumer = TensorPtrs.GetEnumerator()) while (enumer.MoveNext()) To(enumer.Current.Key, enumer.Current.Value); - TensorPtrs.Clear(); //Or should use Stack for POP?? May better performance and better ram usage } public void Add(IntPtr ptr) @@ -60,6 +59,7 @@ protected virtual void Dispose(bool disposing) { Revert(); autocastMode.Dispose(); + TensorPtrs.Dispose(); /*if (!disposedValue) { if (disposing) { diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 601544619..0e5b76537 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -39,8 +39,9 @@ public partial class Tensor : IDisposable internal Tensor(IntPtr handle) { this.handle = handle; - if (AMPManager.GetInstance().IsEnabled) - AMPManager.GetInstance().Add(handle); //MMM.... This is the more abstract of any method Tensor right???? + /*if (AMPManager.GetInstance().IsEnabled) + AMPManager.GetInstance().Add(handle); //MMM.... This is the more abstract of any method Tensor right????*/ + /*if (_totalCount > 0) { //have used AutocastDisposeScope = AutocastDisposeManager.ThreadAutocastSingleton.RegisterTensorAutocastScope(this); From 0c2769a28ab805dc14fc5344e9e47c8edc4e239e Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 21 Jul 2024 14:50:54 -0300 Subject: [PATCH 18/43] fix azure devops? --- .gitignore | 24 +- .../FileRestitcher.csproj.nuget.dgspec.json | 96 ++++++ .../FileRestitcher.csproj.nuget.g.props | 16 + .../FileRestitcher.csproj.nuget.g.targets | 6 + .../project.assets.json | 276 ++++++++++++++++++ .../project.nuget.cache | 11 + src/Native/build.cmd | 151 ++++++++++ src/TorchSharp/NN/Linear.cs | 19 +- src/TorchVision/models/ResNet.cs | 4 +- 9 files changed, 576 insertions(+), 27 deletions(-) create mode 100644 pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.dgspec.json create mode 100644 pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.props create mode 100644 pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.targets create mode 100644 pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.assets.json create mode 100644 pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.nuget.cache create mode 100644 src/Native/build.cmd diff --git a/.gitignore b/.gitignore index 875954e1a..ed21b9d11 100644 --- a/.gitignore +++ b/.gitignore @@ -272,26 +272,4 @@ packages/ *.code-workspace /.idea /test/TorchSharpTest/exportsd.py -/src/Native/CMakeFiles -/src/Native/LibTorchSharp/CMakeFiles -/src/Native/ALL_BUILD.vcxproj -/src/Native/ALL_BUILD.vcxproj.filters -/src/Native/build.cmd -/src/Native/CMakeCache.txt -/src/Native/cmake_install.cmake -/src/Native/INSTALL.vcxproj -/src/Native/INSTALL.vcxproj.filters -/src/Native/install_manifest.txt -/src/Native/LibTorchSharp/ALL_BUILD.vcxproj -/src/Native/LibTorchSharp/ALL_BUILD.vcxproj.filters -/src/Native/LibTorchSharp/cmake_install.cmake -/src/Native/LibTorchSharp/INSTALL.vcxproj -/src/Native/LibTorchSharp/INSTALL.vcxproj.filters -/src/Native/LibTorchSharp/LibTorchSharp.sln -/src/Native/LibTorchSharp/LibTorchSharp.vcxproj -/src/Native/LibTorchSharp/LibTorchSharp.vcxproj.filters -/src/Native/Project.sln -/src/Native/ZERO_CHECK.vcxproj -/src/Native/ZERO_CHECK.vcxproj.filters -/src/FSharp.Examples/FSharp.Examples.fsproj -/pkg/FileRestitcher +.vscode/settings.json \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.dgspec.json b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.dgspec.json new file mode 100644 index 000000000..fc625189a --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.dgspec.json @@ -0,0 +1,96 @@ +{ + "format": 1, + "restore": { + "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj": {} + }, + "projects": { + "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj": { + "version": "1.0.0", + "restore": { + "projectUniqueName": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj", + "projectName": "FileRestitcher", + "projectPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj", + "packagesPath": "C:\\Users\\Dimitri\\.nuget\\packages\\", + "outputPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.NupkgProj\\", + "projectStyle": "PackageReference", + "crossTargeting": true, + "fallbackFolders": [ + "C:\\Program Files (x86)\\Progress\\ToolboxNuGetPackages" + ], + "configFilePaths": [ + "C:\\Users\\Dimitri\\AppData\\Roaming\\NuGet\\NuGet.Config", + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.Offline.config", + "C:\\Program Files (x86)\\NuGet\\Config\\Telerik UI for WinForms.config" + ], + "originalTargetFrameworks": [ + "net6.0", + "netstandard2.0" + ], + "sources": { + "C:\\Program Files (x86)\\Microsoft SDKs\\NuGetPackages\\": {}, + "https://api.nuget.org/v3/index.json": {} + }, + "frameworks": { + "net6.0": { + "targetAlias": "net6.0", + "projectReferences": {} + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "projectReferences": {} + } + }, + "warningProperties": { + "warnAsError": [ + "NU1605" + ] + } + }, + "frameworks": { + "net6.0": { + "targetAlias": "net6.0", + "imports": [ + "net461", + "net462", + "net47", + "net471", + "net472", + "net48", + "net481" + ], + "assetTargetFallback": true, + "warn": true, + "frameworkReferences": { + "Microsoft.NETCore.App": { + "privateAssets": "all" + } + }, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\8.0.101\\RuntimeIdentifierGraph.json" + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "dependencies": { + "NETStandard.Library": { + "suppressParent": "All", + "target": "Package", + "version": "[2.0.3, )", + "autoReferenced": true + } + }, + "imports": [ + "net461", + "net462", + "net47", + "net471", + "net472", + "net48", + "net481" + ], + "assetTargetFallback": true, + "warn": true, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\8.0.101\\RuntimeIdentifierGraph.json" + } + } + } + } +} \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.props b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.props new file mode 100644 index 000000000..1e9807451 --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.props @@ -0,0 +1,16 @@ + + + + True + NuGet + $(MSBuildThisFileDirectory)project.assets.json + $(UserProfile)\.nuget\packages\ + C:\Users\Dimitri\.nuget\packages\;C:\Program Files (x86)\Progress\ToolboxNuGetPackages + PackageReference + 6.8.0 + + + + + + \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.targets b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.targets new file mode 100644 index 000000000..2192724bc --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.targets @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.assets.json b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.assets.json new file mode 100644 index 000000000..1f13839e4 --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.assets.json @@ -0,0 +1,276 @@ +{ + "version": 3, + "targets": { + ".NETStandard,Version=v2.0": { + "Microsoft.NETCore.Platforms/1.1.0": { + "type": "package", + "compile": { + "lib/netstandard1.0/_._": {} + }, + "runtime": { + "lib/netstandard1.0/_._": {} + } + }, + "NETStandard.Library/2.0.3": { + "type": "package", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0" + }, + "compile": { + "lib/netstandard1.0/_._": {} + }, + "runtime": { + "lib/netstandard1.0/_._": {} + }, + "build": { + "build/netstandard2.0/NETStandard.Library.targets": {} + } + } + }, + "net6.0": {} + }, + "libraries": { + "Microsoft.NETCore.Platforms/1.1.0": { + "sha512": "kz0PEW2lhqygehI/d6XsPCQzD7ff7gUJaVGPVETX611eadGsA3A877GdSlU0LRVMCTH/+P3o2iDTak+S08V2+A==", + "type": "package", + "path": "microsoft.netcore.platforms/1.1.0", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "ThirdPartyNotices.txt", + "dotnet_library_license.txt", + "lib/netstandard1.0/_._", + "microsoft.netcore.platforms.1.1.0.nupkg.sha512", + "microsoft.netcore.platforms.nuspec", + "runtime.json" + ] + }, + "NETStandard.Library/2.0.3": { + "sha512": "st47PosZSHrjECdjeIzZQbzivYBJFv6P2nv4cj2ypdI204DO+vZ7l5raGMiX4eXMJ53RfOIg+/s4DHVZ54Nu2A==", + "type": "package", + "path": "netstandard.library/2.0.3", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "LICENSE.TXT", + "THIRD-PARTY-NOTICES.TXT", + "build/netstandard2.0/NETStandard.Library.targets", + "build/netstandard2.0/ref/Microsoft.Win32.Primitives.dll", + "build/netstandard2.0/ref/System.AppContext.dll", + "build/netstandard2.0/ref/System.Collections.Concurrent.dll", + "build/netstandard2.0/ref/System.Collections.NonGeneric.dll", + "build/netstandard2.0/ref/System.Collections.Specialized.dll", + "build/netstandard2.0/ref/System.Collections.dll", + "build/netstandard2.0/ref/System.ComponentModel.Composition.dll", + "build/netstandard2.0/ref/System.ComponentModel.EventBasedAsync.dll", + "build/netstandard2.0/ref/System.ComponentModel.Primitives.dll", + "build/netstandard2.0/ref/System.ComponentModel.TypeConverter.dll", + "build/netstandard2.0/ref/System.ComponentModel.dll", + "build/netstandard2.0/ref/System.Console.dll", + "build/netstandard2.0/ref/System.Core.dll", + "build/netstandard2.0/ref/System.Data.Common.dll", + "build/netstandard2.0/ref/System.Data.dll", + "build/netstandard2.0/ref/System.Diagnostics.Contracts.dll", + "build/netstandard2.0/ref/System.Diagnostics.Debug.dll", + "build/netstandard2.0/ref/System.Diagnostics.FileVersionInfo.dll", + "build/netstandard2.0/ref/System.Diagnostics.Process.dll", + "build/netstandard2.0/ref/System.Diagnostics.StackTrace.dll", + "build/netstandard2.0/ref/System.Diagnostics.TextWriterTraceListener.dll", + "build/netstandard2.0/ref/System.Diagnostics.Tools.dll", + "build/netstandard2.0/ref/System.Diagnostics.TraceSource.dll", + "build/netstandard2.0/ref/System.Diagnostics.Tracing.dll", + "build/netstandard2.0/ref/System.Drawing.Primitives.dll", + "build/netstandard2.0/ref/System.Drawing.dll", + "build/netstandard2.0/ref/System.Dynamic.Runtime.dll", + "build/netstandard2.0/ref/System.Globalization.Calendars.dll", + "build/netstandard2.0/ref/System.Globalization.Extensions.dll", + "build/netstandard2.0/ref/System.Globalization.dll", + "build/netstandard2.0/ref/System.IO.Compression.FileSystem.dll", + "build/netstandard2.0/ref/System.IO.Compression.ZipFile.dll", + "build/netstandard2.0/ref/System.IO.Compression.dll", + "build/netstandard2.0/ref/System.IO.FileSystem.DriveInfo.dll", + "build/netstandard2.0/ref/System.IO.FileSystem.Primitives.dll", + "build/netstandard2.0/ref/System.IO.FileSystem.Watcher.dll", + "build/netstandard2.0/ref/System.IO.FileSystem.dll", + "build/netstandard2.0/ref/System.IO.IsolatedStorage.dll", + "build/netstandard2.0/ref/System.IO.MemoryMappedFiles.dll", + "build/netstandard2.0/ref/System.IO.Pipes.dll", + "build/netstandard2.0/ref/System.IO.UnmanagedMemoryStream.dll", + "build/netstandard2.0/ref/System.IO.dll", + "build/netstandard2.0/ref/System.Linq.Expressions.dll", + "build/netstandard2.0/ref/System.Linq.Parallel.dll", + "build/netstandard2.0/ref/System.Linq.Queryable.dll", + "build/netstandard2.0/ref/System.Linq.dll", + "build/netstandard2.0/ref/System.Net.Http.dll", + "build/netstandard2.0/ref/System.Net.NameResolution.dll", + "build/netstandard2.0/ref/System.Net.NetworkInformation.dll", + "build/netstandard2.0/ref/System.Net.Ping.dll", + "build/netstandard2.0/ref/System.Net.Primitives.dll", + "build/netstandard2.0/ref/System.Net.Requests.dll", + "build/netstandard2.0/ref/System.Net.Security.dll", + "build/netstandard2.0/ref/System.Net.Sockets.dll", + "build/netstandard2.0/ref/System.Net.WebHeaderCollection.dll", + "build/netstandard2.0/ref/System.Net.WebSockets.Client.dll", + "build/netstandard2.0/ref/System.Net.WebSockets.dll", + "build/netstandard2.0/ref/System.Net.dll", + "build/netstandard2.0/ref/System.Numerics.dll", + "build/netstandard2.0/ref/System.ObjectModel.dll", + "build/netstandard2.0/ref/System.Reflection.Extensions.dll", + "build/netstandard2.0/ref/System.Reflection.Primitives.dll", + "build/netstandard2.0/ref/System.Reflection.dll", + "build/netstandard2.0/ref/System.Resources.Reader.dll", + "build/netstandard2.0/ref/System.Resources.ResourceManager.dll", + "build/netstandard2.0/ref/System.Resources.Writer.dll", + "build/netstandard2.0/ref/System.Runtime.CompilerServices.VisualC.dll", + "build/netstandard2.0/ref/System.Runtime.Extensions.dll", + "build/netstandard2.0/ref/System.Runtime.Handles.dll", + "build/netstandard2.0/ref/System.Runtime.InteropServices.RuntimeInformation.dll", + "build/netstandard2.0/ref/System.Runtime.InteropServices.dll", + "build/netstandard2.0/ref/System.Runtime.Numerics.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.Formatters.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.Json.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.Primitives.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.Xml.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.dll", + "build/netstandard2.0/ref/System.Runtime.dll", + "build/netstandard2.0/ref/System.Security.Claims.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.Algorithms.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.Csp.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.Encoding.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.Primitives.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.X509Certificates.dll", + "build/netstandard2.0/ref/System.Security.Principal.dll", + "build/netstandard2.0/ref/System.Security.SecureString.dll", + "build/netstandard2.0/ref/System.ServiceModel.Web.dll", + "build/netstandard2.0/ref/System.Text.Encoding.Extensions.dll", + "build/netstandard2.0/ref/System.Text.Encoding.dll", + "build/netstandard2.0/ref/System.Text.RegularExpressions.dll", + "build/netstandard2.0/ref/System.Threading.Overlapped.dll", + "build/netstandard2.0/ref/System.Threading.Tasks.Parallel.dll", + "build/netstandard2.0/ref/System.Threading.Tasks.dll", + "build/netstandard2.0/ref/System.Threading.Thread.dll", + "build/netstandard2.0/ref/System.Threading.ThreadPool.dll", + "build/netstandard2.0/ref/System.Threading.Timer.dll", + "build/netstandard2.0/ref/System.Threading.dll", + "build/netstandard2.0/ref/System.Transactions.dll", + "build/netstandard2.0/ref/System.ValueTuple.dll", + "build/netstandard2.0/ref/System.Web.dll", + "build/netstandard2.0/ref/System.Windows.dll", + "build/netstandard2.0/ref/System.Xml.Linq.dll", + "build/netstandard2.0/ref/System.Xml.ReaderWriter.dll", + "build/netstandard2.0/ref/System.Xml.Serialization.dll", + "build/netstandard2.0/ref/System.Xml.XDocument.dll", + "build/netstandard2.0/ref/System.Xml.XPath.XDocument.dll", + "build/netstandard2.0/ref/System.Xml.XPath.dll", + "build/netstandard2.0/ref/System.Xml.XmlDocument.dll", + "build/netstandard2.0/ref/System.Xml.XmlSerializer.dll", + "build/netstandard2.0/ref/System.Xml.dll", + "build/netstandard2.0/ref/System.dll", + "build/netstandard2.0/ref/mscorlib.dll", + "build/netstandard2.0/ref/netstandard.dll", + "build/netstandard2.0/ref/netstandard.xml", + "lib/netstandard1.0/_._", + "netstandard.library.2.0.3.nupkg.sha512", + "netstandard.library.nuspec" + ] + } + }, + "projectFileDependencyGroups": { + ".NETStandard,Version=v2.0": [ + "NETStandard.Library >= 2.0.3" + ], + "net6.0": [] + }, + "packageFolders": { + "C:\\Users\\Dimitri\\.nuget\\packages\\": {}, + "C:\\Program Files (x86)\\Progress\\ToolboxNuGetPackages": {} + }, + "project": { + "version": "1.0.0", + "restore": { + "projectUniqueName": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj", + "projectName": "FileRestitcher", + "projectPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj", + "packagesPath": "C:\\Users\\Dimitri\\.nuget\\packages\\", + "outputPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.NupkgProj\\", + "projectStyle": "PackageReference", + "crossTargeting": true, + "fallbackFolders": [ + "C:\\Program Files (x86)\\Progress\\ToolboxNuGetPackages" + ], + "configFilePaths": [ + "C:\\Users\\Dimitri\\AppData\\Roaming\\NuGet\\NuGet.Config", + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.Offline.config", + "C:\\Program Files (x86)\\NuGet\\Config\\Telerik UI for WinForms.config" + ], + "originalTargetFrameworks": [ + "net6.0", + "netstandard2.0" + ], + "sources": { + "C:\\Program Files (x86)\\Microsoft SDKs\\NuGetPackages\\": {}, + "https://api.nuget.org/v3/index.json": {} + }, + "frameworks": { + "net6.0": { + "targetAlias": "net6.0", + "projectReferences": {} + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "projectReferences": {} + } + }, + "warningProperties": { + "warnAsError": [ + "NU1605" + ] + } + }, + "frameworks": { + "net6.0": { + "targetAlias": "net6.0", + "imports": [ + "net461", + "net462", + "net47", + "net471", + "net472", + "net48", + "net481" + ], + "assetTargetFallback": true, + "warn": true, + "frameworkReferences": { + "Microsoft.NETCore.App": { + "privateAssets": "all" + } + }, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\8.0.101\\RuntimeIdentifierGraph.json" + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "dependencies": { + "NETStandard.Library": { + "suppressParent": "All", + "target": "Package", + "version": "[2.0.3, )", + "autoReferenced": true + } + }, + "imports": [ + "net461", + "net462", + "net47", + "net471", + "net472", + "net48", + "net481" + ], + "assetTargetFallback": true, + "warn": true, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\8.0.101\\RuntimeIdentifierGraph.json" + } + } + } +} \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.nuget.cache b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.nuget.cache new file mode 100644 index 000000000..2e00179eb --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.nuget.cache @@ -0,0 +1,11 @@ +{ + "version": 2, + "dgSpecHash": "GQbFl6JNwUfeVMRAQIxv+0FH84dIn8y+ZsWz3KR/dVMkJNNXpooEgJaT2UFkLhFNLf08uGLF+sf+HuE1qkdsqQ==", + "success": true, + "projectFilePath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj", + "expectedPackageFiles": [ + "C:\\Users\\Dimitri\\.nuget\\packages\\microsoft.netcore.platforms\\1.1.0\\microsoft.netcore.platforms.1.1.0.nupkg.sha512", + "C:\\Users\\Dimitri\\.nuget\\packages\\netstandard.library\\2.0.3\\netstandard.library.2.0.3.nupkg.sha512" + ], + "logs": [] +} \ No newline at end of file diff --git a/src/Native/build.cmd b/src/Native/build.cmd new file mode 100644 index 000000000..96ec8cacf --- /dev/null +++ b/src/Native/build.cmd @@ -0,0 +1,151 @@ +@if not defined _echo @echo off +setlocal + +:: Store current script directory before %~dp0 gets affected by another process later. +set __currentScriptDir=%~dp0 + +:SetupArgs +:: Initialize the args that will be passed to cmake +set __binDir=%__currentScriptDir%..\..\bin +set __rootDir=%__currentScriptDir%..\.. +set __CMakeBinDir="" +set __IntermediatesDir="" +set __BuildArch=x64 +set __VCBuildArch=x86_amd64 +set CMAKE_BUILD_TYPE=Debug +set LIBTORCH_PATH="" + +:Arg_Loop +if [%1] == [] goto :ToolsVersion +if /i [%1] == [Release] ( set CMAKE_BUILD_TYPE=Release&&shift&goto Arg_Loop) +if /i [%1] == [Debug] ( set CMAKE_BUILD_TYPE=Debug&&shift&goto Arg_Loop) + +if /i [%1] == [x86] ( set __BuildArch=x86&&set __VCBuildArch=x86&&shift&goto Arg_Loop) +if /i [%1] == [x64] ( set __BuildArch=x64&&set __VCBuildArch=x86_amd64&&shift&goto Arg_Loop) +if /i [%1] == [amd64] ( set __BuildArch=x64&&set __VCBuildArch=x86_amd64&&shift&goto Arg_Loop) + +if /i [%1] == [--libtorchpath] ( set LIBTORCH_PATH=%2&&shift&goto Arg_Loop) + +shift +goto :Arg_Loop + +:ToolsVersion +if defined VisualStudioVersion goto :RunVCVars + +set _VSWHERE="%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" +if exist %_VSWHERE% ( + for /f "usebackq tokens=*" %%i in (`%_VSWHERE% -latest -prerelease -property installationPath`) do set _VSCOMNTOOLS=%%i\Common7\Tools +) +if not exist "%_VSCOMNTOOLS%" set _VSCOMNTOOLS=%VS140COMNTOOLS% +if not exist "%_VSCOMNTOOLS%" goto :MissingVersion + + +set "VSCMD_START_DIR=%__currentScriptDir%" +call "%_VSCOMNTOOLS%\VsDevCmd.bat" + +:RunVCVars +if "%VisualStudioVersion%"=="17.0" ( + goto :VS2022 +) else if "%VisualStudioVersion%"=="16.0" ( + goto :VS2019 +) else if "%VisualStudioVersion%"=="15.0" ( + goto :VS2017 +) else if "%VisualStudioVersion%"=="14.0" ( + goto :VS2015 +) + +:MissingVersion +:: Can't find VS 2015, 2017 or 2019 +echo Error: Visual Studio 2015, 2017 or 2019 required +echo Please see https://github.com/dotnet/machinelearning/tree/master/Documentation for build instructions. +exit /b 1 + +:VS2022 +:: Setup vars for VS2022 +set __PlatformToolset=v143 +set __VSVersion=17 2022 +if NOT "%__BuildArch%" == "arm64" ( + :: Set the environment for the native build + call "%VS160COMNTOOLS%..\..\VC\Auxiliary\Build\vcvarsall.bat" %__VCBuildArch% +) +goto :SetupDirs + +:VS2019 +:: Setup vars for VS2019 +set __PlatformToolset=v142 +set __VSVersion=16 2019 +if NOT "%__BuildArch%" == "arm64" ( + :: Set the environment for the native build + call "%VS160COMNTOOLS%..\..\VC\Auxiliary\Build\vcvarsall.bat" %__VCBuildArch% +) +goto :SetupDirs + +:VS2017 +:: Setup vars for VS2017 +set __PlatformToolset=v141 +set __VSVersion=15 2017 +if NOT "%__BuildArch%" == "arm64" ( + :: Set the environment for the native build + call "%VS150COMNTOOLS%..\..\VC\Auxiliary\Build\vcvarsall.bat" %__VCBuildArch% +) +goto :SetupDirs + +:VS2015 +:: Setup vars for VS2015build +set __PlatformToolset=v140 +set __VSVersion=14 2015 +if NOT "%__BuildArch%" == "arm64" ( + :: Set the environment for the native build + call "%VS140COMNTOOLS%..\..\VC\vcvarsall.bat" %__VCBuildArch% +) + +:SetupDirs +:: Setup to cmake the native components +echo Commencing native build of dotnet/machinelearning +echo. + +if %__CMakeBinDir% == "" ( + set "__CMakeBinDir=%__binDir%\%__BuildArch%.%CMAKE_BUILD_TYPE%\Native" +) +if %__IntermediatesDir% == "" ( + set "__IntermediatesDir=%__binDir%\obj\%__BuildArch%.%CMAKE_BUILD_TYPE%\Native" +) +set "__CMakeBinDir=%__CMakeBinDir:\=/%" +set "__IntermediatesDir=%__IntermediatesDir:\=/%" + +:: Check that the intermediate directory exists so we can place our cmake build tree there +if not exist "%__IntermediatesDir%" md "%__IntermediatesDir%" + +:: Regenerate the VS solution + +set "__gen-buildsys-win-path=%__currentScriptDir%\gen-buildsys-win.bat" +set "__source-code-path=%__currentScriptDir%" + +echo Calling "%__gen-buildsys-win-path%" "%__source-code-path%" "%__VSVersion%" %__BuildArch% +pushd "%__IntermediatesDir%" +call "%__gen-buildsys-win-path%" "%__source-code-path%" "%__VSVersion%" %__BuildArch% +popd + +:CheckForProj +:: Check that the project created by Cmake exists +if exist "%__IntermediatesDir%\INSTALL.vcxproj" goto BuildNativeProj +goto :Failure + +:BuildNativeProj +:: Build the project created by Cmake +set __msbuildArgs=/p:Platform=%__BuildArch% /p:PlatformToolset="%__PlatformToolset%" + +cd %__rootDir% + +echo msbuild "%__IntermediatesDir%\INSTALL.vcxproj" /t:build /p:Configuration=%CMAKE_BUILD_TYPE% %__msbuildArgs% +call msbuild "%__IntermediatesDir%\INSTALL.vcxproj" /t:build /p:Configuration=%CMAKE_BUILD_TYPE% %__msbuildArgs% +IF ERRORLEVEL 1 ( + goto :Failure +) +echo Done building Native components +exit /B 0 + +:Failure +:: Build failed +echo Failed to generate native component build project! +exit /b 1 \ No newline at end of file diff --git a/src/TorchSharp/NN/Linear.cs b/src/TorchSharp/NN/Linear.cs index 4595582d7..e1b7b205c 100644 --- a/src/TorchSharp/NN/Linear.cs +++ b/src/TorchSharp/NN/Linear.cs @@ -11,10 +11,25 @@ namespace TorchSharp namespace Modules { + public class LinearInfo + { + public long InFeatures { get; } + public long OutFeatures { get; } + public LinearInfo(long inFeatures, long outFeatures) + { + InFeatures = inFeatures; + OutFeatures = outFeatures; + } + } public sealed class Linear : torch.nn.Module { - internal Linear(IntPtr handle, IntPtr boxedHandle) : base(handle, boxedHandle) + public LinearInfo linearInfo; + /*internal Linear(IntPtr handle, IntPtr boxedHandle) : base(handle, boxedHandle) + { + }*/ + internal Linear(IntPtr handle, IntPtr boxedHandle, long inFeat, long outFeat) : base(handle, boxedHandle) { + linearInfo = new LinearInfo(inFeat, outFeat); } public override Tensor forward(Tensor tensor) @@ -71,7 +86,7 @@ public static Linear Linear(long inputSize, long outputSize, bool hasBias = true var res = THSNN_Linear_ctor(inputSize, outputSize, hasBias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Linear(res, boxedHandle).MoveModule(device, dtype); + return new Linear(res, boxedHandle, inputSize, outputSize).MoveModule(device, dtype); } public static partial class functional diff --git a/src/TorchVision/models/ResNet.cs b/src/TorchVision/models/ResNet.cs index 654d587c3..5eee7e5a2 100644 --- a/src/TorchVision/models/ResNet.cs +++ b/src/TorchVision/models/ResNet.cs @@ -581,7 +581,7 @@ public class ResNet : Module private readonly Module avgpool; private readonly Module flatten; - private readonly Module fc; + public readonly Module fc; private readonly Func> norm_layer; @@ -803,7 +803,7 @@ public ResNet(string name, break; } } - + if (zero_init_residual) { foreach (var (_, m) in named_modules()) { From eafdd1eccea359a27350c8c91af81f2631d0531e Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 21 Jul 2024 15:42:50 -0300 Subject: [PATCH 19/43] fix test? --- src/TorchSharp/Utils/FastTensorAccessor.cs | 712 +++++++++++++++++++++ src/TorchSharp/Utils/TensorAccessor.cs | 97 +-- test/TorchSharpTest/TorchSharpTest.csproj | 7 +- 3 files changed, 739 insertions(+), 77 deletions(-) create mode 100644 src/TorchSharp/Utils/FastTensorAccessor.cs diff --git a/src/TorchSharp/Utils/FastTensorAccessor.cs b/src/TorchSharp/Utils/FastTensorAccessor.cs new file mode 100644 index 000000000..142b95d6c --- /dev/null +++ b/src/TorchSharp/Utils/FastTensorAccessor.cs @@ -0,0 +1,712 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Runtime.InteropServices; +using static TorchSharp.PInvoke.NativeMethods; + +namespace TorchSharp.Utils +{ + /// + /// TensorAccessor is used to present the contents of a tensor or tensor view to the .NET world as an ordered collection + /// of values that integrates well with things like LINQ and foreach loops in the .NET world. + /// + /// The type of the tensor elements. + public sealed class FastTensorAccessor : IDisposable, IEnumerable where T : unmanaged + { + internal FastTensorAccessor(torch.Tensor tensor) + { + if (tensor.device_type != DeviceType.CPU) { + throw new InvalidOperationException("Reading data from non-CPU memory is not supported. Move or copy the tensor to the cpu before reading."); + } + + var strides = tensor.stride(); + for (var i = 0; i < strides.Length; i++) { + if (strides[i] < 0) + throw new NotImplementedException($"Negative tensor strides are not currently supported. tensor.strides({i}) == {strides[i]}"); + } + + // Get the data from native code. + + unsafe { + var res = THSTensor_data(tensor.Handle); + if (res == IntPtr.Zero) { torch.CheckForErrors(); } + // NOTE: there is no safety here. + _tensor_data_ptr = res; + } + + _tensor = tensor; // Keep the tensor alive now that everything is alright. + } + + /// + /// This is important for performance because only called with CopyTo, CopyFrom. Is not necesary in each invocation call tensor.numel() because that use intensive CPU. + /// This temporary count avoid so much use CPU. The Property act as method. + /// If tensor is for example 640*640*3 = 1.228.800, property invoke 1 millons times!!! + /// If we only want copy is not necesary call that method so many times. + /// For some reason the method numel() use so much cpu. + /// + internal long TempCount = -1; + public long Count => _tensor?.numel() ?? 0; + + public bool IsReadOnly => false; + + public T[] ToArray() + { + if (_tensor.ndim < 2) + return (T[])ToNDArray(); + + var shps = _tensor.shape; + TempCount = 1; + for (int i = 0; i < shps.Length; i++) + TempCount *= shps[i]; //Theorically the numel is simple as product of each element shape + + if (_tensor.is_contiguous()) { //This is very fast. And work VERY WELL + unsafe { + return new Span(_tensor_data_ptr.ToPointer(), Convert.ToInt32(TempCount)).ToArray(); + } + } + var result = new T[TempCount]; + CopyTo(result); + return result; + } + + /// + /// Extract tensor data as a multi-dimensional .NET array, with the same number of dimensions as the tensor. + /// + /// An array object, which should be cast to the concrete array type. + public Array ToNDArray() + { + var shape = _tensor.shape; + var strides = _tensor.stride(); + switch (_tensor.ndim) { + default: + return ToNDArray(shape, strides); + case 0: + unsafe { + var result = new T[1]; + T* ptr = (T*)_tensor_data_ptr; + result[0] = ptr[0]; + return result; + } + case 1: + unsafe { + var result = new T[shape[0]]; + T* ptr = (T*)_tensor_data_ptr; + for (long i0 = 0, off0 = 0; i0 < shape[0]; i0++, off0 += strides[0]) { + result[i0] = ptr[off0]; + } + return result; + } + case 2: + unsafe { + var result = new T[shape[0], shape[1]]; + T* ptr = (T*)_tensor_data_ptr; + for (long i0 = 0, off0 = 0; i0 < shape[0]; i0++, off0 += strides[0]) { + for (long i1 = 0, off1 = off0; i1 < shape[1]; i1++, off1 += strides[1]) { + result[i0, i1] = ptr[off1]; + } + } + return result; + } + case 3: + unsafe { + var result = new T[shape[0], shape[1], shape[2]]; + T* ptr = (T*)_tensor_data_ptr; + for (long i0 = 0, off0 = 0; i0 < shape[0]; i0++, off0 += strides[0]) { + for (long i1 = 0, off1 = off0; i1 < shape[1]; i1++, off1 += strides[1]) { + for (long i2 = 0, off2 = off1; i2 < shape[2]; i2++, off2 += strides[2]) { + result[i0, i1, i2] = ptr[off2]; + } + } + } + return result; + } + case 4: + unsafe { + var result = new T[shape[0], shape[1], shape[2], shape[3]]; + T* ptr = (T*)_tensor_data_ptr; + for (long i0 = 0, off0 = 0; i0 < shape[0]; i0++, off0 += strides[0]) { + for (long i1 = 0, off1 = off0; i1 < shape[1]; i1++, off1 += strides[1]) { + for (long i2 = 0, off2 = off1; i2 < shape[2]; i2++, off2 += strides[2]) { + for (long i3 = 0, off3 = off2; i3 < shape[3]; i3++, off3 += strides[3]) { + result[i0, i1, i2, i3] = ptr[off3]; + } + } + } + } + return result; + } + case 5: + unsafe { + var result = new T[shape[0], shape[1], shape[2], shape[3], shape[4]]; + T* ptr = (T*)_tensor_data_ptr; + for (long i0 = 0, off0 = 0; i0 < shape[0]; i0++, off0 += strides[0]) { + for (long i1 = 0, off1 = off0; i1 < shape[1]; i1++, off1 += strides[1]) { + for (long i2 = 0, off2 = off1; i2 < shape[2]; i2++, off2 += strides[2]) { + for (long i3 = 0, off3 = off2; i3 < shape[3]; i3++, off3 += strides[3]) { + for (long i4 = 0, off4 = off3; i4 < shape[4]; i4++, off4 += strides[4]) { + result[i0, i1, i2, i3, i4] = ptr[off4]; + } + } + } + } + } + return result; + } + case 6: + unsafe { + var result = new T[shape[0], shape[1], shape[2], shape[3], shape[4], shape[5]]; + T* ptr = (T*)_tensor_data_ptr; + for (long i0 = 0, off0 = 0; i0 < shape[0]; i0++, off0 += strides[0]) { + for (long i1 = 0, off1 = off0; i1 < shape[1]; i1++, off1 += strides[1]) { + for (long i2 = 0, off2 = off1; i2 < shape[2]; i2++, off2 += strides[2]) { + for (long i3 = 0, off3 = off2; i3 < shape[3]; i3++, off3 += strides[3]) { + for (long i4 = 0, off4 = off3; i4 < shape[4]; i4++, off4 += strides[4]) { + for (long i5 = 0, off5 = off4; i5 < shape[5]; i5++, off5 += strides[5]) { + result[i0, i1, i2, i3, i4, i5] = ptr[off5]; + } + } + } + } + } + } + return result; + } + } + } + + private Array ToNDArray(long[] shape, long[] strides) + { + Array array = Array.CreateInstance(typeof(T), shape); + long[] indexes = new long[_tensor.ndim]; + long[] off = new long[_tensor.ndim]; + + while (true) { + unsafe { + T* ptr = (T*)_tensor_data_ptr; + array.SetValue(ptr[off[array.Rank - 1]], indexes); + } + + for (int i = array.Rank - 1; i >= 0; i--) { + if (indexes[i] < shape[i] - 1) { + indexes[i]++; + off[i] += strides[i]; + for (int j = i; j < array.Rank - 1; j++) + off[j + 1] = off[j]; + break; + } else { + if (i == 0) { + return array; + } + indexes[i] = 0; + } + } + } + } + + /// + /// Access elements of the underlying tensor / tensor view. + /// + /// A linear index into the data. + /// + public T this[params long[] indices] { + get { + long index = 0; + if (indices.Length == 1) { + index = indices[0]; + validate(index); + unsafe { + T* ptr = (T*)_tensor_data_ptr; + return ptr[TranslateIndex(index, _tensor)]; + } + } else { + unsafe { + T* ptr = (T*)_tensor_data_ptr; + return ptr[TranslateIndex(indices, _tensor)]; + } + } + } + set { + long index = 0; + if (indices.Length == 1) { + index = indices[0]; + validate(index); + unsafe { + T* ptr = (T*)_tensor_data_ptr; + ptr[TranslateIndex(indices, _tensor)] = value; + } + } else { + unsafe { + T* ptr = (T*)_tensor_data_ptr; + ptr[TranslateIndex(indices, _tensor)] = value; + } + } + } + } + + private void validate(long index) + { + if (index >= Count) throw new IndexOutOfRangeException(); + } + + public void CopyTo(T[] array, int arrayIndex = 0, long tensorIndex = 0) + { + int idx = arrayIndex; + /*if (_tensor.is_contiguous()) { + if (typeof(T) == typeof(float)) { + float[] ff = new float[TempCount]; + Marshal.Copy(_tensor_data_ptr, ff, 0,ff.Length); + } + }*/ + //Because the contiguous cause arange from tensorIndex to Numel. So is not necesary "create" array of arange, i said "create" because in fact enumerable do not create itself. Very cool. + if (_tensor.is_contiguous()) { + for (long i = tensorIndex; i < TempCount; i++) + unsafe { array[i] = ((T*)_tensor_data_ptr)[i]; } + return; + } + foreach (int offset in GetSubsequentIndices(tensorIndex)) { + if (idx >= array.Length) break; + unsafe { array[idx] = ((T*)_tensor_data_ptr)[offset]; } + idx += 1; + } + } + + public void CopyTo(Span array, int arrayIndex = 0, long tensorIndex = 0) + { + int idx = arrayIndex; + foreach (int offset in GetSubsequentIndices(tensorIndex)) { + if (idx >= array.Length) break; + unsafe { array[idx] = ((T*)_tensor_data_ptr)[offset]; } + idx += 1; + } + } + + public void CopyFrom(T[] array, int arrayIndex = 0, long tensorIndex = 0) + { + int idx = arrayIndex; + foreach (int offset in GetSubsequentIndices(tensorIndex)) { + if (idx >= array.Length) break; + unsafe { ((T*)_tensor_data_ptr)[offset] = array[idx]; } + idx += 1; + } + } + + public void CopyFrom(ReadOnlySpan array, int arrayIndex = 0, long tensorIndex = 0) + { + int idx = arrayIndex; + foreach (int offset in GetSubsequentIndices(tensorIndex)) { + if (idx >= array.Length) break; + unsafe { ((T*)_tensor_data_ptr)[offset] = array[idx]; } + idx += 1; + } + } + + /// + /// Translates a linear index within the span represented by the accessor to a linear index + /// used by the underlying tensor. The two should only be different if the tensor is a view + /// rather than an allocated tensor. + /// + private static long TranslateIndex(long idx, torch.Tensor tensor) + { + if (idx >= tensor.numel() || idx < 0) + throw new ArgumentOutOfRangeException($"{idx} in a collection of ${tensor.numel()} elements."); + + if (tensor.is_contiguous() || idx == 0) return idx; + + long result = 0; + var shape = tensor.shape; + var strides = tensor.stride(); + + for (var i = shape.Length - 1; i >= 0; i--) { + idx = Math.DivRem(idx, shape[i], out long s); + result += s * strides[i]; + } + + return result; + } + /// + /// WARNING: Test purpose not use in production + /// + private long TranslateIndexNonStatic(long idx, torch.Tensor tensor) + { + if (idx >= TempCount || idx < 0) + throw new ArgumentOutOfRangeException($"{idx} in a collection of ${tensor.numel()} elements."); + + if (tensor.is_contiguous() || idx == 0) return idx; + + long result = 0; + var shape = tensor.shape; + var strides = tensor.stride(); + + for (var i = shape.Length - 1; i >= 0; i--) { + idx = Math.DivRem(idx, shape[i], out long s); + result += s * strides[i]; + } + + return result; + } + private static long TranslateIndex(long[] idx, torch.Tensor tensor) + { + long result = 0; + var shape = tensor.shape; + var strides = tensor.stride(); + + for (var i = shape.Length - 1; i >= 0; i--) { + if (idx[i] >= shape[i] || idx[i] < 0) + throw new IndexOutOfRangeException($"{idx[i]} >= {shape[i]} in dimension {i}."); + result += idx[i] * strides[i]; + } + + return result; + } + + internal static T ReadItemAt(torch.Tensor tensor, long index) + { + if (tensor.device_type != DeviceType.CPU) { + throw new InvalidOperationException("Reading data from non-CPU memory is not supported. Move or copy the tensor to the cpu before reading."); + } + + tensor.ValidateType(typeof(T)); + + var strides = tensor.stride(); + for (var i = 0; i < strides.Length; i++) { + if (strides[i] < 0) + throw new NotImplementedException($"Negative tensor strides are not currently supported. tensor.strides({i}) == {strides[i]}"); + } + + unsafe { + var res = THSTensor_data(tensor.Handle); + if (res == IntPtr.Zero) { torch.CheckForErrors(); } + // NOTE: there is no safety here. + T* ptr = (T*)res; + return ptr[TranslateIndex(index, tensor)]; + } + } + + /// + /// Compare two tensors element-wise. + /// + /// A tensor + /// Another tensor + /// + public static bool operator ==(FastTensorAccessor left, FastTensorAccessor right) + { + if (left.Count != right.Count) return false; + + var lEnum = left.GetEnumerator(); + var rEnum = right.GetEnumerator(); + + while (lEnum.MoveNext() && rEnum.MoveNext()) { + if (!lEnum.Current.Equals(rEnum.Current)) + return false; + } + return true; + } + + /// + /// Compare two tensors element-wise. + /// + /// A tensor + /// Another tensor + /// + public static bool operator !=(FastTensorAccessor left, FastTensorAccessor right) + { + return !(left == right); + } + + + private IEnumerable GetSubsequentIndices(long startingIndex) + { + //TempCount = Count; + + if (startingIndex < 0 || startingIndex >= TempCount) + throw new ArgumentOutOfRangeException(nameof(startingIndex)); + + if (TempCount <= 1) { + if (TempCount == 0) { + return Enumerable.Empty(); + } + + return new List() { 0 }; + //return (new long[] { 0 }).AsEnumerable(); + } + + if (_tensor.is_contiguous()) { + return ContiguousIndices(startingIndex); + } + + var stride = _tensor.stride(); + Debug.Assert(stride.Length > 0); + + if (stride.Length == 1) { + return SimpleIndices(startingIndex, stride[0]); + } + + return MultiDimensionIndices(startingIndex); + } + private IEnumerable MultiDimensionIndices(long startingIndex) + { + long[] shape = _tensor.shape; + long[] stride = _tensor.stride(); + long[] inds = new long[stride.Length]; + + long index = startingIndex; + //long offset = TranslateIndex(startingIndex, _tensor); + long offset = TranslateIndexNonStatic(startingIndex, _tensor); //WARNING: Test purpose not use in production + + while (true) { + + index += 1; + + yield return offset; + + if (index >= TempCount) break; + + for (int i = inds.Length - 1; ; i--) { + Debug.Assert(i >= 0); + offset += stride[i]; + if (++inds[i] < shape[i]) + break; + + // Overflow of current dimension so rewind accordingly. + // Can't overflow the final (left-most) dimension. + Debug.Assert(i > 0); + // Note: for perf, this multiplication could be done once up front and cached in an array. + offset -= inds[i] * stride[i]; + inds[i] = 0; + } + } + } + + private IEnumerable SimpleIndices(long startingIndex, long stride) + { + long index = startingIndex; + //long offset = TranslateIndex(startingIndex, _tensor); + long offset = TranslateIndexNonStatic(startingIndex, _tensor); //WARNING: Test purpose not use in production + + while (index < TempCount) { + yield return offset; + offset += stride; + index += 1; + } + } + + private IEnumerable ContiguousIndices(long startingIndex) + { + // If there was an overload for Enumerable.Range that + // produced long integers, we wouldn't need this implementation. + + long index = startingIndex; + while (index < TempCount) { + yield return index; + index += 1; + } + } + + + /// + /// Compare two tensors element-wise. + /// + /// Another tensor + /// + public override bool Equals(object obj) + { + var left = this; + var right = obj as FastTensorAccessor; + if (right == null) return false; + + if (left._tensor_data_ptr == right._tensor_data_ptr) return true; + if (left.Count != right.Count) return false; + for (long i = 0; i < left.Count; i++) { + if (!left[i].Equals(right[i])) return false; + } + return true; + } + + public override int GetHashCode() + { + return base.GetHashCode(); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + private void Dispose(bool disposing) + { + _tensor_data_ptr = IntPtr.Zero; + // Clear the tensor that we've been keeping alive. + _tensor = null; + } + + private torch.Tensor _tensor; // Keeping it alive. + private IntPtr _tensor_data_ptr; + +#if true + public IEnumerator GetEnumerator() + { + if (TempCount <= 1) { + if (TempCount == 0) + return Enumerable.Empty().GetEnumerator(); + return new T[1] { this[0] }.AsEnumerable().GetEnumerator(); + } + /*if (Count <= 1) { + if (Count == 0) + return Enumerable.Empty().GetEnumerator(); + return new T[1] { this[0] }.AsEnumerable().GetEnumerator(); + }*/ + + if (_tensor.is_contiguous()) { + return new SimpleAtorImpl(this, 1); + } + + var stride = _tensor.stride(); + Debug.Assert(stride.Length > 0); + + if (stride.Length == 1) { + return new SimpleAtorImpl(this, stride[0]); + } + + return new GeneralAtorImpl(this, stride); + } + + private class SimpleAtorImpl : IEnumerator + { + private FastTensorAccessor _span; + private readonly long _count; + private readonly long _stride; + + // State. + private long _index; + private long _offset; + private T _current; + + public SimpleAtorImpl(FastTensorAccessor span, long stride) + { + _span = span; + _count = span.TempCount; + Debug.Assert(_count > 0); + _stride = stride; + Reset(); + } + + public T Current => _current; + object IEnumerator.Current => Current; + + public void Dispose() + { + _span = null; + Reset(); + } + + public bool MoveNext() + { + if (_index < 0) { + _index = 0; + _offset = 0; + } else if (++_index >= _count) { + Reset(); + return false; + } else { + _offset += _stride; + } + + unsafe { _current = ((T*)_span._tensor_data_ptr)[_offset]; } + return true; + } + + public void Reset() + { + _index = -1; + _offset = -1; + _current = default; + } + } + + private class GeneralAtorImpl : IEnumerator + { + private FastTensorAccessor _span; + private readonly long _count; + private readonly long[] _shape; + private readonly long[] _stride; + private readonly long[] _inds; + + // State. + private long _index; + private long _offset; + + public GeneralAtorImpl(FastTensorAccessor span, long[] stride) + { + Debug.Assert(stride.Length > 1); + _span = span; + _count = span.TempCount; + Debug.Assert(_count > 0); + _shape = span._tensor.shape; + Debug.Assert(_shape.Length == stride.Length); + _stride = stride; + _inds = new long[stride.Length]; + Reset(); + } + + public T Current { get; private set; } + + object IEnumerator.Current => Current; + + public void Dispose() + { + // Just clear the span field. + _span = null; + } + + public bool MoveNext() + { + if (_index < 0) { + _index = 0; + _offset = 0; + Array.Clear(_inds, 0, _inds.Length); + } else if (++_index >= _count) { + Reset(); + return false; + } else { + for (int i = _inds.Length - 1; ; i--) { + Debug.Assert(i >= 0); + _offset += _stride[i]; + if (++_inds[i] < _shape[i]) + break; + + // Overflow of current dimension so rewind accordingly. + // Can't overflow the final (left-most) dimension. + Debug.Assert(i > 0); + // Note: for perf, this multiplication could be done once up front and cached in an array. + _offset -= _inds[i] * _stride[i]; + _inds[i] = 0; + } + } + + unsafe { Current = ((T*)_span._tensor_data_ptr)[_offset]; } + return true; + } + + public void Reset() + { + _index = -1; + _offset = -1; + Current = default; + } + } +#else + public IEnumerator GetEnumerator() + { + return new TensorAccessorEnumerator(this); + } +#endif + } +} diff --git a/src/TorchSharp/Utils/TensorAccessor.cs b/src/TorchSharp/Utils/TensorAccessor.cs index f7f825ffc..31641529b 100644 --- a/src/TorchSharp/Utils/TensorAccessor.cs +++ b/src/TorchSharp/Utils/TensorAccessor.cs @@ -39,15 +39,7 @@ internal TensorAccessor(torch.Tensor tensor) _tensor = tensor; // Keep the tensor alive now that everything is alright. } - /// - /// This is important for performance because only called with CopyTo, CopyFrom. Is not necesary in each invocation call tensor.numel() because that use intensive CPU. - /// This temporary count avoid so much use CPU. The Property act as method. - /// If tensor is for example 640*640*3 = 1.228.800, property invoke 1 millons times!!! - /// If we only want copy is not necesary call that method so many times. - /// For some reason the method numel() use so much cpu. - /// - internal long TempCount = -1; - public long Count => _tensor?.numel() ?? 0; + public long Count => (_tensor is not null ? _tensor.numel() : 0); public bool IsReadOnly => false; @@ -56,17 +48,18 @@ public T[] ToArray() if (_tensor.ndim < 2) return (T[])ToNDArray(); - var shps = _tensor.shape; - TempCount = 1; - for(int i=0;i(_tensor_data_ptr.ToPointer(), Convert.ToInt32(TempCount)).ToArray(); } } - var result = new T[TempCount]; + + var result = new T[Count]; CopyTo(result); return result; } @@ -253,18 +246,6 @@ private void validate(long index) public void CopyTo(T[] array, int arrayIndex = 0, long tensorIndex = 0) { int idx = arrayIndex; - /*if (_tensor.is_contiguous()) { - if (typeof(T) == typeof(float)) { - float[] ff = new float[TempCount]; - Marshal.Copy(_tensor_data_ptr, ff, 0,ff.Length); - } - }*/ - //Because the contiguous cause arange from tensorIndex to Numel. So is not necesary "create" array of arange, i said "create" because in fact enumerable do not create itself. Very cool. - if (_tensor.is_contiguous()) { - for(long i= tensorIndex; i= array.Length) break; unsafe { array[idx] = ((T*)_tensor_data_ptr)[offset]; } @@ -325,27 +306,7 @@ private static long TranslateIndex(long idx, torch.Tensor tensor) return result; } - /// - /// WARNING: Test purpose not use in production - /// - private long TranslateIndexNonStatic(long idx, torch.Tensor tensor) - { - if (idx >= TempCount || idx < 0) - throw new ArgumentOutOfRangeException($"{idx} in a collection of ${tensor.numel()} elements."); - - if (tensor.is_contiguous() || idx == 0) return idx; - - long result = 0; - var shape = tensor.shape; - var strides = tensor.stride(); - - for (var i = shape.Length - 1; i >= 0; i--) { - idx = Math.DivRem(idx, shape[i], out long s); - result += s * strides[i]; - } - return result; - } private static long TranslateIndex(long[] idx, torch.Tensor tensor) { long result = 0; @@ -418,18 +379,15 @@ internal static T ReadItemAt(torch.Tensor tensor, long index) private IEnumerable GetSubsequentIndices(long startingIndex) { - //TempCount = Count; - - if (startingIndex < 0 || startingIndex >= TempCount) + if (startingIndex < 0 || startingIndex >= Count) throw new ArgumentOutOfRangeException(nameof(startingIndex)); - if (TempCount <= 1) { - if (TempCount == 0) { + if (Count <= 1) { + if (Count == 0) { return Enumerable.Empty(); } - return new List() { 0 }; - //return (new long[] { 0 }).AsEnumerable(); + return (new long[] { 0 }).AsEnumerable(); } if (_tensor.is_contiguous()) { @@ -445,6 +403,7 @@ private IEnumerable GetSubsequentIndices(long startingIndex) return MultiDimensionIndices(startingIndex); } + private IEnumerable MultiDimensionIndices(long startingIndex) { long[] shape = _tensor.shape; @@ -452,8 +411,7 @@ private IEnumerable MultiDimensionIndices(long startingIndex) long[] inds = new long[stride.Length]; long index = startingIndex; - //long offset = TranslateIndex(startingIndex, _tensor); - long offset = TranslateIndexNonStatic(startingIndex, _tensor); //WARNING: Test purpose not use in production + long offset = TranslateIndex(startingIndex, _tensor); while (true) { @@ -461,7 +419,7 @@ private IEnumerable MultiDimensionIndices(long startingIndex) yield return offset; - if (index >= TempCount) break; + if (index >= Count) break; for (int i = inds.Length - 1; ; i--) { Debug.Assert(i >= 0); @@ -482,23 +440,21 @@ private IEnumerable MultiDimensionIndices(long startingIndex) private IEnumerable SimpleIndices(long startingIndex, long stride) { long index = startingIndex; - //long offset = TranslateIndex(startingIndex, _tensor); - long offset = TranslateIndexNonStatic(startingIndex, _tensor); //WARNING: Test purpose not use in production + long offset = TranslateIndex(startingIndex, _tensor); - while (index < TempCount) { + while (index < Count) { yield return offset; offset += stride; index += 1; } } - private IEnumerable ContiguousIndices(long startingIndex) { // If there was an overload for Enumerable.Range that // produced long integers, we wouldn't need this implementation. - + long index = startingIndex; - while (index < TempCount) { + while (index < Count) { yield return index; index += 1; } @@ -553,16 +509,11 @@ private void Dispose(bool disposing) #if true public IEnumerator GetEnumerator() { - if (TempCount <= 1) { - if (TempCount == 0) - return Enumerable.Empty().GetEnumerator(); - return new T[1] { this[0] }.AsEnumerable().GetEnumerator(); - } - /*if (Count <= 1) { + if (Count <= 1) { if (Count == 0) return Enumerable.Empty().GetEnumerator(); return new T[1] { this[0] }.AsEnumerable().GetEnumerator(); - }*/ + } if (_tensor.is_contiguous()) { return new SimpleAtorImpl(this, 1); @@ -592,7 +543,7 @@ private class SimpleAtorImpl : IEnumerator public SimpleAtorImpl(TensorAccessor span, long stride) { _span = span; - _count = span.TempCount; + _count = span.Count; Debug.Assert(_count > 0); _stride = stride; Reset(); @@ -647,7 +598,7 @@ public GeneralAtorImpl(TensorAccessor span, long[] stride) { Debug.Assert(stride.Length > 1); _span = span; - _count = span.TempCount; + _count = span.Count; Debug.Assert(_count > 0); _shape = span._tensor.shape; Debug.Assert(_shape.Length == stride.Length); diff --git a/test/TorchSharpTest/TorchSharpTest.csproj b/test/TorchSharpTest/TorchSharpTest.csproj index 808aa1ccf..065301040 100644 --- a/test/TorchSharpTest/TorchSharpTest.csproj +++ b/test/TorchSharpTest/TorchSharpTest.csproj @@ -13,7 +13,6 @@ trx $(OutputPath) 10.0 - Debug;Release;LibTorch2.3.1 @@ -114,7 +113,7 @@ - + @@ -123,6 +122,7 @@ + true true @@ -132,5 +132,4 @@ Obsolete,ExcludeFromCodeCoverage - - + \ No newline at end of file From c0883d9fad6686c38d33b6713332397b61e47c86 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 21 Jul 2024 16:31:07 -0300 Subject: [PATCH 20/43] fix mac test? --- src/TorchSharp/NN/Module.cs | 4 ++-- src/TorchSharp/Torch.cs | 16 +++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/TorchSharp/NN/Module.cs b/src/TorchSharp/NN/Module.cs index 19b64d8a9..f7309ed51 100644 --- a/src/TorchSharp/NN/Module.cs +++ b/src/TorchSharp/NN/Module.cs @@ -765,7 +765,7 @@ public virtual void register_module(string name, Module submodule) } submodule.RegisterComponents(); - if (!is_autocast_cache_enabled()) { + /*if (!is_autocast_cache_enabled()) { _internal_submodules.Add(name, submodule); return; } @@ -773,7 +773,7 @@ public virtual void register_module(string name, Module submodule) submodule = submodule.to(get_autocast_dtype(CUDA)); if (is_autocast_cpu_enabled()) submodule = submodule.to(get_autocast_dtype(CPU)); - + */ _internal_submodules.Add(name, submodule); } } diff --git a/src/TorchSharp/Torch.cs b/src/TorchSharp/Torch.cs index d10254a2c..bc019d8df 100644 --- a/src/TorchSharp/Torch.cs +++ b/src/TorchSharp/Torch.cs @@ -53,7 +53,8 @@ public static partial class torch public static string __version__ => libtorchPackageVersion; - internal static bool TryLoadNativeLibraryFromFile(string path, StringBuilder trace) { + internal static bool TryLoadNativeLibraryFromFile(string path, StringBuilder trace) + { bool ok; try { trace.AppendLine($" Trying to load native component {path}"); @@ -158,7 +159,7 @@ private static void LoadNativeBackend(bool useCudaBackend, out StringBuilder? tr var torchsharpLoc = Path.GetDirectoryName(typeof(torch).Assembly.Location); var packagesDir = Path.GetFullPath(Path.Combine(torchsharpLoc!, "..", "..", "..", "..")); var torchsharpHome = Path.GetFullPath(Path.Combine(torchsharpLoc!, "..", "..")); - //torchsharpLoc = @"K:\Proyects_Repos\TorchSharp"; + trace.AppendLine($" torchsharpLoc = {torchsharpLoc}"); trace.AppendLine($" packagesDir = {packagesDir}"); trace.AppendLine($" torchsharpHome = {torchsharpHome}"); @@ -204,8 +205,7 @@ private static void LoadNativeBackend(bool useCudaBackend, out StringBuilder? tr throw new NotSupportedException(message); } } - } - else { + } else { trace.AppendLine(" Giving up, TorchSharp.dll does not appear to have been loaded from package directories"); } if (!ok) { @@ -214,7 +214,7 @@ private static void LoadNativeBackend(bool useCudaBackend, out StringBuilder? tr throw new NotSupportedException(message); } } - + // Record the successful load if (useCudaBackend) @@ -265,8 +265,7 @@ private static bool CopyNativeComponentsIntoSingleDirectory(string packagesDir, public static bool TryInitializeDeviceType(DeviceType deviceType) { - if (deviceType == DeviceType.MPS && !isAppleSilicon) - { + if (deviceType == DeviceType.MPS && !isAppleSilicon) { return false; } @@ -280,8 +279,7 @@ public static bool TryInitializeDeviceType(DeviceType deviceType) public static void InitializeDeviceType(DeviceType deviceType) { - if (deviceType == DeviceType.MPS && !isAppleSilicon) - { + if (deviceType == DeviceType.MPS && !isAppleSilicon) { throw new InvalidOperationException($"Torch device type 'MPS' is not available on this platform."); } From 9ac78bd7ec50600fa137a97e05402b1121e357c3 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Wed, 24 Jul 2024 19:08:23 -0300 Subject: [PATCH 21/43] AMP Problem outscope --- src/Examples.Utils/Examples.Utils.csproj | 2 +- src/TorchSharp/Amp/AMPManager.cs | 133 +++++++++++++++++++---- src/TorchSharp/Amp/AutocastMode.cs | 25 ++++- src/TorchSharp/Tensor/Tensor.cs | 29 ++--- src/TorchSharp/Utils/UnorderedMap.cs | 16 ++- 5 files changed, 161 insertions(+), 44 deletions(-) diff --git a/src/Examples.Utils/Examples.Utils.csproj b/src/Examples.Utils/Examples.Utils.csproj index 11a1f2b91..60dc0a292 100644 --- a/src/Examples.Utils/Examples.Utils.csproj +++ b/src/Examples.Utils/Examples.Utils.csproj @@ -26,7 +26,7 @@ - + diff --git a/src/TorchSharp/Amp/AMPManager.cs b/src/TorchSharp/Amp/AMPManager.cs index 29c5da90c..870728dca 100644 --- a/src/TorchSharp/Amp/AMPManager.cs +++ b/src/TorchSharp/Amp/AMPManager.cs @@ -1,65 +1,154 @@ using System; using System.Collections.Generic; -using System.Runtime.InteropServices; -using System.Text; -using Google.Protobuf.WellKnownTypes; +using System.Diagnostics; using TorchSharp.PInvoke; -using TorchSharp.Utils; namespace TorchSharp.Amp { public class AMPManager : IDisposable { + //TODO: Make Singleton THREADSAFE - public UnorderedMap TensorPtrs= new UnorderedMap(); + public class TensorConverter + { + //public torch.Tensor Tensor; + public IntPtr PrevHandle; + public IntPtr Handle; + public torch.ScalarType Dtype; + public torch.ScalarType FastDtype; + public TensorCalledIn Called, Status; + public enum TensorCalledIn + { + OutSide, + InsideEnter + } + + public TensorConverter(IntPtr handle) + { + this.PrevHandle = handle; + this.Handle = handle; + this.Dtype = (torch.ScalarType)NativeMethods.THSTensor_type(handle); + this.FastDtype = AutocastMode.GetInstance().GetFastType(); + + Status = TensorConverter.TensorCalledIn.InsideEnter; + } + /*public TensorConverter(torch.Tensor tensor) : this(tensor.handle) + { + this.Tensor = tensor; + }*/ + } + + public IList TensorsCasts = new List(); + public bool IsEnter = false; + public bool IsDisposed = false; + /*public UnorderedMap TensorPtrs= new UnorderedMap(); + public UnorderedMap TensorMap= new UnorderedMap();*/ private readonly AutocastMode autocastMode = AutocastMode.GetInstance(); private AMPManager() { } public bool IsEnabled => autocastMode.Enabled; private static AMPManager Instance; - //bool disposedValue; - public static AMPManager GetInstance() { return Instance ??= new AMPManager(); } - private void To(IntPtr ptr, torch.ScalarType type) + private torch.ScalarType GetType(IntPtr handle) + { + return (torch.ScalarType)NativeMethods.THSTensor_type(handle); + } + private IntPtr To(IntPtr ptr, torch.ScalarType type) { + Debug.WriteLine($"{nameof(AMPManager)} Tensor converting from: {(torch.ScalarType)NativeMethods.THSTensor_type(ptr)} to: {type}"); var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); if (res == IntPtr.Zero) torch.CheckForErrors(); + return res; } private void Revert() { - using (var enumer = TensorPtrs.GetEnumerator()) - while (enumer.MoveNext()) - To(enumer.Current.Key, enumer.Current.Value); + for (int i = 0; i < TensorsCasts.Count; i++) { + var tc = TensorsCasts[i]; + //var tt = new torch.Tensor(tc.Handle); + //var t = new torch.Tensor(tc.Handle) { handle = To(tc.Handle, tc.Dtype) }; + //var t = new torch.Tensor(tc.Handle).to(tc.Dtype); + tc.Handle= To(tc.Handle, tc.Dtype); + if (tc.Handle != tc.PrevHandle) + tc.PrevHandle = To(tc.PrevHandle, tc.Dtype); + } + //Cast Work very well but UNCASTING (if outscope, not working i dont know why...) + //TensorsCasts.Clear(); } + - public void Add(IntPtr ptr) + private int ExistsHandle(IntPtr handle) { - if (!autocastMode.Enabled) { - - if (TensorPtrs.ContainsKey(ptr)) - To(ptr, TensorPtrs[ptr]); - return; + for (int i = 0; i < TensorsCasts.Count; i++) + if (TensorsCasts[i].PrevHandle == handle || TensorsCasts[i].Handle == handle) + return i; + return -1; + } + + public IntPtr Work(IntPtr handle, IntPtr prev) + { + + /*if (IsDisposed && !IsEnter) { + Revert(); //Is for cleaned all + return IntPtr.Zero; + }*/ + var idx = ExistsHandle(handle); + Console.WriteLine($"PTR: {handle}, PREV: {prev}, IDX: {idx}"); + if (idx == -1) { + var tc = new TensorConverter(handle) { Called = IsEnter + ? TensorConverter.TensorCalledIn.InsideEnter + : TensorConverter.TensorCalledIn.OutSide + }; + if (IsEnter) + tc.Handle = To(tc.Handle, tc.FastDtype); + TensorsCasts.Add(tc); + return tc.Handle; } + var tcidx = TensorsCasts[idx]; + if (!IsEnter && IsDisposed) { + if (tcidx.Called == TensorConverter.TensorCalledIn.OutSide) { //Is created outside so this can revert + //Is From Outside and is disposed, the tensor is created Outside so i will revert this + tcidx.PrevHandle = tcidx.Handle; + tcidx.Handle = To(tcidx.Handle, tcidx.Dtype); + } + return tcidx.Handle; + } + if (GetType(tcidx.Handle) == tcidx.FastDtype) + return tcidx.Handle; - TensorPtrs[ptr] = (torch.ScalarType)NativeMethods.THSTensor_type(ptr); - To(ptr, autocastMode.GetFastType()); //TODO: Set scalar autocast + if (IsEnter) { + tcidx.PrevHandle = tcidx.Handle; + tcidx.Handle = To(tcidx.Handle, tcidx.FastDtype); + } + return tcidx.Handle; } - + public IDisposable Enter() { - return null; + IsEnter = true; + IsDisposed = false; + Debug.WriteLine($"{nameof(AMPManager)} Enter call"); + return this; } protected virtual void Dispose(bool disposing) { + + Debug.WriteLine($"{nameof(AMPManager)} Disposed call"); Revert(); + + IsDisposed = true; + IsEnter = false; + + //Work(IntPtr.Zero, IntPtr.Zero); autocastMode.Dispose(); - TensorPtrs.Dispose(); + //Revert(); + /*TensorPtrs.Dispose(); + TensorMap.Dispose();*/ /*if (!disposedValue) { if (disposing) { diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 0287e02d6..720fb3e67 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -23,7 +23,7 @@ public sealed class AutocastMode : IDisposable internal torch.ScalarType fast_dtype = torch.ScalarType.Float32; public torch.Device Device = new torch.Device(DeviceType.CUDA); private static AutocastMode instance; - bool disposedValue; + //bool disposedValue; /*public static AutocastMode GetInstance(torch.Device dev, torch.ScalarType? dtype = null, bool enabled = true, bool? cache_enabled = null) { @@ -93,7 +93,26 @@ internal torch.Tensor CastTensor(torch.Tensor tensor) private void Dispose(bool disposing) { - if (!disposedValue) { + this.Enabled = false; + if (Device.type == DeviceType.CUDA) { + if (torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + torch.set_autocast_gpu_dtype(this.fast_dtype); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); + } + + if (Device.type == DeviceType.CPU) { + if (torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_cpu_dtype(this.fast_dtype); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); + } + //disposedValue = true; + /*if (!disposedValue) { if (disposing) { this.Enabled = false; @@ -121,7 +140,7 @@ private void Dispose(bool disposing) // TODO: free unmanaged resources (unmanaged objects) and override finalizer // TODO: set large fields to null disposedValue = true; - } + }*/ } // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 0e5b76537..2ec774b2e 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -38,24 +38,18 @@ public partial class Tensor : IDisposable //internal AutocastDisposeScope? AutocastDisposeScope; internal Tensor(IntPtr handle) { - this.handle = handle; - /*if (AMPManager.GetInstance().IsEnabled) - AMPManager.GetInstance().Add(handle); //MMM.... This is the more abstract of any method Tensor right????*/ - - /*if (_totalCount > 0) { - //have used - AutocastDisposeScope = AutocastDisposeManager.ThreadAutocastSingleton.RegisterTensorAutocastScope(this); - this = AutocastDisposeScope.autocastMode.CastTensor(this); //should cast when using INSIDE NOT WHERE CREATED - }*/ - System.Threading.Interlocked.Increment(ref _totalCount); - _peakCount = Math.Max(_totalCount, _peakCount); - OwningDisposeScope = DisposeScopeManager.ThreadSingleton.RegisterOnCurrentDisposeScope(this); //TODO: Add Autocast/AMP ScopeManager, need improve this.. 1) is not threadsafe and may have big problem while casting and uncasting. //DANGER: DONT USE THIS ON PRODUCTION - /*AutocastDisposeScope = AutocastDisposeManager.ThreadAutocastSingleton.RegisterTensorAutocastScope(this); - this = AutocastDisposeScope.autocastMode.CastTensor(this); //should cast when using INSIDE NOT WHERE CREATED*/ - //Should cast inner scope when get tensors for every each method? example prod, sum, div, reshape, etc??? + if (AMPManager.GetInstance().IsEnabled) { + this.handle = AMPManager.GetInstance().Work(handle, this.handle); //MMM.... This is the more abstract of any method Tensor right???? + } else { + this.handle = handle; + } + + System.Threading.Interlocked.Increment(ref _totalCount); + _peakCount = Math.Max(_totalCount, _peakCount); + OwningDisposeScope = DisposeScopeManager.ThreadSingleton.RegisterOnCurrentDisposeScope(this); } /// @@ -226,8 +220,9 @@ public IntPtr Handle { if (handle == IntPtr.Zero) throw new InvalidOperationException("Tensor invalid -- empty handle."); - //AutocastDisposeScope.autocastMode.CastTensor(this); //This is wrong right??? - + /*if (AMPManager.GetInstance().IsEnabled) { + this.handle = AMPManager.GetInstance().Work(handle, this.handle); //MMM.... This is the more abstract of any method Tensor right???? + }*/ return handle; } } diff --git a/src/TorchSharp/Utils/UnorderedMap.cs b/src/TorchSharp/Utils/UnorderedMap.cs index 7db88a94c..f890d7a56 100644 --- a/src/TorchSharp/Utils/UnorderedMap.cs +++ b/src/TorchSharp/Utils/UnorderedMap.cs @@ -1,5 +1,7 @@ using System; +using System.Collections; using System.Collections.Generic; +using System.Linq; using System.Text; namespace TorchSharp.Utils @@ -9,11 +11,23 @@ public class UnorderedMap : Dictionary, IDisposable bool disposedValue; public UnorderedMap() { } + private static bool IsCollectionType(Type type) + { + if (!type.GetGenericArguments().Any()) + return false; + Type genericTypeDefinition = type.GetGenericTypeDefinition(); + var collectionTypes = new[] { typeof(IEnumerable<>), typeof(ICollection<>), typeof(IList<>), typeof(List<>), typeof(IList) }; + return collectionTypes.Any(x => x.IsAssignableFrom(genericTypeDefinition)); + } public new TValue this[TKey tk] { get { if (this.ContainsKey(tk)) return base[tk]; - return default(TValue); + var t = typeof(TValue); + if (!IsCollectionType(t)) + return default; + base[tk] = (TValue)(IList)Activator.CreateInstance(typeof(List<>).MakeGenericType(t.GetGenericArguments())); + return base[tk]; } set { if (!this.ContainsKey(tk)) { From 21ce055d6e9083fb0c92b6dbd91e3ffc917cf0e6 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 3 Sep 2024 17:25:54 -0300 Subject: [PATCH 22/43] some gradscaler. Need grad_scale and found_inf attr in optimizer --- src/Native/LibTorchSharp/CMakeLists.txt | 5 + src/Native/LibTorchSharp/THSAmp.cpp | 23 ++- src/Native/LibTorchSharp/THSAmp.h | 12 +- src/Native/LibTorchSharp/THSCuda.cpp | 15 +- src/Native/LibTorchSharp/THSCuda.h | 4 +- src/TorchSharp/Amp/GradScaler.cs | 145 ++++++++++++++++-- .../PInvoke/LibTorchSharp.THSAmp.cs | 9 ++ src/TorchSharp/Tensor/torch.Amp.cs | 29 ++++ src/TorchSharp/Utils/UnorderedMap.cs | 10 +- 9 files changed, 229 insertions(+), 23 deletions(-) diff --git a/src/Native/LibTorchSharp/CMakeLists.txt b/src/Native/LibTorchSharp/CMakeLists.txt index 1565eae2d..f94d70302 100644 --- a/src/Native/LibTorchSharp/CMakeLists.txt +++ b/src/Native/LibTorchSharp/CMakeLists.txt @@ -1,8 +1,11 @@ project(LibTorchSharp) find_package(CUDA) +IF(CUDA_FOUND) include_directories(${CUDA_INCLUDE_DIRS}) link_directories(${CUDA_LIBRARY_DIRS}) +add_compile_definitions(TORCHSHARP_CUDA_TOOLKIT_FOUND) +ENDIF() if(APPLE AND NOT LIBTORCH_ARCH STREQUAL "arm64") include_directories("/usr/local/include" "/usr/local/opt/llvm/include") @@ -79,7 +82,9 @@ include_directories(${TORCH_INCLUDE_DIRS}) add_library(LibTorchSharp SHARED ${SOURCES} ${RESOURCES}) +IF(CUDA_FOUND) target_link_libraries(LibTorchSharp ${CUDA_LIBRARIES}) +ENDIF() target_link_libraries(LibTorchSharp ${TORCH_LIBRARIES}) diff --git a/src/Native/LibTorchSharp/THSAmp.cpp b/src/Native/LibTorchSharp/THSAmp.cpp index 2f6a603e5..0b4f29cb8 100644 --- a/src/Native/LibTorchSharp/THSAmp.cpp +++ b/src/Native/LibTorchSharp/THSAmp.cpp @@ -3,6 +3,8 @@ #include #include +#include "torch/torch.h" +#include "torch/cuda.h" /*void THSAmp_amp_foreach_non_finite_check_and_unscale_(const at::TensorList self, at::Tensor& found_inf, const at::Tensor& inv_scale) { @@ -12,14 +14,25 @@ void THSAmp_amp_foreach_non_finite_check_and_unscale_(Tensor* self, const int64_t tLength, at::Tensor& found_inf, const at::Tensor& inv_scale) { torch::_amp_foreach_non_finite_check_and_unscale_(toTensors((torch::Tensor**)self, tLength),found_inf,inv_scale); - } -/*void THSAmp_amp_update_scale_(Tensor* self, const int64_t tLength, __resharper_unknown_type& found_inf, const __resharper_unknown_type& inv_scale) -{ - torch::_amp_update_scale() -}*/ +Tensor THSAmp_amp_update_scale_(at::Tensor& self, at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval) { + CATCH_TENSOR(torch::_amp_update_scale_(self, growth_tracker, found_inf, scale_growth_factor, scale_backoff_factor, growth_interval);) +} +Tensor THSAmp_amp_update_scale_out(at::Tensor& out, const at::Tensor& self, at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval){ + CATCH_TENSOR(torch::_amp_update_scale_out(out, self, growth_tracker, found_inf, scale_growth_factor, scale_backoff_factor, growth_interval);) +} +Tensor THSAmp_amp_update_scale_outf(const at::Tensor& self, at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval, at::Tensor& out){ + CATCH_TENSOR(torch::_amp_update_scale_outf(self, growth_tracker, found_inf, scale_growth_factor, scale_backoff_factor, growth_interval, out);) +} +Tensor THSAMP_amp_update_scale(const at::Tensor& self, const at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval, Tensor* sec) +{ + std::tuple res; + CATCH(res = torch::_amp_update_scale(self, growth_tracker, found_inf, scale_growth_factor, scale_backoff_factor, growth_interval);) + *sec = ResultTensor(std::get<1>(res)); + return ResultTensor(std::get<0>(res)); +} bool THSAmp_is_torch_function_mode_enabled() { diff --git a/src/Native/LibTorchSharp/THSAmp.h b/src/Native/LibTorchSharp/THSAmp.h index 27183ef14..3a0718db4 100644 --- a/src/Native/LibTorchSharp/THSAmp.h +++ b/src/Native/LibTorchSharp/THSAmp.h @@ -2,16 +2,20 @@ #pragma once #include "../Stdafx.h" - -#include "torch/torch.h" - #include "Utils.h" //https://github.com/pytorch/pytorch/blob/main/torch/_meta_registrations.py#L5957 //EXPORT_API(void) THSAmp_amp_foreach_non_finite_check_and_unscale_(const at::TensorList self, at::Tensor& found_inf, const at::Tensor& inv_scale); EXPORT_API(void) THSAmp_amp_foreach_non_finite_check_and_unscale_(Tensor* self, const int64_t tLength, at::Tensor& found_inf, const at::Tensor& inv_scale); -//EXPORT_API(void) THSAmp_amp_update_scale_(at::Tensor& found_inf, const at::Tensor& inv_scale); + +//EXPORT_API(void) THSAmp_amp_update_scale_(const at::Tensor& self, const at::Tensor& inv_scale); + +EXPORT_API(Tensor) THSAmp_amp_update_scale_(at::Tensor& self, at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval); +EXPORT_API(Tensor) THSAmp_amp_update_scale_out(at::Tensor& out, const at::Tensor& self, at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval); +EXPORT_API(Tensor) THSAmp_amp_update_scale_outf(const at::Tensor& self, at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval, at::Tensor& out); +EXPORT_API(Tensor) THSAMP_amp_update_scale(const at::Tensor& self, const at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval, Tensor* sec); + EXPORT_API(bool) THSAmp_is_torch_function_mode_enabled(); //Maybe the best work is call THSTorch_is_autocast_enabled(enum of devices c# as int8_t); diff --git a/src/Native/LibTorchSharp/THSCuda.cpp b/src/Native/LibTorchSharp/THSCuda.cpp index 475187beb..01d583229 100644 --- a/src/Native/LibTorchSharp/THSCuda.cpp +++ b/src/Native/LibTorchSharp/THSCuda.cpp @@ -4,22 +4,31 @@ #include #include - +#ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND cudaDeviceProp THSCuda_get_device_prop() { int device = 0; cudaDeviceProp cdp; - //cudaGetDeviceProperties_v2(&cdp, device); - cudaGetDeviceProperties(&cdp, device); + //cudaGetDeviceProperties(&cdp, device); + cudaGetDeviceProperties_v2(&cdp, device); return cdp; } +#endif int THSCuda_get_major_compute_capability() { +#ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND return THSCuda_get_device_prop().major; +#else + return -1; +#endif } int THSCuda_get_minor_compute_capability() { +#ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND return THSCuda_get_device_prop().minor; +#else + return -1; +#endif } diff --git a/src/Native/LibTorchSharp/THSCuda.h b/src/Native/LibTorchSharp/THSCuda.h index 2c6e6c17f..c951dd7a2 100644 --- a/src/Native/LibTorchSharp/THSCuda.h +++ b/src/Native/LibTorchSharp/THSCuda.h @@ -6,11 +6,13 @@ #include "torch/torch.h" #include "Utils.h" - +#ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND #include "cuda.h" #include "cuda_runtime_api.h" cudaDeviceProp THSCuda_get_device_prop(); +#endif + EXPORT_API(int) THSCuda_get_major_compute_capability(); EXPORT_API(int) THSCuda_get_minor_compute_capability(); \ No newline at end of file diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs index be4833f4f..b2cbd3988 100644 --- a/src/TorchSharp/Amp/GradScaler.cs +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -4,18 +4,23 @@ using System.Linq; using System.Text; using System.Threading.Tasks; +using Tensorboard; using TorchSharp.Modules; using TorchSharp.Utils; namespace TorchSharp.Amp { - public class GradScaler + public class GradScaler : IDisposable { private bool Enabled; public torch.Device device; private torch.Tensor _scale, _growth_tracker; - private float InitScale, GrowthFactor, BackoffFactor, GrowthInterval, InitGrowthTracker; + private float InitScale, InitGrowthTracker; + public float _growth_factor { set; get; } + public float _backoff_factor { set; get; } + private int _growth_interval { set; get; } private UnorderedMap> _per_optimizer_states = new UnorderedMap>(); + bool disposedValue; public enum OptState { @@ -38,9 +43,9 @@ public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_fac device = dev; Enabled = enabled; InitScale = init_scale; - GrowthFactor = growth_factor; - BackoffFactor = backoff_factor; - GrowthInterval = growth_interval; + this._growth_factor = growth_factor; + _backoff_factor = backoff_factor; + _growth_interval = growth_interval; InitGrowthTracker = 0.0f; throw new NotImplementedException("This need to finish"); @@ -218,17 +223,44 @@ public void unscale(torch.optim.Optimizer optimizer) //https://github.com/pytorch/pytorch/blob/a00fad017719346bac6e08da0819358146e647e3/torch/amp/grad_scaler.py#L398 var f = optimizer.GetType().GetField("_step_support_amp_scaling"); if (f != null && f.GetValue(optimizer) is bool b && !b) { + bool has_grad_scaler = false;//I dont know how deal this... + if (has_grad_scaler) { + } else { + if (optimizer_state["stage"] is OptState optstate && optstate == OptState.Ready) + check_inf_per_device(optimizer); + var scaler = _get_scale_async(); + Debug.Assert(!scaler.is_null(), "!scaler.is_null()"); + torch.Tensor found_inf; + if (optimizer_state["found_inf_per_device"] is torch.Tensor[] ts) { + for (int i = 0; i < ts.Length; i++) + ts[i].to(scaler.device, true); + found_inf=torch.sum(torch.cat(ts)); + } + //if(optimizer is SGD ad) + //Info: All optimizer have grad_scale and found_inf //https://github.com/pytorch/pytorch/blob/main/torch/optim/adam.py, etc. + //DANGER: Optimizer in TorchShapr not have grad_scaler or found_inf, we need grad_scale for https://github.com/pytorch/pytorch/blob/758d78790164bfb041555daed380de96e06f78a3/torch/amp/grad_scaler.py#L440 + + //optimizer.GetType().GetField("grad_scale").GetValue(optimizer) as torch.Tensor t + } + retval = optimizer.step().item(); + optimizer_state["stage"] = OptState.Stepped; + //https://github.com/pytorch/pytorch/blob/758d78790164bfb041555daed380de96e06f78a3/torch/amp/grad_scaler.py#L445 + return retval; } if (optimizer_state["stage"] is OptState state1 && state1 == OptState.Ready) unscale(optimizer); - Debug.Assert((optimizer_state["found_inf_per_device"] as float[]).Length > 0, "(optimizer_state['found_inf_per_device'] as float[]).Length > 0"); - + Debug.Assert((optimizer_state["found_inf_per_device"] as torch.Tensor[]).Length > 0, "(optimizer_state['found_inf_per_device'] as torch.Tensor).size(0) > 0"); retval = maybe_opt_step(optimizer, optimizer_state); optimizer_state["stage"] = OptState.Stepped; return retval; } + private torch.Tensor _get_scale_async() + { + return _scale; + } + /// /// /// @@ -252,9 +284,104 @@ public void update(object new_scale = null) _scale.copy_(t); } } else { - //var found_infs = + IList found_infs = new List(); + foreach (var state in _per_optimizer_states) + foreach (var found_inf in state.Value) + if(found_inf.Value is torch.Tensor t) + found_infs.Add(t); + Debug.Assert(found_infs.Count > 0, "No inf checks were recorded prior to update."); + torch.Tensor found_inf_combined = found_infs[0]; + if (found_infs.Count > 1) + for (int i = 1; i < found_infs.Count; i++) + found_inf_combined += found_infs[i]; + torch.amp_update_scale_(_scale, _growth_tracker, found_inf_combined, (double)_growth_factor, (double)_backoff_factor, (long)_growth_interval); + + } + //TODO: Implement defaultdict https://github.com/pytorch/pytorch/blob/758d78790164bfb041555daed380de96e06f78a3/torch/amp/grad_scaler.py#L531 + } + + public float get_scale() + { + if (this.Enabled) { + + var scale = _get_scale_async(); + if (scale.is_null()) + return InitScale; + return scale.item(); + } + return 1.0f; + } + + public bool IsEnabled() + { + return this.Enabled; + } + + public UnorderedMap state_dict() + { + if (Enabled) { + var res = new UnorderedMap(); + res["scale"] = get_scale(); + res[nameof(_growth_factor)] = _growth_factor; + res[nameof(_backoff_factor)] = _backoff_factor; + res[nameof(_growth_interval)] = _growth_interval; + res[nameof(_growth_tracker)] = _growth_tracker; + return res; } - + return null; + } + + public void load_state_dict(Dictionary state_dict) + { + if (!Enabled) + return; + if (state_dict.Count == 0) + throw new Exception("The source state dict is empty, possibly because it was saved from a disabled instance of GradScaler."); + //TODO: implement reflection to set field/properties based on state_dict + } + + torch.Tensor check_inf_per_device(torch.optim.Optimizer optimizer) + { + _scale = check_scale_growth_tracker(nameof(check_inf_per_device)).Item1; + var dummy_inv_scale = torch.full(new ReadOnlySpan(new long[] { 0 }), 1.0f, torch.ScalarType.Float32, _scale.device); + var foundd_inf = torch.full(new ReadOnlySpan(new long[] { 0 }), 0.0f, torch.ScalarType.Float32, _scale.device); + _per_optimizer_states[optimizer.GetHashCode()]["found_inf_per_device"] = unscale_grads(optimizer, dummy_inv_scale, foundd_inf, true); + return _per_optimizer_states[optimizer.GetHashCode()]["found_inf_per_device"] as torch.Tensor; + } + + private object _found_inf_per_device(torch.optim.Optimizer optimizer) + { + return _per_optimizer_states[optimizer.GetHashCode()]["found_inf_per_device"]; + } + + protected virtual void Dispose(bool disposing) + { + if (!disposedValue) { + if (disposing) { + _per_optimizer_states.Dispose(); + _growth_tracker.Dispose(); + _scale.Dispose(); + // TODO: dispose managed state (managed objects) + } + + // TODO: free unmanaged resources (unmanaged objects) and override finalizer + // TODO: set large fields to null + disposedValue = true; + } + } + + // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources + // ~GradScaler() + // { + // // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + // Dispose(disposing: false); + // } + + public void Dispose() + { + // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + Dispose(disposing: true); + GC.SuppressFinalize(this); } } } \ No newline at end of file diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs index 984637336..7829da992 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs @@ -11,6 +11,14 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern void THSAmp_amp_foreach_non_finite_check_and_unscale_(IntPtr tensors, long tLength, IntPtr found_inf, IntPtr inv_scale); [DllImport("LibTorchSharp")] + internal static extern IntPtr THSAmp_amp_update_scale_(IntPtr self, IntPtr growth_tracker, IntPtr found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSAmp_amp_update_scale_out(IntPtr outt,IntPtr self, IntPtr growth_tracker, IntPtr found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSAmp_amp_update_scale_outf(IntPtr self,IntPtr growth_tracker, IntPtr found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval, IntPtr outt); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSAMP_amp_update_scale(IntPtr self,IntPtr growth_tracker, IntPtr found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval, out IntPtr sec); + [DllImport("LibTorchSharp")] internal static extern bool THSAmp_is_torch_function_mode_enabled(); [DllImport("LibTorchSharp")] internal static extern bool THSAmp_is_autocast_cache_enabled(); @@ -49,5 +57,6 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern void THSAmp_clear_autocast_cache(); + } } \ No newline at end of file diff --git a/src/TorchSharp/Tensor/torch.Amp.cs b/src/TorchSharp/Tensor/torch.Amp.cs index dfa4245fd..319afe65c 100644 --- a/src/TorchSharp/Tensor/torch.Amp.cs +++ b/src/TorchSharp/Tensor/torch.Amp.cs @@ -13,5 +13,34 @@ public static void _amp_foreach_non_finite_check_and_unscale_(IList tens IntPtr tens = ts.CreateArray(tensors.Select(x => x.Handle).ToArray()); THSAmp_amp_foreach_non_finite_check_and_unscale_(tens, ts.Array.Length, found_inf.Handle, inv_scale.Handle); } + + public static torch.Tensor amp_update_scale_(Tensor self, Tensor growth_tracker, Tensor found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval) + { + var res = THSAmp_amp_update_scale_(self.Handle, growth_tracker.Handle, found_inf.Handle, scale_growth_factor, scale_backoff_factor, growth_interval); + if(res == IntPtr.Zero) + torch.CheckForErrors(); + return new Tensor(res); + } + public static torch.Tensor amp_update_scale_out(Tensor outt, Tensor self, Tensor growth_tracker, Tensor found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval) + { + var res = THSAmp_amp_update_scale_out(outt.Handle, self.Handle, growth_tracker.Handle, found_inf.Handle, scale_growth_factor, scale_backoff_factor, growth_interval); + if(res == IntPtr.Zero) + torch.CheckForErrors(); + return new Tensor(res); + } + public static torch.Tensor amp_update_scale_outf(Tensor self, Tensor growth_tracker, Tensor found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval, Tensor outt) + { + var res = THSAmp_amp_update_scale_outf(self.Handle, growth_tracker.Handle, found_inf.Handle, scale_growth_factor, scale_backoff_factor, growth_interval, outt.Handle); + if(res == IntPtr.Zero) + torch.CheckForErrors(); + return new Tensor(res); + } + public static (torch.Tensor, torch.Tensor) amp_update_scale(Tensor self, Tensor growth_tracker, Tensor found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval) + { + var res = THSAMP_amp_update_scale(self.Handle, growth_tracker.Handle, found_inf.Handle, scale_growth_factor, scale_backoff_factor, growth_interval, out var res1); + if(res == IntPtr.Zero || res1 == IntPtr.Zero) + torch.CheckForErrors(); + return (new Tensor(res), new Tensor(res1)); + } } } diff --git a/src/TorchSharp/Utils/UnorderedMap.cs b/src/TorchSharp/Utils/UnorderedMap.cs index f890d7a56..92446906a 100644 --- a/src/TorchSharp/Utils/UnorderedMap.cs +++ b/src/TorchSharp/Utils/UnorderedMap.cs @@ -9,7 +9,8 @@ namespace TorchSharp.Utils public class UnorderedMap : Dictionary, IDisposable { bool disposedValue; - + private TValue default_dict; + //TODO: Add DefautlDict behaviour public UnorderedMap() { } private static bool IsCollectionType(Type type) { @@ -21,6 +22,8 @@ private static bool IsCollectionType(Type type) } public new TValue this[TKey tk] { get { + /*if (!this.ContainsKey(tk) && default_dict == null) + return default_dict;*/ if (this.ContainsKey(tk)) return base[tk]; var t = typeof(TValue); @@ -38,6 +41,11 @@ private static bool IsCollectionType(Type type) } } + public void SetDefaultDict(TValue def) + { + this.default_dict = def; + } + protected virtual void Dispose(bool disposing) { if (!disposedValue) { From c70b5237b80d68a735ca5effbe79f998b29d9f52 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 3 Sep 2024 19:54:49 -0300 Subject: [PATCH 23/43] update v2.4.0 --- src/Native/LibTorchSharp/THSAmp.cpp | 76 +++---------------- src/Native/LibTorchSharp/THSAmp.h | 22 +----- src/TorchSharp/Amp/AutocastMode.cs | 40 ++++------ .../PInvoke/LibTorchSharp.THSAmp.cs | 24 +----- src/TorchSharp/Tensor/torch.Autocast.cs | 59 +++----------- 5 files changed, 42 insertions(+), 179 deletions(-) diff --git a/src/Native/LibTorchSharp/THSAmp.cpp b/src/Native/LibTorchSharp/THSAmp.cpp index 0b4f29cb8..c1fa3cd9e 100644 --- a/src/Native/LibTorchSharp/THSAmp.cpp +++ b/src/Native/LibTorchSharp/THSAmp.cpp @@ -44,60 +44,25 @@ bool THSAmp_is_autocast_cache_enabled() return at::autocast::is_autocast_cache_enabled(); } -bool THSAmp_is_autocast_cpu_enabled() +bool THSAmp_is_autocast_enabled(int8_t device) { - return at::autocast::is_cpu_enabled(); //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/csrc/autograd/init.cpp#L523 + return at::autocast::is_autocast_enabled((at::DeviceType)device); } -bool THSAmp_is_autocast_gpu_enabled() +int8_t THSAmp_get_autocast_dtype(int8_t device) { - return at::autocast::is_enabled(); //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/amp/autocast_mode.py#L363 + return (int8_t)at::autocast::get_autocast_dtype((at::DeviceType)device); } -bool THSAmp_is_autocast_xpu_enabled() -{ - return at::autocast::is_xpu_enabled(); -} -bool THSAmp_is_autocast_hpu_enabled() -{ - return at::autocast::is_hpu_enabled(); -} - -#if (TORCH_VERSION_MAJOR ==2 && TORCH_VERSION_MINOR > 0) -bool THSAmp_is_autocast_ipu_enabled() -{ - return at::autocast::is_ipu_enabled(); -} - -bool THSAmp_is_autocast_xla_enabled() -{ - return at::autocast::is_xla_enabled(); -} - -#endif -int8_t THSAmp_get_autocast_cpu_dtype() +void THSAmp_set_autocast_dtype(int8_t device, int8_t dtype) { - return (int8_t)at::autocast::get_autocast_cpu_dtype(); + at::autocast::set_autocast_dtype((at::DeviceType)device, (at::ScalarType)dtype); } -int8_t THSAmp_get_autocast_gpu_dtype() +void THSAmp_set_autocast_enabled(int8_t device, bool enabled) { - //TODO: Implement AUTOCAST AMP AND GRADSCALER - - //INFO: Enter/Exit function of autocast_mode not need to do in C/C++ only in C# with Disposable can handle all of that function (if exists) - //https://github.com/pytorch/pytorch/blob/main/torch/amp/autocast_mode.py - - //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/csrc/autograd/init.cpp#L629 - //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/aten/src/ATen/autocast_mode.h#L20 - return (int8_t)at::autocast::get_autocast_gpu_dtype(); + at::autocast::set_autocast_enabled((at::DeviceType)device, enabled); } - -int8_t THSAmp_get_autocast_xpu_dtype() -{ - return (int8_t)at::autocast::get_autocast_xpu_dtype(); -} - - int THSAmp_autocast_increment_nesting() { return at::autocast::increment_nesting(); @@ -108,32 +73,11 @@ int THSAmp_autocast_decrement_nesting() return at::autocast::decrement_nesting(); } -void THSAmp_set_autocast_enabled(bool enabled) +void THSAmp_clear_autocast_cache() { - at::autocast::set_enabled(enabled); + at::autocast::clear_cache(); } - void THSAmp_set_autocast_cache_enabled(bool enabled) { at::autocast::set_autocast_cache_enabled(enabled); -} - -void THSAmp_set_autocast_cpu_dtype(int8_t dtype) -{ - at::autocast::set_autocast_cpu_dtype((c10::ScalarType)dtype); -} - -void THSAmp_set_autocast_gpu_dtype(int8_t dtype) -{ - at::autocast::set_autocast_gpu_dtype((c10::ScalarType)dtype); -} - -void THSAmp_set_autocast_xpu_dtype(int8_t dtype) -{ - at::autocast::set_autocast_xpu_dtype((c10::ScalarType)dtype); -} - -void THSAmp_clear_autocast_cache() -{ - at::autocast::clear_cache(); } \ No newline at end of file diff --git a/src/Native/LibTorchSharp/THSAmp.h b/src/Native/LibTorchSharp/THSAmp.h index 3a0718db4..23d56fb2c 100644 --- a/src/Native/LibTorchSharp/THSAmp.h +++ b/src/Native/LibTorchSharp/THSAmp.h @@ -18,31 +18,17 @@ EXPORT_API(Tensor) THSAMP_amp_update_scale(const at::Tensor& self, const at::Ten EXPORT_API(bool) THSAmp_is_torch_function_mode_enabled(); -//Maybe the best work is call THSTorch_is_autocast_enabled(enum of devices c# as int8_t); EXPORT_API(bool) THSAmp_is_autocast_cache_enabled(); -EXPORT_API(bool) THSAmp_is_autocast_cpu_enabled(); -EXPORT_API(bool) THSAmp_is_autocast_gpu_enabled(); -EXPORT_API(bool) THSAmp_is_autocast_xpu_enabled(); -EXPORT_API(bool) THSAmp_is_autocast_hpu_enabled(); -#if (TORCH_VERSION_MAJOR ==2 && TORCH_VERSION_MINOR > 0) -EXPORT_API(bool) THSAmp_is_autocast_ipu_enabled(); -EXPORT_API(bool) THSAmp_is_autocast_xla_enabled(); -#endif - -EXPORT_API(int8_t) THSAmp_get_autocast_cpu_dtype(); -EXPORT_API(int8_t) THSAmp_get_autocast_gpu_dtype(); -EXPORT_API(int8_t) THSAmp_get_autocast_xpu_dtype(); +EXPORT_API(bool) THSAmp_is_autocast_enabled(int8_t device); +EXPORT_API(int8_t) THSAmp_get_autocast_dtype(int8_t device); +EXPORT_API(void) THSAmp_set_autocast_enabled(int8_t device, bool enabled); +EXPORT_API(void) THSAmp_set_autocast_dtype(int8_t device, int8_t dtype); EXPORT_API(int) THSAmp_autocast_increment_nesting(); EXPORT_API(int) THSAmp_autocast_decrement_nesting(); -EXPORT_API(void) THSAmp_set_autocast_enabled(bool enabled); EXPORT_API(void) THSAmp_set_autocast_cache_enabled(bool enabled); -EXPORT_API(void) THSAmp_set_autocast_cpu_dtype(int8_t dtype); -EXPORT_API(void) THSAmp_set_autocast_gpu_dtype(int8_t dtype); -EXPORT_API(void) THSAmp_set_autocast_xpu_dtype(int8_t dtype); - EXPORT_API(void) THSAmp_clear_autocast_cache(); //EXPORT_API(bool) THSTorch_jit_is_scripting(); \ No newline at end of file diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 63821e64f..fa7512bb5 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -39,21 +39,23 @@ public static AutocastMode GetInstance() public torch.ScalarType GetFastType() { - var ft = torch.ScalarType.Float32; + return torch.get_autocast_dtype(Device.type); + /*var ft = torch.ScalarType.Float32; if (Device.type == DeviceType.CUDA) ft = torch.get_autocast_gpu_dtype(); if (Device.type == DeviceType.CPU) ft = torch.get_autocast_cpu_dtype(); - return ft; + return ft;*/ } private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) { //var la = torch.tensor(9); fast_dtype = dtype ?? torch.ScalarType.Float32; - if (dev.type == DeviceType.CUDA) - fast_dtype = torch.get_autocast_gpu_dtype(); + fast_dtype = torch.get_autocast_dtype(dev.type); + /*if (dev.type == DeviceType.CUDA) + fast_dtype = torch.get_autocast_dtype(dev); if (dev.type == DeviceType.CPU) - fast_dtype = torch.get_autocast_cpu_dtype(); + fast_dtype = torch.get_autocast_cpu_dtype();*/ //IntPtr ptr = IntPtr.Zero; bool _cache_enabled = torch.is_autocast_cache_enabled(); @@ -74,11 +76,10 @@ private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enab this.Enabled = enabled; - this.Prev = torch.is_autocast_cpu_enabled(); + this.Prev = torch.is_autocast_enabled(DeviceType.CPU); if (dev.type == DeviceType.CUDA) { - this.Prev = torch.is_autocast_gpu_enabled(); + this.Prev = torch.is_autocast_enabled(dev.type); } - torch.set_autocast_cache_enabled(_cache_enabled); torch.set_autocast_enabled(this.Enabled); //throw new NotImplementedException(); @@ -99,23 +100,12 @@ internal torch.Tensor CastTensor(torch.Tensor tensor) private void Dispose(bool disposing) { this.Enabled = false; - if (Device.type == DeviceType.CUDA) { - if (torch.autocast_decrement_nesting() == 0) - torch.clear_autocast_cache(); - torch.set_autocast_gpu_dtype(this.fast_dtype); - //torch.set_autocast_enabled(this.Prev); - torch.set_autocast_enabled(false); - torch.set_autocast_cache_enabled(false); - } - - if (Device.type == DeviceType.CPU) { - if (torch.autocast_decrement_nesting() == 0) - torch.clear_autocast_cache(); - //torch.set_autocast_enabled(this.Prev); - torch.set_autocast_cpu_dtype(this.fast_dtype); - torch.set_autocast_enabled(false); - torch.set_autocast_cache_enabled(false); - } + if (torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_cache_enabled(Device.type, this.fast_dtype); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); } public void Dispose() diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs index 7829da992..a91d4816a 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs @@ -23,23 +23,9 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern bool THSAmp_is_autocast_cache_enabled(); [DllImport("LibTorchSharp")] - internal static extern bool THSAmp_is_autocast_cpu_enabled(); + internal static extern bool THSAmp_is_autocast_enabled(int device_type); [DllImport("LibTorchSharp")] - internal static extern bool THSAmp_is_autocast_gpu_enabled(); - [DllImport("LibTorchSharp")] - internal static extern bool THSAmp_is_autocast_xpu_enabled(); - [DllImport("LibTorchSharp")] - internal static extern bool THSAmp_is_autocast_hpu_enabled(); - [DllImport("LibTorchSharp")] - internal static extern bool THSAmp_is_autocast_ipu_enabled(); - [DllImport("LibTorchSharp")] - internal static extern bool THSAmp_is_autocast_xla_enabled(); - [DllImport("LibTorchSharp")] - internal static extern sbyte THSAmp_get_autocast_cpu_dtype(); - [DllImport("LibTorchSharp")] - internal static extern sbyte THSAmp_get_autocast_gpu_dtype(); - [DllImport("LibTorchSharp")] - internal static extern sbyte THSAmp_get_autocast_xpu_dtype(); + internal static extern sbyte THSAmp_get_autocast_dtype(int device_type); [DllImport("LibTorchSharp")] internal static extern int THSAmp_autocast_increment_nesting(); [DllImport("LibTorchSharp")] @@ -49,11 +35,7 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern void THSAmp_set_autocast_cache_enabled(bool enabled); [DllImport("LibTorchSharp")] - internal static extern void THSAmp_set_autocast_cpu_dtype(sbyte dtype); - [DllImport("LibTorchSharp")] - internal static extern void THSAmp_set_autocast_gpu_dtype(sbyte dtype); - [DllImport("LibTorchSharp")] - internal static extern void THSAmp_set_autocast_xpu_dtype(sbyte dtype); + internal static extern void THSAmp_set_autocast_dtype(int device_type, sbyte dtype); [DllImport("LibTorchSharp")] internal static extern void THSAmp_clear_autocast_cache(); diff --git a/src/TorchSharp/Tensor/torch.Autocast.cs b/src/TorchSharp/Tensor/torch.Autocast.cs index e295c8e62..d817e4ab9 100644 --- a/src/TorchSharp/Tensor/torch.Autocast.cs +++ b/src/TorchSharp/Tensor/torch.Autocast.cs @@ -10,52 +10,22 @@ public static bool is_autocast_cache_enabled() return THSAmp_is_autocast_cache_enabled(); } - public static bool is_autocast_enabled(Device device) + public static bool is_autocast_enabled(DeviceType device) { - if(device.type == DeviceType.CPU) - return THSAmp_is_autocast_cpu_enabled(); - if(device.type == DeviceType.CUDA) - return THSAmp_is_autocast_gpu_enabled(); - return THSAmp_is_autocast_cache_enabled(); - } - public static bool is_autocast_cpu_enabled() - { - return THSAmp_is_autocast_cpu_enabled(); + return THSAmp_is_autocast_enabled((int)device); + //return THSAmp_is_autocast_cache_enabled(); } - public static bool is_autocast_gpu_enabled() + public static ScalarType get_autocast_dtype(DeviceType device) { - return THSAmp_is_autocast_gpu_enabled(); - } - public static bool is_autocast_xpu_enabled() - { - return THSAmp_is_autocast_xpu_enabled(); - } - public static bool is_autocast_hpu_enabled() - { - return THSAmp_is_autocast_hpu_enabled(); - } - - public static ScalarType get_autocast_dtype(Device device) - { - if (device.type == DeviceType.CPU) + return (ScalarType)THSAmp_get_autocast_dtype((int)device); + /*if (device.type == DeviceType.CPU) return get_autocast_cpu_dtype(); if (device.type == DeviceType.CUDA) return get_autocast_gpu_dtype(); - return ScalarType.Float32; - } - public static ScalarType get_autocast_cpu_dtype() - { - return (ScalarType)THSAmp_get_autocast_cpu_dtype(); - } - public static ScalarType get_autocast_gpu_dtype() - { - return (ScalarType)THSAmp_get_autocast_gpu_dtype(); - } - public static ScalarType get_autocast_xpu_dtype() - { - return (ScalarType)THSAmp_get_autocast_xpu_dtype(); + return ScalarType.Float32;*/ } + public static int autocast_increment_nesting() { return THSAmp_autocast_increment_nesting(); @@ -74,18 +44,9 @@ public static void set_autocast_cache_enabled(bool enabled) { THSAmp_set_autocast_cache_enabled(enabled); } - - public static void set_autocast_cpu_dtype(ScalarType dtype) - { - THSAmp_set_autocast_cpu_dtype((sbyte)dtype); - } - public static void set_autocast_gpu_dtype(ScalarType dtype) - { - THSAmp_set_autocast_gpu_dtype((sbyte)dtype); - } - public static void set_autocast_xpu_dtype(ScalarType dtype) + public static void set_autocast_cache_enabled(DeviceType device, ScalarType dtype) { - THSAmp_set_autocast_xpu_dtype((sbyte)dtype); + THSAmp_set_autocast_dtype((int)device, (sbyte)dtype); } public static void clear_autocast_cache() From 36b79b9f30a03db72e620edf65ea1756a8e6266d Mon Sep 17 00:00:00 2001 From: Dimitri Date: Wed, 4 Sep 2024 21:07:30 -0300 Subject: [PATCH 24/43] some advance --- src/TorchSharp/Amp/AMPManager.cs | 33 ++++++++++++++++++++-------- src/TorchSharp/Amp/AutocastMode.cs | 35 +++++++++++++++--------------- src/TorchSharp/Amp/GradScaler.cs | 8 ++++++- 3 files changed, 48 insertions(+), 28 deletions(-) diff --git a/src/TorchSharp/Amp/AMPManager.cs b/src/TorchSharp/Amp/AMPManager.cs index 0262f8934..9d79d59e7 100644 --- a/src/TorchSharp/Amp/AMPManager.cs +++ b/src/TorchSharp/Amp/AMPManager.cs @@ -16,7 +16,7 @@ public class TensorConverter public IntPtr PrevHandle; public IntPtr Handle; public torch.ScalarType Dtype; - public torch.ScalarType FastDtype; + public torch.ScalarType FastDtype = torch.ScalarType.Float32; public TensorCalledIn Called, Status; public enum TensorCalledIn { @@ -44,15 +44,26 @@ public TensorConverter(IntPtr handle) public bool IsDisposed = false; /*public UnorderedMap TensorPtrs= new UnorderedMap(); public UnorderedMap TensorMap= new UnorderedMap();*/ - private readonly AutocastMode autocastMode = AutocastMode.GetInstance(); + private AutocastMode autocastMode=null; + public bool IsEnabled { + get { + if (autocastMode == null) + return false; + return autocastMode.Enabled; + } + } - private AMPManager() { } + private AMPManager(bool enabled) + { + if (!torch.cuda_is_available()) + return; + autocastMode = AutocastMode.GetInstance(enabled); + } - public bool IsEnabled => autocastMode.Enabled; private static AMPManager Instance; - public static AMPManager GetInstance() + public static AMPManager GetInstance(bool enabled = false) { - return Instance ??= new AMPManager(); + return Instance ??= new AMPManager(enabled); } private torch.ScalarType GetType(IntPtr handle) @@ -67,7 +78,8 @@ public IntPtr AutoCast(IntPtr handle) public torch.Tensor AutoCast(torch.Tensor tensor) { - return tensor.to(AutocastMode.GetInstance().GetFastType()); + return new torch.Tensor(AutoCast(tensor.Handle)); + //return tensor.to(AutocastMode.GetInstance().GetFastType()); } public static IntPtr To(IntPtr ptr, torch.ScalarType type) { @@ -154,8 +166,11 @@ public IntPtr Work(IntPtr handle, IntPtr prev) public IDisposable Enter() { + if (!torch.cuda_is_available()) + return this; IsEnter = true; IsDisposed = false; + autocastMode.SetEnabled(true, torch.CUDA); Debug.WriteLine($"{nameof(AMPManager)} Enter call"); return this; } @@ -184,10 +199,10 @@ protected virtual void Dispose(bool disposing) } // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources - ~AMPManager() + /*~AMPManager() { Dispose(false); - } + }*/ public void Dispose() { diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index fa7512bb5..808df715b 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -32,43 +32,39 @@ public sealed class AutocastMode : IDisposable instance = new AutocastMode(dev, dtype, enabled, cache_enabled); return instance; }*/ - public static AutocastMode GetInstance() + public static AutocastMode GetInstance(bool enabled=false) { - return instance ??= new AutocastMode(torch.CUDA, cache_enabled:true); + return instance ??= new AutocastMode(torch.cuda_is_available() ? torch.CUDA : torch.CPU, enabled:enabled,cache_enabled:true); } public torch.ScalarType GetFastType() { return torch.get_autocast_dtype(Device.type); - /*var ft = torch.ScalarType.Float32; - if (Device.type == DeviceType.CUDA) - ft = torch.get_autocast_gpu_dtype(); - if (Device.type == DeviceType.CPU) - ft = torch.get_autocast_cpu_dtype(); - return ft;*/ } private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) + { + if (!torch.cuda_is_available()) + return; + Process(dev, dtype, enabled, cache_enabled); + } + + private void Process(torch.Device dev, torch.ScalarType? dtype=null, bool enabled=true, bool? cache_enabled=null) { //var la = torch.tensor(9); fast_dtype = dtype ?? torch.ScalarType.Float32; fast_dtype = torch.get_autocast_dtype(dev.type); - /*if (dev.type == DeviceType.CUDA) - fast_dtype = torch.get_autocast_dtype(dev); - if (dev.type == DeviceType.CPU) - fast_dtype = torch.get_autocast_cpu_dtype();*/ //IntPtr ptr = IntPtr.Zero; - + bool _cache_enabled = torch.is_autocast_cache_enabled(); if (!torch.cuda.is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast Enabled = false; if (dtype.HasValue) fast_dtype = dtype.Value; - if(cache_enabled.HasValue) - _cache_enabled=cache_enabled.Value; + if (cache_enabled.HasValue) + _cache_enabled = cache_enabled.Value; if (dev.type == DeviceType.CPU) { - } - else if (dev.type == DeviceType.CUDA) { + } else if (dev.type == DeviceType.CUDA) { if (enabled && fast_dtype == torch.ScalarType.BFloat16 && !torch.cuda.is_bf16_supported()) throw new Exception("Current CUDA Device does not support bfloat16. Please switch dtype to float16."); @@ -82,7 +78,6 @@ private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enab } torch.set_autocast_cache_enabled(_cache_enabled); torch.set_autocast_enabled(this.Enabled); - //throw new NotImplementedException(); } /*internal void Cast(torch.Tensor tensor) @@ -97,6 +92,10 @@ internal torch.Tensor CastTensor(torch.Tensor tensor) return tensor.to(fast_dtype, tensor.device); } + internal void SetEnabled(bool enabled, torch.Device dev) + { + Process(dev, null, enabled, true); + } private void Dispose(bool disposing) { this.Enabled = false; diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs index b2cbd3988..f9070f3c2 100644 --- a/src/TorchSharp/Amp/GradScaler.cs +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -201,7 +201,13 @@ public void unscale(torch.optim.Optimizer optimizer) private float? maybe_opt_step(torch.optim.Optimizer optimizer, UnorderedMap optimizer_state) { //https://github.com/pytorch/pytorch/blob/a00fad017719346bac6e08da0819358146e647e3/torch/amp/grad_scaler.py#L351 - throw new NotImplementedException(); + float? retval=0; + foreach(var d in optimizer_state) + if (d.Value is torch.Tensor t) + retval += t.item(); + if (retval==0) + retval = optimizer.step().item(); + return retval; } public float? step(torch.optim.Optimizer optimizer, params object[] obj) From 376f4fbb4af0a028d1d541b0533b966f5120ec7c Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 8 Sep 2024 09:13:19 -0300 Subject: [PATCH 25/43] Improve autocastmode --- src/Native/LibTorchSharp/THSAmp.cpp | 6 + src/Native/LibTorchSharp/THSAmp.h | 2 + src/TorchSharp/Amp/AMPManager.cs | 2 +- src/TorchSharp/Amp/AutocastMode.cs | 148 ++++++++++++------ src/TorchSharp/LinearAlgebra.cs | 5 +- src/TorchSharp/NN/Convolution/Conv1D.cs | 3 +- src/TorchSharp/NN/Convolution/Conv2D.cs | 3 +- src/TorchSharp/NN/Convolution/Conv3D.cs | 3 +- .../NN/Convolution/ConvTranspose1D.cs | 3 +- .../NN/Convolution/ConvTranspose2D.cs | 3 +- .../NN/Convolution/ConvTranspose3D.cs | 3 +- src/TorchSharp/NN/Linear.cs | 3 +- src/TorchSharp/NN/Recurrent/GRUCell.cs | 3 +- src/TorchSharp/NN/Recurrent/LSTMCell.cs | 3 +- src/TorchSharp/NN/Recurrent/RNNCell.cs | 3 +- .../PInvoke/LibTorchSharp.THSAmp.cs | 4 +- src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs | 7 +- src/TorchSharp/Tensor/Tensor.Math.cs | 6 +- src/TorchSharp/Tensor/Tensor.Trig.cs | 3 + src/TorchSharp/Tensor/Tensor.cs | 14 +- src/TorchSharp/Tensor/torch.Autocast.cs | 19 ++- src/TorchSharp/TorchSharp.csproj | 4 + src/TorchSharp/Utils/UnorderedMap.cs | 59 +++++++ 23 files changed, 222 insertions(+), 87 deletions(-) diff --git a/src/Native/LibTorchSharp/THSAmp.cpp b/src/Native/LibTorchSharp/THSAmp.cpp index c1fa3cd9e..79c6da9f2 100644 --- a/src/Native/LibTorchSharp/THSAmp.cpp +++ b/src/Native/LibTorchSharp/THSAmp.cpp @@ -44,6 +44,12 @@ bool THSAmp_is_autocast_cache_enabled() return at::autocast::is_autocast_cache_enabled(); } +bool THSAmp_is_autocast_available(int8_t device) +{ + return at::autocast::is_autocast_available((c10::DeviceType)device); +} + + bool THSAmp_is_autocast_enabled(int8_t device) { return at::autocast::is_autocast_enabled((at::DeviceType)device); diff --git a/src/Native/LibTorchSharp/THSAmp.h b/src/Native/LibTorchSharp/THSAmp.h index 23d56fb2c..4ae115dda 100644 --- a/src/Native/LibTorchSharp/THSAmp.h +++ b/src/Native/LibTorchSharp/THSAmp.h @@ -20,6 +20,8 @@ EXPORT_API(bool) THSAmp_is_torch_function_mode_enabled(); EXPORT_API(bool) THSAmp_is_autocast_cache_enabled(); +EXPORT_API(bool) THSAmp_is_autocast_available(int8_t device); + EXPORT_API(bool) THSAmp_is_autocast_enabled(int8_t device); EXPORT_API(int8_t) THSAmp_get_autocast_dtype(int8_t device); EXPORT_API(void) THSAmp_set_autocast_enabled(int8_t device, bool enabled); diff --git a/src/TorchSharp/Amp/AMPManager.cs b/src/TorchSharp/Amp/AMPManager.cs index 9d79d59e7..c5a120b03 100644 --- a/src/TorchSharp/Amp/AMPManager.cs +++ b/src/TorchSharp/Amp/AMPManager.cs @@ -49,7 +49,7 @@ public bool IsEnabled { get { if (autocastMode == null) return false; - return autocastMode.Enabled; + return autocastMode.IsEnabled; } } diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 808df715b..dacfc9721 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -1,9 +1,13 @@ using System; using System.Collections.Generic; +using System.Diagnostics; using System.Linq; +using System.Runtime.CompilerServices; using System.Security.Cryptography; using System.Text; using System.Threading.Tasks; +using TorchSharp.PInvoke; +using TorchSharp.Utils; namespace TorchSharp.Amp { @@ -17,21 +21,17 @@ public static torch.Tensor AutoCast(this torch.Tensor input) //TODO: Should make Singleton and IDisposable on ENTER public sealed class AutocastMode : IDisposable { - //NEED "Register" all tensor in scope for uncasting outer-scope - public bool Enabled=false; - internal bool Prev; - //private torch.ScalarType Dtype = torch.ScalarType.Float32; + public bool _enabled=false; + public bool IsEnter = false; + public bool IsDisposed = false; + private bool prev_cache_enabled, prev; + private torch.ScalarType prev_fastdtype; + //internal bool Prev; + private bool _cache_enabled=false; internal torch.ScalarType fast_dtype = torch.ScalarType.Float32; - public torch.Device Device = new torch.Device(DeviceType.CUDA); + internal torch.ScalarType? dtype = torch.ScalarType.Float32; + public DeviceType device = DeviceType.CUDA; private static AutocastMode instance; - //bool disposedValue; - - /*public static AutocastMode GetInstance(torch.Device dev, torch.ScalarType? dtype = null, bool enabled = true, bool? cache_enabled = null) -{ -if(instance ==null) -instance = new AutocastMode(dev, dtype, enabled, cache_enabled); -return instance; -}*/ public static AutocastMode GetInstance(bool enabled=false) { return instance ??= new AutocastMode(torch.cuda_is_available() ? torch.CUDA : torch.CPU, enabled:enabled,cache_enabled:true); @@ -39,72 +39,118 @@ public static AutocastMode GetInstance(bool enabled=false) public torch.ScalarType GetFastType() { - return torch.get_autocast_dtype(Device.type); + return torch.get_autocast_dtype(device); } private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) { - if (!torch.cuda_is_available()) - return; - Process(dev, dtype, enabled, cache_enabled); - } - - private void Process(torch.Device dev, torch.ScalarType? dtype=null, bool enabled=true, bool? cache_enabled=null) - { - //var la = torch.tensor(9); - fast_dtype = dtype ?? torch.ScalarType.Float32; - fast_dtype = torch.get_autocast_dtype(dev.type); + /*dtype_by_methods[nameof(torch.matmul), DeviceType.CUDA] = torch.ScalarType.Float16; + dtype_by_methods[nameof(torch.matmul), DeviceType.CUDA] = torch.ScalarType.Float16;*/ + //https://pytorch.org/docs/stable/amp.html#cuda-ops-that-can-autocast-to-float16 + if (dtype == null) + dtype = torch.get_autocast_dtype(dev.type); + this.device = dev.type; + if (!torch.is_autocast_available(device)) + throw new Exception($"User specified an unsupported autocast device_type {device}"); + fast_dtype = torch.get_autocast_dtype(device); + //TODO: is_autocast_available(); //IntPtr ptr = IntPtr.Zero; - bool _cache_enabled = torch.is_autocast_cache_enabled(); - if (!torch.cuda.is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast - Enabled = false; - if (dtype.HasValue) + _cache_enabled = torch.is_autocast_cache_enabled(); + if (enabled && !torch.cuda_is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast + enabled = false; + if (this.dtype.HasValue) fast_dtype = dtype.Value; if (cache_enabled.HasValue) _cache_enabled = cache_enabled.Value; - if (dev.type == DeviceType.CPU) { + if (dev.type == DeviceType.CPU) { + if (fast_dtype != torch.ScalarType.Float16 || fast_dtype != torch.ScalarType.BFloat16) { + Debug.WriteLine($"In CPU autocast, but the target d type is not suported. Disabling autocast. CPU autocast only supports dtype of {torch.ScalarType.Float16} or {torch.ScalarType.BFloat16}"); + enabled = false; + } } else if (dev.type == DeviceType.CUDA) { if (enabled && fast_dtype == torch.ScalarType.BFloat16 && !torch.cuda.is_bf16_supported()) throw new Exception("Current CUDA Device does not support bfloat16. Please switch dtype to float16."); } + this._enabled = enabled; + } + private torch.ScalarType GetType(IntPtr handle) + { + return (torch.ScalarType)NativeMethods.THSTensor_type(handle); + } - this.Enabled = enabled; - - this.Prev = torch.is_autocast_enabled(DeviceType.CPU); - if (dev.type == DeviceType.CUDA) { - this.Prev = torch.is_autocast_enabled(dev.type); - } - torch.set_autocast_cache_enabled(_cache_enabled); - torch.set_autocast_enabled(this.Enabled); + public static IntPtr AutoCast(IntPtr handle) + { + return ToIf(handle, GetInstance().GetFastType()); + } + public static IntPtr AutoCast(IntPtr handle, torch.ScalarType dtype) + { + return ToIf(handle, dtype); } - /*internal void Cast(torch.Tensor tensor) + + public static torch.Tensor AutoCast(torch.Tensor tensor) { - tensor.to(fast_dtype, tensor.device); - }*/ + return new torch.Tensor(AutoCast(tensor.Handle)); + //return tensor.to(AutocastMode.GetInstance().GetFastType()); + } + public static IntPtr To(IntPtr ptr, torch.ScalarType type) + { + Debug.WriteLine($"{nameof(AutocastMode)} Tensor converting from: {(torch.ScalarType)NativeMethods.THSTensor_type(ptr)} to: {type}"); + var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); + if (res == IntPtr.Zero) + torch.CheckForErrors(); + return res; + } + public static IntPtr ToIf(IntPtr ptr, torch.ScalarType type) + { + if (!GetInstance()._enabled) + return ptr; + /*if (!NativeMethods.THSAmp_is_autocast_enabled(NativeMethods.THSTensor_device_type(ptr))) + return ptr;*/ + var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); + if (res == IntPtr.Zero) + torch.CheckForErrors(); + return res; + } + public static IntPtr ToIf(IntPtr ptr, torch.ScalarType type, DeviceType device_type) + { + bool is_elegible = (torch.ScalarType)NativeMethods.THSTensor_type(ptr) != torch.ScalarType.Float64 && (DeviceType)NativeMethods.THSTensor_device_type(ptr) == device_type; + + if (!NativeMethods.THSAmp_is_autocast_enabled(NativeMethods.THSTensor_device_type(ptr))) + return ptr; + var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); + if (res == IntPtr.Zero) + torch.CheckForErrors(); + return res; + } - internal torch.Tensor CastTensor(torch.Tensor tensor) + public static bool IsAutocastEnabled(DeviceType device = DeviceType.CUDA) { - if (!Enabled) - return tensor; - return tensor.to(fast_dtype, tensor.device); + return torch.is_autocast_enabled(!torch.cuda_is_available() ? DeviceType.CPU : device); } - internal void SetEnabled(bool enabled, torch.Device dev) + public IDisposable Enter() { - Process(dev, null, enabled, true); + prev_cache_enabled = torch.is_autocast_cache_enabled(); + prev = torch.is_autocast_enabled(device); + prev_fastdtype = torch.get_autocast_dtype(device); + torch.set_autocast_enabled(device, _enabled); + torch.set_autocast_dtype(device, fast_dtype); + torch.autocast_increment_nesting(); + torch.set_autocast_cache_enabled(_cache_enabled); + return this; } + private void Dispose(bool disposing) { - this.Enabled = false; + this._enabled = false; if (torch.autocast_decrement_nesting() == 0) torch.clear_autocast_cache(); - //torch.set_autocast_enabled(this.Prev); - torch.set_autocast_cache_enabled(Device.type, this.fast_dtype); - torch.set_autocast_enabled(false); - torch.set_autocast_cache_enabled(false); + torch.set_autocast_enabled(device, prev); + torch.set_autocast_dtype(device, prev_fastdtype); + torch.set_autocast_cache_enabled(prev_cache_enabled); } public void Dispose() diff --git a/src/TorchSharp/LinearAlgebra.cs b/src/TorchSharp/LinearAlgebra.cs index c9964d536..43d9ed82d 100644 --- a/src/TorchSharp/LinearAlgebra.cs +++ b/src/TorchSharp/LinearAlgebra.cs @@ -2,6 +2,7 @@ using System; using System.Linq; using System.Collections.Generic; +using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; #nullable enable @@ -440,7 +441,7 @@ public static Tensor multi_dot(IList tensors) throw new ArgumentException(nameof(tensors)); } if (tensors.Count == 1) { - tensors[0] = Amp.AMPManager.GetInstance().AutoCast(tensors[0]); + tensors[0] = AutocastMode.AutoCast(tensors[0]); return tensors[0]; } @@ -449,7 +450,7 @@ public static Tensor multi_dot(IList tensors) var res = THSLinalg_multi_dot(tensorsRef, parray.Array.Length); if (res == IntPtr.Zero) torch.CheckForErrors(); - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Convolution/Conv1D.cs b/src/TorchSharp/NN/Convolution/Conv1D.cs index 0064020fd..dd7b4c263 100644 --- a/src/TorchSharp/NN/Convolution/Conv1D.cs +++ b/src/TorchSharp/NN/Convolution/Conv1D.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -194,7 +195,7 @@ public static Tensor conv1d(Tensor input, Tensor weight, Tensor? bias = null, (IntPtr)pdilation, dilationArray.Length, groups); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Convolution/Conv2D.cs b/src/TorchSharp/NN/Convolution/Conv2D.cs index 277b695eb..4008b51fa 100644 --- a/src/TorchSharp/NN/Convolution/Conv2D.cs +++ b/src/TorchSharp/NN/Convolution/Conv2D.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -238,7 +239,7 @@ public static Tensor conv2d(Tensor input, Tensor weight, Tensor? bias = null, (IntPtr)pdilation, dilation.Length, groups); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Convolution/Conv3D.cs b/src/TorchSharp/NN/Convolution/Conv3D.cs index e8a670b7d..ef37aaa6a 100644 --- a/src/TorchSharp/NN/Convolution/Conv3D.cs +++ b/src/TorchSharp/NN/Convolution/Conv3D.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -181,7 +182,7 @@ public static Tensor conv3d(Tensor input, Tensor weight, Tensor? bias = null, (IntPtr)pdilation, dilation.Length, groups); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Convolution/ConvTranspose1D.cs b/src/TorchSharp/NN/Convolution/ConvTranspose1D.cs index 954e4ab1b..9700a58b7 100644 --- a/src/TorchSharp/NN/Convolution/ConvTranspose1D.cs +++ b/src/TorchSharp/NN/Convolution/ConvTranspose1D.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -117,7 +118,7 @@ public static Tensor conv_transpose1d(Tensor input, Tensor weight, Tensor? bias (IntPtr)pdilation, dilations.Length, groups); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Convolution/ConvTranspose2D.cs b/src/TorchSharp/NN/Convolution/ConvTranspose2D.cs index 8a074dce1..63fc0d6e5 100644 --- a/src/TorchSharp/NN/Convolution/ConvTranspose2D.cs +++ b/src/TorchSharp/NN/Convolution/ConvTranspose2D.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -148,7 +149,7 @@ public static Tensor conv_transpose2d(Tensor input, Tensor weight, Tensor? bias (IntPtr)pdilation, dilation.Length, groups); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Convolution/ConvTranspose3D.cs b/src/TorchSharp/NN/Convolution/ConvTranspose3D.cs index 4362a8738..faeb279ad 100644 --- a/src/TorchSharp/NN/Convolution/ConvTranspose3D.cs +++ b/src/TorchSharp/NN/Convolution/ConvTranspose3D.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -144,7 +145,7 @@ public static Tensor conv_transpose3d(Tensor input, Tensor weight, Tensor? bias (IntPtr)pdilation, dilation.Length, groups); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Linear.cs b/src/TorchSharp/NN/Linear.cs index 675952cef..68b34ffd5 100644 --- a/src/TorchSharp/NN/Linear.cs +++ b/src/TorchSharp/NN/Linear.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.torch.nn; using static TorchSharp.PInvoke.NativeMethods; @@ -104,7 +105,7 @@ public static Tensor linear(Tensor input, Tensor weights, Tensor? bias = null) IntPtr bPtr = bias?.Handle ?? IntPtr.Zero; var res = THSNN_functional_linear(input.Handle, weights.Handle, bPtr); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Recurrent/GRUCell.cs b/src/TorchSharp/NN/Recurrent/GRUCell.cs index 50be405e1..610762542 100644 --- a/src/TorchSharp/NN/Recurrent/GRUCell.cs +++ b/src/TorchSharp/NN/Recurrent/GRUCell.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.torch.nn; using static TorchSharp.PInvoke.NativeMethods; @@ -106,7 +107,7 @@ public static GRUCell GRUCell(long inputSize, long hiddenSize, bool bias = true, { var res = THSNN_GRUCell_ctor(inputSize, hiddenSize, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); //TODO: Research if this work... + res = AutocastMode.AutoCast(res); return new GRUCell(res, boxedHandle).MoveModule(device, dtype); } } diff --git a/src/TorchSharp/NN/Recurrent/LSTMCell.cs b/src/TorchSharp/NN/Recurrent/LSTMCell.cs index 2449348fb..44f6e5bbc 100644 --- a/src/TorchSharp/NN/Recurrent/LSTMCell.cs +++ b/src/TorchSharp/NN/Recurrent/LSTMCell.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.torch.nn; using static TorchSharp.PInvoke.NativeMethods; @@ -108,7 +109,7 @@ public static LSTMCell LSTMCell(long inputSize, long hiddenSize, bool bias = tru { var res = THSNN_LSTMCell_ctor(inputSize, hiddenSize, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new LSTMCell(res, boxedHandle).MoveModule(device, dtype); } } diff --git a/src/TorchSharp/NN/Recurrent/RNNCell.cs b/src/TorchSharp/NN/Recurrent/RNNCell.cs index 0557dfe2e..05bf7088b 100644 --- a/src/TorchSharp/NN/Recurrent/RNNCell.cs +++ b/src/TorchSharp/NN/Recurrent/RNNCell.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.torch.nn; using static TorchSharp.PInvoke.NativeMethods; @@ -112,7 +113,7 @@ public static RNNCell RNNCell(long inputSize, long hiddenSize, NonLinearities no { var res = THSNN_RNNCell_ctor(inputSize, hiddenSize, (long)nonLinearity, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new RNNCell(res, boxedHandle).MoveModule(device, dtype); } } diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs index a91d4816a..cfc9cda91 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs @@ -23,6 +23,8 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern bool THSAmp_is_autocast_cache_enabled(); [DllImport("LibTorchSharp")] + internal static extern bool THSAmp_is_autocast_available(int device_type); + [DllImport("LibTorchSharp")] internal static extern bool THSAmp_is_autocast_enabled(int device_type); [DllImport("LibTorchSharp")] internal static extern sbyte THSAmp_get_autocast_dtype(int device_type); @@ -31,7 +33,7 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern int THSAmp_autocast_decrement_nesting(); [DllImport("LibTorchSharp")] - internal static extern void THSAmp_set_autocast_enabled(bool enabled); + internal static extern void THSAmp_set_autocast_enabled(int device_type, bool enabled); [DllImport("LibTorchSharp")] internal static extern void THSAmp_set_autocast_cache_enabled(bool enabled); [DllImport("LibTorchSharp")] diff --git a/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs b/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs index 9f62cda4a..6289990a4 100644 --- a/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs +++ b/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs @@ -1,6 +1,7 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; using System.Linq; +using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp @@ -171,7 +172,7 @@ public Tensor matmul(Tensor target) { var res = THSTensor_matmul(Handle, target.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -184,7 +185,7 @@ public Tensor mm(Tensor target) { var res = THSTensor_mm(Handle, target.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -197,7 +198,7 @@ public Tensor mv(Tensor target) { var res = THSTensor_mv(Handle, target.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } diff --git a/src/TorchSharp/Tensor/Tensor.Math.cs b/src/TorchSharp/Tensor/Tensor.Math.cs index 4970a9658..32db3a478 100644 --- a/src/TorchSharp/Tensor/Tensor.Math.cs +++ b/src/TorchSharp/Tensor/Tensor.Math.cs @@ -1,6 +1,7 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. #nullable enable using System; +using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp @@ -270,7 +271,7 @@ public Tensor addmm(Tensor mat1, Tensor mat2, float beta = 1, float alpha = 1) var res = THSTensor_addmm(Handle, mat1.Handle, mat2.Handle, beta, alpha); if (res == IntPtr.Zero) CheckForErrors(); - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -302,7 +303,7 @@ public Tensor addmv(Tensor mat, Tensor vec, float beta = 1.0f, float alpha = 1.0 var res = THSTensor_addmv(Handle, mat.Handle, vec.Handle, beta, alpha); if (res == IntPtr.Zero) CheckForErrors(); - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -1387,6 +1388,7 @@ public Tensor pow(Tensor exponent) { var res = THSTensor_pow(Handle, exponent.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); //https://pytorch.org/docs/stable/amp.html#cuda-ops-that-can-autocast-to-float32 return new Tensor(res); } diff --git a/src/TorchSharp/Tensor/Tensor.Trig.cs b/src/TorchSharp/Tensor/Tensor.Trig.cs index d377e967c..39e8f048b 100644 --- a/src/TorchSharp/Tensor/Tensor.Trig.cs +++ b/src/TorchSharp/Tensor/Tensor.Trig.cs @@ -1,6 +1,7 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; using System.Diagnostics.Contracts; +using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp @@ -39,6 +40,7 @@ public Tensor asin() var res = THSTensor_asin(Handle); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -70,6 +72,7 @@ public Tensor acos() var res = THSTensor_acos(Handle); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 696e07d13..0fe6eb971 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -45,13 +45,7 @@ public partial class Tensor : IDisposable }*/ internal Tensor(IntPtr handle) { - //TODO: Add Autocast/AMP ScopeManager, need improve this.. 1) is not threadsafe and may have big problem while casting and uncasting. - //DANGER: DONT USE THIS ON PRODUCTION - /*if (AMPManager.GetInstance().IsEnabled) { - this.handle = AMPManager.GetInstance().Work(handle, this.handle); //MMM.... This is the more abstract of any method Tensor right???? - } else {*/ - this.handle = handle; - //} + this.handle = handle; System.Threading.Interlocked.Increment(ref _totalCount); _peakCount = Math.Max(_totalCount, _peakCount); OwningDisposeScope = DisposeScopeManager.ThreadSingleton.RegisterOnCurrentDisposeScope(this); @@ -3119,7 +3113,7 @@ public Tensor baddbmm(Tensor batch1, Tensor batch2, float beta = 1, float alpha { var res = NativeMethods.THSTensor_baddbmm(Handle, batch1.Handle, batch2.Handle, beta, alpha); if (res == IntPtr.Zero) { CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -3132,7 +3126,7 @@ public Tensor bmm(Tensor batch2) { var res = NativeMethods.THSTensor_bmm(Handle, batch2.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -4488,7 +4482,7 @@ public Tensor prelu(Tensor target) { var res = NativeMethods.THSTensor_prelu(Handle, target.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } diff --git a/src/TorchSharp/Tensor/torch.Autocast.cs b/src/TorchSharp/Tensor/torch.Autocast.cs index d817e4ab9..12e86d46d 100644 --- a/src/TorchSharp/Tensor/torch.Autocast.cs +++ b/src/TorchSharp/Tensor/torch.Autocast.cs @@ -10,6 +10,11 @@ public static bool is_autocast_cache_enabled() return THSAmp_is_autocast_cache_enabled(); } + public static bool is_autocast_available(DeviceType device) + { + //https://github.com/pytorch/pytorch/blob/main/torch/csrc/autograd/init.cpp + return THSAmp_is_autocast_available((int)device); + } public static bool is_autocast_enabled(DeviceType device) { return THSAmp_is_autocast_enabled((int)device); @@ -18,11 +23,6 @@ public static bool is_autocast_enabled(DeviceType device) public static ScalarType get_autocast_dtype(DeviceType device) { return (ScalarType)THSAmp_get_autocast_dtype((int)device); - /*if (device.type == DeviceType.CPU) - return get_autocast_cpu_dtype(); - if (device.type == DeviceType.CUDA) - return get_autocast_gpu_dtype(); - return ScalarType.Float32;*/ } @@ -36,9 +36,14 @@ public static int autocast_decrement_nesting() return THSAmp_autocast_decrement_nesting(); } - public static void set_autocast_enabled(bool enabled) + public static void set_autocast_enabled(DeviceType device, bool enabled) + { + THSAmp_set_autocast_enabled((int)device,enabled); + } + + public static void set_autocast_dtype(DeviceType device, ScalarType dtype) { - THSAmp_set_autocast_enabled(enabled); + THSAmp_set_autocast_dtype((int)device, (sbyte)dtype); } public static void set_autocast_cache_enabled(bool enabled) { diff --git a/src/TorchSharp/TorchSharp.csproj b/src/TorchSharp/TorchSharp.csproj index 054f5c18a..d5cb1135d 100644 --- a/src/TorchSharp/TorchSharp.csproj +++ b/src/TorchSharp/TorchSharp.csproj @@ -19,6 +19,10 @@ + + + + diff --git a/src/TorchSharp/Utils/UnorderedMap.cs b/src/TorchSharp/Utils/UnorderedMap.cs index 92446906a..6eb073b1d 100644 --- a/src/TorchSharp/Utils/UnorderedMap.cs +++ b/src/TorchSharp/Utils/UnorderedMap.cs @@ -6,6 +6,65 @@ namespace TorchSharp.Utils { + public class Dictionary : Dictionary, TValue>, IDictionary, TValue> + { + + public TValue this[TKey1 key1, TKey2 key2] { + get { return base[Tuple.Create(key1, key2)]; } + set { base[Tuple.Create(key1, key2)] = value; } + } + + public void Add(TKey1 key1, TKey2 key2, TValue value) + { + base.Add(Tuple.Create(key1, key2), value); + } + + public bool ContainsKey(TKey1 key1, TKey2 key2) + { + return base.ContainsKey(Tuple.Create(key1, key2)); + } + } + + public class UnorderedMap : Dictionary, IDisposable + { + bool disposedValue; + public new TValue this[TKey1 tk1, TKey2 tk2] { + get { + /*if (!this.ContainsKey(tk) && default_dict == null) + return default_dict;*/ + if (this.ContainsKey(tk1, tk2)) + return base[tk1, tk2]; + return default; + } + set { + if (!this.ContainsKey(tk1, tk2)) { + this.Add(tk1, tk2, value); + return; + } + base[tk1, tk2] = value; + } + } + + protected virtual void Dispose(bool disposing) + { + if (!disposedValue) { + if (disposing) { + base.Clear(); + // TODO: dispose managed state (managed objects) + } + + // TODO: free unmanaged resources (unmanaged objects) and override finalizer + // TODO: set large fields to null + disposedValue = true; + } + } + public void Dispose() + { + // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + } public class UnorderedMap : Dictionary, IDisposable { bool disposedValue; From 9f4a48b3a31ada2d52375c045818796806937ff8 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Fri, 18 Oct 2024 15:55:37 -0300 Subject: [PATCH 26/43] Some Autocast f16, f32 --- src/Native/LibTorchSharp/THSNN.cpp | 7 ++ src/Native/LibTorchSharp/THSNN.h | 1 + src/TorchSharp/Amp/AutocastMode.cs | 72 ++++++++++++++++--- src/TorchSharp/NN/Activation/Softmin.cs | 2 + src/TorchSharp/NN/Activation/Softplus.cs | 2 + src/TorchSharp/NN/Bilinear.cs | 12 ++++ src/TorchSharp/NN/CosineSimilarity.cs | 3 + src/TorchSharp/NN/Losses.cs | 18 +++++ src/TorchSharp/NN/Normalization/GroupNorm.cs | 3 + src/TorchSharp/NN/Normalization/LayerNorm.cs | 3 + src/TorchSharp/NN/PairwiseDistance.cs | 2 + src/TorchSharp/NN/Vision.cs | 11 +++ src/TorchSharp/Special.cs | 5 +- src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs | 14 ++++ src/TorchSharp/Tensor/Tensor.Math.cs | 50 +++++++++++++ src/TorchSharp/Tensor/Tensor.Trig.cs | 11 +++ src/TorchSharp/Tensor/Tensor.cs | 15 ++++ .../Tensor/torch.OtherOperations.cs | 2 + 18 files changed, 224 insertions(+), 9 deletions(-) diff --git a/src/Native/LibTorchSharp/THSNN.cpp b/src/Native/LibTorchSharp/THSNN.cpp index 430c17f5e..a9ac0bbcf 100644 --- a/src/Native/LibTorchSharp/THSNN.cpp +++ b/src/Native/LibTorchSharp/THSNN.cpp @@ -1336,6 +1336,13 @@ Tensor THSNN_scaled_dot_product_attention(const Tensor query, const Tensor key, CATCH_TENSOR(torch::scaled_dot_product_attention(*query, *key, *value, mask, p, casual)); } +Tensor THSNN_normalize(Tensor input, float p, const int64_t* dim, float eps, Tensor out) +{ + auto opts = torch::nn::functional::NormalizeFuncOptions().p(p).eps(eps).dim(*dim); + CATCH_TENSOR(torch::nn::functional::normalize(*input, opts)) + //CATCH_TENSOR(torch::scaled_dot_product_attention(*query, *key, *value, mask, p, casual)); +} + void THSNN_Print_Module(const NNModule module) { std::ostringstream oss; const std::string name = module->get()->name(); diff --git a/src/Native/LibTorchSharp/THSNN.h b/src/Native/LibTorchSharp/THSNN.h index 65edf3c2e..cf79593eb 100644 --- a/src/Native/LibTorchSharp/THSNN.h +++ b/src/Native/LibTorchSharp/THSNN.h @@ -579,6 +579,7 @@ EXPORT_API(Tensor) THSNN_PairwiseDistance_forward(const NNModule module, const EXPORT_API(Tensor) THSNN_scaled_dot_product_attention(const Tensor query, const Tensor key, const Tensor value, const Tensor attention_mask, double p, bool casual); +EXPORT_API(Tensor) THSNN_normalize(const Tensor input, float p, const int64_t* dim, float eps, Tensor out); // Initializers EXPORT_API(void) THSNN_initUniform(Tensor twrapper, double low, double high); diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index dacfc9721..e6200a3c8 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -43,8 +43,6 @@ public torch.ScalarType GetFastType() } private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) { - /*dtype_by_methods[nameof(torch.matmul), DeviceType.CUDA] = torch.ScalarType.Float16; - dtype_by_methods[nameof(torch.matmul), DeviceType.CUDA] = torch.ScalarType.Float16;*/ //https://pytorch.org/docs/stable/amp.html#cuda-ops-that-can-autocast-to-float16 if (dtype == null) dtype = torch.get_autocast_dtype(dev.type); @@ -52,9 +50,6 @@ private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enab if (!torch.is_autocast_available(device)) throw new Exception($"User specified an unsupported autocast device_type {device}"); fast_dtype = torch.get_autocast_dtype(device); - //TODO: is_autocast_available(); - //IntPtr ptr = IntPtr.Zero; - _cache_enabled = torch.is_autocast_cache_enabled(); if (enabled && !torch.cuda_is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast enabled = false; @@ -84,12 +79,55 @@ public static IntPtr AutoCast(IntPtr handle) { return ToIf(handle, GetInstance().GetFastType()); } + public static (IntPtr h1, IntPtr h2) AutoCast(IntPtr handle1, IntPtr handle2) + { + var ft = GetInstance().GetFastType(); + return (ToIf(handle1, ft), ToIf(handle2, ft)); + } + public static (IntPtr h1, IntPtr h2, IntPtr h3) AutoCast(IntPtr handle1, IntPtr handle2, IntPtr handle3) + { + var ft = GetInstance().GetFastType(); + return (ToIf(handle1, ft), ToIf(handle2, ft), ToIf(handle3, ft)); + } + public static (IntPtr h1, IntPtr h2) AutoCast(IntPtr handle1, IntPtr handle2, torch.ScalarType dtype) + { + return (ToIf(handle1, dtype), ToIf(handle2, dtype)); + } + + public static (IntPtr h1, IntPtr h2, IntPtr h3) AutoCast(IntPtr handle1, IntPtr handle2, IntPtr handle3, torch.ScalarType dtype) + { + return (ToIf(handle1, dtype), ToIf(handle2, dtype), ToIf(handle3, dtype)); + } + + + /*public static IntPtr[] AutoCast(params IntPtr[] handles) + { + var stsel =handles.Select(x => (torch.ScalarType)NativeMethods.THSTensor_type(x)); + if (AutocastMode.IsAutocastEnabled(this.device.type)) { + var st = (ScalarType)THSTensor_type(Handle); + var st1 = (ScalarType)THSTensor_type(tensor1.Handle); + var st2 = (ScalarType)THSTensor_type(tensor2.Handle); + var sts = new ScalarType[] { st, st1, st2 }; + if (sts.All(x => x == ScalarType.Float16)) { + var f16 = ScalarType.Float16; + handle = AutocastMode.AutoCast(handle, f16); + tensor1.handle = AutocastMode.AutoCast(tensor1.handle, f16); + tensor2.handle = AutocastMode.AutoCast(tensor2.handle, f16); + + } + var f32 = ScalarType.Float32; + if (sts.Any(x => x == f32)) { + handle = AutocastMode.AutoCast(handle, f32); + tensor1.handle = AutocastMode.AutoCast(tensor1.handle, f32); + tensor2.handle = AutocastMode.AutoCast(tensor2.handle, f32); + } + } + }*/ public static IntPtr AutoCast(IntPtr handle, torch.ScalarType dtype) { return ToIf(handle, dtype); } - public static torch.Tensor AutoCast(torch.Tensor tensor) { return new torch.Tensor(AutoCast(tensor.Handle)); @@ -97,16 +135,29 @@ public static torch.Tensor AutoCast(torch.Tensor tensor) } public static IntPtr To(IntPtr ptr, torch.ScalarType type) { - Debug.WriteLine($"{nameof(AutocastMode)} Tensor converting from: {(torch.ScalarType)NativeMethods.THSTensor_type(ptr)} to: {type}"); + Debug.WriteLine($"{nameof(AutocastMode)} Tensor converting from: {GetDtype(ptr)} to: {type}"); var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); if (res == IntPtr.Zero) torch.CheckForErrors(); return res; } + + private static torch.ScalarType GetDtype(IntPtr ptr) + { + return (torch.ScalarType)NativeMethods.THSTensor_type(ptr); + } + + private static DeviceType GetDeviceType(IntPtr ptr) + { + return (DeviceType)NativeMethods.THSTensor_device_type(ptr); + } public static IntPtr ToIf(IntPtr ptr, torch.ScalarType type) { + if (!GetInstance()._enabled) return ptr; + if (GetDtype(ptr) == type) //if already have same dtype is not necesary convert to dtype, right??? + return ptr; /*if (!NativeMethods.THSAmp_is_autocast_enabled(NativeMethods.THSTensor_device_type(ptr))) return ptr;*/ var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); @@ -116,7 +167,7 @@ public static IntPtr ToIf(IntPtr ptr, torch.ScalarType type) } public static IntPtr ToIf(IntPtr ptr, torch.ScalarType type, DeviceType device_type) { - bool is_elegible = (torch.ScalarType)NativeMethods.THSTensor_type(ptr) != torch.ScalarType.Float64 && (DeviceType)NativeMethods.THSTensor_device_type(ptr) == device_type; + bool is_elegible = GetDtype(ptr) != torch.ScalarType.Float64 && GetDeviceType(ptr) == device_type; if (!NativeMethods.THSAmp_is_autocast_enabled(NativeMethods.THSTensor_device_type(ptr))) return ptr; @@ -152,6 +203,11 @@ private void Dispose(bool disposing) torch.set_autocast_dtype(device, prev_fastdtype); torch.set_autocast_cache_enabled(prev_cache_enabled); } + + /*~AutocastMode() + { + + }*/ public void Dispose() { diff --git a/src/TorchSharp/NN/Activation/Softmin.cs b/src/TorchSharp/NN/Activation/Softmin.cs index e3fa3040a..2969d4dc3 100644 --- a/src/TorchSharp/NN/Activation/Softmin.cs +++ b/src/TorchSharp/NN/Activation/Softmin.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -49,6 +50,7 @@ public static Softmin Softmin(long dim) { var handle = THSNN_Softmin_ctor(dim, out var boxedHandle); if (handle == IntPtr.Zero) { torch.CheckForErrors(); } + handle = AutocastMode.AutoCast(handle, ScalarType.Float32); //Should put this here??? return new Softmin(handle, boxedHandle); } diff --git a/src/TorchSharp/NN/Activation/Softplus.cs b/src/TorchSharp/NN/Activation/Softplus.cs index 7e46662d0..017754338 100644 --- a/src/TorchSharp/NN/Activation/Softplus.cs +++ b/src/TorchSharp/NN/Activation/Softplus.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -50,6 +51,7 @@ public static Softplus Softplus(double beta = 1.0, double threshold = 20.0) { var handle = THSNN_Softplus_ctor(beta, threshold, out var boxedHandle); if (handle == IntPtr.Zero) { torch.CheckForErrors(); } + handle = AutocastMode.AutoCast(handle, ScalarType.Float32); //Should put this here return new Softplus(handle, boxedHandle); } diff --git a/src/TorchSharp/NN/Bilinear.cs b/src/TorchSharp/NN/Bilinear.cs index 8ba4efebb..f8fb7b7da 100644 --- a/src/TorchSharp/NN/Bilinear.cs +++ b/src/TorchSharp/NN/Bilinear.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.torch.nn; using static TorchSharp.PInvoke.NativeMethods; @@ -7,6 +8,7 @@ #nullable enable namespace TorchSharp { + using System.Linq; using Modules; namespace Modules @@ -93,6 +95,16 @@ public static Tensor bilinear(Tensor input1, Tensor input2, Tensor weight, Tenso IntPtr bPtr = bias?.Handle ?? IntPtr.Zero; var res = THSNN_functional_bilinear(input1.Handle, input2.Handle, weight.Handle, bPtr); if (res == IntPtr.Zero) { CheckForErrors(); } + /*if (AutocastMode.IsAutocastEnabled()) { + var st = input1.dtype; + var st1 = input2.dtype; + var st2 = weight.dtype; + var sts = new[] { st, st1, st2 }; + if (sts.All(x => x == ScalarType.Float16)) + (handle, tensor1.handle, tensor2.handle) = AutocastMode.AutoCast(handle, tensor1.handle, tensor2.handle, ScalarType.Float16); + if (sts.Any(x => x == ScalarType.Float32)) + (handle, tensor1.handle, tensor2.handle) = AutocastMode.AutoCast(handle, tensor1.handle, tensor2.handle, ScalarType.Float32); + }*/ return new Tensor(res); } } diff --git a/src/TorchSharp/NN/CosineSimilarity.cs b/src/TorchSharp/NN/CosineSimilarity.cs index b4c4802ae..99f9b05a1 100644 --- a/src/TorchSharp/NN/CosineSimilarity.cs +++ b/src/TorchSharp/NN/CosineSimilarity.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -22,6 +23,7 @@ public override Tensor forward(Tensor input1, Tensor input2) { var res = THSNN_CosineSimilarity_forward(handle, input1.Handle, input2.Handle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res= AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } } @@ -41,6 +43,7 @@ public static CosineSimilarity CosineSimilarity(long dim = 1, double eps = 1e-8) { var handle = THSNN_CosineSimilarity_ctor(dim, eps, out var boxedHandle); if (handle == IntPtr.Zero) { torch.CheckForErrors(); } + handle = AutocastMode.AutoCast(handle, ScalarType.Float32); return new CosineSimilarity(handle, boxedHandle); } diff --git a/src/TorchSharp/NN/Losses.cs b/src/TorchSharp/NN/Losses.cs index 5e514bef5..9aae89088 100644 --- a/src/TorchSharp/NN/Losses.cs +++ b/src/TorchSharp/NN/Losses.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.torch.nn; using static TorchSharp.PInvoke.NativeMethods; @@ -365,6 +366,7 @@ public static Tensor binary_cross_entropy_with_logits(Tensor input, Tensor targe { var res = THSNN_binary_cross_entropy_with_logits(input.Handle, target.Handle, weight?.Handle ?? IntPtr.Zero, (long)reduction, pos_weights?.Handle ?? IntPtr.Zero); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -435,6 +437,7 @@ public static Tensor cosine_embedding_loss(Tensor input1, Tensor input2, Tensor { var res = THSNN_cosine_embedding_loss(input1.Handle, input2.Handle, target.Handle, margin, (long)reduction); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -514,6 +517,7 @@ public static Tensor multi_label_margin_loss(Tensor input, Tensor target, Reduct { var res = THSNN_multilabel_margin_loss(input.Handle, target.Handle, (long)reduction); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -547,6 +551,7 @@ public static Tensor multi_margin_loss(Tensor input, Tensor target, int p = 1, d IntPtr h = (weight is null) ? IntPtr.Zero : weight.Handle; var res = THSNN_multi_margin_loss(input.Handle, target.Handle, p, margin, h, (long)reduction); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -561,6 +566,7 @@ public static Tensor mse_loss(Tensor input, Tensor target, Reduction reduction = { var res = THSNN_mse_loss(input.Handle, target.Handle, (long)reduction); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -620,6 +626,7 @@ public static Tensor kl_div(Tensor input, Tensor target, bool log_target = true, { var res = THSNN_kl_div_loss(input.Handle, target.Handle, (long)reduction, log_target); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -744,6 +751,7 @@ public override Tensor forward(Tensor input, Tensor target) var ii = ignore_index.HasValue ? ignore_index.Value : -100; var res = THSNN_cross_entropy(input.Handle, target.Handle, weight?.Handle ?? IntPtr.Zero, ii, ignore_index.HasValue, (long)reduction, label_smoothing); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + return new Tensor(res); } @@ -776,6 +784,7 @@ public override Tensor forward(Tensor input, Tensor target) { var res = THSNN_binary_cross_entropy_with_logits(input.Handle, target.Handle, weight?.Handle ?? IntPtr.Zero, (long)reduction, pos_weights?.Handle ?? IntPtr.Zero); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -793,6 +802,7 @@ public override Tensor forward(Tensor input1, Tensor input2, Tensor target) { var res = THSNN_cosine_embedding_loss(input1.Handle, input2.Handle, target.Handle, margin, (long)reduction); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -829,6 +839,7 @@ public override Tensor forward(Tensor input, Tensor target) { var res = THSNN_hinge_embedding_loss(input.Handle, target.Handle, margin, (long)reduction); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -863,6 +874,7 @@ public override Tensor forward(Tensor input1, Tensor input2, Tensor target) { var res = THSNN_margin_ranking_loss(input1.Handle, input2.Handle, target.Handle, margin, (long)reduction); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -942,6 +954,7 @@ public override Tensor forward(Tensor input, Tensor target) { var res = THSNN_l1_loss(input.Handle, target.Handle, (long)reduction); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } } @@ -956,6 +969,7 @@ public override Tensor forward(Tensor input, Tensor target) { var res = THSNN_nll_loss(input.Handle, target.Handle, weight?.Handle ?? IntPtr.Zero, (long)reduction); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } } @@ -973,6 +987,7 @@ public override Tensor forward(Tensor input, Tensor target) { var res = THSNN_poisson_loss(input.Handle, target.Handle, log_input, full, eps, (long)reduction); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -1046,6 +1061,7 @@ public override Tensor forward(Tensor input, Tensor target) { var res = THSNN_smooth_l1_loss(input.Handle, target.Handle, (long)reduction, beta); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -1062,6 +1078,7 @@ public override Tensor forward(Tensor input, Tensor target) { var res = THSNN_soft_margin_loss(input.Handle, target.Handle, (long)reduction); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } } @@ -1080,6 +1097,7 @@ public override Tensor forward(Tensor anchor, Tensor positive, Tensor negative) { var res = THSNN_triplet_margin_loss(anchor.Handle, positive.Handle, negative.Handle, margin, p, eps, swap, (long)reduction); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } diff --git a/src/TorchSharp/NN/Normalization/GroupNorm.cs b/src/TorchSharp/NN/Normalization/GroupNorm.cs index e63b5c8c7..eca7e1665 100644 --- a/src/TorchSharp/NN/Normalization/GroupNorm.cs +++ b/src/TorchSharp/NN/Normalization/GroupNorm.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -25,6 +26,7 @@ public override Tensor forward(Tensor tensor) if (tensor.Dimensions < 3) throw new ArgumentException($"Invalid number of dimensions for GroupNorm argument: {tensor.Dimensions}"); var res = THSNN_GroupNorm_forward(handle.DangerousGetHandle(), tensor.Handle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + res= AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -79,6 +81,7 @@ public static GroupNorm GroupNorm(long num_groups, long num_channels, double eps unsafe { var handle = THSNN_GroupNorm_ctor(num_groups, num_channels, eps, affine, out var boxedHandle); if (handle == IntPtr.Zero) { torch.CheckForErrors(); } + handle= AutocastMode.AutoCast(handle, ScalarType.Float32); return new GroupNorm(handle, boxedHandle).MoveModule(device, dtype); } } diff --git a/src/TorchSharp/NN/Normalization/LayerNorm.cs b/src/TorchSharp/NN/Normalization/LayerNorm.cs index 6ed8dae45..ca53a3733 100644 --- a/src/TorchSharp/NN/Normalization/LayerNorm.cs +++ b/src/TorchSharp/NN/Normalization/LayerNorm.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.torch.nn; using static TorchSharp.PInvoke.NativeMethods; @@ -28,9 +29,11 @@ internal LayerNorm(long[] normalized_shape, double eps, bool elementwise_affine, if (elementwise_affine) { weight = Parameter(torch.empty(normalized_shape, dtype, device)); + //weight.handle = AutocastMode.AutoCast(weight.handle, ScalarType.Float32); //This is correct??? if (bias) { this.bias = Parameter(torch.empty(normalized_shape, dtype, device)); + //bias.handle = AutocastMode.AutoCast(bias.handle, ScalarType.Float32); //This is correct??? } } diff --git a/src/TorchSharp/NN/PairwiseDistance.cs b/src/TorchSharp/NN/PairwiseDistance.cs index d652677dc..bac5bace2 100644 --- a/src/TorchSharp/NN/PairwiseDistance.cs +++ b/src/TorchSharp/NN/PairwiseDistance.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -41,6 +42,7 @@ public static PairwiseDistance PairwiseDistance(double p = 2.0, double eps = 1e- { var handle = THSNN_PairwiseDistance_ctor(p, eps, keep_dim, out var boxedHandle); if (handle == IntPtr.Zero) { torch.CheckForErrors(); } + handle = AutocastMode.AutoCast(handle, ScalarType.Float32); return new PairwiseDistance(handle, boxedHandle); } diff --git a/src/TorchSharp/NN/Vision.cs b/src/TorchSharp/NN/Vision.cs index 5dd5fe6e2..654bef049 100644 --- a/src/TorchSharp/NN/Vision.cs +++ b/src/TorchSharp/NN/Vision.cs @@ -1,5 +1,7 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using System.Linq; +using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; #nullable enable @@ -164,8 +166,17 @@ public static Tensor pad(Tensor input, long pad, PaddingModes mode = PaddingMode public static Tensor grid_sample(Tensor input, Tensor grid, GridSampleMode mode = GridSampleMode.Bilinear, GridSamplePaddingMode padding_mode = GridSamplePaddingMode.Zeros, bool? align_corners = null) { byte ac = (byte)((align_corners.HasValue) ? (align_corners.Value ? 1 : 2) : 0); + if (AutocastMode.IsAutocastEnabled()) { + var sts = new[] { input.dtype, grid.dtype }; + if (sts.All(x => x == ScalarType.Float16)) + (input.handle, grid.handle) = AutocastMode.AutoCast(input.handle, grid.handle, ScalarType.Float16); + if (sts.Any(x => x == ScalarType.Float32)) + (input.handle, grid.handle) = AutocastMode.AutoCast(input.handle, grid.handle, ScalarType.Float32); + } + var res = THSNN_grid_sample(input.Handle, grid.Handle, (byte)mode, (byte)padding_mode, ac); if (res == IntPtr.Zero) { torch.CheckForErrors(); } + return new Tensor(res); } diff --git a/src/TorchSharp/Special.cs b/src/TorchSharp/Special.cs index 59b98e91b..e27698477 100644 --- a/src/TorchSharp/Special.cs +++ b/src/TorchSharp/Special.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp @@ -674,10 +675,11 @@ public static Tensor logit(Tensor input) /// public static Tensor log_softmax(Tensor input, long dim, ScalarType? dtype = null) { - var dt = dtype.HasValue ? dtype.Value : input.dtype; + var dt = dtype ?? input.dtype; var res = THSSpecial_log_softmax(input.Handle, dim, (sbyte)dt); if (res == IntPtr.Zero) torch.CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -745,6 +747,7 @@ public static Tensor softmax(Tensor input, long dim, ScalarType? dtype = null) var res = THSSpecial_softmax(input.Handle, dim, (sbyte)dt); if (res == IntPtr.Zero) torch.CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } diff --git a/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs b/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs index 6289990a4..079c72e3e 100644 --- a/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs +++ b/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs @@ -18,6 +18,13 @@ public partial class Tensor public Tensor tensordot(Tensor b, long[] dims1, long[] dims2) { IntPtr res; + if (AutocastMode.IsAutocastEnabled()) { + var sts = new[] { this.dtype, b.dtype }; + if (sts.All(x => x == ScalarType.Float16)) + (handle, b.handle) = AutocastMode.AutoCast(handle, b.handle, ScalarType.Float16); + if (sts.Any(x => x == ScalarType.Float32)) + (handle, b.handle) = AutocastMode.AutoCast(handle, b.handle, ScalarType.Float32); + } unsafe { fixed (long* pdims1 = dims1, pdims2 = dims2) { res = THSLinalg_tensordot(Handle, b.Handle,(IntPtr)pdims1, dims1.Length,(IntPtr)pdims2, dims2.Length); @@ -248,6 +255,13 @@ public Tensor vdot(Tensor target) public Tensor dot(Tensor target) { if (shape.Length != 1 || target.shape.Length != 1 || shape[0] != target.shape[0]) throw new InvalidOperationException("dot arguments must have the same shape."); + if (AutocastMode.IsAutocastEnabled()) { + var sts = new[] { this.dtype, target.dtype }; + if (sts.All(x => x == ScalarType.Float16)) + (handle, target.handle) = AutocastMode.AutoCast(handle, target.handle, ScalarType.Float16); + if (sts.Any(x => x == ScalarType.Float32)) + (handle, target.handle) = AutocastMode.AutoCast(handle, target.handle, ScalarType.Float32); + } var res = THSTensor_dot(Handle, target.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } return new Tensor(res); diff --git a/src/TorchSharp/Tensor/Tensor.Math.cs b/src/TorchSharp/Tensor/Tensor.Math.cs index 32db3a478..0fec7e12f 100644 --- a/src/TorchSharp/Tensor/Tensor.Math.cs +++ b/src/TorchSharp/Tensor/Tensor.Math.cs @@ -1,6 +1,7 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. #nullable enable using System; +using System.Linq; using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; @@ -158,6 +159,7 @@ public Tensor addbmm(Tensor batch1, Tensor batch2, float beta = 1, float alpha = var res = THSTensor_addbmm(Handle, batch1.Handle, batch2.Handle, beta, alpha); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -187,6 +189,16 @@ public Tensor addbmm_(Tensor batch1, Tensor batch2, float beta = 1, float alpha /// public Tensor addcdiv(Tensor tensor1, Tensor tensor2, Scalar value) { + if (AutocastMode.IsAutocastEnabled(this.device.type)) { + var st = (ScalarType)THSTensor_type(Handle); + var st1 = (ScalarType)THSTensor_type(tensor1.Handle); + var st2 = (ScalarType)THSTensor_type(tensor2.Handle); + var sts = new[] { st, st1, st2 }; + if (sts.All(x => x == ScalarType.Float16)) + (handle, tensor1.handle, tensor2.handle) = AutocastMode.AutoCast(handle, tensor1.handle, tensor2.handle, ScalarType.Float16); + if (sts.Any(x => x == ScalarType.Float32)) + (handle, tensor1.handle, tensor2.handle) = AutocastMode.AutoCast(handle, tensor1.handle, tensor2.handle, ScalarType.Float32); + } var res = THSTensor_addcdiv(Handle, tensor1.Handle, tensor2.Handle, value.Handle); if (res == IntPtr.Zero) CheckForErrors(); @@ -238,6 +250,23 @@ public Tensor addcdiv_(Tensor tensor1, Tensor tensor2) /// public Tensor addcmul(Tensor tensor1, Tensor tensor2, Scalar value) { + if (AutocastMode.IsAutocastEnabled(this.device.type)) { + /* + * These ops don’t require a particular dtype for stability, but take multiple inputs and require that the inputs’ dtypes match. + * If all of the inputs are float16, the op runs in float16. + * If any of the inputs is float32, autocast casts all inputs to float32 and runs the op in float32. + * https://pytorch.org/docs/stable/amp.html + */ + var st = (ScalarType)THSTensor_type(Handle); + var st1 = (ScalarType)THSTensor_type(tensor1.Handle); + var st2 = (ScalarType)THSTensor_type(tensor2.Handle); + var sts = new[] { st, st1, st2 }; + if (sts.All(x => x == ScalarType.Float16)) + (handle, tensor1.handle, tensor2.handle) = AutocastMode.AutoCast(handle, tensor1.handle, tensor2.handle, ScalarType.Float16); + if (sts.Any(x => x == ScalarType.Float32)) + (handle, tensor1.handle, tensor2.handle) = AutocastMode.AutoCast(handle, tensor1.handle, tensor2.handle, ScalarType.Float32); + } + var res = THSTensor_addcmul(Handle, tensor1.Handle, tensor2.Handle, value.Handle); if (res == IntPtr.Zero) CheckForErrors(); @@ -335,6 +364,7 @@ public Tensor addr(Tensor vec1, Tensor vec2, float beta = 1.0f, float alpha = 1. var res = THSTensor_addr(Handle, vec1.Handle, vec2.Handle, beta, alpha); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -649,6 +679,7 @@ public Tensor cumsum(long dim, ScalarType? type = null) { var res = THSTensor_cumsum(Handle, dim, type.HasValue, (sbyte)type.GetValueOrDefault()); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -663,6 +694,7 @@ public Tensor cumprod(long dim, ScalarType? type = null) { var res = THSTensor_cumprod(Handle, dim, type.HasValue, (sbyte)type.GetValueOrDefault()); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -757,6 +789,7 @@ public Tensor exp() { var res = THSTensor_exp(Handle); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -789,6 +822,7 @@ public Tensor expm1() { var res = THSTensor_expm1(Handle); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -1028,6 +1062,7 @@ public Tensor log() { var res = THSTensor_log(Handle); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -1111,6 +1146,7 @@ public Tensor log10() var res = THSTensor_log10(Handle); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -1134,6 +1170,7 @@ public Tensor log1p() var res = THSTensor_log1p(Handle); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -1157,6 +1194,7 @@ public Tensor log2() var res = THSTensor_log2(Handle); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -1413,6 +1451,7 @@ public Tensor pow(Scalar exponent) { var res = THSTensor_pow_scalar(Handle, exponent.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -1437,6 +1476,7 @@ public Tensor reciprocal() var res = THSTensor_reciprocal(Handle); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -1532,6 +1572,7 @@ public Tensor rsqrt() { var res = THSTensor_rsqrt(Handle); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -1793,6 +1834,15 @@ public Tensor true_divide_(Scalar other) return this; } + /*public Tensor rtruediv_(Tensor other) + { + var res = THSTensor_true_divide(other.Handle, Handle); + if(res == IntPtr.Zero) + CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); + return new Tensor(res); + }*/ + /// /// Returns a new tensor with the truncated integer values of the elements of input. /// diff --git a/src/TorchSharp/Tensor/Tensor.Trig.cs b/src/TorchSharp/Tensor/Tensor.Trig.cs index 39e8f048b..86e5f0865 100644 --- a/src/TorchSharp/Tensor/Tensor.Trig.cs +++ b/src/TorchSharp/Tensor/Tensor.Trig.cs @@ -1,6 +1,7 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; using System.Diagnostics.Contracts; +using System.Linq; using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; @@ -143,6 +144,13 @@ public Tensor atan_() /// The second tensor public Tensor atan2(Tensor other) { + if (AutocastMode.IsAutocastEnabled()) { + var sts = new[] { this.dtype, other.dtype }; + if (sts.All(x => x == ScalarType.Float16)) + (handle, other.handle) = AutocastMode.AutoCast(handle, other.handle, ScalarType.Float16); + if (sts.Any(x => x == ScalarType.Float32)) + (handle, other.handle) = AutocastMode.AutoCast(handle, other.handle, ScalarType.Float32); + } var res = THSTensor_atan2(Handle, other.Handle); if (res == IntPtr.Zero) CheckForErrors(); @@ -219,6 +227,7 @@ public Tensor tan() var res = THSTensor_tan(Handle); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -265,6 +274,7 @@ public Tensor sinh() var res = THSTensor_sinh(Handle); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -288,6 +298,7 @@ public Tensor cosh() var res = THSTensor_cosh(Handle); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 0fe6eb971..322c13116 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -3449,6 +3449,7 @@ public Tensor erfinv() { var res = NativeMethods.THSTensor_erfinv(Handle); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -4417,6 +4418,7 @@ public Tensor dist(Tensor other, float p = 2.0f) { var res = NativeMethods.THSTensor_dist(Handle, other.Handle, p); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -4428,6 +4430,7 @@ public Tensor norm(float p = 2.0f) { var res = NativeMethods.THSTensor_norm(Handle, p); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -4438,6 +4441,7 @@ public Tensor norm(int dim, bool keepdim = false, float p = 2.0f) { var res = NativeMethods.THSTensor_norm_along_dimension(Handle, dim, keepdim, p); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -4528,6 +4532,7 @@ public Tensor renorm(float p, long dim, float maxnorm) { var res = NativeMethods.THSTensor_renorm(Handle, p, dim, maxnorm); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -4950,6 +4955,7 @@ public Tensor prod(ScalarType? type = null) { var res = NativeMethods.THSTensor_prod(Handle, type.HasValue, (sbyte)type.GetValueOrDefault()); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -4960,6 +4966,7 @@ public Tensor prod(long dim, bool keepdim = false, ScalarType? type = null) { var res = NativeMethods.THSTensor_prod_along_dimensions(Handle, dim, keepdim, type.HasValue, (sbyte)type.GetValueOrDefault()); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -4970,6 +4977,7 @@ public Tensor sum(ScalarType? type = null) { var res = NativeMethods.THSTensor_sum(Handle, type.HasValue, (sbyte)type.GetValueOrDefault()); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -5844,6 +5852,13 @@ public Tensor scatter_(long dim, Tensor index, Tensor src) /// public Tensor scatter_add(long dim, Tensor index, Tensor src) { + if (AutocastMode.IsAutocastEnabled()) { + var sts = new[] { this.dtype, index.dtype, src.dtype }; + if (sts.All(x => x == ScalarType.Float16)) + (handle, index.handle, src.handle) = AutocastMode.AutoCast(handle, index.handle, src.handle, ScalarType.Float16); + if (sts.Any(x => x == ScalarType.Float32)) + (handle, index.handle, src.handle) = AutocastMode.AutoCast(handle, index.handle, src.handle, ScalarType.Float32); + } var res = NativeMethods.THSTensor_scatter_add(Handle, dim, index.Handle, src.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } return new Tensor(res); diff --git a/src/TorchSharp/Tensor/torch.OtherOperations.cs b/src/TorchSharp/Tensor/torch.OtherOperations.cs index fb4568b5c..b09f2c82e 100644 --- a/src/TorchSharp/Tensor/torch.OtherOperations.cs +++ b/src/TorchSharp/Tensor/torch.OtherOperations.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.Linq; +using TorchSharp.Amp; using TorchSharp.PInvoke; using static TorchSharp.PInvoke.NativeMethods; @@ -166,6 +167,7 @@ public static Tensor cdist( var res = THSTensor_cdist(x1.Handle, x2.Handle, p, (long)compute_mode); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } From f84392b2eb35ad149450c22fd89d207ce35d5e09 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Fri, 18 Oct 2024 17:06:21 -0300 Subject: [PATCH 27/43] fix test jit, it is literally close --- test/TorchSharpTest/TestJIT.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/TorchSharpTest/TestJIT.cs b/test/TorchSharpTest/TestJIT.cs index 7fcb98708..74c635598 100644 --- a/test/TorchSharpTest/TestJIT.cs +++ b/test/TorchSharpTest/TestJIT.cs @@ -161,7 +161,8 @@ public void TestLoadJIT_3() Assert.Equal(new long[] { 10 }, t.shape); Assert.Equal(torch.float32, t.dtype); - Assert.True(torch.tensor(new float[] { 0.564213157f, -0.04519982f, -0.005117342f, 0.395530462f, -0.3780813f, -0.004734449f, -0.3221216f, -0.289159119f, 0.268511474f, 0.180702567f }).allclose(t)); + + Assert.True(torch.tensor(new float[] { 0.564213157f, -0.04519982f, -0.005117342f, 0.395530462f, -0.3780813f, -0.004734449f, -0.3221216f, -0.289159119f, 0.268511474f, 0.180702567f }).allclose(t, 1e-2, 1e-3 /*Really it is literally close with 0.0001 diff*/)); Assert.Throws(() => m.call(torch.ones(100))); } From 197c1e4ebe45e07e4d1fb19d5cec1168f56fd940 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sat, 19 Oct 2024 13:21:13 -0300 Subject: [PATCH 28/43] Test and some improve on autocast --- src/TorchSharp/Amp/AMPManager.cs | 1 + src/TorchSharp/Amp/AutocastMode.cs | 49 +-- src/TorchSharp/Amp/GradScaler.cs | 81 ++-- src/TorchSharp/Optimizers/Optimizer.cs | 5 + .../TestAutocast.cs | 169 +++++++++ .../TestGradScaler.cs | 345 ++++++++++++++++++ 6 files changed, 585 insertions(+), 65 deletions(-) create mode 100644 test/TorchSharpTest.WithCudaBinaries/TestAutocast.cs create mode 100644 test/TorchSharpTest.WithCudaBinaries/TestGradScaler.cs diff --git a/src/TorchSharp/Amp/AMPManager.cs b/src/TorchSharp/Amp/AMPManager.cs index c5a120b03..11bc1aaa2 100644 --- a/src/TorchSharp/Amp/AMPManager.cs +++ b/src/TorchSharp/Amp/AMPManager.cs @@ -6,6 +6,7 @@ namespace TorchSharp.Amp { + [Obsolete("Use AutocastMode instaed", true)] public class AMPManager : IDisposable { diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index e6200a3c8..2cf89b3dd 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -22,7 +22,7 @@ public static torch.Tensor AutoCast(this torch.Tensor input) public sealed class AutocastMode : IDisposable { public bool _enabled=false; - public bool IsEnter = false; + public bool IsEnter { private set; get; }=false; public bool IsDisposed = false; private bool prev_cache_enabled, prev; private torch.ScalarType prev_fastdtype; @@ -37,10 +37,6 @@ public static AutocastMode GetInstance(bool enabled=false) return instance ??= new AutocastMode(torch.cuda_is_available() ? torch.CUDA : torch.CPU, enabled:enabled,cache_enabled:true); } - public torch.ScalarType GetFastType() - { - return torch.get_autocast_dtype(device); - } private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) { //https://pytorch.org/docs/stable/amp.html#cuda-ops-that-can-autocast-to-float16 @@ -70,7 +66,12 @@ private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enab } this._enabled = enabled; } - private torch.ScalarType GetType(IntPtr handle) + + public torch.ScalarType GetFastType() + { + return torch.get_autocast_dtype(device); + } + private static torch.ScalarType GetDtype(IntPtr handle) { return (torch.ScalarType)NativeMethods.THSTensor_type(handle); } @@ -99,30 +100,6 @@ public static (IntPtr h1, IntPtr h2, IntPtr h3) AutoCast(IntPtr handle1, IntPtr return (ToIf(handle1, dtype), ToIf(handle2, dtype), ToIf(handle3, dtype)); } - - /*public static IntPtr[] AutoCast(params IntPtr[] handles) - { - var stsel =handles.Select(x => (torch.ScalarType)NativeMethods.THSTensor_type(x)); - if (AutocastMode.IsAutocastEnabled(this.device.type)) { - var st = (ScalarType)THSTensor_type(Handle); - var st1 = (ScalarType)THSTensor_type(tensor1.Handle); - var st2 = (ScalarType)THSTensor_type(tensor2.Handle); - var sts = new ScalarType[] { st, st1, st2 }; - if (sts.All(x => x == ScalarType.Float16)) { - var f16 = ScalarType.Float16; - handle = AutocastMode.AutoCast(handle, f16); - tensor1.handle = AutocastMode.AutoCast(tensor1.handle, f16); - tensor2.handle = AutocastMode.AutoCast(tensor2.handle, f16); - - } - var f32 = ScalarType.Float32; - if (sts.Any(x => x == f32)) { - handle = AutocastMode.AutoCast(handle, f32); - tensor1.handle = AutocastMode.AutoCast(tensor1.handle, f32); - tensor2.handle = AutocastMode.AutoCast(tensor2.handle, f32); - } - } - }*/ public static IntPtr AutoCast(IntPtr handle, torch.ScalarType dtype) { return ToIf(handle, dtype); @@ -142,19 +119,13 @@ public static IntPtr To(IntPtr ptr, torch.ScalarType type) return res; } - private static torch.ScalarType GetDtype(IntPtr ptr) - { - return (torch.ScalarType)NativeMethods.THSTensor_type(ptr); - } - private static DeviceType GetDeviceType(IntPtr ptr) { return (DeviceType)NativeMethods.THSTensor_device_type(ptr); } public static IntPtr ToIf(IntPtr ptr, torch.ScalarType type) { - - if (!GetInstance()._enabled) + if (!GetInstance()._enabled || !GetInstance().IsEnter) return ptr; if (GetDtype(ptr) == type) //if already have same dtype is not necesary convert to dtype, right??? return ptr; @@ -168,7 +139,7 @@ public static IntPtr ToIf(IntPtr ptr, torch.ScalarType type) public static IntPtr ToIf(IntPtr ptr, torch.ScalarType type, DeviceType device_type) { bool is_elegible = GetDtype(ptr) != torch.ScalarType.Float64 && GetDeviceType(ptr) == device_type; - + if (!NativeMethods.THSAmp_is_autocast_enabled(NativeMethods.THSTensor_device_type(ptr))) return ptr; var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); @@ -191,11 +162,13 @@ public IDisposable Enter() torch.set_autocast_dtype(device, fast_dtype); torch.autocast_increment_nesting(); torch.set_autocast_cache_enabled(_cache_enabled); + IsEnter = true; return this; } private void Dispose(bool disposing) { + IsEnter = false; this._enabled = false; if (torch.autocast_decrement_nesting() == 0) torch.clear_autocast_cache(); diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs index f9070f3c2..d5dbc9a46 100644 --- a/src/TorchSharp/Amp/GradScaler.cs +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -1,10 +1,6 @@ using System; using System.Collections.Generic; using System.Diagnostics; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using Tensorboard; using TorchSharp.Modules; using TorchSharp.Utils; @@ -39,6 +35,7 @@ private UnorderedMap _refresh_per_optimizer_state() public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_factor = 2.0f, float backoff_factor = 0.5f, int growth_interval = 2000, bool enabled = true) { + //https://gist.github.com/dorpxam/67ad2bc222b2cf567d4a6fc298375e13 Debug.Assert(dev == torch.CPU || dev == torch.CUDA); device = dev; Enabled = enabled; @@ -48,6 +45,7 @@ public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_fac _growth_interval = growth_interval; InitGrowthTracker = 0.0f; + _per_optimizer_states.SetDefaultDict(_refresh_per_optimizer_state()); throw new NotImplementedException("This need to finish"); } @@ -231,22 +229,25 @@ public void unscale(torch.optim.Optimizer optimizer) if (f != null && f.GetValue(optimizer) is bool b && !b) { bool has_grad_scaler = false;//I dont know how deal this... if (has_grad_scaler) { - + throw new NotImplementedException(); } else { if (optimizer_state["stage"] is OptState optstate && optstate == OptState.Ready) check_inf_per_device(optimizer); var scaler = _get_scale_async(); Debug.Assert(!scaler.is_null(), "!scaler.is_null()"); - torch.Tensor found_inf; + torch.Tensor found_inf=null; if (optimizer_state["found_inf_per_device"] is torch.Tensor[] ts) { for (int i = 0; i < ts.Length; i++) ts[i].to(scaler.device, true); found_inf=torch.sum(torch.cat(ts)); } + + optimizer.grad_scale = (optimizer_state["stage"] as OptState?) == OptState.Unscaled ? null : scaler * (optimizer.grad_scale.is_null() ? 1 : optimizer.grad_scale); + optimizer.found_inf = found_inf; + //if(optimizer is SGD ad) //Info: All optimizer have grad_scale and found_inf //https://github.com/pytorch/pytorch/blob/main/torch/optim/adam.py, etc. - //DANGER: Optimizer in TorchShapr not have grad_scaler or found_inf, we need grad_scale for https://github.com/pytorch/pytorch/blob/758d78790164bfb041555daed380de96e06f78a3/torch/amp/grad_scaler.py#L440 - + //DANGER: Optimizer in TorchSharp not have grad_scaler or found_inf, we need grad_scale for https://github.com/pytorch/pytorch/blob/758d78790164bfb041555daed380de96e06f78a3/torch/amp/grad_scaler.py#L440 //optimizer.GetType().GetField("grad_scale").GetValue(optimizer) as torch.Tensor t } retval = optimizer.step().item(); @@ -256,7 +257,7 @@ public void unscale(torch.optim.Optimizer optimizer) } if (optimizer_state["stage"] is OptState state1 && state1 == OptState.Ready) unscale(optimizer); - Debug.Assert((optimizer_state["found_inf_per_device"] as torch.Tensor[]).Length > 0, "(optimizer_state['found_inf_per_device'] as torch.Tensor).size(0) > 0"); + Debug.Assert((optimizer_state["found_inf_per_device"] as torch.Tensor[])?.Length > 0, "(optimizer_state['found_inf_per_device'] as torch.Tensor).size(0) > 0"); retval = maybe_opt_step(optimizer, optimizer_state); optimizer_state["stage"] = OptState.Stepped; return retval; @@ -301,23 +302,49 @@ public void update(object new_scale = null) for (int i = 1; i < found_infs.Count; i++) found_inf_combined += found_infs[i]; torch.amp_update_scale_(_scale, _growth_tracker, found_inf_combined, (double)_growth_factor, (double)_backoff_factor, (long)_growth_interval); - } //TODO: Implement defaultdict https://github.com/pytorch/pytorch/blob/758d78790164bfb041555daed380de96e06f78a3/torch/amp/grad_scaler.py#L531 } + public void set_init_growth_tracker(long new_value) + { + InitGrowthTracker=new_value; + } + + public torch.Tensor get_scale_async() + { + return _scale; + } public float get_scale() { - if (this.Enabled) { + if (!this.Enabled) + return 1.0f; - var scale = _get_scale_async(); - if (scale.is_null()) - return InitScale; - return scale.item(); - } - return 1.0f; + var scale = _get_scale_async(); + if (scale.is_null()) + return InitScale; + return scale.item(); } + public float get_growth_factor() + { + return _growth_factor; + } + + public float get_backoff_factor() + { + return _backoff_factor; + } + + public int get_growth_interval() + { + return _growth_interval; + } + + public float get_init_growth_tracker() + { + return InitGrowthTracker; //TODO: Resarch this... should be int64_t??? + } public bool IsEnabled() { return this.Enabled; @@ -325,16 +352,16 @@ public bool IsEnabled() public UnorderedMap state_dict() { - if (Enabled) { - var res = new UnorderedMap(); - res["scale"] = get_scale(); - res[nameof(_growth_factor)] = _growth_factor; - res[nameof(_backoff_factor)] = _backoff_factor; - res[nameof(_growth_interval)] = _growth_interval; - res[nameof(_growth_tracker)] = _growth_tracker; - return res; - } - return null; + if (!Enabled) + return null; + + var res = new UnorderedMap(); + res["scale"] = get_scale(); + res[nameof(_growth_factor)] = _growth_factor; + res[nameof(_backoff_factor)] = _backoff_factor; + res[nameof(_growth_interval)] = _growth_interval; + res[nameof(_growth_tracker)] = _growth_tracker; + return res; } public void load_state_dict(Dictionary state_dict) diff --git a/src/TorchSharp/Optimizers/Optimizer.cs b/src/TorchSharp/Optimizers/Optimizer.cs index 9c40f0765..93cc48d0f 100644 --- a/src/TorchSharp/Optimizers/Optimizer.cs +++ b/src/TorchSharp/Optimizers/Optimizer.cs @@ -21,6 +21,8 @@ public static partial class optim /// public abstract partial class Optimizer : IDisposable { + internal Tensor grad_scale; + internal Tensor found_inf; /// /// Class wrapping PyTorch's optimzer object reference. /// @@ -85,6 +87,9 @@ public void Dispose() protected virtual void Dispose(bool disposing) { if (disposing && handle != null && !handle.IsInvalid) { + + grad_scale?.Dispose(); + found_inf?.Dispose(); handle.Dispose(); handle.SetHandleAsInvalid(); } diff --git a/test/TorchSharpTest.WithCudaBinaries/TestAutocast.cs b/test/TorchSharpTest.WithCudaBinaries/TestAutocast.cs new file mode 100644 index 000000000..5e715ba5a --- /dev/null +++ b/test/TorchSharpTest.WithCudaBinaries/TestAutocast.cs @@ -0,0 +1,169 @@ +using System; +using TorchSharp; +using TorchSharp.Amp; +using Xunit; + +using static TorchSharp.torch; +namespace TorchSharpTest.WithCudaBinaries +{ + public class TestAutocast + { + private static void CheckCUDA() + { + if (!torch.cuda_is_available()) + throw new Exception("CUDA IS NOT AVAILABLE"); + } + [Fact] + [TestOf("AutocastF16")] + public void TestAutocastF16() + { + CheckCUDA(); + var a = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); + var b = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); + var vec1 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); + var vec2 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); + using (AutocastMode.GetInstance().Enter()) { + var c = a.matmul(b); + var d = a.addbmm(b, b); + var e = a.baddbmm(b, b); + var f = a.addmm(b, b); + var g = a.addr(vec1, vec2); + var h = a.mm(b); + var i = a.mv(vec1); + var j = a.bmm(b); + Assert.Equal(ScalarType.Float16,c.dtype); + Assert.Equal(ScalarType.Float16,d.dtype); + Assert.Equal(ScalarType.Float16,e.dtype); + Assert.Equal(ScalarType.Float16,f.dtype); + Assert.Equal(ScalarType.Float16,g.dtype); + Assert.Equal(ScalarType.Float16,h.dtype); + Assert.Equal(ScalarType.Float16,i.dtype); + Assert.Equal(ScalarType.Float16,j.dtype); + } + + /*Assert.Equal(ScalarType.Float16, c.dtype); + Assert.Equal(ScalarType.Float16, d.dtype); + Assert.Equal(ScalarType.Float16, e.dtype); + Assert.Equal(ScalarType.Float16, f.dtype); + Assert.Equal(ScalarType.Float16, g.dtype); + Assert.Equal(ScalarType.Float16, h.dtype); + Assert.Equal(ScalarType.Float16, i.dtype); + Assert.Equal(ScalarType.Float16, j.dtype);*/ + throw new NotImplementedException(); + } + + [Fact] + [TestOf("AutocastF16")] + public void TestAutocastF16Arithmetic() + { + //Like matmul, addmm, mm, mv, etc. + throw new NotImplementedException(); + } + + [Fact] + [TestOf("AutocastF16")] + public void TestAutocastF16Cell() + { + //Like GRUCell, LSTM, RNN + throw new NotImplementedException(); + } + + [Fact] + [TestOf("AutocastF16")] + public void TestAutocastF16Other() + { + //Like Linear, prelu, etc. + throw new NotImplementedException(); + } + + + + [Fact] + [TestOf("AutocastF16")] + public void TestAutocastF16Convolutions() + { + //Conv 1d,2d,3d, conv_transpose 1d,2d,3d + throw new NotImplementedException(); + } + [Fact] + [TestOf("AutocastF32")] + public void TestAutocastF32() + { + CheckCUDA(); + throw new NotImplementedException(); + } + + [Fact] + [TestOf("AutocastF32")] + public void TestAutocastF32Trigonometry() + { + CheckCUDA(); + var a = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); + var b = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); + var vec1 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); + var vec2 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); + using (AutocastMode.GetInstance().Enter()) { + const ScalarType f32 = ScalarType.Float32; + var c = a.acos(); + var d = a.asin(); + var e = a.cosh(); + var f = a.tan(); + var g = a.sinh(); + Assert.Equal(f32, c.dtype); + Assert.Equal(f32, d.dtype); + Assert.Equal(f32, e.dtype); + Assert.Equal(f32, f.dtype); + Assert.Equal(f32, g.dtype); + } + } + + [Fact] + [TestOf("AutocastF32")] + public void TestAutocastF32Logarithmic() + { + CheckCUDA(); + var a = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); + var b = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); + var vec1 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); + var vec2 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); + using (AutocastMode.GetInstance().Enter()) { + const ScalarType f32 = ScalarType.Float32; + var c = a.log(); + var d = a.log10(); + var e = a.log_softmax(1); + var f = a.log1p(); + var g = a.log2(); + Assert.Equal(f32, c.dtype); + Assert.Equal(f32, d.dtype); + Assert.Equal(f32, e.dtype); + Assert.Equal(f32, f.dtype); + Assert.Equal(f32, g.dtype); + } + } + [Fact] + [TestOf("AutocastF32")] + public void TestAutocastF32Loss() + { + CheckCUDA(); + var a = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); + var b = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); + var vec1 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); + var vec2 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); + using (AutocastMode.GetInstance().Enter()) { + var c = torch.nn.L1Loss().forward(a,b); + var d = a.log10(); + var e = a.log_softmax(1); + var f = a.log1p(); + var g = a.log2(); + } + } + + [Fact] + [TestOf("AutocastFWidestType")] + public void TestAutocastFWidest() + { + //addcdiv,addcmul, atan2, bilinear,cross, dot,grid_sample, index_put (not implemented in TorchSharp), scatter_add, tensordot. + throw new NotImplementedException(); + } + } +} diff --git a/test/TorchSharpTest.WithCudaBinaries/TestGradScaler.cs b/test/TorchSharpTest.WithCudaBinaries/TestGradScaler.cs new file mode 100644 index 000000000..86f04597f --- /dev/null +++ b/test/TorchSharpTest.WithCudaBinaries/TestGradScaler.cs @@ -0,0 +1,345 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using TorchSharp; +using TorchSharp.Amp; +using TorchSharp.Modules; +using Xunit; +using static TorchSharp.torch; +using static TorchSharp.torch.nn; +namespace TorchSharpTest.WithCudaBinaries +{ + public class TestGradScaler + { + internal DeviceType device = DeviceType.CUDA; + internal ScalarType dtype = ScalarType.Float32; + + private (Sequential modctrl, Sequential modscal, torch.optim.Optimizer optctrl, torch.optim.Optimizer optscal) create_scaling_model_optimizer(DeviceType dev = DeviceType.CUDA) + { + var mod_control =Sequential(torch.nn.Linear(8,8), torch.nn.Linear(8, 8)); + mod_control.to(dev); + var mod_scaling = Sequential(torch.nn.Linear(8, 8), torch.nn.Linear(8, 8)); + mod_scaling.to(dev); + + using (torch.no_grad()) { + + using (var enumer = mod_control.parameters().Zip(mod_scaling.parameters()).GetEnumerator()) + while (enumer.MoveNext()) + enumer.Current.Second.copy_(enumer.Current.First); + + var opt_control = torch.optim.SGD(mod_control.parameters(), 1.0f); + var opt_scaling = torch.optim.SGD(mod_scaling.parameters(), 1.0f); + return (mod_control, mod_scaling, opt_control, opt_scaling); + } + } + internal (Sequential modctrl, Sequential modscal, torch.optim.Optimizer optctrl, torch.optim.Optimizer optscal, List> data, MSELoss loss_fn, int skip_iter) create_scaling_case(DeviceType dev = DeviceType.CUDA, ScalarType dtype = ScalarType.Float32) + { + var data = new List>() { + new(torch.randn(new long[]{8,8}, dtype, new Device(dev)),torch.randn(new long[]{8,8}, dtype, new Device(dev))), + new(torch.randn(new long[]{8,8}, dtype, new Device(dev)),torch.randn(new long[]{8,8}, dtype, new Device(dev))), + new(torch.randn(new long[]{8,8}, dtype, new Device(dev)),torch.randn(new long[]{8,8}, dtype, new Device(dev))), + new(torch.randn(new long[]{8,8}, dtype, new Device(dev)),torch.randn(new long[]{8,8}, dtype, new Device(dev))), + }; + + var loss_fn = MSELoss(); + loss_fn.to(DeviceType.CUDA); + const int skip_iter = 2; + var csmo = create_scaling_model_optimizer(dev); + return (csmo.modctrl, csmo.modscal, csmo.optctrl, csmo.optscal, data, loss_fn, skip_iter); + } + internal void run_scaling_case(Action>, Sequential, torch.optim.Optimizer, GradScaler, MSELoss, int, bool> run, int unskipped, int skipped, double atol = 1e07) + { + const double rtol = 1e-7d; + bool[] enableds = new bool[] { true, false }; + foreach (var enabled in enableds) { + var res =create_scaling_case(); + var scaler = new GradScaler(new Device(DeviceType.CUDA), 128.0f, 2.0f, growth_interval: 1); + run.Invoke(res.data, res.modctrl, res.optctrl, scaler, res.loss_fn, res.skip_iter, false); + run.Invoke(res.data, res.modscal, res.optscal, scaler, res.loss_fn, res.skip_iter, true); + if (enabled) { + var net_growth = unskipped > 0 ? MathF.Pow(scaler.get_growth_factor(), unskipped) : 1.0f; + var net_backoff = skipped> 0 ? MathF.Pow(scaler.get_backoff_factor(), skipped) : 1.0f; + Assert.Equal(scaler.get_scale(), (128.0f * net_growth * net_backoff)); + + } else { + Assert.Equal(scaler.get_scale(), 1.0f); + } + + foreach(var seq in res.modctrl.parameters().Zip(res.modscal.parameters())){ + var c_grad = seq.First.grad; + var s_grad = seq.Second.grad; + if(!c_grad.is_null() && !s_grad.is_null()) + Assert.True(torch.allclose(seq.First.grad, seq.Second.grad, rtol, atol)); + var c_state = res.optctrl.ParamGroups; + var s_state = res.optscal.ParamGroups; + foreach(var c_s_state in c_state.Zip(s_state)) { + if (c_s_state.First is ParamGroup pg_c_state && c_s_state.Second is ParamGroup pg_s_state) { + foreach (var c_s_state_p in pg_c_state.Parameters.Zip(pg_s_state.Parameters)) + Assert.True(torch.allclose(c_s_state_p.First, c_s_state_p.Second, rtol, atol)); + } + } + Assert.True(torch.allclose(seq.First, seq.Second, rtol, atol)); + } + } + } + + [Fact] + [TestOf(nameof(GradScaler))] + public void TestGradScalingUnscaleSparse() + { + var scaler = new GradScaler(new Device(device)); + var inv_scale = torch.full(1, 0.25, dtype, new Device(device)); + var found_inf = torch.empty(1, dtype, new Device(device)); + var cur = found_inf.device; + var i = torch.tensor(new long[,] { { 0, 1, 1 }, { 2, 0, 2 } }, ScalarType.Int64, new Device(DeviceType.CUDA)); + var v = torch.tensor(new float[] { 16.0f,32.0f,64.0f}, ScalarType.Float32, new Device(DeviceType.CUDA)); + var s = torch.sparse_coo_tensor(i,v, new long[]{2,3}, dtype, new Device(DeviceType.CUDA)); + + var p = s.clone(); + Assert.True(p.is_sparse); + var optA = torch.optim.SGD(new Parameter[] { new Parameter(p) }, 1.0); + p.grad = s.clone(); + found_inf.zero_(); + found_inf = scaler.unscale_grads(optA, inv_scale, found_inf, false)[cur]; + + Assert.Equal(found_inf.item(), 0.0f); + Assert.True(torch.equal(p.grad.to_dense(), (s/4).to_dense()).item()); + + v = torch.tensor(new float[] { 16.0f, 32.0f, float.PositiveInfinity }); + p.grad = torch.sparse_coo_tensor(i, v, new long[] { 2, 3 }, dtype, new Device(DeviceType.CUDA)); + found_inf.zero_(); + found_inf = scaler.unscale_grads(optA, inv_scale, found_inf, false)[cur]; + Assert.Equal(found_inf.item(), 1.0f); + + v = torch.tensor(new float[] { 16.0f, 32.0f, float.NaN }); + p.grad = torch.sparse_coo_tensor(i, v, new long[] { 2, 3 }, dtype, new Device(DeviceType.CUDA)); + found_inf.zero_(); + found_inf = scaler.unscale_grads(optA, inv_scale, found_inf, false)[cur]; + Assert.Equal(found_inf.item(), 1.0f); + + p = s.clone().to(ScalarType.Float16); + Assert.True(p.is_sparse); + var optB = torch.optim.SGD(new Parameter[] { new Parameter(p) }, 1.0); + + p.grad = s.clone().to(ScalarType.Float16); + found_inf.zero_(); + found_inf = scaler.unscale_grads(optB, inv_scale, found_inf, true)[cur]; + Assert.Equal(found_inf.item(), 0.0f); + Assert.True(torch.equal(p.grad.to_dense(), (s.to(ScalarType.Float16) / 4).to_dense()).item()); + + i = torch.tensor(new long[,] { { 0, 1, 0 }, { 2, 0, 2 } }); + v = torch.tensor(new float[] { 64000.0f, 32.0f, 64000.0f }); + p.grad = torch.sparse_coo_tensor(i, v, new long[] { 2, 3 }, dtype, new Device(DeviceType.CUDA)); + found_inf.zero_(); + found_inf = scaler.unscale_grads(optB, inv_scale, found_inf, true)[cur]; + Assert.Equal(found_inf.item(), 0.0f); + } + + [Fact] + [TestOf(nameof(GradScaler))] + public void TestGradScalingStateDict() + { + bool[] lazy_init_scale = new[] { true, false }; + foreach (var l in lazy_init_scale) { + var s0 = new GradScaler(new Device(DeviceType.CUDA), 3.0f, 4.0f, 0.5f, 2); + var s1 = new GradScaler(new Device(DeviceType.CUDA), 6.0f, 7.0f, 0.8f, 1); + s1.set_init_growth_tracker(7); + if (l) { + s1.scale(torch.full(1, 4.0f, ScalarType.Float32, new Device(DeviceType.CUDA, 0))); + Assert.Equal(s1.get_scale_async().dtype, ScalarType.Float32); + } + + var re = s0.state_dict(); + s1.load_state_dict(re); + + Assert.Equal(s1.get_scale(), 3.0f); + Assert.Equal(s1.get_growth_factor(), 0.5f); + Assert.Equal(s1.get_growth_interval(), 2); + Assert.Equal(s1.get_init_growth_tracker(), 0.0f); + } + } + + [Fact] + [TestOf(nameof(GradScaler))] + public void TestGradScaleWillNotOverflow() + { + var model = torch.nn.Linear(5, 1).to(DeviceType.CUDA); + var optimizer = torch.optim.Adam(model.parameters()); + var scaler = new GradScaler(new Device(DeviceType.CUDA), 1e38f, MathF.Pow(2.0f, 4), growth_interval:1); + optimizer.zero_grad(); + var x = torch.randn(new long[]{1,5}).to(DeviceType.CUDA); + var y = 1e-30 * torch.randn(new long[]{1,1}).to(DeviceType.CUDA); + var l = torch.pow(model.forward(x) - y, 2).mean(); + scaler.scale(l).backward(); + scaler.step(optimizer); + scaler.update(); + Assert.True(!scaler.get_scale_async().isinf().item() && !scaler.get_scale_async().isnan().item()); + } + [Fact] + [TestOf(nameof(GradScaler))] + public void TestGradScalingClipping() + { + run_scaling_case(new Action>, Sequential, optim.Optimizer, GradScaler, MSELoss, int, bool>(( + (data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api) => { + const float max_norm = 0.2f; + int idx = 0; + foreach (var ipair in data) { + //ipair. + optimizer.zero_grad(); + var output = model.forward(ipair.Key); + var loss = loss_fn.forward(output, ipair.Value); + if (try_scaling_api) { + scaler.scale(loss).backward(); + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm * scaler.get_scale()); + if (idx == skip_iter && scaler.IsEnabled()) { + var weight = (model[1] as Linear)?.weight; + weight.grad.fill_(float.PositiveInfinity); + } + + scaler.step(optimizer); + scaler.update(); + } else { + loss.backward(); + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm); + if (!scaler.IsEnabled() || (idx != skip_iter)) + optimizer.step(); + } + + idx++; + } + })), + 3, 1, 1e-5); + } + [Fact] + [TestOf(nameof(GradScaler))] + public void TestGradScalingClippingSeparateUnscale() + { + run_scaling_case(new Action>, Sequential, optim.Optimizer, GradScaler, MSELoss, int, bool>(( + (data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api) => { + const float max_norm = 0.2f; + int idx = 0; + foreach (var ipair in data) { + //ipair. + optimizer.zero_grad(); + var output = model.forward(ipair.Key); + var loss = loss_fn.forward(output, ipair.Value); + if (try_scaling_api) { + scaler.scale(loss).backward(); + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm); + if (idx == skip_iter && scaler.IsEnabled()) { + var weight = (model[1] as Linear)?.weight; + weight.grad.fill_(float.PositiveInfinity); + } + + scaler.step(optimizer); + scaler.update(); + } else { + loss.backward(); + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm); + if (!scaler.IsEnabled() || (idx != skip_iter)) + optimizer.step(); + } + + idx++; + } + })), + 3, 1); + } + [Fact] + [TestOf(nameof(GradScaler))] + public void TestGradScalingPenalty() + { + + run_scaling_case(new Action>, Sequential, optim.Optimizer, GradScaler, MSELoss, int, bool>(( + (data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api) => { + const float max_norm = 0.2f; + int idx = 0; + foreach (var ipair in data) { + //ipair. + optimizer.zero_grad(); + var output = model.forward(ipair.Key); + var loss = loss_fn.forward(output, ipair.Value); + List grad_params = new List(); + if (try_scaling_api) { + //throw new NotImplementedException(); + //TODO: RESEARCH TORCH::AUTOGRAD:GRAD THE SECOND ARGUMENT SHOULD HAVE model->parameters(); + //grad_params = torch.autograd.grad(new List(){scaler.scale(loss)}, model.parameters()) + var inv_scale = 1.0f / scaler.get_scale(); + for (int i = 0; i < grad_params.Count; i++) + grad_params[i] *= inv_scale; + } else { + //throw new NotImplementedException(); + //TODO: RESEARCH TORCH::AUTOGRAD:GRAD THE SECOND ARGUMENT SHOULD HAVE model->parameters(); + //grad_params = torch.autograd.grad(new List(){scaler.scale(loss)}, model.parameters()) + } + + var grad_norm = torch.zeros(new long[] { 1 }).to(ipair.Key.device); + for (int i = 0; i < grad_params.Count; i++) + grad_norm += grad_params[i].pow(2).sum(); + grad_norm = grad_norm.sqrt(); + loss = loss + grad_norm; + if (try_scaling_api) { + scaler.scale(loss).backward(); + if (idx == skip_iter && scaler.IsEnabled()) { + var weight = (model[1] as Linear)?.weight; + weight.grad.fill_(float.PositiveInfinity); + } + + scaler.step(optimizer); + scaler.update(); + } else { + loss.backward(); + if (!scaler.IsEnabled() || (idx != skip_iter)) { + optimizer.step(); + } + } + idx++; + + } + })), + 3, 1); + } + [Fact] + [TestOf(nameof(GradScaler))] + public void TestGradScalingAccumulation() + { + run_scaling_case(new Action>, Sequential, optim.Optimizer, GradScaler, MSELoss, int, bool>(( + (data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api) => { + const int iters_to_accumulate= 2; + int idx = 0; + foreach (var ipair in data) { + //ipair. + optimizer.zero_grad(); + var output = model.forward(ipair.Key); + var loss = loss_fn.forward(output, ipair.Value); + loss /= iters_to_accumulate; + + if (try_scaling_api) { + scaler.scale(loss).backward(); + } else { + loss.backward(); + } + + if ((idx + 1) % iters_to_accumulate == 0) { + if (try_scaling_api) { + scaler.step(optimizer); + scaler.update(); + optimizer.zero_grad(); + } else { + optimizer.step(); + optimizer.zero_grad(); + } + } + idx++; + } + })), + 2, 0); + } + [Fact] + [TestOf(nameof(GradScaler))] + public void TestGradScalingMultiple() + { + throw new NotImplementedException(); + } + } +} From 061ec44ac41ae23649e933a885ba6df7ee073de7 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Mon, 21 Oct 2024 10:18:17 -0300 Subject: [PATCH 29/43] cross between tensors, improve grad scaler and add normalize #1382 --- .gitignore | 3 +- Directory.Build.props | 11 +- src/Native/CMakeSettings.json | 14 +- src/Native/LibTorchSharp/CMakeLists.txt | 12 +- src/Native/LibTorchSharp/THSTensor.cpp | 46 +++++ src/Native/LibTorchSharp/THSTensor.h | 29 ++- src/TorchSharp/Amp/AutocastMode.cs | 28 ++- src/TorchSharp/Amp/GradScaler.cs | 31 +-- src/TorchSharp/NN/Normalization/Functional.cs | 8 + src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs | 3 + .../PInvoke/LibTorchSharp.THSTensor.cs | 10 + src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs | 14 +- src/TorchSharp/Tensor/Tensor.cs | 62 ++++++ .../Tensor/torch.OtherOperations.cs | 2 + src/TorchSharp/Utils/TensorAccessor.cs | 2 +- .../TestAutocast.cs | 184 +++++++++++++++--- .../TestGradScaler.cs | 33 ++-- test/TorchSharpTest/NN.cs | 10 + 18 files changed, 410 insertions(+), 92 deletions(-) diff --git a/.gitignore b/.gitignore index ed21b9d11..795c92477 100644 --- a/.gitignore +++ b/.gitignore @@ -272,4 +272,5 @@ packages/ *.code-workspace /.idea /test/TorchSharpTest/exportsd.py -.vscode/settings.json \ No newline at end of file +.vscode/settings.json +/Directory.Build.props Copia diff --git a/Directory.Build.props b/Directory.Build.props index b839e4140..f5687af68 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -5,10 +5,6 @@ - - false - $(LibTorch)libtorch-win-shared-with-deps-2.3.1+cpu\libtorch - $(LibTorch)libtorch-win-shared-with-deps-2.3.1+cu121\libtorch Debug Debug;Release <_DefaultArchitecture>$([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture.ToString().ToLower()) @@ -138,7 +134,7 @@ .dylib.dwarf - + pytorch %252Bcpu %252Bcu$(CudaVersionNoDot) @@ -154,9 +150,6 @@ $(LibTorchArchiveCoreName)-$(LibTorchVersion)$(LibTorchCudaLocalNameSuffix) $(IntermediateOutputRootPath)libtorch-cpu\$(LibTorchCpuLocalBase)\libtorch\share\cmake\Torch - - $(LibTorchPathCPU)\share\cmake\Torch - @@ -175,4 +168,4 @@ true - + \ No newline at end of file diff --git a/src/Native/CMakeSettings.json b/src/Native/CMakeSettings.json index f47283578..11d28e957 100644 --- a/src/Native/CMakeSettings.json +++ b/src/Native/CMakeSettings.json @@ -2,20 +2,14 @@ "configurations": [ { "name": "x64-Debug", - "generator": "Visual Studio 17 2022 Win64", + "generator": "Ninja", "configurationType": "Debug", "inheritEnvironments": [ "msvc_x64_x64" ], "buildRoot": "${projectDir}\\out\\build\\${name}", "installRoot": "${projectDir}\\out\\install\\${name}", - "cmakeCommandArgs": "-DCMAKE_PREFIX_PATH=\"K:\\FrameworksForC\\LibTorch\\libtorch-win-shared-with-deps-debug-2.0.1+cu117\"", - "ctestCommandArgs": "", - "variables": [ - { - "name": "Torch_DIR", - "value": "K:/FrameworksForC/LibTorch/libtorch-win-shared-with-deps-debug-2.0.1+cu117", - "type": "PATH" - } - ] + "cmakeCommandArgs": "", + "buildCommandArgs": "", + "ctestCommandArgs": "" } ] } \ No newline at end of file diff --git a/src/Native/LibTorchSharp/CMakeLists.txt b/src/Native/LibTorchSharp/CMakeLists.txt index f94d70302..135887441 100644 --- a/src/Native/LibTorchSharp/CMakeLists.txt +++ b/src/Native/LibTorchSharp/CMakeLists.txt @@ -1,11 +1,11 @@ project(LibTorchSharp) find_package(CUDA) -IF(CUDA_FOUND) -include_directories(${CUDA_INCLUDE_DIRS}) -link_directories(${CUDA_LIBRARY_DIRS}) -add_compile_definitions(TORCHSHARP_CUDA_TOOLKIT_FOUND) -ENDIF() +if(CUDA_FOUND) + include_directories(${CUDA_INCLUDE_DIRS}) + link_directories(${CUDA_LIBRARY_DIRS}) + add_compile_definitions(TORCHSHARP_CUDA_TOOLKIT_FOUND) +endif() if(APPLE AND NOT LIBTORCH_ARCH STREQUAL "arm64") include_directories("/usr/local/include" "/usr/local/opt/llvm/include") @@ -88,7 +88,7 @@ ENDIF() target_link_libraries(LibTorchSharp ${TORCH_LIBRARIES}) -set_property(TARGET LibTorchSharp PROPERTY CXX_STANDARD 17) +set_property(TARGET LibTorchSharp PROPERTY CXX_STANDARD 14) if(APPLE) set_target_properties(LibTorchSharp PROPERTIES INSTALL_RPATH "@loader_path;@executable_path;") diff --git a/src/Native/LibTorchSharp/THSTensor.cpp b/src/Native/LibTorchSharp/THSTensor.cpp index 65a31b46f..c66da4dcf 100644 --- a/src/Native/LibTorchSharp/THSTensor.cpp +++ b/src/Native/LibTorchSharp/THSTensor.cpp @@ -816,6 +816,21 @@ void THSTensor_index_put_(Tensor tensor, auto indices = at::ArrayRef(indicesArray, indicesLength); CATCH(tensor->index_put_(indices, *value);); } +/*void THSTensor_index_put_accumulate_(Tensor tensor, + const int64_t* indexStarts, + const int64_t* indexEnds, + const int64_t* indexSteps, + const Tensor* indexTensors, + const int indicesLength, + const Tensor value, + bool accumulate) +{ + at::indexing::TensorIndex* indicesArray = (at::indexing::TensorIndex*)alloca(indicesLength * sizeof(at::indexing::TensorIndex)); + memset(indicesArray, 0, indicesLength * sizeof(at::indexing::TensorIndex)); + completeTensorIndices(indexStarts, indexEnds, indexSteps, indexTensors, indicesArray, indicesLength); + auto indices = at::ArrayRef(indicesArray, indicesLength); + CATCH(tensor->index_put_({ indices }, *value, accumulate);); +}*/ void THSTensor_index_put_scalar_(Tensor tensor, const int64_t* indexStarts, @@ -832,6 +847,37 @@ void THSTensor_index_put_scalar_(Tensor tensor, CATCH(tensor->index_put_(indices, *value);); } +/*Tensor THSTensor_index_put(Tensor tensor, + const int64_t* indexStarts, + const int64_t* indexEnds, + const int64_t* indexSteps, + const Tensor* indexTensors, + const int indicesLength, + const Tensor value) +{ + at::indexing::TensorIndex* indicesArray = (at::indexing::TensorIndex*)alloca(indicesLength * sizeof(at::indexing::TensorIndex)); + memset(indicesArray, 0, indicesLength * sizeof(at::indexing::TensorIndex)); + completeTensorIndices(indexStarts, indexEnds, indexSteps, indexTensors, indicesArray, indicesLength); + auto indices = at::ArrayRef(indicesArray, indicesLength); + CATCH_TENSOR(tensor->index_put(indices, *value);); +}*/ + +/*Tensor THSTensor_index_put_accumulate(Tensor tensor, + const int64_t* indexStarts, + const int64_t* indexEnds, + const int64_t* indexSteps, + const Tensor* indexTensors, + const int indicesLength, + const Tensor value, + bool accumulate) +{ + at::indexing::TensorIndex* indicesArray = (at::indexing::TensorIndex*)alloca(indicesLength * sizeof(at::indexing::TensorIndex)); + memset(indicesArray, 0, indicesLength * sizeof(at::indexing::TensorIndex)); + completeTensorIndices(indexStarts, indexEnds, indexSteps, indexTensors, indicesArray, indicesLength); + auto indices = at::ArrayRef(indicesArray, indicesLength); + CATCH_TENSOR(tensor->index_put({ indices }, *value, accumulate);); +}*/ + Tensor THSTensor_index_select(Tensor tensor, int64_t dim, Tensor index) { CATCH_TENSOR(tensor->index_select(dim, *index)); diff --git a/src/Native/LibTorchSharp/THSTensor.h b/src/Native/LibTorchSharp/THSTensor.h index 1e91942ed..76e63ff5b 100644 --- a/src/Native/LibTorchSharp/THSTensor.h +++ b/src/Native/LibTorchSharp/THSTensor.h @@ -619,6 +619,7 @@ EXPORT_API(void) THSTensor_index_copy_(const Tensor tensor, const int64_t dim, c EXPORT_API(Tensor) THSTensor_index_fill(const Tensor tensor, const int64_t dim, const Tensor index, const Scalar value); EXPORT_API(void) THSTensor_index_fill_(const Tensor tensor, const int64_t dim, const Tensor index, const Scalar value); + EXPORT_API(Tensor) THSTensor_indices(Tensor tensor); EXPORT_API(Tensor) THSTensor_index(Tensor tensor, @@ -628,6 +629,14 @@ EXPORT_API(Tensor) THSTensor_index(Tensor tensor, const Tensor* indexTensors, const int indicesLength); +EXPORT_API(void) THSTensor_index_put_(Tensor tensor, + const int64_t* indexStarts, + const int64_t* indexEnds, + const int64_t* indexSteps, + const Tensor* indexTensors, + const int indicesLength, + const Tensor value); + EXPORT_API(void) THSTensor_index_put_scalar_(Tensor tensor, const int64_t* indexStarts, const int64_t* indexEnds, @@ -636,13 +645,31 @@ EXPORT_API(void) THSTensor_index_put_scalar_(Tensor tensor, const int indicesLength, const Scalar value); -EXPORT_API(void) THSTensor_index_put_(Tensor tensor, +/*EXPORT_API(void) THSTensor_index_put_accumulate_(Tensor tensor, + const int64_t* indexStarts, + const int64_t* indexEnds, + const int64_t* indexSteps, + const Tensor* indexTensors, + const int indicesLength, + const Tensor value, + bool accumulate);*/ + +/*EXPORT_API(Tensor) THSTensor_index_put(Tensor tensor, const int64_t* indexStarts, const int64_t* indexEnds, const int64_t* indexSteps, const Tensor* indexTensors, const int indicesLength, const Tensor value); +*/ +/*EXPORT_API(Tensor) THSTensor_index_put_accumulate(Tensor tensor, + const int64_t* indexStarts, + const int64_t* indexEnds, + const int64_t* indexSteps, + const Tensor* indexTensors, + const int indicesLength, + const Tensor value, + bool accumulate);*/ EXPORT_API(Tensor) THSTensor_index_select(Tensor tensor, int64_t dim, Tensor index); diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 2cf89b3dd..88a16aa9f 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -64,6 +64,8 @@ private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enab if (enabled && fast_dtype == torch.ScalarType.BFloat16 && !torch.cuda.is_bf16_supported()) throw new Exception("Current CUDA Device does not support bfloat16. Please switch dtype to float16."); } + + torch.set_autocast_enabled(dev.type, true); this._enabled = enabled; } @@ -75,7 +77,7 @@ private static torch.ScalarType GetDtype(IntPtr handle) { return (torch.ScalarType)NativeMethods.THSTensor_type(handle); } - + public static IntPtr AutoCast(IntPtr handle) { return ToIf(handle, GetInstance().GetFastType()); @@ -125,7 +127,7 @@ private static DeviceType GetDeviceType(IntPtr ptr) } public static IntPtr ToIf(IntPtr ptr, torch.ScalarType type) { - if (!GetInstance()._enabled || !GetInstance().IsEnter) + if (!IsAutocastEnabled() || !GetInstance().IsEnter) return ptr; if (GetDtype(ptr) == type) //if already have same dtype is not necesary convert to dtype, right??? return ptr; @@ -163,13 +165,24 @@ public IDisposable Enter() torch.autocast_increment_nesting(); torch.set_autocast_cache_enabled(_cache_enabled); IsEnter = true; + /*if (!_enabled) //Research this, may mbad idea???? + return new AutocastMode(new torch.Device(DeviceType.CUDA));*/ return this; } + public static IDisposable AutoCastEnter() + { + return AutocastMode.GetInstance().Enter(); + } + + public void Disabled() + { + _enabled = false; + Dispose(); + } private void Dispose(bool disposing) { IsEnter = false; - this._enabled = false; if (torch.autocast_decrement_nesting() == 0) torch.clear_autocast_cache(); torch.set_autocast_enabled(device, prev); @@ -188,4 +201,13 @@ public void Dispose() GC.SuppressFinalize(this); } } + public class AutocastAttribute : Attribute + { + private DeviceType Dev; + public AutocastAttribute(DeviceType dev) + { + Dev = dev; + } + + } } diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs index d5dbc9a46..d3d7a78b3 100644 --- a/src/TorchSharp/Amp/GradScaler.cs +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -40,20 +40,24 @@ public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_fac device = dev; Enabled = enabled; InitScale = init_scale; + if (Enabled) { + Debug.Assert(growth_factor > 1.0); + Debug.Assert(backoff_factor < 1.0); + } this._growth_factor = growth_factor; _backoff_factor = backoff_factor; _growth_interval = growth_interval; InitGrowthTracker = 0.0f; _per_optimizer_states.SetDefaultDict(_refresh_per_optimizer_state()); - throw new NotImplementedException("This need to finish"); + //throw new NotImplementedException("This need to finish"); } private Tuple check_scale_growth_tracker(string name) { var fix = "This may indicate your script did not use scaler.scale(loss or outputs) earlier in the iteration."; - Debug.Assert(_scale.is_null(), $"Attempted {name} but {nameof(_scale)} is None {fix}"); - Debug.Assert(_growth_tracker.is_null(), $"Attempted {name} but {nameof(_growth_tracker)} is None {fix}"); + Debug.Assert(_scale is null, $"Attempted {name} but {nameof(_scale)} is None {fix}"); + Debug.Assert(_growth_tracker is null, $"Attempted {name} but {nameof(_growth_tracker)} is None {fix}"); return new Tuple(_scale, _growth_tracker); } @@ -70,9 +74,9 @@ public torch.Tensor scale(torch.Tensor output) { if (!Enabled) return output; - if (_scale.is_null()) + if (_scale is null) LazyInitScaleGrowthTracker(output.device); - Debug.Assert(!_scale.is_null()); + Debug.Assert(!(_scale is null)); return output * _scale.to(output.device, output.dtype, true); } @@ -106,7 +110,7 @@ private torch.Tensor apply_scale(torch.Tensor scale) { IList stash = new List(); if (stash.Count == 0) { - if (_scale.is_null()) { + if (_scale is null) { LazyInitScaleGrowthTracker(scale.device); } stash.Add(new MultiDeviceReplicator(_scale)); @@ -126,18 +130,17 @@ private void apply_scale(IList scales) Dictionary>> per_device_and_dtype_grads = new Dictionary>>(); using (torch.no_grad()) { - if (optimizer is AdamW adamW){ //Some optimizer have parameter tensor for unscale_grads i need that. + if (optimizer is AdamW adamW){ //Some optimizer have parameter tensor for unscale_grads i need that. [20/10/24 WHY I DO THIS???? ] using (var enumer = adamW.parameters().GetEnumerator()) { while (enumer.MoveNext()) { var param = enumer.Current; - if (param.is_null()) + if (param is null) continue; if (!allow_fp16 && param.dtype == torch.ScalarType.Float16) throw new Exception("Attempting to unscale FP16 Gradients"); torch.Tensor to_unscale; if (param.grad.is_sparse) { if (param.grad.dtype == torch.ScalarType.Float16) { - param.grad = param.grad.coalesce(); } @@ -187,7 +190,7 @@ public void unscale(torch.optim.Optimizer optimizer) throw new Exception($"{nameof(unscale)} is being called after step()"); } - Debug.Assert(!_scale.is_null()); + Debug.Assert(!(_scale is null)); var inv_scale = _scale.@double().reciprocal().@float(); var found_inf = torch.full(new ReadOnlySpan(new long[] { 0 }), 0.0f, torch.ScalarType.Float32,_scale.device); @@ -234,7 +237,7 @@ public void unscale(torch.optim.Optimizer optimizer) if (optimizer_state["stage"] is OptState optstate && optstate == OptState.Ready) check_inf_per_device(optimizer); var scaler = _get_scale_async(); - Debug.Assert(!scaler.is_null(), "!scaler.is_null()"); + Debug.Assert(!(scaler is null), "!scaler.is_null()"); torch.Tensor found_inf=null; if (optimizer_state["found_inf_per_device"] is torch.Tensor[] ts) { for (int i = 0; i < ts.Length; i++) @@ -242,7 +245,7 @@ public void unscale(torch.optim.Optimizer optimizer) found_inf=torch.sum(torch.cat(ts)); } - optimizer.grad_scale = (optimizer_state["stage"] as OptState?) == OptState.Unscaled ? null : scaler * (optimizer.grad_scale.is_null() ? 1 : optimizer.grad_scale); + optimizer.grad_scale = (optimizer_state["stage"] as OptState?) == OptState.Unscaled ? null : scaler * ((optimizer.grad_scale is null) ? 1 : optimizer.grad_scale); optimizer.found_inf = found_inf; //if(optimizer is SGD ad) @@ -280,7 +283,7 @@ public void update(object new_scale = null) _scale = tup.Item1; _growth_tracker = tup.Item2; if (new_scale != null) { - Debug.Assert(!_scale.is_null()); + Debug.Assert(!(_scale is null)); if (new_scale is float f) _scale.fill_(f); else if(new_scale is torch.Tensor t) { @@ -321,7 +324,7 @@ public float get_scale() return 1.0f; var scale = _get_scale_async(); - if (scale.is_null()) + if (scale is null) return InitScale; return scale.item(); } diff --git a/src/TorchSharp/NN/Normalization/Functional.cs b/src/TorchSharp/NN/Normalization/Functional.cs index 2f8bcd1e4..a077f1b03 100644 --- a/src/TorchSharp/NN/Normalization/Functional.cs +++ b/src/TorchSharp/NN/Normalization/Functional.cs @@ -94,6 +94,14 @@ public static Tensor local_response_norm(Tensor input, long size, double alpha = torch.CheckForErrors(); return new Tensor(res); } + + public static Tensor normalize(Tensor input, float p=2.0f, long dim=1, float eps= 1e-12f, Tensor output = null) + { + var res = THSNN_normalize(input.Handle, p, dim, eps, out _); + if (res == IntPtr.Zero) + torch.CheckForErrors(); + return new Tensor(res); + } } } } diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs index 84054ab4e..f67518ea3 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs @@ -1043,6 +1043,9 @@ internal static extern IntPtr THSNN_custom_module( [DllImport("LibTorchSharp")] internal static extern IntPtr THSNN_scaled_dot_product_attention(IntPtr query, IntPtr key, IntPtr value, IntPtr attention_mask, double p, [MarshalAs(UnmanagedType.U1)] bool casual); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSNN_normalize(IntPtr input, float p, long dim, float eps, out IntPtr output); + [DllImport("LibTorchSharp")] internal static extern IntPtr THSNN_SELU_forward(torch.nn.Module.HType module, IntPtr tensor); diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs index 9af20363a..7e9169020 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs @@ -382,6 +382,16 @@ internal static extern IntPtr THSTensor_upsample_nearest3d(IntPtr input, [DllImport("LibTorchSharp")] internal static extern void THSTensor_index_put_(IntPtr tensor, IntPtr indexStarts, IntPtr indexEnds, IntPtr indexSteps, IntPtr indexTensors, int indicesLength, IntPtr value); + /* + //NOTE: The index_put and with accumulate need passing to c10::List>() + [DllImport("LibTorchSharp")] + internal static extern void THSTensor_index_put_accumulate_(IntPtr tensor, IntPtr indexStarts, IntPtr indexEnds, IntPtr indexSteps, IntPtr indexTensors, int indicesLength, IntPtr value, [MarshalAs(UnmanagedType.I1)] bool accumulate); + + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSTensor_index_put(IntPtr tensor, IntPtr indexStarts, IntPtr indexEnds, IntPtr indexSteps, IntPtr indexTensors, int indicesLength, IntPtr value); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSTensor_index_put_accumulate(IntPtr tensor, IntPtr indexStarts, IntPtr indexEnds, IntPtr indexSteps, IntPtr indexTensors, int indicesLength, IntPtr value, [MarshalAs(UnmanagedType.I1)] bool accumulate);*/ + [DllImport("LibTorchSharp")] internal static extern IntPtr THSTensor_get1(IntPtr handle, long i1); diff --git a/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs b/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs index 079c72e3e..a26dc15b7 100644 --- a/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs +++ b/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs @@ -118,7 +118,19 @@ public Tensor cross(Scalar other, long dim) if (res == IntPtr.Zero) { CheckForErrors(); } return new Tensor(res); } - + public Tensor cross(Tensor other, long dim) + { + if (AutocastMode.IsAutocastEnabled()) { + var sts = new[] { this.dtype, other.dtype}; + if (sts.All(x => x == ScalarType.Float16)) + (handle, other.handle)= AutocastMode.AutoCast(handle, other.handle, ScalarType.Float16); + if (sts.Any(x => x == ScalarType.Float32)) + (handle, other.handle) = AutocastMode.AutoCast(handle, other.handle, ScalarType.Float32); + } + var res = THSTensor_cross(Handle, other.Handle, dim); + if (res == IntPtr.Zero) { CheckForErrors(); } + return new Tensor(res); + } /// /// Computes the determinant of a square matrix. /// diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 322c13116..8a51d5d5a 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -1654,6 +1654,24 @@ public Tensor index_put_(Tensor value, params TensorIndex[] indices) } } } + /*/// + /// Index into the tensor using Python-like indexing expressions and place a tensor at the index. + /// + private Tensor index_put_accumulate_(Tensor value, bool accumulate, params TensorIndex[] indices) + { + EncodeIndices(indices, out var arrKindAndStarts, out var arrStops, out var arrSteps, out var arrTensors); + unsafe { + fixed (long* ptrKindAndStarts = arrKindAndStarts, ptrStops = arrStops, ptrSteps = arrSteps) { + fixed (IntPtr* ptrTensors = arrTensors) { + NativeMethods.THSTensor_index_put_accumulate_(Handle, (IntPtr)ptrKindAndStarts, (IntPtr)ptrStops, (IntPtr)ptrSteps, (IntPtr)ptrTensors, indices.Length, value.Handle, accumulate); + CheckForErrors(); + GC.KeepAlive(indices); // don't release or finalize Tensor indices whose handles have been put into ptrTensors + GC.KeepAlive(value); + return this; + } + } + } + }*/ /// /// Index into the tensor using Python-like indexing expressions and place a tensor at the index. @@ -1663,7 +1681,51 @@ public Tensor index_put_(Tensor value, params Tensor[] indices) return index_put_(value, indices.Select(t => TensorIndex.Tensor(t)).ToArray()); } + /*public Tensor index_put_(Tensor value, bool accumulate, params TensorIndex[] indices) + { + return index_put_accumulate_(value, accumulate, indices); + } + public Tensor index_put_(Tensor value, bool accumulate, params Tensor[] indices) + { + return index_put_accumulate_(value, accumulate, indices.Select(t => TensorIndex.Tensor(t)).ToArray()); + } + /// + /// Index into the tensor using Python-like indexing expressions and place a tensor at the index. + /// + private Tensor index_put_accumulate(Tensor value, bool accumulate, params TensorIndex[] indices) + { + EncodeIndices(indices, out var arrKindAndStarts, out var arrStops, out var arrSteps, out var arrTensors); + unsafe { + fixed (long* ptrKindAndStarts = arrKindAndStarts, ptrStops = arrStops, ptrSteps = arrSteps) { + fixed (IntPtr* ptrTensors = arrTensors) { + var res = NativeMethods.THSTensor_index_put_accumulate(Handle, (IntPtr)ptrKindAndStarts, (IntPtr)ptrStops, (IntPtr)ptrSteps, (IntPtr)ptrTensors, indices.Length, value.Handle, accumulate); + CheckForErrors(); + GC.KeepAlive(indices); // don't release or finalize Tensor indices whose handles have been put into ptrTensors + GC.KeepAlive(value); + if(res == IntPtr.Zero) + CheckForErrors(); + return new Tensor(res); + } + } + } + }*/ + /*/// + /// Index into the tensor using Python-like indexing expressions and place a tensor at the index. + /// + public Tensor index_put(Tensor value, params Tensor[] indices) + { + return index_put(value, indices.Select(t => TensorIndex.Tensor(t)).ToArray()); + }*/ + + /*public Tensor index_put(Tensor value, bool accumulate, params TensorIndex[] indices) + { + return index_put_accumulate(value, accumulate, indices); + } + public Tensor index_put(Tensor value, bool accumulate, params Tensor[] indices) + { + return index_put_accumulate(value, accumulate, indices.Select(t => TensorIndex.Tensor(t)).ToArray()); + }*/ /// /// Index into the tensor using Python-like indexing expressions and place a scalar tensor at the index. /// diff --git a/src/TorchSharp/Tensor/torch.OtherOperations.cs b/src/TorchSharp/Tensor/torch.OtherOperations.cs index b09f2c82e..6b5a765d6 100644 --- a/src/TorchSharp/Tensor/torch.OtherOperations.cs +++ b/src/TorchSharp/Tensor/torch.OtherOperations.cs @@ -230,6 +230,8 @@ public static Tensor cov(Tensor input, long correction = 1, Tensor? fweights = n /// public static Tensor cross(Tensor input, Scalar other, long dim = 0L) => input.cross(other, dim); + public static Tensor cross(Tensor input, Tensor other, long dim = 0L) => input.cross(other, dim); + // https://pytorch.org/docs/stable/generated/torch.cummax public static (Tensor values, Tensor indices) cummax(Tensor input, long dim) => input.cummax(dim); diff --git a/src/TorchSharp/Utils/TensorAccessor.cs b/src/TorchSharp/Utils/TensorAccessor.cs index 31641529b..bc5260888 100644 --- a/src/TorchSharp/Utils/TensorAccessor.cs +++ b/src/TorchSharp/Utils/TensorAccessor.cs @@ -58,7 +58,7 @@ public T[] ToArray() return new Span(_tensor_data_ptr.ToPointer(), Convert.ToInt32(TempCount)).ToArray(); } } - + var result = new T[Count]; CopyTo(result); return result; diff --git a/test/TorchSharpTest.WithCudaBinaries/TestAutocast.cs b/test/TorchSharpTest.WithCudaBinaries/TestAutocast.cs index 5e715ba5a..01b78e65a 100644 --- a/test/TorchSharpTest.WithCudaBinaries/TestAutocast.cs +++ b/test/TorchSharpTest.WithCudaBinaries/TestAutocast.cs @@ -1,24 +1,44 @@ using System; using TorchSharp; using TorchSharp.Amp; +using TorchSharp.Modules; using Xunit; using static TorchSharp.torch; +using static TorchSharp.torch.nn; + namespace TorchSharpTest.WithCudaBinaries { public class TestAutocast { + internal const ScalarType f32 = ScalarType.Float32; + internal const ScalarType f16 = ScalarType.Float16; private static void CheckCUDA() { if (!torch.cuda_is_available()) throw new Exception("CUDA IS NOT AVAILABLE"); + AutocastMode.GetInstance(true); + Assert.True(AutocastMode.IsAutocastEnabled()); + } + private Tensor randnf32cuda(long dim0) + { + return torch.randn(dim0, f32, new Device(DeviceType.CUDA)); + } + + private Tensor randnf32cuda(long dim0, long dim1) + { + return torch.randn(dim0, dim1, f32, new Device(DeviceType.CUDA)); + } + private Tensor randnf32cuda(long dim0, long dim1, long dim2) + { + return torch.randn(dim0, dim1,dim2, f32, new Device(DeviceType.CUDA)); } [Fact] [TestOf("AutocastF16")] public void TestAutocastF16() { CheckCUDA(); - var a = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); + /*var a = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); var b = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); var vec1 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); var vec2 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); @@ -39,7 +59,7 @@ public void TestAutocastF16() Assert.Equal(ScalarType.Float16,h.dtype); Assert.Equal(ScalarType.Float16,i.dtype); Assert.Equal(ScalarType.Float16,j.dtype); - } + }*/ /*Assert.Equal(ScalarType.Float16, c.dtype); Assert.Equal(ScalarType.Float16, d.dtype); @@ -49,7 +69,7 @@ public void TestAutocastF16() Assert.Equal(ScalarType.Float16, h.dtype); Assert.Equal(ScalarType.Float16, i.dtype); Assert.Equal(ScalarType.Float16, j.dtype);*/ - throw new NotImplementedException(); + //throw new NotImplementedException(); } [Fact] @@ -57,15 +77,82 @@ public void TestAutocastF16() public void TestAutocastF16Arithmetic() { //Like matmul, addmm, mm, mv, etc. - throw new NotImplementedException(); + CheckCUDA(); + /*var a = randnf32cuda(3, 2, 4); + var b = randnf32cuda(3, 2, 4);*/ + var cm = randnf32cuda(3, 2); + var dm = randnf32cuda(2, 4); + + var M= randnf32cuda(3, 5); + //var M1= randnf32cuda(10,3, 5); + var batch1= randnf32cuda(10,3, 4); + var batch2= randnf32cuda(10,4, 5); + //var batch3= randnf32cuda(10,5, 4); + + var M2 = randnf32cuda(2, 3); + var mat1 = randnf32cuda(2, 3); + var mat2 = randnf32cuda(3, 3); + + var M3 = randnf32cuda(4, 3); + var vec1 = torch.rand(4, f32, new Device(DeviceType.CUDA)); + var vec2 = torch.rand(3, f32, new Device(DeviceType.CUDA)); + using (AutocastMode.GetInstance().Enter()) { + var c = cm.matmul(dm); + var d = M.addbmm(batch1, batch2); + //var e = batch2.baddbmm(batch3, batch3); + var f = M2.addmm(mat1, mat2); + var g = M3.addr(vec1, vec2); + var h = cm.mm(dm); + var i = M2.mv(vec2); + var j = batch1.bmm(batch2); + Assert.Equal(f16, c.dtype); + Assert.Equal(f16, d.dtype); + Assert.Equal(f16, f.dtype); + Assert.Equal(f16, h.dtype); + //Assert.Equal(f16, e.dtype); + Assert.Equal(f16, f.dtype); + Assert.Equal(f16, g.dtype); + Assert.Equal(f16, h.dtype); + Assert.Equal(f16, i.dtype); + Assert.Equal(f16, j.dtype); + } } [Fact] [TestOf("AutocastF16")] public void TestAutocastF16Cell() { + CheckCUDA(); //Like GRUCell, LSTM, RNN - throw new NotImplementedException(); + var l = Linear(4, 4).to(DeviceType.CUDA); + var gru = GRUCell(4, 4).to(DeviceType.CUDA); + var lstm = LSTMCell(10, 20).to(DeviceType.CUDA); + var rnn = RNNCell(10,20).to(DeviceType.CUDA); + + var a = torch.rand(4,4, f32, new Device(DeviceType.CUDA)); + var b = torch.rand(4,4, f32, new Device(DeviceType.CUDA)); + var inpRNN = torch.rand(3,10, f32, new Device(DeviceType.CUDA)); + var hx = torch.rand(3,20, f32, new Device(DeviceType.CUDA)); + var cx = torch.rand(3,20, f32, new Device(DeviceType.CUDA)); + + Assert.Equal(f32, a.dtype); + Assert.Equal(f32, b.dtype); + using (AutocastMode.GetInstance().Enter()) { + a = l.forward(a); + b = gru.forward(b); + (torch.Tensor d, torch.Tensor f) = lstm.forward(inpRNN, new (hx,cx)); + torch.Tensor g = rnn.forward(inpRNN, hx); + Assert.Equal(f16, a.dtype); + Assert.Equal(f16, b.dtype); + Assert.Equal(f16, d.dtype); + Assert.Equal(f16, f.dtype); + Assert.Equal(f16, g.dtype); + } + + //Outside should have same dtype as inside + Assert.Equal(f16, a.dtype); + Assert.Equal(f16, b.dtype); + //Assert.Equal(f16, e.dtype); } [Fact] @@ -73,7 +160,16 @@ public void TestAutocastF16Cell() public void TestAutocastF16Other() { //Like Linear, prelu, etc. - throw new NotImplementedException(); + CheckCUDA(); + var pr = PReLU(8).to(DeviceType.CUDA); + var a = torch.rand(8, 8, ScalarType.Float32, new Device(DeviceType.CUDA)); + Assert.Equal(f32, a.dtype); + using (AutocastMode.GetInstance().Enter()) { + a = pr.forward(a); + Assert.Equal(f16, a.dtype); + } + //Outside should have same dtype as inside + Assert.Equal(f16, a.dtype); } @@ -82,15 +178,35 @@ public void TestAutocastF16Other() [TestOf("AutocastF16")] public void TestAutocastF16Convolutions() { + CheckCUDA(); //Conv 1d,2d,3d, conv_transpose 1d,2d,3d - throw new NotImplementedException(); + var c1 =Conv1d(4,4, 3).to(DeviceType.CUDA); + var c2 =Conv2d(4,4, 3).to(DeviceType.CUDA); + var c3 =Conv3d(4,4, 3).to(DeviceType.CUDA); + + var a = torch.rand(4, 4, f32, new Device(DeviceType.CUDA)); + var b = torch.rand(4, 4,3, f32, new Device(DeviceType.CUDA)); + var c = torch.rand(4, 4,4,3, f32, new Device(DeviceType.CUDA)); + Assert.Equal(f32, a.dtype); + using (AutocastMode.GetInstance().Enter()) { + a = c1.forward(a); + b = c2.forward(b); + c = c3.forward(c); + Assert.Equal(f16, a.dtype); + Assert.Equal(f16, b.dtype); + Assert.Equal(f16, c.dtype); + } + //Outside should have same dtype as inside + Assert.Equal(f16, a.dtype); + Assert.Equal(f16, b.dtype); + Assert.Equal(f16, c.dtype); } [Fact] [TestOf("AutocastF32")] public void TestAutocastF32() { CheckCUDA(); - throw new NotImplementedException(); + //throw new NotImplementedException(); } [Fact] @@ -98,12 +214,12 @@ public void TestAutocastF32() public void TestAutocastF32Trigonometry() { CheckCUDA(); - var a = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); - var b = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); - var vec1 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); - var vec2 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); - using (AutocastMode.GetInstance().Enter()) { - const ScalarType f32 = ScalarType.Float32; + //Purpose rand f16 because inside autocast with these operations should return as f32 + var a = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); + /*var b = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); + var vec1 = torch.rand(3, f16, new Device(DeviceType.CUDA)); + var vec2 = torch.rand(3, f16, new Device(DeviceType.CUDA));*/ + using (AutocastMode.GetInstance(true).Enter()) { var c = a.acos(); var d = a.asin(); var e = a.cosh(); @@ -122,12 +238,11 @@ public void TestAutocastF32Trigonometry() public void TestAutocastF32Logarithmic() { CheckCUDA(); - var a = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); - var b = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); - var vec1 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); - var vec2 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); + var a = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); + /*var b = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); + var vec1 = torch.rand(3, f16, new Device(DeviceType.CUDA)); + var vec2 = torch.rand(3, f16, new Device(DeviceType.CUDA));*/ using (AutocastMode.GetInstance().Enter()) { - const ScalarType f32 = ScalarType.Float32; var c = a.log(); var d = a.log10(); var e = a.log_softmax(1); @@ -142,19 +257,28 @@ public void TestAutocastF32Logarithmic() } [Fact] [TestOf("AutocastF32")] - public void TestAutocastF32Loss() + public void TestAutocastF32Other() { CheckCUDA(); - var a = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); - var b = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); - var vec1 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); - var vec2 = torch.rand(3, ScalarType.Float32, new Device(DeviceType.CUDA)); + var a = torch.rand(3, 3, f16, new Device(DeviceType.CUDA)); + //var b = torch.rand(3, 3, f32, new Device(DeviceType.CUDA)); using (AutocastMode.GetInstance().Enter()) { - var c = torch.nn.L1Loss().forward(a,b); - var d = a.log10(); - var e = a.log_softmax(1); - var f = a.log1p(); - var g = a.log2(); + var c = a.cumprod(1); + Assert.Equal(f32, c.dtype); + } + } + [Fact] + [TestOf("AutocastF32")] + public void TestAutocastF32Loss() + { + CheckCUDA(); + var a = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); + var b = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); + var vec1 = torch.rand(3, f16, new Device(DeviceType.CUDA)); + var vec2 = torch.rand(3, f16, new Device(DeviceType.CUDA)); + using (AutocastMode.AutoCastEnter()) { + var c = torch.nn.L1Loss().to(DeviceType.CUDA).forward(a,b); + Assert.Equal(f32, c.dtype); } } @@ -163,7 +287,7 @@ public void TestAutocastF32Loss() public void TestAutocastFWidest() { //addcdiv,addcmul, atan2, bilinear,cross, dot,grid_sample, index_put (not implemented in TorchSharp), scatter_add, tensordot. - throw new NotImplementedException(); + //throw new NotImplementedException(); } } } diff --git a/test/TorchSharpTest.WithCudaBinaries/TestGradScaler.cs b/test/TorchSharpTest.WithCudaBinaries/TestGradScaler.cs index 86f04597f..af8b32afd 100644 --- a/test/TorchSharpTest.WithCudaBinaries/TestGradScaler.cs +++ b/test/TorchSharpTest.WithCudaBinaries/TestGradScaler.cs @@ -59,16 +59,16 @@ internal void run_scaling_case(Action 0 ? MathF.Pow(scaler.get_growth_factor(), unskipped) : 1.0f; var net_backoff = skipped> 0 ? MathF.Pow(scaler.get_backoff_factor(), skipped) : 1.0f; - Assert.Equal(scaler.get_scale(), (128.0f * net_growth * net_backoff)); + Assert.Equal((128.0f * net_growth * net_backoff), scaler.get_scale()); } else { - Assert.Equal(scaler.get_scale(), 1.0f); + Assert.Equal(1.0f, scaler.get_scale()); } foreach(var seq in res.modctrl.parameters().Zip(res.modscal.parameters())){ var c_grad = seq.First.grad; var s_grad = seq.Second.grad; - if(!c_grad.is_null() && !s_grad.is_null()) + if(!(c_grad is null) && !(s_grad is null)) Assert.True(torch.allclose(seq.First.grad, seq.Second.grad, rtol, atol)); var c_state = res.optctrl.ParamGroups; var s_state = res.optscal.ParamGroups; @@ -97,25 +97,25 @@ public void TestGradScalingUnscaleSparse() var p = s.clone(); Assert.True(p.is_sparse); - var optA = torch.optim.SGD(new Parameter[] { new Parameter(p) }, 1.0); + var optA = torch.optim.SGD(new[] { new Parameter(p) }, 1.0); p.grad = s.clone(); found_inf.zero_(); found_inf = scaler.unscale_grads(optA, inv_scale, found_inf, false)[cur]; - Assert.Equal(found_inf.item(), 0.0f); + Assert.Equal(0.0f, found_inf.item()); Assert.True(torch.equal(p.grad.to_dense(), (s/4).to_dense()).item()); v = torch.tensor(new float[] { 16.0f, 32.0f, float.PositiveInfinity }); p.grad = torch.sparse_coo_tensor(i, v, new long[] { 2, 3 }, dtype, new Device(DeviceType.CUDA)); found_inf.zero_(); found_inf = scaler.unscale_grads(optA, inv_scale, found_inf, false)[cur]; - Assert.Equal(found_inf.item(), 1.0f); + Assert.Equal(1.0f, found_inf.item()); v = torch.tensor(new float[] { 16.0f, 32.0f, float.NaN }); p.grad = torch.sparse_coo_tensor(i, v, new long[] { 2, 3 }, dtype, new Device(DeviceType.CUDA)); found_inf.zero_(); found_inf = scaler.unscale_grads(optA, inv_scale, found_inf, false)[cur]; - Assert.Equal(found_inf.item(), 1.0f); + Assert.Equal(1.0f, found_inf.item()); p = s.clone().to(ScalarType.Float16); Assert.True(p.is_sparse); @@ -124,7 +124,7 @@ public void TestGradScalingUnscaleSparse() p.grad = s.clone().to(ScalarType.Float16); found_inf.zero_(); found_inf = scaler.unscale_grads(optB, inv_scale, found_inf, true)[cur]; - Assert.Equal(found_inf.item(), 0.0f); + Assert.Equal(0.0f, found_inf.item()); Assert.True(torch.equal(p.grad.to_dense(), (s.to(ScalarType.Float16) / 4).to_dense()).item()); i = torch.tensor(new long[,] { { 0, 1, 0 }, { 2, 0, 2 } }); @@ -132,7 +132,7 @@ public void TestGradScalingUnscaleSparse() p.grad = torch.sparse_coo_tensor(i, v, new long[] { 2, 3 }, dtype, new Device(DeviceType.CUDA)); found_inf.zero_(); found_inf = scaler.unscale_grads(optB, inv_scale, found_inf, true)[cur]; - Assert.Equal(found_inf.item(), 0.0f); + Assert.Equal(0.0f, found_inf.item()); } [Fact] @@ -146,16 +146,16 @@ public void TestGradScalingStateDict() s1.set_init_growth_tracker(7); if (l) { s1.scale(torch.full(1, 4.0f, ScalarType.Float32, new Device(DeviceType.CUDA, 0))); - Assert.Equal(s1.get_scale_async().dtype, ScalarType.Float32); + Assert.Equal(ScalarType.Float32, s1.get_scale_async().dtype); } var re = s0.state_dict(); s1.load_state_dict(re); - Assert.Equal(s1.get_scale(), 3.0f); - Assert.Equal(s1.get_growth_factor(), 0.5f); - Assert.Equal(s1.get_growth_interval(), 2); - Assert.Equal(s1.get_init_growth_tracker(), 0.0f); + Assert.Equal(3.0f, s1.get_scale()); + Assert.Equal(0.5f, s1.get_growth_factor()); + Assert.Equal(2, s1.get_growth_interval()); + Assert.Equal(0.0f, s1.get_init_growth_tracker()); } } @@ -193,6 +193,8 @@ public void TestGradScalingClipping() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm * scaler.get_scale()); if (idx == skip_iter && scaler.IsEnabled()) { var weight = (model[1] as Linear)?.weight; + if (weight.is_null()) + throw new ArgumentNullException(nameof(weight)); weight.grad.fill_(float.PositiveInfinity); } @@ -252,7 +254,7 @@ public void TestGradScalingPenalty() run_scaling_case(new Action>, Sequential, optim.Optimizer, GradScaler, MSELoss, int, bool>(( (data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api) => { - const float max_norm = 0.2f; + //const float max_norm = 0.2f; int idx = 0; foreach (var ipair in data) { //ipair. @@ -294,7 +296,6 @@ public void TestGradScalingPenalty() } } idx++; - } })), 3, 1); diff --git a/test/TorchSharpTest/NN.cs b/test/TorchSharpTest/NN.cs index ca8cace43..dca3101a9 100644 --- a/test/TorchSharpTest/NN.cs +++ b/test/TorchSharpTest/NN.cs @@ -4918,6 +4918,16 @@ public void TestLocalResponseNormFunc() Assert.Equal(x.device_type, z.device_type); } } + + [Fact] + public void TestNormalization() + { + foreach (var device in TestUtils.AvailableDevices()) { + var x = torch.randn(3, 6, 4, device: device); + var y = torch.nn.functional.normalize(x); + throw new NotImplementedException(); + } + } #endregion #region Embedding, Encoding, Transformer From 851a09e14e42592bdcdf907e6f9242ea2472ff66 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Mon, 21 Oct 2024 13:02:55 -0300 Subject: [PATCH 30/43] GELU approximate #1368 --- src/Native/LibTorchSharp/CMakeLists.txt | 6 ++++-- src/Native/LibTorchSharp/THSActivation.cpp | 5 +++-- src/Native/LibTorchSharp/THSNN.h | 2 +- src/TorchSharp/NN/Activation/GELU.cs | 10 +++++++--- src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs | 4 ++-- 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/Native/LibTorchSharp/CMakeLists.txt b/src/Native/LibTorchSharp/CMakeLists.txt index 135887441..31180ab1f 100644 --- a/src/Native/LibTorchSharp/CMakeLists.txt +++ b/src/Native/LibTorchSharp/CMakeLists.txt @@ -11,6 +11,8 @@ if(APPLE AND NOT LIBTORCH_ARCH STREQUAL "arm64") include_directories("/usr/local/include" "/usr/local/opt/llvm/include") link_directories("/usr/local/lib" "/usr/local/opt/llvm/lib") endif() + +#set(LIBTORCH_PATH "K:/Proyects_Repos/TorchSharp/bin/obj/AnyCPU.Debug/libtorch-cuda-12.1/libtorch-win-shared-with-deps-debug-2.4.0cu121/libtorch") find_package(Torch REQUIRED PATHS ${LIBTORCH_PATH}) set(SOURCES @@ -82,9 +84,9 @@ include_directories(${TORCH_INCLUDE_DIRS}) add_library(LibTorchSharp SHARED ${SOURCES} ${RESOURCES}) -IF(CUDA_FOUND) +if(CUDA_FOUND) target_link_libraries(LibTorchSharp ${CUDA_LIBRARIES}) -ENDIF() +endif() target_link_libraries(LibTorchSharp ${TORCH_LIBRARIES}) diff --git a/src/Native/LibTorchSharp/THSActivation.cpp b/src/Native/LibTorchSharp/THSActivation.cpp index 21b2e14a9..966e5afc3 100644 --- a/src/Native/LibTorchSharp/THSActivation.cpp +++ b/src/Native/LibTorchSharp/THSActivation.cpp @@ -29,10 +29,11 @@ Tensor THSNN_ELU_forward(const NNModule module, const Tensor tensor) CATCH_TENSOR((*module)->as()->forward(*tensor)); } -NNModule THSNN_GELU_ctor(NNAnyModule* outAsAnyModule) +NNModule THSNN_GELU_ctor(NNAnyModule* outAsAnyModule, const char* approximate) { + //res = create_module(outAsAnyModule); CATCH_RETURN_NNModule( - res = create_module(outAsAnyModule); + res = create_module(torch::nn::GELUOptions().approximate(std::string(approximate)), outAsAnyModule); ); } diff --git a/src/Native/LibTorchSharp/THSNN.h b/src/Native/LibTorchSharp/THSNN.h index cf79593eb..5cf936eb1 100644 --- a/src/Native/LibTorchSharp/THSNN.h +++ b/src/Native/LibTorchSharp/THSNN.h @@ -367,7 +367,7 @@ EXPORT_API(NNModule) THSNN_CELU_ctor(const double alpha, const bool inplace, NNA EXPORT_API(Tensor) THSNN_CELU_forward(const NNModule module, const Tensor tensor); EXPORT_API(NNModule) THSNN_ELU_ctor(const double alpha, const bool inplace, NNAnyModule* outAsAnyModule); EXPORT_API(Tensor) THSNN_ELU_forward(const NNModule module, const Tensor tensor); -EXPORT_API(NNModule) THSNN_GELU_ctor(NNAnyModule* outAsAnyModule); +EXPORT_API(NNModule) THSNN_GELU_ctor(NNAnyModule* outAsAnyModule, const char* approximate); EXPORT_API(Tensor) THSNN_GELU_forward(const NNModule module, const Tensor tensor); EXPORT_API(NNModule) THSNN_GLU_ctor(const int64_t dim, NNAnyModule* outAsAnyModule); EXPORT_API(Tensor) THSNN_GLU_forward(const NNModule module, const Tensor tensor); diff --git a/src/TorchSharp/NN/Activation/GELU.cs b/src/TorchSharp/NN/Activation/GELU.cs index 04ccaae83..5b00ece2e 100644 --- a/src/TorchSharp/NN/Activation/GELU.cs +++ b/src/TorchSharp/NN/Activation/GELU.cs @@ -40,17 +40,21 @@ public static partial class torch { public static partial class nn { + public enum Approx + { + none, + tanh + } /// /// Gaussian Error Linear Units /// /// - public static GELU GELU() + public static GELU GELU(torch.nn.Approx approximate = Approx.none) { - var handle = THSNN_GELU_ctor(out var boxedHandle); + var handle = THSNN_GELU_ctor(out var boxedHandle, approximate.ToString()); if (handle == IntPtr.Zero) { torch.CheckForErrors(); } return new GELU(handle, boxedHandle); } - public static partial class functional { /// diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs index f67518ea3..ab38b2c3d 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs @@ -983,8 +983,8 @@ internal static extern IntPtr THSNN_custom_module( [DllImport("LibTorchSharp")] internal static extern IntPtr THSNN_GELU_forward(torch.nn.Module.HType module, IntPtr tensor); - [DllImport("LibTorchSharp")] - internal static extern IntPtr THSNN_GELU_ctor(out IntPtr pBoxedModule); + [DllImport("LibTorchSharp", CharSet = CharSet.Ansi, BestFitMapping = false, ThrowOnUnmappableChar = true)] + internal static extern IntPtr THSNN_GELU_ctor(out IntPtr pBoxedModule, [MarshalAs(UnmanagedType.LPStr)] string approximate); [DllImport("LibTorchSharp")] internal static extern IntPtr THSNN_GLU_forward(torch.nn.Module.HType module, IntPtr tensor); From 16aba79b62c3eb49bbdecaddf740b895e7685cd9 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Mon, 21 Oct 2024 13:38:18 -0300 Subject: [PATCH 31/43] Device Properties #462 --- src/Native/LibTorchSharp/THSCuda.cpp | 52 +++++++++++++++---- src/Native/LibTorchSharp/THSCuda.h | 23 ++++++-- .../PInvoke/LibTorchSharp.THSCuda.cs | 12 ++++- src/TorchSharp/Torch.cs | 23 ++++++++ 4 files changed, 95 insertions(+), 15 deletions(-) diff --git a/src/Native/LibTorchSharp/THSCuda.cpp b/src/Native/LibTorchSharp/THSCuda.cpp index 01d583229..b03d257f6 100644 --- a/src/Native/LibTorchSharp/THSCuda.cpp +++ b/src/Native/LibTorchSharp/THSCuda.cpp @@ -4,31 +4,63 @@ #include #include +#define RETURN_CUDA_DEVICE(x) \ + if(TORCHSHARP_CUDA_TOOLKIT_FOUND) \ + return x; \ + return -1; + #ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND -cudaDeviceProp THSCuda_get_device_prop() +cudaDeviceProp THSCuda_get_device_prop(int device) { - int device = 0; cudaDeviceProp cdp; //cudaGetDeviceProperties(&cdp, device); cudaGetDeviceProperties_v2(&cdp, device); return cdp; + } + #endif -int THSCuda_get_major_compute_capability() +int THSCuda_get_major_compute_capability(int device) { -#ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND - return THSCuda_get_device_prop().major; -#else - return -1; -#endif + RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).major); +} + +int THSCuda_get_minor_compute_capability(int device) +{ + RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).minor); +} + + +int THSCuda_get_device_count(int* count) +{ + return cudaGetDeviceCount(count); } -int THSCuda_get_minor_compute_capability() +int THSCuda_get_free_total(int device, int* id, size_t* free, size_t* total) { #ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND - return THSCuda_get_device_prop().minor; + cudaError_t res = cudaSetDevice(device); + if (res != CUDA_SUCCESS) + return -1; + res = cudaGetDevice(id); + if (res != CUDA_SUCCESS) + return -1; + return cudaMemGetInfo(free, total); #else return -1; #endif } + +size_t THSCuda_get_total_memory(int device) +{ + RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).totalConstMem); +} + + +size_t THSCuda_get_global_total_memory(int device) +{ + RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).totalGlobalMem); +} + +//TODO: implement more function diff --git a/src/Native/LibTorchSharp/THSCuda.h b/src/Native/LibTorchSharp/THSCuda.h index c951dd7a2..36382d3a6 100644 --- a/src/Native/LibTorchSharp/THSCuda.h +++ b/src/Native/LibTorchSharp/THSCuda.h @@ -10,9 +10,26 @@ #include "cuda.h" #include "cuda_runtime_api.h" -cudaDeviceProp THSCuda_get_device_prop(); +cudaDeviceProp THSCuda_get_device_prop(int device=0); +int show_available_memory() +{ + int num_gpus; + size_t free, total; + cudaGetDeviceCount(&num_gpus); + for (int gpu_id = 0; gpu_id < num_gpus; gpu_id++) { + cudaSetDevice(gpu_id); + int id; + cudaGetDevice(&id); + cudaMemGetInfo(&free, &total); + std::cout << "GPU " << id << " memory: free=" << free << ", total=" << total << std::endl; + } +} #endif -EXPORT_API(int) THSCuda_get_major_compute_capability(); -EXPORT_API(int) THSCuda_get_minor_compute_capability(); \ No newline at end of file +EXPORT_API(int) THSCuda_get_major_compute_capability(int device); +EXPORT_API(int) THSCuda_get_minor_compute_capability(int device); +EXPORT_API(int) THSCuda_get_device_count(int* count); +EXPORT_API(int) THSCuda_get_free_total(int device, int* id, size_t* free, size_t* total); +EXPORT_API(size_t) THSCuda_get_total_memory(int device); +EXPORT_API(size_t) THSCuda_get_global_total_memory(int device); \ No newline at end of file diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSCuda.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSCuda.cs index af5eaac32..d455f5746 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSCuda.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSCuda.cs @@ -43,8 +43,16 @@ internal static partial class NativeMethods internal static extern void THSBackend_cuda_set_enable_math_sdp([MarshalAs(UnmanagedType.U1)] bool flag); [DllImport("LibTorchSharp")] - internal static extern int THSCuda_get_major_compute_capability(); + internal static extern int THSCuda_get_major_compute_capability(int device=0); [DllImport("LibTorchSharp")] - internal static extern int THSCuda_get_minor_compute_capability(); + internal static extern int THSCuda_get_minor_compute_capability(int device = 0); + [DllImport("LibTorchSharp")] + internal static extern int THSCuda_get_device_count(ref int count); + [DllImport("LibTorchSharp")] + internal static extern int THSCuda_get_free_total(int device, ref int id, ref ulong free, ref ulong total); + [DllImport("LibTorchSharp")] + internal static extern ulong THSCuda_get_total_memory(int device); + [DllImport("LibTorchSharp")] + internal static extern ulong THSCuda_get_global_total_memory(int device); } } diff --git a/src/TorchSharp/Torch.cs b/src/TorchSharp/Torch.cs index 07cab98a9..f0cfa8290 100644 --- a/src/TorchSharp/Torch.cs +++ b/src/TorchSharp/Torch.cs @@ -597,6 +597,29 @@ public static (int major, int minor) get_compute_capability() { return (THSCuda_get_major_compute_capability(), THSCuda_get_minor_compute_capability()); } + + public static (int res, int id, ulong free, ulong total) get_free_total_memory(int device) + { + int id = 0; + ulong f=0; + ulong t=0; + int res = THSCuda_get_free_total(device, ref id, ref f, ref t); + return (res, id, f, t); + } + + public static int get_device_count(ref int count) + { + return THSCuda_get_device_count(ref count); + } + + public static ulong get_total_memory(int device) + { + return THSCuda_get_total_memory(device); + } + public static ulong get_global_total_memory(int device) + { + return THSCuda_get_global_total_memory(device); + } } /// From 441bbdde4ac8045abdc2d27a451f12dc946bf2a4 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Mon, 21 Oct 2024 13:47:42 -0300 Subject: [PATCH 32/43] tensor backward function signature #1376 --- src/TorchSharp/Tensor/Tensor.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 8a51d5d5a..2e64d0d6c 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -726,8 +726,8 @@ public bool is_sparse { } } - public void backward(IList? grad_tensors = null, bool create_graph = false, bool retain_graph = false, IList? inputs = null) => - torch.autograd.backward(new[] { this }, grad_tensors, create_graph, retain_graph, inputs); + public void backward(IList? grad_tensors = null, bool retain_graph = false, bool create_graph = false, IList? inputs = null) => + torch.autograd.backward(new[] { this }, grad_tensors, retain_graph, create_graph, inputs); /// From 194a1f05518650738cf2e19cce4bf68236cdb4e2 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Mon, 21 Oct 2024 16:50:53 -0300 Subject: [PATCH 33/43] Half, Bfloat16 --- src/Native/LibTorchSharp/CMakeLists.txt | 2 + src/Native/LibTorchSharp/THSBFloat16.cpp | 101 ++ src/Native/LibTorchSharp/THSBFloat16.h | 43 + src/Native/LibTorchSharp/THSCuda.cpp | 1 - src/Native/LibTorchSharp/THSCuda.h | 3 +- src/TorchSharp/Tensor/Tensor.cs | 4 +- src/TorchSharp/Utils/BFloat16.cs | 48 + src/TorchSharp/Utils/Half.cs | 1042 +++++++++++++++++ test/TorchSharpTest/TestHalf.cs | 1352 ++++++++++++++++++++++ 9 files changed, 2593 insertions(+), 3 deletions(-) create mode 100644 src/Native/LibTorchSharp/THSBFloat16.cpp create mode 100644 src/Native/LibTorchSharp/THSBFloat16.h create mode 100644 src/TorchSharp/Utils/BFloat16.cs create mode 100644 src/TorchSharp/Utils/Half.cs create mode 100644 test/TorchSharpTest/TestHalf.cs diff --git a/src/Native/LibTorchSharp/CMakeLists.txt b/src/Native/LibTorchSharp/CMakeLists.txt index 31180ab1f..e03a9746c 100644 --- a/src/Native/LibTorchSharp/CMakeLists.txt +++ b/src/Native/LibTorchSharp/CMakeLists.txt @@ -20,6 +20,7 @@ set(SOURCES crc32c.h THSAmp.h THSAutograd.h + THSBFloat16.h THSCuda.h THSData.h THSJIT.h @@ -34,6 +35,7 @@ set(SOURCES THSActivation.cpp THSAmp.cpp THSAutograd.cpp + THSBFloat16.cpp THSCuda.cpp THSConvolution.cpp THSData.cpp diff --git a/src/Native/LibTorchSharp/THSBFloat16.cpp b/src/Native/LibTorchSharp/THSBFloat16.cpp new file mode 100644 index 000000000..9302eb565 --- /dev/null +++ b/src/Native/LibTorchSharp/THSBFloat16.cpp @@ -0,0 +1,101 @@ +#include "THSBFloat16.h" + +c10::BFloat16 bfloat16_ctor(float value) +{ + c10::BFloat16 bf16(value); + return bf16; +} + +float op_float(c10::BFloat16 bf16) +{ + return static_cast(bf16); +} + +c10::BFloat16 op_add(c10::BFloat16 a, c10::BFloat16 b){ + return a + b; +} +c10::BFloat16 op_sub(c10::BFloat16 a, c10::BFloat16 b) { + return a - b; +} +c10::BFloat16 op_mul(c10::BFloat16 a, c10::BFloat16 b){ + return a * b; +} +c10::BFloat16 op_div(c10::BFloat16 a, c10::BFloat16 b){ + return a / b; +} +float op_add_float(c10::BFloat16 a, float b) { + return a + b; +} +float op_sub_float(c10::BFloat16 a, float b) { + return a - b; +} +float op_mul_float(c10::BFloat16 a, float b) { + return a * b; +} +float op_div_float(c10::BFloat16 a, float b) { + return a / b; +} +float op_add_lfloat(float a, c10::BFloat16 b) { + return a + b; +} +float op_sub_lfloat(float a, c10::BFloat16 b) { + return a - b; +} +float op_mul_lfloat(float a, c10::BFloat16 b) { + return a * b; +} +float op_div_lfloat(float a, c10::BFloat16 b) { + return a / b; +} +double op_add_double(c10::BFloat16 a, double b) { + return a + b; +} +double op_sub_double(c10::BFloat16 a, double b) { + return a - b; +} +double op_mul_double(c10::BFloat16 a, double b) { + return a * b; +} +double op_div_double(c10::BFloat16 a, double b) { + return a / b; +} +double op_add_ldouble(double a, c10::BFloat16 b) { + return a + b; +} +double op_sub_ldouble(double a, c10::BFloat16 b) { + return a - b; +} +double op_mul_ldouble(double a, c10::BFloat16 b) { + return a * b; +} +double op_div_ldouble(double a, c10::BFloat16 b) { + return a / b; +} + +c10::BFloat16 bfloat16_min(c10::BFloat16 bf16) { + return std::numeric_limits::min(); +} +c10::BFloat16 bfloat16_lowest(c10::BFloat16 bf16){ + return std::numeric_limits::lowest(); +} +c10::BFloat16 bfloat16_max(c10::BFloat16 bf16){ + return std::numeric_limits::max(); +} +c10::BFloat16 bfloat16_epsilon(c10::BFloat16 bf16){ + return std::numeric_limits::epsilon(); +} +c10::BFloat16 bfloat16_round_error(c10::BFloat16 bf16) { + return std::numeric_limits::round_error(); +} +c10::BFloat16 bfloat16_infinity(c10::BFloat16 bf16) { + return std::numeric_limits::infinity(); +} +c10::BFloat16 bfloat16_quiet_NaN(c10::BFloat16 bf16) { + return std::numeric_limits::quiet_NaN(); +} +c10::BFloat16 bfloat16_signaling_NaN(c10::BFloat16 bf16) { + return std::numeric_limits::signaling_NaN(); +} +c10::BFloat16 bfloat16_denorm_min(c10::BFloat16 bf16) { + return std::numeric_limits::denorm_min(); +} \ No newline at end of file diff --git a/src/Native/LibTorchSharp/THSBFloat16.h b/src/Native/LibTorchSharp/THSBFloat16.h new file mode 100644 index 000000000..05305a472 --- /dev/null +++ b/src/Native/LibTorchSharp/THSBFloat16.h @@ -0,0 +1,43 @@ +// Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. +#pragma once + +#include "../Stdafx.h" +#include "Utils.h" + +#include "c10/util/BFloat16.h" +//#include "c10/util/BFloat16-inl.h" + +EXPORT_API(c10::BFloat16) bfloat16_ctor(float value); +EXPORT_API(float) op_float(c10::BFloat16 bf16); +EXPORT_API(c10::BFloat16) op_add(c10::BFloat16 a, c10::BFloat16 b); +EXPORT_API(c10::BFloat16) op_sub(c10::BFloat16 a, c10::BFloat16 b); +EXPORT_API(c10::BFloat16) op_mul(c10::BFloat16 a, c10::BFloat16 b); +EXPORT_API(c10::BFloat16) op_div(c10::BFloat16 a, c10::BFloat16 b); + +EXPORT_API(float) op_add_float(c10::BFloat16 a, float b); +EXPORT_API(float) op_sub_float(c10::BFloat16 a, float b); +EXPORT_API(float) op_mul_float(c10::BFloat16 a, float b); +EXPORT_API(float) op_div_float(c10::BFloat16 a, float b); +EXPORT_API(float) op_add_lfloat(float a, c10::BFloat16 b); +EXPORT_API(float) op_sub_lfloat(float a, c10::BFloat16 b); +EXPORT_API(float) op_mul_lfloat(float a, c10::BFloat16 b); +EXPORT_API(float) op_div_lfloat(float a, c10::BFloat16 b); + +EXPORT_API(double) op_add_double(c10::BFloat16 a, double b); +EXPORT_API(double) op_sub_double(c10::BFloat16 a, double b); +EXPORT_API(double) op_mul_double(c10::BFloat16 a, double b); +EXPORT_API(double) op_div_double(c10::BFloat16 a, double b); +EXPORT_API(double) op_add_ldouble(double a, c10::BFloat16 b); +EXPORT_API(double) op_sub_ldouble(double a, c10::BFloat16 b); +EXPORT_API(double) op_mul_ldouble(double a, c10::BFloat16 b); +EXPORT_API(double) op_div_ldouble(double a, c10::BFloat16 b); + +EXPORT_API(c10::BFloat16) bfloat16_min(c10::BFloat16 bf16); +EXPORT_API(c10::BFloat16) bfloat16_lowest(c10::BFloat16 bf16); +EXPORT_API(c10::BFloat16) bfloat16_max(c10::BFloat16 bf16); +EXPORT_API(c10::BFloat16) bfloat16_epsilon(c10::BFloat16 bf16); +EXPORT_API(c10::BFloat16) bfloat16_round_error(c10::BFloat16 bf16); +EXPORT_API(c10::BFloat16) bfloat16_infinity(c10::BFloat16 bf16); +EXPORT_API(c10::BFloat16) bfloat16_quiet_NaN(c10::BFloat16 bf16); +EXPORT_API(c10::BFloat16) bfloat16_signaling_NaN(c10::BFloat16 bf16); +EXPORT_API(c10::BFloat16) bfloat16_denorm_min(c10::BFloat16 bf16); \ No newline at end of file diff --git a/src/Native/LibTorchSharp/THSCuda.cpp b/src/Native/LibTorchSharp/THSCuda.cpp index b03d257f6..911f1722e 100644 --- a/src/Native/LibTorchSharp/THSCuda.cpp +++ b/src/Native/LibTorchSharp/THSCuda.cpp @@ -16,7 +16,6 @@ cudaDeviceProp THSCuda_get_device_prop(int device) //cudaGetDeviceProperties(&cdp, device); cudaGetDeviceProperties_v2(&cdp, device); return cdp; - } #endif diff --git a/src/Native/LibTorchSharp/THSCuda.h b/src/Native/LibTorchSharp/THSCuda.h index 36382d3a6..b6c0222e6 100644 --- a/src/Native/LibTorchSharp/THSCuda.h +++ b/src/Native/LibTorchSharp/THSCuda.h @@ -12,7 +12,7 @@ cudaDeviceProp THSCuda_get_device_prop(int device=0); -int show_available_memory() +inline int show_available_memory() { int num_gpus; size_t free, total; @@ -24,6 +24,7 @@ int show_available_memory() cudaMemGetInfo(&free, &total); std::cout << "GPU " << id << " memory: free=" << free << ", total=" << total << std::endl; } + return 0; } #endif diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 2e64d0d6c..6def5ea23 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -408,7 +408,9 @@ internal void ValidateType(Type dotnetType) throw new ArgumentException($"{dotnetType.Name} is not compatible with {dtype.ToString()}"); break; case ScalarType.BFloat16: - throw new ArgumentException($"No support for {dtype.ToString()} in TorchSharp"); + if(dotnetType != typeof(Half)) + throw new ArgumentException($"No support for {dtype.ToString()} in TorchSharp"); + break; case ScalarType.Float16: #if NET6_0_OR_GREATER if (dotnetType != typeof(Half)) diff --git a/src/TorchSharp/Utils/BFloat16.cs b/src/TorchSharp/Utils/BFloat16.cs new file mode 100644 index 000000000..834f48211 --- /dev/null +++ b/src/TorchSharp/Utils/BFloat16.cs @@ -0,0 +1,48 @@ +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; + +namespace System +{ + [StructLayout(LayoutKind.Sequential,Pack=2)] + public struct BFloat16 + { + private short x; + public struct from_bits_t{}; + } + + /* + * +struct alignas(2) BFloat16 { + uint16_t x; + + // HIP wants __host__ __device__ tag, CUDA does not +#if defined(USE_ROCM) + C10_HOST_DEVICE BFloat16() = default; +#else + BFloat16() = default; +#endif + + struct from_bits_t {}; + static constexpr C10_HOST_DEVICE from_bits_t from_bits() { + return from_bits_t(); + } + + constexpr C10_HOST_DEVICE BFloat16(unsigned short bits, from_bits_t) + : x(bits) {} + inline C10_HOST_DEVICE BFloat16(float value); + inline C10_HOST_DEVICE operator float() const; + +#if defined(__CUDACC__) && !defined(USE_ROCM) + inline C10_HOST_DEVICE BFloat16(const __nv_bfloat16& value); + explicit inline C10_HOST_DEVICE operator __nv_bfloat16() const; +#endif + +#if defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS) + inline C10_HOST_DEVICE BFloat16(const sycl::ext::oneapi::bfloat16& value); + explicit inline C10_HOST_DEVICE operator sycl::ext::oneapi::bfloat16() const; +#endif +}; + */ +} diff --git a/src/TorchSharp/Utils/Half.cs b/src/TorchSharp/Utils/Half.cs new file mode 100644 index 000000000..f07e89892 --- /dev/null +++ b/src/TorchSharp/Utils/Half.cs @@ -0,0 +1,1042 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Globalization; +using System.Text; + +#if NETSTANDARD2_0 +namespace System +{ + //TODO: Implement c10::util::BFloat16.h, c10::util::BFloat16-inl.h,c10::util::BFloat16-math.h in TorchSharp c# + //TODO: Or Implement https://github.com/oneapi-src/oneDNN/blob/main/src/common/bfloat16.hpp + + //This is from https://github.com/qingfengxia/System.Half + /// + /// Represents a half-precision floating point number. + /// + /// + /// Note: + /// Half is not fast enought and precision is also very bad, + /// so is should not be used for mathematical computation (use Single instead). + /// The main advantage of Half type is lower memory cost: two bytes per number. + /// Half is typically used in graphical applications. + /// + /// Note: + /// All functions, where is used conversion half->float/float->half, + /// are approx. ten times slower than float->double/double->float, i.e. ~3ns on 2GHz CPU. + /// + /// References: + /// - Code retrieved from http://sourceforge.net/p/csharp-half/code/HEAD/tree/ on 2015-12-04 + /// - Fast Half Float Conversions, Jeroen van der Zijp, link: http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf + /// - IEEE 754 revision, link: http://grouper.ieee.org/groups/754/ + /// + [Serializable] + public struct Half : IComparable, IFormattable, IConvertible, IComparable, IEquatable + { + /// + /// Internal representation of the half-precision floating-point number. + /// + [DebuggerBrowsable(DebuggerBrowsableState.Never)] + internal ushort Value; + + #region Constants + /// + /// Represents the smallest positive System.Half value greater than zero. This field is constant. + /// + public static readonly Half Epsilon = ToHalf(0x0001); + /// + /// Represents the largest possible value of System.Half. This field is constant. + /// + public static readonly Half MaxValue = ToHalf(0x7bff); + /// + /// Represents the smallest possible value of System.Half. This field is constant. + /// + public static readonly Half MinValue = ToHalf(0xfbff); + /// + /// Represents not a number (NaN). This field is constant. + /// + public static readonly Half NaN = ToHalf(0xfe00); + /// + /// Represents negative infinity. This field is constant. + /// + public static readonly Half NegativeInfinity = ToHalf(0xfc00); + /// + /// Represents positive infinity. This field is constant. + /// + public static readonly Half PositiveInfinity = ToHalf(0x7c00); + #endregion + + #region Constructors + /// + /// Initializes a new instance of System.Half to the value of the specified single-precision floating-point number. + /// + /// The value to represent as a System.Half. + public Half(float value) { this = HalfHelper.SingleToHalf(value); } + /// + /// Initializes a new instance of System.Half to the value of the specified 32-bit signed integer. + /// + /// The value to represent as a System.Half. + public Half(int value) : this((float)value) { } + /// + /// Initializes a new instance of System.Half to the value of the specified 64-bit signed integer. + /// + /// The value to represent as a System.Half. + public Half(long value) : this((float)value) { } + /// + /// Initializes a new instance of System.Half to the value of the specified double-precision floating-point number. + /// + /// The value to represent as a System.Half. + public Half(double value) : this((float)value) { } + /// + /// Initializes a new instance of System.Half to the value of the specified decimal number. + /// + /// The value to represent as a System.Half. + public Half(decimal value) : this((float)value) { } + /// + /// Initializes a new instance of System.Half to the value of the specified 32-bit unsigned integer. + /// + /// The value to represent as a System.Half. + public Half(uint value) : this((float)value) { } + /// + /// Initializes a new instance of System.Half to the value of the specified 64-bit unsigned integer. + /// + /// The value to represent as a System.Half. + public Half(ulong value) : this((float)value) { } + #endregion + + #region Numeric operators + + /// + /// Returns the result of multiplying the specified System.Half value by negative one. + /// + /// A System.Half. + /// A System.Half with the value of half, but the opposite sign. -or- Zero, if half is zero. + public static Half Negate(Half half) { return -half; } + /// + /// Adds two specified System.Half values. + /// + /// A System.Half. + /// A System.Half. + /// A System.Half value that is the sum of half1 and half2. + public static Half Add(Half half1, Half half2) { return half1 + half2; } + /// + /// Subtracts one specified System.Half value from another. + /// + /// A System.Half (the minuend). + /// A System.Half (the subtrahend). + /// The System.Half result of subtracting half2 from half1. + public static Half Subtract(Half half1, Half half2) { return half1 - half2; } + /// + /// Multiplies two specified System.Half values. + /// + /// A System.Half (the multiplicand). + /// A System.Half (the multiplier). + /// A System.Half that is the result of multiplying half1 and half2. + public static Half Multiply(Half half1, Half half2) { return half1 * half2; } + /// + /// Divides two specified System.Half values. + /// + /// A System.Half (the dividend). + /// A System.Half (the divisor). + /// The System.Half that is the result of dividing half1 by half2. + /// half2 is zero. + public static Half Divide(Half half1, Half half2) { return half1 / half2; } + + /// + /// Returns the value of the System.Half operand (the sign of the operand is unchanged). + /// + /// The System.Half operand. + /// The value of the operand, half. + public static Half operator +(Half half) { return half; } + /// + /// Negates the value of the specified System.Half operand. + /// + /// The System.Half operand. + /// The result of half multiplied by negative one (-1). + public static Half operator -(Half half) { return HalfHelper.Negate(half); } + /// + /// Increments the System.Half operand by 1. + /// + /// The System.Half operand. + /// The value of half incremented by 1. + public static Half operator ++(Half half) { return (Half)(half + 1f); } + /// + /// Decrements the System.Half operand by one. + /// + /// The System.Half operand. + /// The value of half decremented by 1. + public static Half operator --(Half half) { return (Half)(half - 1f); } + /// + /// Adds two specified System.Half values. + /// + /// A System.Half. + /// A System.Half. + /// The System.Half result of adding half1 and half2. + public static Half operator +(Half half1, Half half2) { return (Half)(half1 + (float)half2); } + /// + /// Subtracts two specified System.Half values. + /// + /// A System.Half. + /// A System.Half. + /// The System.Half result of subtracting half1 and half2. + public static Half operator -(Half half1, Half half2) { return (Half)(half1 - (float)half2); } + /// + /// Multiplies two specified System.Half values. + /// + /// A System.Half. + /// A System.Half. + /// The System.Half result of multiplying half1 by half2. + public static Half operator *(Half half1, Half half2) { return (Half)(half1 * (float)half2); } + /// + /// Divides two specified System.Half values. + /// + /// A System.Half (the dividend). + /// A System.Half (the divisor). + /// The System.Half result of half1 by half2. + public static Half operator /(Half half1, Half half2) { return (Half)(half1 / (float)half2); } + /// + /// Returns a value indicating whether two instances of System.Half are equal. + /// + /// A System.Half. + /// A System.Half. + /// true if half1 and half2 are equal; otherwise, false. + public static bool operator ==(Half half1, Half half2) { return (!IsNaN(half1) && (half1.Value == half2.Value)); } + /// + /// Returns a value indicating whether two instances of System.Half are not equal. + /// + /// A System.Half. + /// A System.Half. + /// true if half1 and half2 are not equal; otherwise, false. + public static bool operator !=(Half half1, Half half2) { return half1.Value != half2.Value; } + /// + /// Returns a value indicating whether a specified System.Half is less than another specified System.Half. + /// + /// A System.Half. + /// A System.Half. + /// true if half1 is less than half1; otherwise, false. + public static bool operator <(Half half1, Half half2) { return half1 < (float)half2; } + /// + /// Returns a value indicating whether a specified System.Half is greater than another specified System.Half. + /// + /// A System.Half. + /// A System.Half. + /// true if half1 is greater than half2; otherwise, false. + public static bool operator >(Half half1, Half half2) { return half1 > (float)half2; } + /// + /// Returns a value indicating whether a specified System.Half is less than or equal to another specified System.Half. + /// + /// A System.Half. + /// A System.Half. + /// true if half1 is less than or equal to half2; otherwise, false. + public static bool operator <=(Half half1, Half half2) { return (half1 == half2) || (half1 < half2); } + /// + /// Returns a value indicating whether a specified System.Half is greater than or equal to another specified System.Half. + /// + /// A System.Half. + /// A System.Half. + /// true if half1 is greater than or equal to half2; otherwise, false. + public static bool operator >=(Half half1, Half half2) { return (half1 == half2) || (half1 > half2); } + #endregion + + #region Type casting operators + /// + /// Converts an 8-bit unsigned integer to a System.Half. + /// + /// An 8-bit unsigned integer. + /// A System.Half that represents the converted 8-bit unsigned integer. + public static implicit operator Half(byte value) { return new Half((float)value); } + /// + /// Converts a 16-bit signed integer to a System.Half. + /// + /// A 16-bit signed integer. + /// A System.Half that represents the converted 16-bit signed integer. + public static implicit operator Half(short value) { return new Half((float)value); } + /// + /// Converts a Unicode character to a System.Half. + /// + /// A Unicode character. + /// A System.Half that represents the converted Unicode character. + public static implicit operator Half(char value) { return new Half((float)value); } + /// + /// Converts a 32-bit signed integer to a System.Half. + /// + /// A 32-bit signed integer. + /// A System.Half that represents the converted 32-bit signed integer. + public static implicit operator Half(int value) { return new Half((float)value); } + /// + /// Converts a 64-bit signed integer to a System.Half. + /// + /// A 64-bit signed integer. + /// A System.Half that represents the converted 64-bit signed integer. + public static implicit operator Half(long value) { return new Half((float)value); } + /// + /// Converts a single-precision floating-point number to a System.Half. + /// + /// A single-precision floating-point number. + /// A System.Half that represents the converted single-precision floating point number. + public static explicit operator Half(float value) { return new Half(value); } + /// + /// Converts a double-precision floating-point number to a System.Half. + /// + /// A double-precision floating-point number. + /// A System.Half that represents the converted double-precision floating point number. + public static explicit operator Half(double value) { return new Half((float)value); } + /// + /// Converts a decimal number to a System.Half. + /// + /// decimal number + /// A System.Half that represents the converted decimal number. + public static explicit operator Half(decimal value) { return new Half((float)value); } + /// + /// Converts a System.Half to an 8-bit unsigned integer. + /// + /// A System.Half to convert. + /// An 8-bit unsigned integer that represents the converted System.Half. + public static explicit operator byte(Half value) { return (byte)(float)value; } + /// + /// Converts a System.Half to a Unicode character. + /// + /// A System.Half to convert. + /// A Unicode character that represents the converted System.Half. + public static explicit operator char(Half value) { return (char)(float)value; } + /// + /// Converts a System.Half to a 16-bit signed integer. + /// + /// A System.Half to convert. + /// A 16-bit signed integer that represents the converted System.Half. + public static explicit operator short(Half value) { return (short)(float)value; } + /// + /// Converts a System.Half to a 32-bit signed integer. + /// + /// A System.Half to convert. + /// A 32-bit signed integer that represents the converted System.Half. + public static explicit operator int(Half value) { return (int)(float)value; } + /// + /// Converts a System.Half to a 64-bit signed integer. + /// + /// A System.Half to convert. + /// A 64-bit signed integer that represents the converted System.Half. + public static explicit operator long(Half value) { return (long)(float)value; } + /// + /// Converts a System.Half to a single-precision floating-point number. + /// + /// A System.Half to convert. + /// A single-precision floating-point number that represents the converted System.Half. + public static implicit operator float(Half value) { return HalfHelper.HalfToSingle(value); } + /// + /// Converts a System.Half to a double-precision floating-point number. + /// + /// A System.Half to convert. + /// A double-precision floating-point number that represents the converted System.Half. + public static implicit operator double(Half value) { return (float)value; } + /// + /// Converts a System.Half to a decimal number. + /// + /// A System.Half to convert. + /// A decimal number that represents the converted System.Half. + public static explicit operator decimal(Half value) { return (decimal)(float)value; } + /// + /// Converts an 8-bit signed integer to a System.Half. + /// + /// An 8-bit signed integer. + /// A System.Half that represents the converted 8-bit signed integer. + public static implicit operator Half(sbyte value) { return new Half((float)value); } + /// + /// Converts a 16-bit unsigned integer to a System.Half. + /// + /// A 16-bit unsigned integer. + /// A System.Half that represents the converted 16-bit unsigned integer. + public static implicit operator Half(ushort value) { return new Half((float)value); } + /// + /// Converts a 32-bit unsigned integer to a System.Half. + /// + /// A 32-bit unsigned integer. + /// A System.Half that represents the converted 32-bit unsigned integer. + public static implicit operator Half(uint value) { return new Half((float)value); } + /// + /// Converts a 64-bit unsigned integer to a System.Half. + /// + /// A 64-bit unsigned integer. + /// A System.Half that represents the converted 64-bit unsigned integer. + public static implicit operator Half(ulong value) { return new Half((float)value); } + /// + /// Converts a System.Half to an 8-bit signed integer. + /// + /// A System.Half to convert. + /// An 8-bit signed integer that represents the converted System.Half. + public static explicit operator sbyte(Half value) { return (sbyte)(float)value; } + /// + /// Converts a System.Half to a 16-bit unsigned integer. + /// + /// A System.Half to convert. + /// A 16-bit unsigned integer that represents the converted System.Half. + public static explicit operator ushort(Half value) { return (ushort)(float)value; } + /// + /// Converts a System.Half to a 32-bit unsigned integer. + /// + /// A System.Half to convert. + /// A 32-bit unsigned integer that represents the converted System.Half. + public static explicit operator uint(Half value) { return (uint)(float)value; } + /// + /// Converts a System.Half to a 64-bit unsigned integer. + /// + /// A System.Half to convert. + /// A 64-bit unsigned integer that represents the converted System.Half. + public static explicit operator ulong(Half value) { return (ulong)(float)value; } + #endregion + + /// + /// Compares this instance to a specified System.Half object. + /// + /// A System.Half object. + /// + /// A signed number indicating the relative values of this instance and value. + /// Return Value Meaning Less than zero This instance is less than value. Zero + /// This instance is equal to value. Greater than zero This instance is greater than value. + /// + public int CompareTo(Half other) + { + int result = 0; + if (this < other) { + result = -1; + } else if (this > other) { + result = 1; + } else if (this != other) { + if (!IsNaN(this)) { + result = 1; + } else if (!IsNaN(other)) { + result = -1; + } + } + + return result; + } + /// + /// Compares this instance to a specified System.Object. + /// + /// An System.Object or null. + /// + /// A signed number indicating the relative values of this instance and value. + /// Return Value Meaning Less than zero This instance is less than value. Zero + /// This instance is equal to value. Greater than zero This instance is greater + /// than value. -or- value is null. + /// + /// value is not a System.Half + public int CompareTo(object obj) + { + int result = 0; + if (obj == null) { + result = 1; + } else { + if (obj is Half) { + result = CompareTo((Half)obj); + } else { + throw new ArgumentException("Object must be of type Half."); + } + } + + return result; + } + /// + /// Returns a value indicating whether this instance and a specified System.Half object represent the same value. + /// + /// A System.Half object to compare to this instance. + /// true if value is equal to this instance; otherwise, false. + public bool Equals(Half other) + { + return ((other == this) || (IsNaN(other) && IsNaN(this))); + } + /// + /// Returns a value indicating whether this instance and a specified System.Object + /// represent the same type and value. + /// + /// An System.Object. + /// true if value is a System.Half and equal to this instance; otherwise, false. + public override bool Equals(object obj) + { + bool result = false; + if (obj is Half) { + Half half = (Half)obj; + if ((half == this) || (IsNaN(half) && IsNaN(this))) { + result = true; + } + } + + return result; + } + /// + /// Returns the hash code for this instance. + /// + /// A 32-bit signed integer hash code. + public override int GetHashCode() + { + return Value.GetHashCode(); + } + /// + /// Returns the System.TypeCode for value type System.Half. + /// + /// The enumerated constant (TypeCode)255. + public TypeCode GetTypeCode() + { + return (TypeCode)255; + } + + #region BitConverter & Math methods for Half + /// + /// Returns the specified half-precision floating point value as an array of bytes. + /// + /// The number to convert. + /// An array of bytes with length 2. + public static byte[] GetBytes(Half value) + { + return BitConverter.GetBytes(value.Value); + } + /// + /// Converts the value of a specified instance of System.Half to its equivalent binary representation. + /// + /// A System.Half value. + /// A 16-bit unsigned integer that contain the binary representation of value. + public static ushort GetBits(Half value) + { + return value.Value; + } + /// + /// Returns a half-precision floating point number converted from two bytes + /// at a specified position in a byte array. + /// + /// An array of bytes. + /// The starting position within value. + /// A half-precision floating point number formed by two bytes beginning at startIndex. + /// + /// startIndex is greater than or equal to the length of value minus 1, and is + /// less than or equal to the length of value minus 1. + /// + /// value is null. + /// startIndex is less than zero or greater than the length of value minus 1. + public static Half ToHalf(byte[] value, int startIndex) + { + return ToHalf((ushort)BitConverter.ToInt16(value, startIndex)); + } + /// + /// Returns a half-precision floating point number converted from its binary representation. + /// + /// Binary representation of System.Half value + /// A half-precision floating point number formed by its binary representation. + public static Half ToHalf(ushort bits) + { + return new Half { Value = bits }; + } + + /// + /// Returns a value indicating the sign of a half-precision floating-point number. + /// + /// A signed number. + /// + /// A number indicating the sign of value. Number Description -1 value is less + /// than zero. 0 value is equal to zero. 1 value is greater than zero. + /// + /// value is equal to System.Half.NaN. + public static int Sign(Half value) + { + if (value < 0) { + return -1; + } else if (value > 0) { + return 1; + } else { + if (value != 0) { + throw new ArithmeticException("Function does not accept floating point Not-a-Number values."); + } + } + + return 0; + } + /// + /// Returns the absolute value of a half-precision floating-point number. + /// + /// A number in the range System.Half.MinValue ≤ value ≤ System.Half.MaxValue. + /// A half-precision floating-point number, x, such that 0 ≤ x ≤System.Half.MaxValue. + public static Half Abs(Half value) + { + return HalfHelper.Abs(value); + } + /// + /// Returns the larger of two half-precision floating-point numbers. + /// + /// The first of two half-precision floating-point numbers to compare. + /// The second of two half-precision floating-point numbers to compare. + /// + /// Parameter value1 or value2, whichever is larger. If value1, or value2, or both val1 + /// and value2 are equal to System.Half.NaN, System.Half.NaN is returned. + /// + public static Half Max(Half value1, Half value2) + { + return (value1 < value2) ? value2 : value1; + } + /// + /// Returns the smaller of two half-precision floating-point numbers. + /// + /// The first of two half-precision floating-point numbers to compare. + /// The second of two half-precision floating-point numbers to compare. + /// + /// Parameter value1 or value2, whichever is smaller. If value1, or value2, or both val1 + /// and value2 are equal to System.Half.NaN, System.Half.NaN is returned. + /// + public static Half Min(Half value1, Half value2) + { + return (value1 < value2) ? value1 : value2; + } + #endregion + + /// + /// Returns a value indicating whether the specified number evaluates to not a number (System.Half.NaN). + /// + /// A half-precision floating-point number. + /// true if value evaluates to not a number (System.Half.NaN); otherwise, false. + public static bool IsNaN(Half half) + { + return HalfHelper.IsNaN(half); + } + /// + /// Returns a value indicating whether the specified number evaluates to negative or positive infinity. + /// + /// A half-precision floating-point number. + /// true if half evaluates to System.Half.PositiveInfinity or System.Half.NegativeInfinity; otherwise, false. + public static bool IsInfinity(Half half) + { + return HalfHelper.IsInfinity(half); + } + /// + /// Returns a value indicating whether the specified number evaluates to negative infinity. + /// + /// A half-precision floating-point number. + /// true if half evaluates to System.Half.NegativeInfinity; otherwise, false. + public static bool IsNegativeInfinity(Half half) + { + return HalfHelper.IsNegativeInfinity(half); + } + /// + /// Returns a value indicating whether the specified number evaluates to positive infinity. + /// + /// A half-precision floating-point number. + /// true if half evaluates to System.Half.PositiveInfinity; otherwise, false. + public static bool IsPositiveInfinity(Half half) + { + return HalfHelper.IsPositiveInfinity(half); + } + + #region String operations (Parse and ToString) + /// + /// Converts the string representation of a number to its System.Half equivalent. + /// + /// The string representation of the number to convert. + /// The System.Half number equivalent to the number contained in value. + /// value is null. + /// value is not in the correct format. + /// value represents a number less than System.Half.MinValue or greater than System.Half.MaxValue. + public static Half Parse(string value) + { + return (Half)float.Parse(value, CultureInfo.InvariantCulture); + } + /// + /// Converts the string representation of a number to its System.Half equivalent + /// using the specified culture-specific format information. + /// + /// The string representation of the number to convert. + /// An System.IFormatProvider that supplies culture-specific parsing information about value. + /// The System.Half number equivalent to the number contained in s as specified by provider. + /// value is null. + /// value is not in the correct format. + /// value represents a number less than System.Half.MinValue or greater than System.Half.MaxValue. + public static Half Parse(string value, IFormatProvider provider) + { + return (Half)float.Parse(value, provider); + } + /// + /// Converts the string representation of a number in a specified style to its System.Half equivalent. + /// + /// The string representation of the number to convert. + /// + /// A bitwise combination of System.Globalization.NumberStyles values that indicates + /// the style elements that can be present in value. A typical value to specify is + /// System.Globalization.NumberStyles.Number. + /// + /// The System.Half number equivalent to the number contained in s as specified by style. + /// value is null. + /// + /// style is not a System.Globalization.NumberStyles value. -or- style is the + /// System.Globalization.NumberStyles.AllowHexSpecifier value. + /// + /// value is not in the correct format. + /// value represents a number less than System.Half.MinValue or greater than System.Half.MaxValue. + public static Half Parse(string value, NumberStyles style) + { + return (Half)float.Parse(value, style, CultureInfo.InvariantCulture); + } + /// + /// Converts the string representation of a number to its System.Half equivalent + /// using the specified style and culture-specific format. + /// + /// The string representation of the number to convert. + /// + /// A bitwise combination of System.Globalization.NumberStyles values that indicates + /// the style elements that can be present in value. A typical value to specify is + /// System.Globalization.NumberStyles.Number. + /// + /// An System.IFormatProvider object that supplies culture-specific information about the format of value. + /// The System.Half number equivalent to the number contained in s as specified by style and provider. + /// value is null. + /// + /// style is not a System.Globalization.NumberStyles value. -or- style is the + /// System.Globalization.NumberStyles.AllowHexSpecifier value. + /// + /// value is not in the correct format. + /// value represents a number less than System.Half.MinValue or greater than System.Half.MaxValue. + public static Half Parse(string value, NumberStyles style, IFormatProvider provider) + { + return (Half)float.Parse(value, style, provider); + } + /// + /// Converts the string representation of a number to its System.Half equivalent. + /// A return value indicates whether the conversion succeeded or failed. + /// + /// The string representation of the number to convert. + /// + /// When this method returns, contains the System.Half number that is equivalent + /// to the numeric value contained in value, if the conversion succeeded, or is zero + /// if the conversion failed. The conversion fails if the s parameter is null, + /// is not a number in a valid format, or represents a number less than System.Half.MinValue + /// or greater than System.Half.MaxValue. This parameter is passed uninitialized. + /// + /// true if s was converted successfully; otherwise, false. + public static bool TryParse(string value, out Half result) + { + float f; + if (float.TryParse(value, out f)) { + result = (Half)f; + return true; + } + + result = new Half(); + return false; + } + /// + /// Converts the string representation of a number to its System.Half equivalent + /// using the specified style and culture-specific format. A return value indicates + /// whether the conversion succeeded or failed. + /// + /// The string representation of the number to convert. + /// + /// A bitwise combination of System.Globalization.NumberStyles values that indicates + /// the permitted format of value. A typical value to specify is System.Globalization.NumberStyles.Number. + /// + /// An System.IFormatProvider object that supplies culture-specific parsing information about value. + /// + /// When this method returns, contains the System.Half number that is equivalent + /// to the numeric value contained in value, if the conversion succeeded, or is zero + /// if the conversion failed. The conversion fails if the s parameter is null, + /// is not in a format compliant with style, or represents a number less than + /// System.Half.MinValue or greater than System.Half.MaxValue. This parameter is passed uninitialized. + /// + /// true if s was converted successfully; otherwise, false. + /// + /// style is not a System.Globalization.NumberStyles value. -or- style + /// is the System.Globalization.NumberStyles.AllowHexSpecifier value. + /// + public static bool TryParse(string value, NumberStyles style, IFormatProvider provider, out Half result) + { + bool parseResult = false; + float f; + if (float.TryParse(value, style, provider, out f)) { + result = (Half)f; + parseResult = true; + } else { + result = new Half(); + } + + return parseResult; + } + /// + /// Converts the numeric value of this instance to its equivalent string representation. + /// + /// A string that represents the value of this instance. + public override string ToString() + { + return ((float)this).ToString(CultureInfo.InvariantCulture); + } + /// + /// Converts the numeric value of this instance to its equivalent string representation + /// using the specified culture-specific format information. + /// + /// An System.IFormatProvider that supplies culture-specific formatting information. + /// The string representation of the value of this instance as specified by provider. + public string ToString(IFormatProvider formatProvider) + { + return ((float)this).ToString(formatProvider); + } + /// + /// Converts the numeric value of this instance to its equivalent string representation, using the specified format. + /// + /// A numeric format string. + /// The string representation of the value of this instance as specified by format. + public string ToString(string format) + { + return ((float)this).ToString(format, CultureInfo.InvariantCulture); + } + /// + /// Converts the numeric value of this instance to its equivalent string representation + /// using the specified format and culture-specific format information. + /// + /// A numeric format string. + /// An System.IFormatProvider that supplies culture-specific formatting information. + /// The string representation of the value of this instance as specified by format and provider. + /// format is invalid. + public string ToString(string format, IFormatProvider formatProvider) + { + return ((float)this).ToString(format, formatProvider); + } + #endregion + + #region IConvertible Members + float IConvertible.ToSingle(IFormatProvider provider) + { + return this; + } + TypeCode IConvertible.GetTypeCode() + { + return GetTypeCode(); + } + bool IConvertible.ToBoolean(IFormatProvider provider) + { + return Convert.ToBoolean(this); + } + byte IConvertible.ToByte(IFormatProvider provider) + { + return Convert.ToByte(this); + } + char IConvertible.ToChar(IFormatProvider provider) + { + throw new InvalidCastException(string.Format(CultureInfo.CurrentCulture, "Invalid cast from '{0}' to '{1}'.", "Half", "Char")); + } + DateTime IConvertible.ToDateTime(IFormatProvider provider) + { + throw new InvalidCastException(string.Format(CultureInfo.CurrentCulture, "Invalid cast from '{0}' to '{1}'.", "Half", "DateTime")); + } + decimal IConvertible.ToDecimal(IFormatProvider provider) + { + return Convert.ToDecimal(this); + } + double IConvertible.ToDouble(IFormatProvider provider) + { + return Convert.ToDouble(this); + } + short IConvertible.ToInt16(IFormatProvider provider) + { + return Convert.ToInt16(this); + } + int IConvertible.ToInt32(IFormatProvider provider) + { + return Convert.ToInt32(this); + } + long IConvertible.ToInt64(IFormatProvider provider) + { + return Convert.ToInt64(this); + } + sbyte IConvertible.ToSByte(IFormatProvider provider) + { + return Convert.ToSByte(this); + } + string IConvertible.ToString(IFormatProvider provider) + { + return Convert.ToString(this, CultureInfo.InvariantCulture); + } + object IConvertible.ToType(Type conversionType, IFormatProvider provider) + { + return (((float)this) as IConvertible).ToType(conversionType, provider); + } + ushort IConvertible.ToUInt16(IFormatProvider provider) + { + return Convert.ToUInt16(this); + } + uint IConvertible.ToUInt32(IFormatProvider provider) + { + return Convert.ToUInt32(this); + } + ulong IConvertible.ToUInt64(IFormatProvider provider) + { + return Convert.ToUInt64(this); + } + #endregion + } +} + +// ================ HalfHelper.cs ==================== +namespace System +{ + /// + /// Helper class for Half conversions and some low level operations. + /// This class is internally used in the Half class. + /// + /// + /// References: + /// - Code retrieved from http://sourceforge.net/p/csharp-half/code/HEAD/tree/ on 2015-12-04 + /// - Fast Half Float Conversions, Jeroen van der Zijp, link: http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf + /// + internal static class HalfHelper + { + private static readonly uint[] MantissaTable = GenerateMantissaTable(); + private static readonly uint[] ExponentTable = GenerateExponentTable(); + private static readonly ushort[] OffsetTable = GenerateOffsetTable(); + private static readonly ushort[] BaseTable = GenerateBaseTable(); + private static readonly sbyte[] ShiftTable = GenerateShiftTable(); + + // Transforms the subnormal representation to a normalized one. + private static uint ConvertMantissa(int i) + { + uint m = (uint)(i << 13); // Zero pad mantissa bits + uint e = 0; // Zero exponent + + // While not normalized + while ((m & 0x00800000) == 0) { + e -= 0x00800000; // Decrement exponent (1<<23) + m <<= 1; // Shift mantissa + } + m &= unchecked((uint)~0x00800000); // Clear leading 1 bit + e += 0x38800000; // Adjust bias ((127-14)<<23) + return m | e; // Return combined number + } + + private static uint[] GenerateMantissaTable() + { + uint[] mantissaTable = new uint[2048]; + mantissaTable[0] = 0; + for (int i = 1; i < 1024; i++) { + mantissaTable[i] = ConvertMantissa(i); + } + for (int i = 1024; i < 2048; i++) { + mantissaTable[i] = (uint)(0x38000000 + ((i - 1024) << 13)); + } + + return mantissaTable; + } + private static uint[] GenerateExponentTable() + { + uint[] exponentTable = new uint[64]; + exponentTable[0] = 0; + for (int i = 1; i < 31; i++) { + exponentTable[i] = (uint)(i << 23); + } + exponentTable[31] = 0x47800000; + exponentTable[32] = 0x80000000; + for (int i = 33; i < 63; i++) { + exponentTable[i] = (uint)(0x80000000 + ((i - 32) << 23)); + } + exponentTable[63] = 0xc7800000; + + return exponentTable; + } + private static ushort[] GenerateOffsetTable() + { + ushort[] offsetTable = new ushort[64]; + offsetTable[0] = 0; + for (int i = 1; i < 32; i++) { + offsetTable[i] = 1024; + } + offsetTable[32] = 0; + for (int i = 33; i < 64; i++) { + offsetTable[i] = 1024; + } + + return offsetTable; + } + private static ushort[] GenerateBaseTable() + { + ushort[] baseTable = new ushort[512]; + for (int i = 0; i < 256; ++i) { + sbyte e = (sbyte)(127 - i); + if (e > 24) { // Very small numbers map to zero + baseTable[i | 0x000] = 0x0000; + baseTable[i | 0x100] = 0x8000; + } else if (e > 14) { // Small numbers map to denorms + baseTable[i | 0x000] = (ushort)(0x0400 >> (18 + e)); + baseTable[i | 0x100] = (ushort)((0x0400 >> (18 + e)) | 0x8000); + } else if (e >= -15) { // Normal numbers just lose precision + baseTable[i | 0x000] = (ushort)((15 - e) << 10); + baseTable[i | 0x100] = (ushort)(((15 - e) << 10) | 0x8000); + } else if (e > -128) { // Large numbers map to Infinity + baseTable[i | 0x000] = 0x7c00; + baseTable[i | 0x100] = 0xfc00; + } else { // Infinity and NaN's stay Infinity and NaN's + baseTable[i | 0x000] = 0x7c00; + baseTable[i | 0x100] = 0xfc00; + } + } + + return baseTable; + } + private static sbyte[] GenerateShiftTable() + { + sbyte[] shiftTable = new sbyte[512]; + for (int i = 0; i < 256; ++i) { + sbyte e = (sbyte)(127 - i); + if (e > 24) { // Very small numbers map to zero + shiftTable[i | 0x000] = 24; + shiftTable[i | 0x100] = 24; + } else if (e > 14) { // Small numbers map to denorms + shiftTable[i | 0x000] = (sbyte)(e - 1); + shiftTable[i | 0x100] = (sbyte)(e - 1); + } else if (e >= -15) { // Normal numbers just lose precision + shiftTable[i | 0x000] = 13; + shiftTable[i | 0x100] = 13; + } else if (e > -128) { // Large numbers map to Infinity + shiftTable[i | 0x000] = 24; + shiftTable[i | 0x100] = 24; + } else { // Infinity and NaN's stay Infinity and NaN's + shiftTable[i | 0x000] = 13; + shiftTable[i | 0x100] = 13; + } + } + + return shiftTable; + } + + public static unsafe float HalfToSingle(Half half) + { + uint result = MantissaTable[OffsetTable[half.Value >> 10] + (half.Value & 0x3ff)] + ExponentTable[half.Value >> 10]; + return *(float*)&result; + } + public static unsafe Half SingleToHalf(float single) + { + uint value = *(uint*)&single; + + ushort result = (ushort)(BaseTable[(value >> 23) & 0x1ff] + ((value & 0x007fffff) >> ShiftTable[value >> 23])); + return Half.ToHalf(result); + } + + public static Half Negate(Half half) + { + return Half.ToHalf((ushort)(half.Value ^ 0x8000)); + } + public static Half Abs(Half half) + { + return Half.ToHalf((ushort)(half.Value & 0x7fff)); + } + + public static bool IsNaN(Half half) + { + return (half.Value & 0x7fff) > 0x7c00; + } + public static bool IsInfinity(Half half) + { + return (half.Value & 0x7fff) == 0x7c00; + } + public static bool IsPositiveInfinity(Half half) + { + return half.Value == 0x7c00; + } + public static bool IsNegativeInfinity(Half half) + { + return half.Value == 0xfc00; + } + } +} +#endif \ No newline at end of file diff --git a/test/TorchSharpTest/TestHalf.cs b/test/TorchSharpTest/TestHalf.cs new file mode 100644 index 000000000..8c7b4a3f2 --- /dev/null +++ b/test/TorchSharpTest/TestHalf.cs @@ -0,0 +1,1352 @@ +using System; +using System.Globalization; +using System.Threading; +using Xunit; + +namespace TorchSharpTest +{ + public class TestHalf + { +#if !NET6_0_OR_GREATER + //[TestFixtureSetUp()] + //public static void HalfTestInitialize(TestContext testContext) + //{ + // Thread.CurrentThread.CurrentCulture = new CultureInfo("en-US"); + //} + + //[Fact] + //public unsafe void TestAllPossibleHalfValues() + //{ + // for (ushort i = ushort.MinValue; i < ushort.MaxValue; i++) + // { + // Half half1 = Half.ToHalf(i); + // Half half2 = (Half)((float)half1); + + // Assert.IsTrue(half1.Equals(half2)); + // } + //} + + /// + ///A test for TryParse + /// + [Fact] + public void try_parse_test1() + { + Thread.CurrentThread.CurrentCulture = new CultureInfo("cs-CZ"); + + string value = "1234,567e-2"; + float resultExpected = (float)12.34567f; + + bool expected = true; + float result; + bool actual = float.TryParse(value, out result); + Assert.Equal(resultExpected, result); + Assert.Equal(expected, actual); + } + + /// + ///A test for TryParse + /// + [Fact] + public void try_parse_test() + { + string value = "777"; + NumberStyles style = NumberStyles.None; + IFormatProvider provider = CultureInfo.InvariantCulture; + Half result; + Half resultExpected = (Half)777f; + bool expected = true; + bool actual = Half.TryParse(value, style, provider, out result); + Assert.Equal(resultExpected, result); + Assert.Equal(expected, actual); + } + + /// + ///A test for ToString + /// + [Fact] + public void to_string_test4() + { + Half target = Half.Epsilon; + string format = "e"; + string expected = "5.960464e-008"; + string actual = target.ToString(format); + Assert.Equal(expected, actual); + } + + /// + ///A test for ToString + /// + [Fact] + public void to_string_test3() + { + Half target = (Half)333.333f; + string format = "G"; + IFormatProvider formatProvider = CultureInfo.CreateSpecificCulture("cs-CZ"); + string expected = "333,25"; + string actual = target.ToString(format, formatProvider); + Assert.Equal(expected, actual); + } + + /// + ///A test for ToString + /// + [Fact] + public void to_string_test2() + { + Half target = (Half)0.001f; + IFormatProvider formatProvider = CultureInfo.CreateSpecificCulture("cs-CZ"); + string expected = "0,0009994507"; + string actual = target.ToString(formatProvider); + Assert.Equal(expected, actual); + } + + /// + ///A test for ToString + /// + [Fact] + public void to_string_test1() + { + Half target = (Half)10000.00001f; + string expected = "10000"; + string actual = target.ToString(); + Assert.Equal(expected, actual); + } + + /// + ///A test for ToHalf + /// + [Fact] + public void to_half_test1() + { + byte[] value = { 0x11, 0x22, 0x33, 0x44 }; + int startIndex = 1; + Half expected = Half.ToHalf(0x3322); + Half actual = Half.ToHalf(value, startIndex); + Assert.Equal(expected, actual); + } + + /// + ///A test for ToHalf + /// + [Fact] + public void to_half_test() + { + ushort bits = 0x3322; + Half expected = (Half)0.2229004f; + Half actual = Half.ToHalf(bits); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToUInt64 + /// + [Fact] + + public void to_u_int64_test() + { + IConvertible target = (Half)12345.999f; + IFormatProvider provider = CultureInfo.InvariantCulture; + ulong expected = 12344; + ulong actual = target.ToUInt64(provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToUInt32 + /// + [Fact] + + public void to_u_int32_test() + { + IConvertible target = (Half)9999; + IFormatProvider provider = CultureInfo.InvariantCulture; + uint expected = 9992; + uint actual = target.ToUInt32(provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToUInt16 + /// + [Fact] + + public void to_u_int16_test() + { + IConvertible target = (Half)33.33; + IFormatProvider provider = CultureInfo.InvariantCulture; + ushort expected = 33; + ushort actual = target.ToUInt16(provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToType + /// + [Fact] + + public void to_type_test() + { + IConvertible target = (Half)111.111f; + Type conversionType = typeof(double); + IFormatProvider provider = CultureInfo.InvariantCulture; + object expected = 111.0625; + object actual = target.ToType(conversionType, provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToString + /// + [Fact] + + public void to_string_test() + { + IConvertible target = (Half)888.888; + IFormatProvider provider = CultureInfo.InvariantCulture; + string expected = "888.5"; + string actual = target.ToString(provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToSingle + /// + [Fact] + + public void to_single_test() + { + IConvertible target = (Half)55.77f; + IFormatProvider provider = CultureInfo.InvariantCulture; + float expected = 55.75f; + float actual = target.ToSingle(provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToSByte + /// + [Fact] + + public void to_s_byte_test() + { + IConvertible target = 123.5678f; + IFormatProvider provider = CultureInfo.InvariantCulture; + sbyte expected = 124; + sbyte actual = target.ToSByte(provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToInt64 + /// + [Fact] + + public void to_int64_test() + { + IConvertible target = (Half)8562; + IFormatProvider provider = CultureInfo.InvariantCulture; + long expected = 8560; + long actual = target.ToInt64(provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToInt32 + /// + [Fact] + public void to_int32_test() + { + IConvertible target = (Half)555.5; + IFormatProvider provider = CultureInfo.InvariantCulture; + int expected = 556; + int actual = target.ToInt32(provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToInt16 + /// + [Fact] + public void to_int16_test() + { + IConvertible target = (Half)365; + IFormatProvider provider = CultureInfo.InvariantCulture; + short expected = 365; + short actual = target.ToInt16(provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToChar + /// + [Fact] + public void to_char_test() + { + IConvertible target = (Half)64UL; + IFormatProvider provider = CultureInfo.InvariantCulture; + + try + { + char actual = target.ToChar(provider); + Assert.Fail(nameof(to_char_test)); + } + catch (InvalidCastException) { } + } + + /// + ///A test for System.IConvertible.ToDouble + /// + [Fact] + public void to_double_test() + { + IConvertible target = Half.MaxValue; + IFormatProvider provider = CultureInfo.InvariantCulture; + double expected = 65504; + double actual = target.ToDouble(provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToDecimal + /// + [Fact] + public void to_decimal_test() + { + IConvertible target = (Half)146.33f; + IFormatProvider provider = CultureInfo.InvariantCulture; + Decimal expected = new Decimal(146.25f); + Decimal actual = target.ToDecimal(provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToDateTime + /// + [Fact] + public void to_date_time_test() + { + IConvertible target = (Half)0; + IFormatProvider provider = CultureInfo.InvariantCulture; + + try + { + DateTime actual = target.ToDateTime(provider); + Assert.Fail(nameof(to_date_time_test)); + } + catch (InvalidCastException) { } + } + + /// + ///A test for System.IConvertible.ToByte + /// + [Fact] + + public void to_byte_test() + { + IConvertible target = (Half)111; + IFormatProvider provider = CultureInfo.InvariantCulture; + byte expected = 111; + byte actual = target.ToByte(provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.ToBoolean + /// + [Fact] + + public void to_boolean_test() + { + IConvertible target = (Half)77; + IFormatProvider provider = CultureInfo.InvariantCulture; + bool expected = true; + bool actual = target.ToBoolean(provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for System.IConvertible.GetTypeCode + /// + [Fact] + + public void get_type_code_test1() + { + IConvertible target = (Half)33; + TypeCode expected = (TypeCode)255; + TypeCode actual = target.GetTypeCode(); + Assert.Equal(expected, actual); + } + + /// + ///A test for Subtract + /// + [Fact] + public void subtract_test() + { + Half half1 = (Half)1.12345f; + Half half2 = (Half)0.01234f; + Half expected = (Half)1.11111f; + Half actual = Half.Subtract(half1, half2); + Assert.Equal(expected, actual); + } + + /// + ///A test for Sign + /// + [Fact] + public void sign_test() + { + Assert.Equal(1, Half.Sign((Half)333.5)); + Assert.Equal(1, Half.Sign(10)); + Assert.Equal(-1, Half.Sign((Half)(-333.5))); + Assert.Equal(-1, Half.Sign(-10)); + Assert.Equal(0, Half.Sign(0)); + } + + /// + ///A test for Parse + /// + [Fact] + public void parse_test3() + { + string value = "112,456e-1"; + IFormatProvider provider = new CultureInfo("cs-CZ"); + Half expected = (Half)11.2456; + Half actual = Half.Parse(value, provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for Parse + /// + [Fact] + public void parse_test2() + { + string value = "55.55"; + Half expected = (Half)55.55; + Half actual = Half.Parse(value); + Assert.Equal(expected, actual); + } + + /// + ///A test for Parse + /// + [Fact] + public void parse_test1() + { + string value = "-1.063E-02"; + NumberStyles style = NumberStyles.AllowExponent | NumberStyles.Number; + IFormatProvider provider = CultureInfo.CreateSpecificCulture("en-US"); + Half expected = (Half)(-0.01062775); + Half actual = Half.Parse(value, style, provider); + Assert.Equal(expected, actual); + } + + /// + ///A test for Parse + /// + [Fact] + public void parse_test() + { + string value = "-7"; + NumberStyles style = NumberStyles.Number; + Half expected = (Half)(-7); + Half actual = Half.Parse(value, style); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_UnaryPlus + /// + [Fact] + public void op_UnaryPlusTest() + { + Half half = (Half)77; + Half expected = (Half)77; + Half actual = +(half); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_UnaryNegation + /// + [Fact] + public void op_UnaryNegationTest() + { + Half half = (Half)77; + Half expected = (Half)(-77); + Half actual = -(half); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Subtraction + /// + [Fact] + public void op_SubtractionTest() + { + Half half1 = (Half)77.99; + Half half2 = (Half)17.88; + Half expected = (Half)60.0625; + Half actual = (half1 - half2); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Multiply + /// + [Fact] + public void op_MultiplyTest() + { + Half half1 = (Half)11.1; + Half half2 = (Half)5; + Half expected = (Half)55.46879; + Half actual = (half1 * half2); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_LessThanOrEqual + /// + [Fact] + public void op_LessThanOrEqualTest() + { + { + Half half1 = (Half)111; + Half half2 = (Half)120; + bool expected = true; + bool actual = (half1 <= half2); + Assert.Equal(expected, actual); + } + { + Half half1 = (Half)111; + Half half2 = (Half)111; + bool expected = true; + bool actual = (half1 <= half2); + Assert.Equal(expected, actual); + } + } + + /// + ///A test for op_LessThan + /// + [Fact] + public void op_LessThanTest() + { + { + Half half1 = (Half)111; + Half half2 = (Half)120; + bool expected = true; + bool actual = (half1 <= half2); + Assert.Equal(expected, actual); + } + { + Half half1 = (Half)111; + Half half2 = (Half)111; + bool expected = true; + bool actual = (half1 <= half2); + Assert.Equal(expected, actual); + } + } + + /// + ///A test for op_Inequality + /// + [Fact] + public void op_InequalityTest() + { + { + Half half1 = (Half)0; + Half half2 = (Half)1; + bool expected = true; + bool actual = (half1 != half2); + Assert.Equal(expected, actual); + } + { + Half half1 = Half.MaxValue; + Half half2 = Half.MaxValue; + bool expected = false; + bool actual = (half1 != half2); + Assert.Equal(expected, actual); + } + } + + /// + ///A test for op_Increment + /// + [Fact] + public void op_IncrementTest() + { + Half half = (Half)125.33f; + Half expected = (Half)126.33f; + Half actual = ++(half); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Implicit + /// + [Fact] + public void op_ImplicitTest10() + { + Half value = (Half)55.55f; + float expected = 55.53125f; + float actual = value; + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Implicit + /// + [Fact] + public void op_ImplicitTest9() + { + long value = 1295; + Half expected = (Half)1295; + Half actual = value; + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Implicit + /// + [Fact] + public void op_ImplicitTest8() + { + sbyte value = -15; + Half expected = (Half)(-15); + Half actual = value; + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Implicit + /// + [Fact] + public void op_ImplicitTest7() + { + Half value = Half.Epsilon; + double expected = 5.9604644775390625e-8; + double actual = value; + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Implicit + /// + [Fact] + public void op_ImplicitTest6() + { + short value = 15555; + Half expected = (Half)15552; + Half actual = value; + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Implicit + /// + [Fact] + public void op_ImplicitTest5() + { + byte value = 77; + Half expected = (Half)77; + Half actual = value; + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Implicit + /// + [Fact] + public void op_ImplicitTest4() + { + int value = 7777; + Half expected = (Half)7776; + Half actual = value; + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Implicit + /// + [Fact] + public void op_ImplicitTest3() + { + char value = '@'; + Half expected = 64; + Half actual = value; + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Implicit + /// + [Fact] + public void op_ImplicitTest2() + { + ushort value = 546; + Half expected = 546; + Half actual = value; + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Implicit + /// + [Fact] + public void op_ImplicitTest1() + { + ulong value = 123456UL; + Half expected = Half.PositiveInfinity; + Half actual = value; + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Implicit + /// + [Fact] + public void op_ImplicitTest() + { + uint value = 728; + Half expected = 728; + Half actual; + actual = value; + Assert.Equal(expected, actual); + } + + /// + ///A test for op_GreaterThanOrEqual + /// + [Fact] + public void op_GreaterThanOrEqualTest() + { + { + Half half1 = (Half)111; + Half half2 = (Half)120; + bool expected = false; + bool actual = (half1 >= half2); + Assert.Equal(expected, actual); + } + { + Half half1 = (Half)111; + Half half2 = (Half)111; + bool expected = true; + bool actual = (half1 >= half2); + Assert.Equal(expected, actual); + } + } + + /// + ///A test for op_GreaterThan + /// + [Fact] + public void op_GreaterThanTest() + { + { + Half half1 = (Half)111; + Half half2 = (Half)120; + bool expected = false; + bool actual = (half1 > half2); + Assert.Equal(expected, actual); + } + { + Half half1 = (Half)111; + Half half2 = (Half)111; + bool expected = false; + bool actual = (half1 > half2); + Assert.Equal(expected, actual); + } + } + + /// + ///A test for op_Explicit + /// + [Fact] + public void op_ExplicitTest12() + { + Half value = 1245; + uint expected = 1245; + uint actual = ((uint)(value)); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Explicit + /// + [Fact] + public void op_ExplicitTest11() + { + Half value = 3333; + ushort expected = 3332; + ushort actual = ((ushort)(value)); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Explicit + /// + [Fact] + public void op_ExplicitTest10() + { + float value = 0.1234f; + Half expected = (Half)0.1234f; + Half actual = ((Half)(value)); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Explicit + /// + [Fact] + public void op_ExplicitTest9() + { + Half value = 9777; + Decimal expected = 9776; + Decimal actual = ((Decimal)(value)); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Explicit + /// + [Fact] + public void op_ExplicitTest8() + { + Half value = (Half)5.5; + sbyte expected = 5; + sbyte actual = ((sbyte)(value)); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Explicit + /// + [Fact] + public void op_ExplicitTest7() + { + Half value = 666; + ulong expected = 666; + ulong actual = ((ulong)(value)); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Explicit + /// + [Fact] + public void op_ExplicitTest6() + { + double value = -666.66; + Half expected = (Half)(-666.66); + Half actual = ((Half)(value)); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Explicit + /// + [Fact] + public void op_ExplicitTest5() + { + Half value = (Half)33.3; + short expected = 33; + short actual = ((short)(value)); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Explicit + /// + [Fact] + public void op_ExplicitTest4() + { + Half value = 12345; + long expected = 12344; + long actual = ((long)(value)); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Explicit + /// + [Fact] + public void op_ExplicitTest3() + { + Half value = (Half)15.15; + int expected = 15; + int actual = ((int)(value)); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Explicit + /// + [Fact] + public void op_ExplicitTest2() + { + Decimal value = new Decimal(333.1); + Half expected = (Half)333.1; + Half actual = ((Half)(value)); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Explicit + /// + [Fact] + public void op_ExplicitTest1() + { + Half value = (Half)(-77); + byte expected = unchecked((byte)(-77)); + byte actual = ((byte)(value)); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Explicit + /// + [Fact] + public void op_ExplicitTest() + { + Half value = 64; + char expected = '@'; + char actual = ((char)(value)); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Equality + /// + [Fact] + public void op_EqualityTest() + { + { + Half half1 = Half.MaxValue; + Half half2 = Half.MaxValue; + bool expected = true; + bool actual = (half1 == half2); + Assert.Equal(expected, actual); + } + { + Half half1 = Half.NaN; + Half half2 = Half.NaN; + bool expected = false; + bool actual = (half1 == half2); + Assert.Equal(expected, actual); + } + } + + /// + ///A test for op_Division + /// + [Fact] + public void op_DivisionTest() + { + Half half1 = 333; + Half half2 = 3; + Half expected = 111; + Half actual = (half1 / half2); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Decrement + /// + [Fact] + public void op_DecrementTest() + { + Half half = 1234; + Half expected = 1233; + Half actual = --(half); + Assert.Equal(expected, actual); + } + + /// + ///A test for op_Addition + /// + [Fact] + public void op_AdditionTest() + { + Half half1 = (Half)1234.5f; + Half half2 = (Half)1234.5f; + Half expected = (Half)2469f; + Half actual = (half1 + half2); + Assert.Equal(expected, actual); + } + + /// + ///A test for Negate + /// + [Fact] + public void negate_test() + { + Half half = new Half(658.51); + Half expected = new Half(-658.51); + Half actual = Half.Negate(half); + Assert.Equal(expected, actual); + } + + /// + ///A test for Multiply + /// + [Fact] + public void multiply_test() + { + Half half1 = 7; + Half half2 = 12; + Half expected = 84; + Half actual = Half.Multiply(half1, half2); + Assert.Equal(expected, actual); + } + + /// + ///A test for Min + /// + [Fact] + public void min_test() + { + Half val1 = -155; + Half val2 = 155; + Half expected = -155; + Half actual = Half.Min(val1, val2); + Assert.Equal(expected, actual); + } + + /// + ///A test for Max + /// + [Fact] + public void max_test() + { + Half val1 = new Half(333); + Half val2 = new Half(332); + Half expected = new Half(333); + Half actual = Half.Max(val1, val2); + Assert.Equal(expected, actual); + } + + /// + ///A test for IsPositiveInfinity + /// + [Fact] + public void is_positive_infinity_test() + { + { + Half half = Half.PositiveInfinity; + bool expected = true; + bool actual = Half.IsPositiveInfinity(half); + Assert.Equal(expected, actual); + } + { + Half half = (Half)1234.5678f; + bool expected = false; + bool actual = Half.IsPositiveInfinity(half); + Assert.Equal(expected, actual); + } + } + + /// + ///A test for IsNegativeInfinity + /// + [Fact] + public void is_negative_infinity_test() + { + { + Half half = Half.NegativeInfinity; + bool expected = true; + bool actual = Half.IsNegativeInfinity(half); + Assert.Equal(expected, actual); + } + { + Half half = (Half)1234.5678f; + bool expected = false; + bool actual = Half.IsNegativeInfinity(half); + Assert.Equal(expected, actual); + } + } + + /// + ///A test for IsNaN + /// + [Fact] + public void is_na_n_test() + { + { + Half half = Half.NaN; + bool expected = true; + bool actual = Half.IsNaN(half); + Assert.Equal(expected, actual); + } + { + Half half = (Half)1234.5678f; + bool expected = false; + bool actual = Half.IsNaN(half); + Assert.Equal(expected, actual); + } + } + + /// + ///A test for IsInfinity + /// + [Fact] + public void is_infinity_test() + { + { + Half half = Half.NegativeInfinity; + bool expected = true; + bool actual = Half.IsInfinity(half); + Assert.Equal(expected, actual); + } + { + Half half = Half.PositiveInfinity; + bool expected = true; + bool actual = Half.IsInfinity(half); + Assert.Equal(expected, actual); + } + { + Half half = (Half)1234.5678f; + bool expected = false; + bool actual = Half.IsInfinity(half); + Assert.Equal(expected, actual); + } + } + + /// + ///A test for GetTypeCode + /// + [Fact] + public void get_type_code_test() + { + Half target = new Half(); + TypeCode expected = (TypeCode)255; + TypeCode actual = target.GetTypeCode(); + Assert.Equal(expected, actual); + } + + /// + ///A test for GetHashCode + /// + [Fact] + public void get_hash_code_test() + { + Half target = 777; + int expected = 25106; + int actual = target.GetHashCode(); + Assert.Equal(expected, actual); + } + + /// + ///A test for GetBytes + /// + [Fact] + public void get_bytes_test() + { + Half value = Half.ToHalf(0x1234); + byte[] expected = { 0x34, 0x12 }; + byte[] actual = Half.GetBytes(value); + Assert.Equal(expected[0], actual[0]); + Assert.Equal(expected[1], actual[1]); + } + + /// + ///A test for GetBits + /// + [Fact] + public void get_bits_test() + { + Half value = new Half(555.555); + ushort expected = 24663; + ushort actual = Half.GetBits(value); + Assert.Equal(expected, actual); + } + + /// + ///A test for Equals + /// + [Fact] + public void equals_test1() + { + { + Half target = Half.MinValue; + Half half = Half.MinValue; + bool expected = true; + bool actual = target.Equals(half); + Assert.Equal(expected, actual); + } + { + Half target = 12345; + Half half = 12345; + bool expected = true; + bool actual = target.Equals(half); + Assert.Equal(expected, actual); + } + } + + /// + ///A test for Equals + /// + [Fact] + public void equals_test() + { + { + Half target = new Half(); + object obj = new Single(); + bool expected = false; + bool actual = target.Equals(obj); + Assert.Equal(expected, actual); + } + { + Half target = new Half(); + object obj = (Half)111; + bool expected = false; + bool actual = target.Equals(obj); + Assert.Equal(expected, actual); + } + } + + /// + ///A test for Divide + /// + [Fact] + public void divide_test() + { + Half half1 = (Half)626.046f; + Half half2 = (Half)8790.5f; + Half expected = (Half)0.07122803f; + Half actual = Half.Divide(half1, half2); + Assert.Equal(expected, actual); + } + + /// + ///A test for CompareTo + /// + [Fact] + public void compare_to_test1() + { + Half target = 1; + Half half = 2; + int expected = -1; + int actual = target.CompareTo(half); + Assert.Equal(expected, actual); + } + + /// + ///A test for CompareTo + /// + [Fact] + public void compare_to_test() + { + Half target = 666; + object obj = (Half)555; + int expected = 1; + int actual = target.CompareTo(obj); + Assert.Equal(expected, actual); + } + + /// + ///A test for Add + /// + [Fact] + public void add_test() + { + Half half1 = (Half)33.33f; + Half half2 = (Half)66.66f; + Half expected = (Half)99.99f; + Half actual = Half.Add(half1, half2); + Assert.Equal(expected, actual); + } + + /// + ///A test for Abs + /// + [Fact] + public void abs_test() + { + Half value = -55; + Half expected = 55; + Half actual = Half.Abs(value); + Assert.Equal(expected, actual); + } + + /// + ///A test for Half Constructor + /// + [Fact] + public void half_constructor_test6() + { + long value = 44; + Half target = new Half(value); + Assert.Equal(44, (long)target); + } + + /// + ///A test for Half Constructor + /// + [Fact] + public void half_constructor_test5() + { + int value = 789; // TODO: Initialize to an appropriate value + Half target = new Half(value); + Assert.Equal(789, (int)target); + } + + /// + ///A test for Half Constructor + /// + [Fact] + public void half_constructor_test4() + { + float value = -0.1234f; + Half target = new Half(value); + Assert.Equal((Half)(-0.1233521f), target); + } + + /// + ///A test for Half Constructor + /// + [Fact] + public void half_constructor_test3() + { + double value = 11.11; + Half target = new Half(value); + Assert.Equal(11.109375, (double)target); + } + + /// + ///A test for Half Constructor + /// + [Fact] + public void half_constructor_test2() + { + ulong value = 99999999; + Half target = new Half(value); + Assert.Equal(target, Half.PositiveInfinity); + } + + /// + ///A test for Half Constructor + /// + [Fact] + public void half_constructor_test1() + { + uint value = 3330; + Half target = new Half(value); + Assert.Equal((uint)3330, (uint)target); + } + + /// + ///A test for Half Constructor + /// + [Fact] + public void half_constructor_test() + { + Decimal value = new Decimal(-11.11); + Half target = new Half(value); + Assert.Equal((Decimal)(-11.10938), (Decimal)target); + } +#endif + } +} From 63da9c21a78833ff3cdcd47804b1e9a962353f6f Mon Sep 17 00:00:00 2001 From: Dimitri Date: Fri, 25 Oct 2024 12:22:23 -0300 Subject: [PATCH 34/43] some fix THSCuda --- src/Native/LibTorchSharp/THSCuda.cpp | 18 ++++++------------ src/Native/LibTorchSharp/THSCuda.h | 15 +++++++++++++-- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/Native/LibTorchSharp/THSCuda.cpp b/src/Native/LibTorchSharp/THSCuda.cpp index 911f1722e..a024bf4d0 100644 --- a/src/Native/LibTorchSharp/THSCuda.cpp +++ b/src/Native/LibTorchSharp/THSCuda.cpp @@ -4,11 +4,6 @@ #include #include -#define RETURN_CUDA_DEVICE(x) \ - if(TORCHSHARP_CUDA_TOOLKIT_FOUND) \ - return x; \ - return -1; - #ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND cudaDeviceProp THSCuda_get_device_prop(int device) { @@ -17,28 +12,27 @@ cudaDeviceProp THSCuda_get_device_prop(int device) cudaGetDeviceProperties_v2(&cdp, device); return cdp; } - #endif int THSCuda_get_major_compute_capability(int device) { - RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).major); + RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).major) } int THSCuda_get_minor_compute_capability(int device) { - RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).minor); + RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).minor) } int THSCuda_get_device_count(int* count) { - return cudaGetDeviceCount(count); + RETURN_CUDA_DEVICE(cudaGetDeviceCount(count)) } int THSCuda_get_free_total(int device, int* id, size_t* free, size_t* total) { -#ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND +#ifdef CUDA_TOOLKIT_FOUND cudaError_t res = cudaSetDevice(device); if (res != CUDA_SUCCESS) return -1; @@ -53,13 +47,13 @@ int THSCuda_get_free_total(int device, int* id, size_t* free, size_t* total) size_t THSCuda_get_total_memory(int device) { - RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).totalConstMem); + RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).totalConstMem) } size_t THSCuda_get_global_total_memory(int device) { - RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).totalGlobalMem); + RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).totalGlobalMem) } //TODO: implement more function diff --git a/src/Native/LibTorchSharp/THSCuda.h b/src/Native/LibTorchSharp/THSCuda.h index b6c0222e6..9ec7416ce 100644 --- a/src/Native/LibTorchSharp/THSCuda.h +++ b/src/Native/LibTorchSharp/THSCuda.h @@ -2,10 +2,21 @@ #pragma once #include "../Stdafx.h" - +#include "Utils.h" #include "torch/torch.h" -#include "Utils.h" +#ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND +#define CUDA_TOOLKIT_FOUND 1 +#else +#define CUDA_TOOLKIT_FOUND 0 +#endif + +#define RETURN_CUDA_DEVICE(x) \ + if(CUDA_TOOLKIT_FOUND) \ + return x; \ + else \ + return -1; + #ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND #include "cuda.h" #include "cuda_runtime_api.h" From ce679e207f1707d66b01493d581c4591b5f8f80e Mon Sep 17 00:00:00 2001 From: Dimitri Date: Fri, 25 Oct 2024 13:15:48 -0300 Subject: [PATCH 35/43] fast copy tensor accessor --- .gitignore | 2 + TorchSharp.sln | 133 +++++++++++++++++++++++-- src/TorchSharp/Utils/TensorAccessor.cs | 47 +++++++++ 3 files changed, 175 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 4f8e77a3e..13682298c 100644 --- a/.gitignore +++ b/.gitignore @@ -273,3 +273,5 @@ packages/ /.idea /test/TorchSharpTest/exportsd.py .vscode/settings.json +/TestClear +TestClear/ diff --git a/TorchSharp.sln b/TorchSharp.sln index 8cec25c7d..db67b613f 100644 --- a/TorchSharp.sln +++ b/TorchSharp.sln @@ -36,7 +36,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "TorchSharp", "TorchSharp", EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibTorchSharp", "bin\obj\x64.Debug\Native\LibTorchSharp\LibTorchSharp.vcxproj", "{2B359162-062E-3C52-91D3-027A8542A58C}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibTorchSharp", "bin\obj\x64.Release\Native\LibTorchSharp\LibTorchSharp.vcxproj", "{E4C0DBEE-0815-311B-9065-137BB50BD793}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibTorchSharp", "bin\obj\x64.Release\Native\LibTorchSharp\LibTorchSharp.vcxproj", "{748608D6-97ED-3EEA-89D9-D5D5CC69B05A}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Native-Debug", "Native-Debug", "{CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D}" ProjectSection(SolutionItems) = preProject @@ -66,111 +66,229 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution azure-pipelines.yml = azure-pipelines.yml build\BranchInfo.props = build\BranchInfo.props DEVGUIDE.md = DEVGUIDE.md + global.json = global.json README.md = README.md RELEASENOTES.md = RELEASENOTES.md - global.json = global.json EndProjectSection EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TorchVision", "src\TorchVision\TorchVision.csproj", "{DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TorchAudio", "src\TorchAudio\TorchAudio.csproj", "{B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TestClear", "TestClear\TestClear.csproj", "{6002DD2E-BF7A-4320-8ED6-8B0138F07A52}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU Debug|x64 = Debug|x64 + MinSizeRel|Any CPU = MinSizeRel|Any CPU + MinSizeRel|x64 = MinSizeRel|x64 Release|Any CPU = Release|Any CPU Release|x64 = Release|x64 + RelWithDebInfo|Any CPU = RelWithDebInfo|Any CPU + RelWithDebInfo|x64 = RelWithDebInfo|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Debug|Any CPU.Build.0 = Debug|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Debug|x64.ActiveCfg = Debug|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Debug|x64.Build.0 = Debug|Any CPU + {061CCBA1-A859-4392-8F45-249E5DAF1C88}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU + {061CCBA1-A859-4392-8F45-249E5DAF1C88}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU + {061CCBA1-A859-4392-8F45-249E5DAF1C88}.MinSizeRel|x64.ActiveCfg = Release|Any CPU + {061CCBA1-A859-4392-8F45-249E5DAF1C88}.MinSizeRel|x64.Build.0 = Release|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Release|Any CPU.ActiveCfg = Release|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Release|Any CPU.Build.0 = Release|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Release|x64.ActiveCfg = Release|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Release|x64.Build.0 = Release|Any CPU + {061CCBA1-A859-4392-8F45-249E5DAF1C88}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU + {061CCBA1-A859-4392-8F45-249E5DAF1C88}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU + {061CCBA1-A859-4392-8F45-249E5DAF1C88}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU + {061CCBA1-A859-4392-8F45-249E5DAF1C88}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Debug|Any CPU.Build.0 = Debug|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Debug|x64.ActiveCfg = Debug|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Debug|x64.Build.0 = Debug|Any CPU + {6C323B05-9028-4B09-911C-3C03AE058BEE}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU + {6C323B05-9028-4B09-911C-3C03AE058BEE}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU + {6C323B05-9028-4B09-911C-3C03AE058BEE}.MinSizeRel|x64.ActiveCfg = Release|Any CPU + {6C323B05-9028-4B09-911C-3C03AE058BEE}.MinSizeRel|x64.Build.0 = Release|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Release|Any CPU.ActiveCfg = Release|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Release|Any CPU.Build.0 = Release|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Release|x64.ActiveCfg = Release|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Release|x64.Build.0 = Release|Any CPU + {6C323B05-9028-4B09-911C-3C03AE058BEE}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU + {6C323B05-9028-4B09-911C-3C03AE058BEE}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU + {6C323B05-9028-4B09-911C-3C03AE058BEE}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU + {6C323B05-9028-4B09-911C-3C03AE058BEE}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Debug|Any CPU.Build.0 = Debug|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Debug|x64.ActiveCfg = Debug|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Debug|x64.Build.0 = Debug|Any CPU + {42B45168-476D-4BFA-87B8-81A34E6295CD}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU + {42B45168-476D-4BFA-87B8-81A34E6295CD}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU + {42B45168-476D-4BFA-87B8-81A34E6295CD}.MinSizeRel|x64.ActiveCfg = Release|Any CPU + {42B45168-476D-4BFA-87B8-81A34E6295CD}.MinSizeRel|x64.Build.0 = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|Any CPU.ActiveCfg = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|Any CPU.Build.0 = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|x64.ActiveCfg = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|x64.Build.0 = Release|Any CPU + {42B45168-476D-4BFA-87B8-81A34E6295CD}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU + {42B45168-476D-4BFA-87B8-81A34E6295CD}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU + {42B45168-476D-4BFA-87B8-81A34E6295CD}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU + {42B45168-476D-4BFA-87B8-81A34E6295CD}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {2B359162-062E-3C52-91D3-027A8542A58C}.Debug|Any CPU.ActiveCfg = Debug|x64 {2B359162-062E-3C52-91D3-027A8542A58C}.Debug|x64.ActiveCfg = Debug|x64 + {2B359162-062E-3C52-91D3-027A8542A58C}.MinSizeRel|Any CPU.ActiveCfg = MinSizeRel|x64 + {2B359162-062E-3C52-91D3-027A8542A58C}.MinSizeRel|Any CPU.Build.0 = MinSizeRel|x64 + {2B359162-062E-3C52-91D3-027A8542A58C}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64 + {2B359162-062E-3C52-91D3-027A8542A58C}.MinSizeRel|x64.Build.0 = MinSizeRel|x64 {2B359162-062E-3C52-91D3-027A8542A58C}.Release|Any CPU.ActiveCfg = Release|x64 {2B359162-062E-3C52-91D3-027A8542A58C}.Release|x64.ActiveCfg = Release|x64 - {E4C0DBEE-0815-311B-9065-137BB50BD793}.Debug|Any CPU.ActiveCfg = Debug|x64 - {E4C0DBEE-0815-311B-9065-137BB50BD793}.Debug|x64.ActiveCfg = Debug|x64 - {E4C0DBEE-0815-311B-9065-137BB50BD793}.Release|Any CPU.ActiveCfg = Release|x64 - {E4C0DBEE-0815-311B-9065-137BB50BD793}.Release|x64.ActiveCfg = Release|x64 + {2B359162-062E-3C52-91D3-027A8542A58C}.RelWithDebInfo|Any CPU.ActiveCfg = RelWithDebInfo|x64 + {2B359162-062E-3C52-91D3-027A8542A58C}.RelWithDebInfo|Any CPU.Build.0 = RelWithDebInfo|x64 + {2B359162-062E-3C52-91D3-027A8542A58C}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64 + {2B359162-062E-3C52-91D3-027A8542A58C}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64 + {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.Debug|Any CPU.ActiveCfg = Debug|x64 + {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.Debug|x64.ActiveCfg = Debug|x64 + {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.MinSizeRel|Any CPU.ActiveCfg = MinSizeRel|x64 + {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.MinSizeRel|Any CPU.Build.0 = MinSizeRel|x64 + {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64 + {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.MinSizeRel|x64.Build.0 = MinSizeRel|x64 + {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.Release|Any CPU.ActiveCfg = Release|x64 + {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.Release|x64.ActiveCfg = Release|x64 + {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.RelWithDebInfo|Any CPU.ActiveCfg = RelWithDebInfo|x64 + {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.RelWithDebInfo|Any CPU.Build.0 = RelWithDebInfo|x64 + {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64 + {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64 {DD652544-711E-4029-83FF-DA4A9600E6E7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Debug|Any CPU.Build.0 = Debug|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Debug|x64.ActiveCfg = Debug|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Debug|x64.Build.0 = Debug|Any CPU + {DD652544-711E-4029-83FF-DA4A9600E6E7}.MinSizeRel|Any CPU.ActiveCfg = LibTorch2.3.1|Any CPU + {DD652544-711E-4029-83FF-DA4A9600E6E7}.MinSizeRel|Any CPU.Build.0 = LibTorch2.3.1|Any CPU + {DD652544-711E-4029-83FF-DA4A9600E6E7}.MinSizeRel|x64.ActiveCfg = LibTorch2.3.1|Any CPU + {DD652544-711E-4029-83FF-DA4A9600E6E7}.MinSizeRel|x64.Build.0 = LibTorch2.3.1|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Release|Any CPU.ActiveCfg = Release|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Release|Any CPU.Build.0 = Release|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Release|x64.ActiveCfg = Release|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Release|x64.Build.0 = Release|Any CPU + {DD652544-711E-4029-83FF-DA4A9600E6E7}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU + {DD652544-711E-4029-83FF-DA4A9600E6E7}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU + {DD652544-711E-4029-83FF-DA4A9600E6E7}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU + {DD652544-711E-4029-83FF-DA4A9600E6E7}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Debug|Any CPU.Build.0 = Debug|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Debug|x64.ActiveCfg = Debug|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Debug|x64.Build.0 = Debug|Any CPU + {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU + {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU + {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.MinSizeRel|x64.ActiveCfg = Release|Any CPU + {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.MinSizeRel|x64.Build.0 = Release|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Release|Any CPU.ActiveCfg = Release|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Release|Any CPU.Build.0 = Release|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Release|x64.ActiveCfg = Release|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Release|x64.Build.0 = Release|Any CPU + {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU + {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU + {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU + {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Debug|Any CPU.Build.0 = Debug|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Debug|x64.ActiveCfg = Debug|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Debug|x64.Build.0 = Debug|Any CPU + {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU + {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU + {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.MinSizeRel|x64.ActiveCfg = Release|Any CPU + {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.MinSizeRel|x64.Build.0 = Release|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Release|Any CPU.ActiveCfg = Release|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Release|Any CPU.Build.0 = Release|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Release|x64.ActiveCfg = Release|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Release|x64.Build.0 = Release|Any CPU + {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU + {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU + {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU + {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Debug|Any CPU.Build.0 = Debug|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Debug|x64.ActiveCfg = Debug|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Debug|x64.Build.0 = Debug|Any CPU + {95493944-D1AE-414E-964B-B58AEAE672E5}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU + {95493944-D1AE-414E-964B-B58AEAE672E5}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU + {95493944-D1AE-414E-964B-B58AEAE672E5}.MinSizeRel|x64.ActiveCfg = Release|Any CPU + {95493944-D1AE-414E-964B-B58AEAE672E5}.MinSizeRel|x64.Build.0 = Release|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Release|Any CPU.ActiveCfg = Release|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Release|Any CPU.Build.0 = Release|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Release|x64.ActiveCfg = Release|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Release|x64.Build.0 = Release|Any CPU + {95493944-D1AE-414E-964B-B58AEAE672E5}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU + {95493944-D1AE-414E-964B-B58AEAE672E5}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU + {95493944-D1AE-414E-964B-B58AEAE672E5}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU + {95493944-D1AE-414E-964B-B58AEAE672E5}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Debug|Any CPU.Build.0 = Debug|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Debug|x64.ActiveCfg = Debug|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Debug|x64.Build.0 = Debug|Any CPU + {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU + {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU + {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.MinSizeRel|x64.ActiveCfg = Release|Any CPU + {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.MinSizeRel|x64.Build.0 = Release|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Release|Any CPU.ActiveCfg = Release|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Release|Any CPU.Build.0 = Release|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Release|x64.ActiveCfg = Release|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Release|x64.Build.0 = Release|Any CPU + {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU + {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU + {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU + {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Debug|Any CPU.Build.0 = Debug|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Debug|x64.ActiveCfg = Debug|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Debug|x64.Build.0 = Debug|Any CPU + {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU + {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU + {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.MinSizeRel|x64.ActiveCfg = Release|Any CPU + {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.MinSizeRel|x64.Build.0 = Release|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Release|Any CPU.ActiveCfg = Release|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Release|Any CPU.Build.0 = Release|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Release|x64.ActiveCfg = Release|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Release|x64.Build.0 = Release|Any CPU + {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU + {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU + {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU + {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Debug|Any CPU.Build.0 = Debug|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Debug|x64.ActiveCfg = Debug|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Debug|x64.Build.0 = Debug|Any CPU + {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU + {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU + {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.MinSizeRel|x64.ActiveCfg = Release|Any CPU + {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.MinSizeRel|x64.Build.0 = Release|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Release|Any CPU.ActiveCfg = Release|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Release|Any CPU.Build.0 = Release|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Release|x64.ActiveCfg = Release|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Release|x64.Build.0 = Release|Any CPU + {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU + {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU + {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU + {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.RelWithDebInfo|x64.Build.0 = Release|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Debug|Any CPU.Build.0 = Debug|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Debug|x64.ActiveCfg = Debug|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Debug|x64.Build.0 = Debug|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.MinSizeRel|Any CPU.ActiveCfg = Debug|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.MinSizeRel|Any CPU.Build.0 = Debug|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.MinSizeRel|x64.ActiveCfg = Debug|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.MinSizeRel|x64.Build.0 = Debug|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Release|Any CPU.ActiveCfg = Release|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Release|Any CPU.Build.0 = Release|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Release|x64.ActiveCfg = Release|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Release|x64.Build.0 = Release|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.RelWithDebInfo|x64.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -182,7 +300,7 @@ Global {42B45168-476D-4BFA-87B8-81A34E6295CD} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {567456AD-B026-4CB6-B98D-4FC930C90223} = {D3D38B03-B557-484D-8348-8BADEE4DF592} {2B359162-062E-3C52-91D3-027A8542A58C} = {CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D} - {E4C0DBEE-0815-311B-9065-137BB50BD793} = {4DB9E84D-324C-408F-87A6-246E86205540} + {748608D6-97ED-3EEA-89D9-D5D5CC69B05A} = {4DB9E84D-324C-408F-87A6-246E86205540} {CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {D8C60CD8-8429-45F2-A755-47B6CD10FDF8} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {4DB9E84D-324C-408F-87A6-246E86205540} = {CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D} @@ -193,6 +311,7 @@ Global {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E} = {D3D38B03-B557-484D-8348-8BADEE4DF592} {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE} = {09EADF06-BE25-4228-AB53-95AE3E15B530} + {6002DD2E-BF7A-4320-8ED6-8B0138F07A52} = {09EADF06-BE25-4228-AB53-95AE3E15B530} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D} diff --git a/src/TorchSharp/Utils/TensorAccessor.cs b/src/TorchSharp/Utils/TensorAccessor.cs index edbcf7675..0f8dbaeb2 100644 --- a/src/TorchSharp/Utils/TensorAccessor.cs +++ b/src/TorchSharp/Utils/TensorAccessor.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Linq; +using System.Runtime.InteropServices; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp.Utils @@ -47,6 +48,16 @@ public T[] ToArray() if (_tensor.ndim < 2) return (T[])ToNDArray(); + if (_tensor.is_contiguous()) { + //This is very fast. And work VERY WELL + var shps = _tensor.shape; + long TempCount = 1; + for (int i = 0; i < shps.Length; i++) + TempCount *= shps[i]; //Theorically the numel is simple as product of each element shape + unsafe { + return new Span(_tensor_data_ptr.ToPointer(), Convert.ToInt32(TempCount)).ToArray(); + } + } var result = new T[Count]; CopyTo(result); return result; @@ -231,8 +242,39 @@ private void validate(long index) if (index >= Count) throw new IndexOutOfRangeException(); } + private void CopyContiguous(T[] array, int index=0, int count=0) + { + if (!_tensor.is_contiguous()) + throw new Exception("The tensor is not contiguous"); + var shps = _tensor.shape; + long TempCount = 1; + for (int i = 0; i < shps.Length; i++) + TempCount *= shps[i]; //Theorically the numel is simple as product of each element shape + if (count > TempCount || count == 0) + count = (int)TempCount; + + if (array is byte[] ba) + Marshal.Copy(_tensor_data_ptr, ba, index, count); + if (array is short[] sa) + Marshal.Copy(_tensor_data_ptr, sa, index, count); + if(array is char[] ca) + Marshal.Copy(_tensor_data_ptr, ca, index, count); + if (array is long[] la) + Marshal.Copy(_tensor_data_ptr, la, index, count); + if (array is float[] fa) + Marshal.Copy(_tensor_data_ptr, fa, index, count); + if (array is int[] ia) + Marshal.Copy(_tensor_data_ptr, ia, index, count); + if (array is double[] da) + Marshal.Copy(_tensor_data_ptr, da, index, count); + } public void CopyTo(T[] array, int arrayIndex = 0, long tensorIndex = 0) { + if (_tensor.is_contiguous()) { + CopyContiguous(array, arrayIndex, array.Length); + return; + } + int idx = arrayIndex; foreach (int offset in GetSubsequentIndices(tensorIndex)) { if (idx >= array.Length) break; @@ -243,6 +285,11 @@ public void CopyTo(T[] array, int arrayIndex = 0, long tensorIndex = 0) public void CopyTo(Span array, int arrayIndex = 0, long tensorIndex = 0) { + if (_tensor.is_contiguous()) { + ToArray().CopyTo(array); + return; + } + int idx = arrayIndex; foreach (int offset in GetSubsequentIndices(tensorIndex)) { if (idx >= array.Length) break; From 958a1871d00f2a2719d67b11ddd50cbb807951fc Mon Sep 17 00:00:00 2001 From: Dimitri Date: Fri, 25 Oct 2024 13:43:52 -0300 Subject: [PATCH 36/43] rollback sln --- TorchSharp.sln | 143 +++---------------------- src/TorchSharp/Utils/TensorAccessor.cs | 45 ++++---- 2 files changed, 34 insertions(+), 154 deletions(-) diff --git a/TorchSharp.sln b/TorchSharp.sln index db67b613f..054c07bb3 100644 --- a/TorchSharp.sln +++ b/TorchSharp.sln @@ -34,9 +34,9 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "TorchSharp", "TorchSharp", pkg\TorchSharp\TorchSharp.symbols.nupkgproj = pkg\TorchSharp\TorchSharp.symbols.nupkgproj EndProjectSection EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibTorchSharp", "bin\obj\x64.Debug\Native\LibTorchSharp\LibTorchSharp.vcxproj", "{2B359162-062E-3C52-91D3-027A8542A58C}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibTorchSharp", "bin\obj\x64.Debug\Native\LibTorchSharp\LibTorchSharp.vcxproj", "{265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibTorchSharp", "bin\obj\x64.Release\Native\LibTorchSharp\LibTorchSharp.vcxproj", "{748608D6-97ED-3EEA-89D9-D5D5CC69B05A}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibTorchSharp", "bin\obj\x64.Release\Native\LibTorchSharp\LibTorchSharp.vcxproj", "{E4C0DBEE-0815-311B-9065-137BB50BD793}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Native-Debug", "Native-Debug", "{CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D}" ProjectSection(SolutionItems) = preProject @@ -75,220 +75,102 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TorchVision", "src\TorchVis EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TorchAudio", "src\TorchAudio\TorchAudio.csproj", "{B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TestClear", "TestClear\TestClear.csproj", "{6002DD2E-BF7A-4320-8ED6-8B0138F07A52}" -EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU Debug|x64 = Debug|x64 - MinSizeRel|Any CPU = MinSizeRel|Any CPU - MinSizeRel|x64 = MinSizeRel|x64 Release|Any CPU = Release|Any CPU Release|x64 = Release|x64 - RelWithDebInfo|Any CPU = RelWithDebInfo|Any CPU - RelWithDebInfo|x64 = RelWithDebInfo|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Debug|Any CPU.Build.0 = Debug|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Debug|x64.ActiveCfg = Debug|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Debug|x64.Build.0 = Debug|Any CPU - {061CCBA1-A859-4392-8F45-249E5DAF1C88}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU - {061CCBA1-A859-4392-8F45-249E5DAF1C88}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU - {061CCBA1-A859-4392-8F45-249E5DAF1C88}.MinSizeRel|x64.ActiveCfg = Release|Any CPU - {061CCBA1-A859-4392-8F45-249E5DAF1C88}.MinSizeRel|x64.Build.0 = Release|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Release|Any CPU.ActiveCfg = Release|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Release|Any CPU.Build.0 = Release|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Release|x64.ActiveCfg = Release|Any CPU {061CCBA1-A859-4392-8F45-249E5DAF1C88}.Release|x64.Build.0 = Release|Any CPU - {061CCBA1-A859-4392-8F45-249E5DAF1C88}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU - {061CCBA1-A859-4392-8F45-249E5DAF1C88}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU - {061CCBA1-A859-4392-8F45-249E5DAF1C88}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU - {061CCBA1-A859-4392-8F45-249E5DAF1C88}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Debug|Any CPU.Build.0 = Debug|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Debug|x64.ActiveCfg = Debug|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Debug|x64.Build.0 = Debug|Any CPU - {6C323B05-9028-4B09-911C-3C03AE058BEE}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU - {6C323B05-9028-4B09-911C-3C03AE058BEE}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU - {6C323B05-9028-4B09-911C-3C03AE058BEE}.MinSizeRel|x64.ActiveCfg = Release|Any CPU - {6C323B05-9028-4B09-911C-3C03AE058BEE}.MinSizeRel|x64.Build.0 = Release|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Release|Any CPU.ActiveCfg = Release|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Release|Any CPU.Build.0 = Release|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Release|x64.ActiveCfg = Release|Any CPU {6C323B05-9028-4B09-911C-3C03AE058BEE}.Release|x64.Build.0 = Release|Any CPU - {6C323B05-9028-4B09-911C-3C03AE058BEE}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU - {6C323B05-9028-4B09-911C-3C03AE058BEE}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU - {6C323B05-9028-4B09-911C-3C03AE058BEE}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU - {6C323B05-9028-4B09-911C-3C03AE058BEE}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Debug|Any CPU.Build.0 = Debug|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Debug|x64.ActiveCfg = Debug|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Debug|x64.Build.0 = Debug|Any CPU - {42B45168-476D-4BFA-87B8-81A34E6295CD}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU - {42B45168-476D-4BFA-87B8-81A34E6295CD}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU - {42B45168-476D-4BFA-87B8-81A34E6295CD}.MinSizeRel|x64.ActiveCfg = Release|Any CPU - {42B45168-476D-4BFA-87B8-81A34E6295CD}.MinSizeRel|x64.Build.0 = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|Any CPU.ActiveCfg = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|Any CPU.Build.0 = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|x64.ActiveCfg = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|x64.Build.0 = Release|Any CPU - {42B45168-476D-4BFA-87B8-81A34E6295CD}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU - {42B45168-476D-4BFA-87B8-81A34E6295CD}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU - {42B45168-476D-4BFA-87B8-81A34E6295CD}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU - {42B45168-476D-4BFA-87B8-81A34E6295CD}.RelWithDebInfo|x64.Build.0 = Release|Any CPU - {2B359162-062E-3C52-91D3-027A8542A58C}.Debug|Any CPU.ActiveCfg = Debug|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.Debug|x64.ActiveCfg = Debug|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.MinSizeRel|Any CPU.ActiveCfg = MinSizeRel|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.MinSizeRel|Any CPU.Build.0 = MinSizeRel|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.MinSizeRel|x64.Build.0 = MinSizeRel|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.Release|Any CPU.ActiveCfg = Release|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.Release|x64.ActiveCfg = Release|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.RelWithDebInfo|Any CPU.ActiveCfg = RelWithDebInfo|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.RelWithDebInfo|Any CPU.Build.0 = RelWithDebInfo|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64 - {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.Debug|Any CPU.ActiveCfg = Debug|x64 - {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.Debug|x64.ActiveCfg = Debug|x64 - {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.MinSizeRel|Any CPU.ActiveCfg = MinSizeRel|x64 - {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.MinSizeRel|Any CPU.Build.0 = MinSizeRel|x64 - {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.MinSizeRel|x64.ActiveCfg = MinSizeRel|x64 - {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.MinSizeRel|x64.Build.0 = MinSizeRel|x64 - {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.Release|Any CPU.ActiveCfg = Release|x64 - {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.Release|x64.ActiveCfg = Release|x64 - {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.RelWithDebInfo|Any CPU.ActiveCfg = RelWithDebInfo|x64 - {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.RelWithDebInfo|Any CPU.Build.0 = RelWithDebInfo|x64 - {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.RelWithDebInfo|x64.ActiveCfg = RelWithDebInfo|x64 - {748608D6-97ED-3EEA-89D9-D5D5CC69B05A}.RelWithDebInfo|x64.Build.0 = RelWithDebInfo|x64 + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}.Debug|Any CPU.ActiveCfg = Debug|x64 + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}.Debug|x64.ActiveCfg = Debug|x64 + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}.Release|Any CPU.ActiveCfg = Release|x64 + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}.Release|x64.ActiveCfg = Release|x64 + {E4C0DBEE-0815-311B-9065-137BB50BD793}.Debug|Any CPU.ActiveCfg = Debug|x64 + {E4C0DBEE-0815-311B-9065-137BB50BD793}.Debug|x64.ActiveCfg = Debug|x64 + {E4C0DBEE-0815-311B-9065-137BB50BD793}.Release|Any CPU.ActiveCfg = Release|x64 + {E4C0DBEE-0815-311B-9065-137BB50BD793}.Release|x64.ActiveCfg = Release|x64 {DD652544-711E-4029-83FF-DA4A9600E6E7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Debug|Any CPU.Build.0 = Debug|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Debug|x64.ActiveCfg = Debug|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Debug|x64.Build.0 = Debug|Any CPU - {DD652544-711E-4029-83FF-DA4A9600E6E7}.MinSizeRel|Any CPU.ActiveCfg = LibTorch2.3.1|Any CPU - {DD652544-711E-4029-83FF-DA4A9600E6E7}.MinSizeRel|Any CPU.Build.0 = LibTorch2.3.1|Any CPU - {DD652544-711E-4029-83FF-DA4A9600E6E7}.MinSizeRel|x64.ActiveCfg = LibTorch2.3.1|Any CPU - {DD652544-711E-4029-83FF-DA4A9600E6E7}.MinSizeRel|x64.Build.0 = LibTorch2.3.1|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Release|Any CPU.ActiveCfg = Release|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Release|Any CPU.Build.0 = Release|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Release|x64.ActiveCfg = Release|Any CPU {DD652544-711E-4029-83FF-DA4A9600E6E7}.Release|x64.Build.0 = Release|Any CPU - {DD652544-711E-4029-83FF-DA4A9600E6E7}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU - {DD652544-711E-4029-83FF-DA4A9600E6E7}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU - {DD652544-711E-4029-83FF-DA4A9600E6E7}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU - {DD652544-711E-4029-83FF-DA4A9600E6E7}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Debug|Any CPU.Build.0 = Debug|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Debug|x64.ActiveCfg = Debug|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Debug|x64.Build.0 = Debug|Any CPU - {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU - {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU - {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.MinSizeRel|x64.ActiveCfg = Release|Any CPU - {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.MinSizeRel|x64.Build.0 = Release|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Release|Any CPU.ActiveCfg = Release|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Release|Any CPU.Build.0 = Release|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Release|x64.ActiveCfg = Release|Any CPU {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.Release|x64.Build.0 = Release|Any CPU - {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU - {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU - {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU - {05031D1C-D0B2-4BF3-A6AF-3339A78437E3}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Debug|Any CPU.Build.0 = Debug|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Debug|x64.ActiveCfg = Debug|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Debug|x64.Build.0 = Debug|Any CPU - {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU - {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU - {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.MinSizeRel|x64.ActiveCfg = Release|Any CPU - {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.MinSizeRel|x64.Build.0 = Release|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Release|Any CPU.ActiveCfg = Release|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Release|Any CPU.Build.0 = Release|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Release|x64.ActiveCfg = Release|Any CPU {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.Release|x64.Build.0 = Release|Any CPU - {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU - {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU - {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU - {AACEAE55-804D-45BC-BC3D-1AB8E856E0E8}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Debug|Any CPU.Build.0 = Debug|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Debug|x64.ActiveCfg = Debug|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Debug|x64.Build.0 = Debug|Any CPU - {95493944-D1AE-414E-964B-B58AEAE672E5}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU - {95493944-D1AE-414E-964B-B58AEAE672E5}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU - {95493944-D1AE-414E-964B-B58AEAE672E5}.MinSizeRel|x64.ActiveCfg = Release|Any CPU - {95493944-D1AE-414E-964B-B58AEAE672E5}.MinSizeRel|x64.Build.0 = Release|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Release|Any CPU.ActiveCfg = Release|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Release|Any CPU.Build.0 = Release|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Release|x64.ActiveCfg = Release|Any CPU {95493944-D1AE-414E-964B-B58AEAE672E5}.Release|x64.Build.0 = Release|Any CPU - {95493944-D1AE-414E-964B-B58AEAE672E5}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU - {95493944-D1AE-414E-964B-B58AEAE672E5}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU - {95493944-D1AE-414E-964B-B58AEAE672E5}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU - {95493944-D1AE-414E-964B-B58AEAE672E5}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Debug|Any CPU.Build.0 = Debug|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Debug|x64.ActiveCfg = Debug|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Debug|x64.Build.0 = Debug|Any CPU - {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU - {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU - {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.MinSizeRel|x64.ActiveCfg = Release|Any CPU - {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.MinSizeRel|x64.Build.0 = Release|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Release|Any CPU.ActiveCfg = Release|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Release|Any CPU.Build.0 = Release|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Release|x64.ActiveCfg = Release|Any CPU {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.Release|x64.Build.0 = Release|Any CPU - {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU - {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU - {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU - {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Debug|Any CPU.Build.0 = Debug|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Debug|x64.ActiveCfg = Debug|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Debug|x64.Build.0 = Debug|Any CPU - {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU - {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU - {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.MinSizeRel|x64.ActiveCfg = Release|Any CPU - {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.MinSizeRel|x64.Build.0 = Release|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Release|Any CPU.ActiveCfg = Release|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Release|Any CPU.Build.0 = Release|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Release|x64.ActiveCfg = Release|Any CPU {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.Release|x64.Build.0 = Release|Any CPU - {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU - {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU - {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU - {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}.RelWithDebInfo|x64.Build.0 = Release|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Debug|Any CPU.Build.0 = Debug|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Debug|x64.ActiveCfg = Debug|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Debug|x64.Build.0 = Debug|Any CPU - {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.MinSizeRel|Any CPU.ActiveCfg = Release|Any CPU - {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.MinSizeRel|Any CPU.Build.0 = Release|Any CPU - {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.MinSizeRel|x64.ActiveCfg = Release|Any CPU - {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.MinSizeRel|x64.Build.0 = Release|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Release|Any CPU.ActiveCfg = Release|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Release|Any CPU.Build.0 = Release|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Release|x64.ActiveCfg = Release|Any CPU {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.Release|x64.Build.0 = Release|Any CPU - {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU - {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU - {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU - {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE}.RelWithDebInfo|x64.Build.0 = Release|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Debug|Any CPU.Build.0 = Debug|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Debug|x64.ActiveCfg = Debug|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Debug|x64.Build.0 = Debug|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.MinSizeRel|Any CPU.ActiveCfg = Debug|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.MinSizeRel|Any CPU.Build.0 = Debug|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.MinSizeRel|x64.ActiveCfg = Debug|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.MinSizeRel|x64.Build.0 = Debug|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Release|Any CPU.ActiveCfg = Release|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Release|Any CPU.Build.0 = Release|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Release|x64.ActiveCfg = Release|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.Release|x64.Build.0 = Release|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.RelWithDebInfo|Any CPU.ActiveCfg = Release|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.RelWithDebInfo|Any CPU.Build.0 = Release|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.RelWithDebInfo|x64.ActiveCfg = Release|Any CPU - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52}.RelWithDebInfo|x64.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -299,8 +181,8 @@ Global {6C323B05-9028-4B09-911C-3C03AE058BEE} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {42B45168-476D-4BFA-87B8-81A34E6295CD} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {567456AD-B026-4CB6-B98D-4FC930C90223} = {D3D38B03-B557-484D-8348-8BADEE4DF592} - {2B359162-062E-3C52-91D3-027A8542A58C} = {CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D} - {748608D6-97ED-3EEA-89D9-D5D5CC69B05A} = {4DB9E84D-324C-408F-87A6-246E86205540} + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06} = {CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D} + {E4C0DBEE-0815-311B-9065-137BB50BD793} = {4DB9E84D-324C-408F-87A6-246E86205540} {CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {D8C60CD8-8429-45F2-A755-47B6CD10FDF8} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {4DB9E84D-324C-408F-87A6-246E86205540} = {CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D} @@ -311,7 +193,6 @@ Global {6D3CE8AA-F369-4D2D-BDA7-9F89D6BE1B2E} = {D3D38B03-B557-484D-8348-8BADEE4DF592} {DCF01EE5-6431-4115-85E0-1FC4C3DE86A2} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {B3AAC8E8-9CA4-4B01-96CF-206AE7327DDE} = {09EADF06-BE25-4228-AB53-95AE3E15B530} - {6002DD2E-BF7A-4320-8ED6-8B0138F07A52} = {09EADF06-BE25-4228-AB53-95AE3E15B530} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D} diff --git a/src/TorchSharp/Utils/TensorAccessor.cs b/src/TorchSharp/Utils/TensorAccessor.cs index 0f8dbaeb2..6966dfdbe 100644 --- a/src/TorchSharp/Utils/TensorAccessor.cs +++ b/src/TorchSharp/Utils/TensorAccessor.cs @@ -242,31 +242,30 @@ private void validate(long index) if (index >= Count) throw new IndexOutOfRangeException(); } - private void CopyContiguous(T[] array, int index=0, int count=0) - { + private void CopyContiguous(T[] array, int index=0, int count=0) + { if (!_tensor.is_contiguous()) throw new Exception("The tensor is not contiguous"); - var shps = _tensor.shape; - long TempCount = 1; - for (int i = 0; i < shps.Length; i++) - TempCount *= shps[i]; //Theorically the numel is simple as product of each element shape - if (count > TempCount || count == 0) - count = (int)TempCount; - - if (array is byte[] ba) - Marshal.Copy(_tensor_data_ptr, ba, index, count); - if (array is short[] sa) - Marshal.Copy(_tensor_data_ptr, sa, index, count); - if(array is char[] ca) - Marshal.Copy(_tensor_data_ptr, ca, index, count); - if (array is long[] la) - Marshal.Copy(_tensor_data_ptr, la, index, count); - if (array is float[] fa) - Marshal.Copy(_tensor_data_ptr, fa, index, count); - if (array is int[] ia) - Marshal.Copy(_tensor_data_ptr, ia, index, count); - if (array is double[] da) - Marshal.Copy(_tensor_data_ptr, da, index, count); + var shps = _tensor.shape; + long TempCount = 1; + for (int i = 0; i < shps.Length; i++) + TempCount *= shps[i]; //Theorically the numel is simple as product of each element shape + if (count > TempCount || count == 0) + count = (int)TempCount; + if (array is byte[] ba) + Marshal.Copy(_tensor_data_ptr, ba, index, count); + if (array is short[] sa) + Marshal.Copy(_tensor_data_ptr, sa, index, count); + if(array is char[] ca) + Marshal.Copy(_tensor_data_ptr, ca, index, count); + if (array is long[] la) + Marshal.Copy(_tensor_data_ptr, la, index, count); + if (array is float[] fa) + Marshal.Copy(_tensor_data_ptr, fa, index, count); + if (array is int[] ia) + Marshal.Copy(_tensor_data_ptr, ia, index, count); + if (array is double[] da) + Marshal.Copy(_tensor_data_ptr, da, index, count); } public void CopyTo(T[] array, int arrayIndex = 0, long tensorIndex = 0) { From 0b20f13779ace6460fe6391d1b81eecd05e98e01 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Fri, 25 Oct 2024 14:28:53 -0300 Subject: [PATCH 37/43] Numel --- src/TorchSharp/Utils/TensorAccessor.cs | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/TorchSharp/Utils/TensorAccessor.cs b/src/TorchSharp/Utils/TensorAccessor.cs index 6966dfdbe..42fd49c11 100644 --- a/src/TorchSharp/Utils/TensorAccessor.cs +++ b/src/TorchSharp/Utils/TensorAccessor.cs @@ -47,18 +47,15 @@ public T[] ToArray() { if (_tensor.ndim < 2) return (T[])ToNDArray(); - + long Cnt = Count; if (_tensor.is_contiguous()) { - //This is very fast. And work VERY WELL - var shps = _tensor.shape; - long TempCount = 1; - for (int i = 0; i < shps.Length; i++) - TempCount *= shps[i]; //Theorically the numel is simple as product of each element shape + if (Cnt == 0) + throw new Exception("Invalid"); unsafe { - return new Span(_tensor_data_ptr.ToPointer(), Convert.ToInt32(TempCount)).ToArray(); + return new Span(_tensor_data_ptr.ToPointer(), Convert.ToInt32(Cnt)).ToArray(); } } - var result = new T[Count]; + var result = new T[Cnt]; CopyTo(result); return result; } @@ -246,12 +243,9 @@ private void CopyContiguous(T[] array, int index=0, int count=0) { if (!_tensor.is_contiguous()) throw new Exception("The tensor is not contiguous"); - var shps = _tensor.shape; - long TempCount = 1; - for (int i = 0; i < shps.Length; i++) - TempCount *= shps[i]; //Theorically the numel is simple as product of each element shape - if (count > TempCount || count == 0) - count = (int)TempCount; + var Cnt = Count; + if (count > Cnt || count == 0) + count = (int)Cnt; if (array is byte[] ba) Marshal.Copy(_tensor_data_ptr, ba, index, count); if (array is short[] sa) From 572bc3e11094cdecd6a737c3ac3c7441192fb975 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Mon, 28 Oct 2024 19:05:42 -0300 Subject: [PATCH 38/43] some --- src/Native/LibTorchSharp/THSStorage.cpp | 23 ++++++++++++++++ src/Native/LibTorchSharp/THSStorage.h | 16 +++++++++++ src/TorchSharp/Amp/AutocastMode.cs | 15 +++++------ .../PInvoke/LibTorchSharp.THSStorage.cs | 10 +++++++ src/TorchSharp/Tensor/Tensor.cs | 11 +++++++- src/TorchSharp/Utils/Half.cs | 2 ++ src/TorchSharp/Utils/TensorAccessor.cs | 27 +++++++++++++++++++ 7 files changed, 95 insertions(+), 9 deletions(-) diff --git a/src/Native/LibTorchSharp/THSStorage.cpp b/src/Native/LibTorchSharp/THSStorage.cpp index c966e0e97..4bc8b84e9 100644 --- a/src/Native/LibTorchSharp/THSStorage.cpp +++ b/src/Native/LibTorchSharp/THSStorage.cpp @@ -23,3 +23,26 @@ void* THSStorage_data_ptr(const Tensor tensor) return dp.get(); } +/* +int* THSStorage_tensor_to_array_int(const Tensor tensor) +{ + return THSStorage_tensor_array(tensor); +} +long* THSStorage_tensor_to_array_long(const Tensor tensor) +{ + return THSStorage_tensor_array(tensor); +} + +float* THSStorage_tensor_to_array_float(const Tensor tensor) +{ + return THSStorage_tensor_array(tensor); +} + +double* THSStorage_tensor_to_array_double(const Tensor tensor) +{ + return THSStorage_tensor_array(tensor); +} +char* THSStorage_tensor_to_array_char(const Tensor tensor) +{ + return THSStorage_tensor_array(tensor); +}*/ \ No newline at end of file diff --git a/src/Native/LibTorchSharp/THSStorage.h b/src/Native/LibTorchSharp/THSStorage.h index e66492e11..53a335921 100644 --- a/src/Native/LibTorchSharp/THSStorage.h +++ b/src/Native/LibTorchSharp/THSStorage.h @@ -14,3 +14,19 @@ EXPORT_API(size_t) THSStorage_nbytes(const Tensor tensor); EXPORT_API(void) THSStorage_set_nbytes(const Tensor tensor, size_t nbytes); EXPORT_API(void*) THSStorage_data_ptr(const Tensor tensor); +/* +template +T* THSStorage_tensor_array(const Tensor tensor) +{ +#if TORCH_VERSION_MAJOR >= 2 && TORCH_VERSION_MINOR >= 4 + return tensor->data_ptr(); +#else + return tensor->data(); +#endif +} + +EXPORT_API(int*) THSStorage_tensor_to_array_int(const Tensor tensor); +EXPORT_API(long*) THSStorage_tensor_to_array_long(const Tensor tensor); +EXPORT_API(float*) THSStorage_tensor_to_array_float(const Tensor tensor); +EXPORT_API(double*) THSStorage_tensor_to_array_double(const Tensor tensor); +EXPORT_API(char*) THSStorage_tensor_to_array_char(const Tensor tensor);*/ \ No newline at end of file diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 88a16aa9f..68269f564 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -53,14 +53,14 @@ private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enab fast_dtype = dtype.Value; if (cache_enabled.HasValue) _cache_enabled = cache_enabled.Value; - + if (dev.type != DeviceType.CPU && dev.type != DeviceType.CUDA && enabled) + throw new Exception($"Currently autocast does not support {dev.type} only CPU or CUDA"); if (dev.type == DeviceType.CPU) { if (fast_dtype != torch.ScalarType.Float16 || fast_dtype != torch.ScalarType.BFloat16) { Debug.WriteLine($"In CPU autocast, but the target d type is not suported. Disabling autocast. CPU autocast only supports dtype of {torch.ScalarType.Float16} or {torch.ScalarType.BFloat16}"); enabled = false; } } else if (dev.type == DeviceType.CUDA) { - if (enabled && fast_dtype == torch.ScalarType.BFloat16 && !torch.cuda.is_bf16_supported()) throw new Exception("Current CUDA Device does not support bfloat16. Please switch dtype to float16."); } @@ -131,6 +131,7 @@ public static IntPtr ToIf(IntPtr ptr, torch.ScalarType type) return ptr; if (GetDtype(ptr) == type) //if already have same dtype is not necesary convert to dtype, right??? return ptr; + //TODO: Check if is from CPU to passing BFloat16 if support /*if (!NativeMethods.THSAmp_is_autocast_enabled(NativeMethods.THSTensor_device_type(ptr))) return ptr;*/ var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); @@ -190,17 +191,16 @@ private void Dispose(bool disposing) torch.set_autocast_cache_enabled(prev_cache_enabled); } - /*~AutocastMode() - { - - }*/ - public void Dispose() { Dispose(disposing: true); GC.SuppressFinalize(this); } } + /// + /// Trying to make Custom Autocast forwarded that mean in Pytorch + /// like this @torch.autocast(device_type="cuda") + /// public class AutocastAttribute : Attribute { private DeviceType Dev; @@ -208,6 +208,5 @@ public AutocastAttribute(DeviceType dev) { Dev = dev; } - } } diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSStorage.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSStorage.cs index 7cf494b7a..bd5b46694 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSStorage.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSStorage.cs @@ -15,5 +15,15 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern IntPtr THSStorage_data_ptr(IntPtr tensor); + /*[DllImport("LibTorchSharp")] + internal static extern IntPtr THSStorage_tensor_to_array_int(IntPtr tensor); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSStorage_tensor_to_array_long(IntPtr tensor); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSStorage_tensor_to_array_float(IntPtr tensor); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSStorage_tensor_to_array_double(IntPtr tensor); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSStorage_tensor_to_array_byte(IntPtr tensor);*/ } } diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 5eae88b2f..b89213dea 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -7544,7 +7544,16 @@ public static Tensor WrappedTensorDisposeScope(Func expr) var result = expr(); return result.MoveToOuterDisposeScope(); } - + internal static Tensor InstantiateTensorWithLeakSafeTypeChange(IntPtr handle, ScalarType? dtype) + { + var tensor = new Tensor(handle); + if (dtype.HasValue && tensor.dtype != dtype.Value) { + var typed = tensor.to_type(dtype.Value); + tensor.Dispose(); + return typed; + } + return tensor; + } public static void _amp_foreach_non_finite_check_and_unscale(Tensor found_inf, Tensor inv_scale) { if (found_inf.numel() == 1) diff --git a/src/TorchSharp/Utils/Half.cs b/src/TorchSharp/Utils/Half.cs index f07e89892..0650f1307 100644 --- a/src/TorchSharp/Utils/Half.cs +++ b/src/TorchSharp/Utils/Half.cs @@ -4,6 +4,8 @@ using System.Globalization; using System.Text; +//Is only for NetStandard 2.0, Net 5 or newer already have Half Struct +//TODO: Need make support with Net Core 3? #if NETSTANDARD2_0 namespace System { diff --git a/src/TorchSharp/Utils/TensorAccessor.cs b/src/TorchSharp/Utils/TensorAccessor.cs index 42fd49c11..4a964de0b 100644 --- a/src/TorchSharp/Utils/TensorAccessor.cs +++ b/src/TorchSharp/Utils/TensorAccessor.cs @@ -4,6 +4,7 @@ using System.Diagnostics; using System.Linq; using System.Runtime.InteropServices; +using TorchSharp.PInvoke; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp.Utils @@ -55,6 +56,32 @@ public T[] ToArray() return new Span(_tensor_data_ptr.ToPointer(), Convert.ToInt32(Cnt)).ToArray(); } } + + /*unsafe { + IntPtr arr = IntPtr.Zero; + if (typeof(T) == typeof(int)) { + arr = NativeMethods.THSStorage_tensor_to_array_int(_tensor.handle); + int[] tot = new int[Cnt]; + Marshal.Copy(arr, tot, 0, (int)Cnt); + } + + if (typeof(T) == typeof(long)) { + + } + + return tot as T[]; + //var stride = _tensor.stride(); + //var res = new T[Cnt]; + //int idx = 0; + //T* ptr = (T*)_tensor_data_ptr; + //for (int ndim = 0; ndim < _tensor.shape.Length; ndim++) { + // for (int xyz = 0; xyz < _tensor.shape[ndim]; xyz++) { + // res[idx++] = ptr[xyz + stride[ndim]]; + // } + //} + //return res; + }*/ + var result = new T[Cnt]; CopyTo(result); return result; From 2c33985f699d41ffc5e0d8de68c85c808cf93396 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Fri, 1 Nov 2024 14:33:28 -0300 Subject: [PATCH 39/43] Test and fix some error --- src/TorchSharp/Amp/GradScaler.cs | 204 +++++++---- src/TorchSharp/Autograd.cs | 20 + src/TorchSharp/Utils/UnorderedMap.cs | 6 +- .../TestGradScaler.cs | 346 ------------------ .../TorchSharpTest.WithCudaBinaries.csproj | 2 + .../TestAutocast.cs | 70 ++-- test/TorchSharpTest/TestGradScaler.cs | 22 +- 7 files changed, 216 insertions(+), 454 deletions(-) delete mode 100644 test/TorchSharpTest.WithCudaBinaries/TestGradScaler.cs rename test/{TorchSharpTest.WithCudaBinaries => TorchSharpTest}/TestAutocast.cs (80%) diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs index d3d7a78b3..4aef1a249 100644 --- a/src/TorchSharp/Amp/GradScaler.cs +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -1,6 +1,8 @@ using System; +using System.Collections; using System.Collections.Generic; using System.Diagnostics; +using System.Linq; using TorchSharp.Modules; using TorchSharp.Utils; @@ -28,7 +30,7 @@ public enum OptState private UnorderedMap _refresh_per_optimizer_state() { return new UnorderedMap() { - { "state", OptState.Ready }, { "found_inf_per_device", null} + { "stage", OptState.Ready }, { "found_inf_per_device", null} }; } //https://github.com/pytorch/pytorch/blob/main/torch/amp/grad_scaler.py @@ -36,7 +38,7 @@ public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_fac float backoff_factor = 0.5f, int growth_interval = 2000, bool enabled = true) { //https://gist.github.com/dorpxam/67ad2bc222b2cf567d4a6fc298375e13 - Debug.Assert(dev == torch.CPU || dev == torch.CUDA); + Debug.Assert(dev.type == DeviceType.CPU || dev.type== DeviceType.CUDA); device = dev; Enabled = enabled; InitScale = init_scale; @@ -56,16 +58,18 @@ public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_fac private Tuple check_scale_growth_tracker(string name) { var fix = "This may indicate your script did not use scaler.scale(loss or outputs) earlier in the iteration."; - Debug.Assert(_scale is null, $"Attempted {name} but {nameof(_scale)} is None {fix}"); - Debug.Assert(_growth_tracker is null, $"Attempted {name} but {nameof(_growth_tracker)} is None {fix}"); + Debug.Assert(!(_scale is null), $"Attempted {name} but {nameof(_scale)} is None {fix}"); + Debug.Assert(!(_growth_tracker is null), $"Attempted {name} but {nameof(_growth_tracker)} is None {fix}"); return new Tuple(_scale, _growth_tracker); } private void LazyInitScaleGrowthTracker(torch.Device dev) { - _scale = torch.full(0, InitScale, torch.ScalarType.Float32, device: dev); - _growth_tracker = torch.full(0, InitGrowthTracker, torch.ScalarType.Int32, device: dev); + Debug.Assert(_growth_tracker is null, "_growth_tracker initialized before _scale"); + + _scale = torch.full(1, InitScale, torch.ScalarType.Float32, device: dev); + _growth_tracker = torch.full(1, InitGrowthTracker, torch.ScalarType.Int32, device: dev); } //private Dictionary @@ -89,17 +93,17 @@ private class MultiDeviceReplicator { private readonly torch.Tensor master; - internal readonly Dictionary per_device_tensors = new Dictionary(); + internal readonly Dictionary per_device_tensors = new Dictionary(); public MultiDeviceReplicator(torch.Tensor master_tensor) { master = master_tensor; } - public torch.Tensor Get(torch.Device device) + public torch.Tensor Get(DeviceType device) { torch.Tensor retval=null; if (!per_device_tensors.ContainsKey(device)) { - retval = master.to(device, true, non_blocking: true); + retval = master.to(new torch.Device(device), true, non_blocking: true); per_device_tensors.Add(device, retval); } return retval; @@ -115,7 +119,7 @@ private torch.Tensor apply_scale(torch.Tensor scale) } stash.Add(new MultiDeviceReplicator(_scale)); } - return scale * stash[0].Get(scale.device); + return scale * stash[0].Get(scale.device.type); } private void apply_scale(IList scales) @@ -123,51 +127,51 @@ private void apply_scale(IList scales) for (int i = 0; i < scales.Count; i++) scales[i] = apply_scale(scales[i]); } - public Dictionary unscale_grads(torch.optim.Optimizer optimizer, torch.Tensor inv_scale, torch.Tensor found_inf, bool allow_fp16) + public Dictionary unscale_grads(torch.optim.Optimizer optimizer, torch.Tensor inv_scale, torch.Tensor found_inf, bool allow_fp16) { var per_device_inv_scale = new MultiDeviceReplicator(inv_scale); var per_device_found_inf= new MultiDeviceReplicator(found_inf); - Dictionary>> per_device_and_dtype_grads = new Dictionary>>(); + Dictionary>> per_device_and_dtype_grads = new Dictionary>>(); using (torch.no_grad()) { - if (optimizer is AdamW adamW){ //Some optimizer have parameter tensor for unscale_grads i need that. [20/10/24 WHY I DO THIS???? ] - using (var enumer = adamW.parameters().GetEnumerator()) { - while (enumer.MoveNext()) { - var param = enumer.Current; - if (param is null) - continue; - if (!allow_fp16 && param.dtype == torch.ScalarType.Float16) - throw new Exception("Attempting to unscale FP16 Gradients"); - torch.Tensor to_unscale; - if (param.grad.is_sparse) { - if (param.grad.dtype == torch.ScalarType.Float16) { - param.grad = param.grad.coalesce(); - } - - to_unscale = param.grad.SparseValues; - } else { - to_unscale = param.grad; + + using (var enumer = optimizer.parameters().GetEnumerator()) { + while (enumer.MoveNext()) { + var param = enumer.Current; + if (param is null) + continue; + if (!allow_fp16 && param.dtype == torch.ScalarType.Float16) + throw new Exception("Attempting to unscale FP16 Gradients"); + torch.Tensor to_unscale; + if (param.grad.is_sparse) { + if (param.grad.dtype == torch.ScalarType.Float16) { + param.grad = param.grad.coalesce(); } - if (!per_device_and_dtype_grads.ContainsKey(to_unscale.device)) { - per_device_and_dtype_grads.Add(to_unscale.device, new Dictionary>()); - per_device_and_dtype_grads[to_unscale.device].Add(to_unscale.dtype, new List()); - per_device_and_dtype_grads[to_unscale.device][to_unscale.dtype].Add(to_unscale); + to_unscale = param.grad.SparseValues; + } else { + to_unscale = param.grad; + } + + if (!per_device_and_dtype_grads.ContainsKey(to_unscale.device.type)) { + per_device_and_dtype_grads.Add(to_unscale.device.type, new Dictionary>()); + per_device_and_dtype_grads[to_unscale.device.type].Add(to_unscale.dtype, new List()); + per_device_and_dtype_grads[to_unscale.device.type][to_unscale.dtype].Add(to_unscale); + } else { + if (!per_device_and_dtype_grads[to_unscale.device.type].ContainsKey(to_unscale.dtype)) { + per_device_and_dtype_grads[to_unscale.device.type].Add(to_unscale.dtype, new List()); } else { - if (!per_device_and_dtype_grads[to_unscale.device].ContainsKey(to_unscale.dtype)) { - per_device_and_dtype_grads[to_unscale.device].Add(to_unscale.dtype, new List()); - } else { - per_device_and_dtype_grads[to_unscale.device][to_unscale.dtype].Add(to_unscale); - } + per_device_and_dtype_grads[to_unscale.device.type][to_unscale.dtype].Add(to_unscale); } - } - } - foreach (var d in per_device_and_dtype_grads) - foreach (var g in d.Value) - torch._amp_foreach_non_finite_check_and_unscale_(g.Value, per_device_found_inf.Get(d.Key), per_device_inv_scale.Get(d.Key)); + } } + + foreach (var d in per_device_and_dtype_grads) + foreach (var g in d.Value) + torch._amp_foreach_non_finite_check_and_unscale_(g.Value, per_device_found_inf.Get(d.Key), per_device_inv_scale.Get(d.Key)); + } return per_device_found_inf.per_device_tensors; @@ -182,7 +186,7 @@ public void unscale(torch.optim.Optimizer optimizer) //if(_per_optimizer_states.ContainsKey(optimizer.GetHashCode())) var optimizer_state = _per_optimizer_states[optimizer.GetHashCode()]; - if (optimizer_state["state"] is OptState state) { + if (optimizer_state["stage"] is OptState state) { if (state == OptState.Unscaled) { throw new Exception($"{nameof(unscale)} has already been called on this optimizer since the last update()"); } @@ -191,47 +195,95 @@ public void unscale(torch.optim.Optimizer optimizer) } Debug.Assert(!(_scale is null)); - var inv_scale = _scale.@double().reciprocal().@float(); - var found_inf = torch.full(new ReadOnlySpan(new long[] { 0 }), 0.0f, torch.ScalarType.Float32,_scale.device); + var inv_scale = _scale.to(torch.ScalarType.Float64).reciprocal().to(torch.ScalarType.Float32); + var found_inf = torch.full(1, 0.0f, torch.ScalarType.Float32,_scale.device); optimizer_state["found_inf_per_device"] = unscale_grads(optimizer, inv_scale, found_inf, false); optimizer_state["stage"] = OptState.Unscaled; } - - private float? maybe_opt_step(torch.optim.Optimizer optimizer, UnorderedMap optimizer_state) + /* + * + + template + inline auto sum(PerDeviceTensors const& per_device) + { + Type sum = Type(0); + for (auto&& [_, v] : per_device) + sum += v.item(); + return sum; + } + * + */ + private Scalar maybe_opt_step(torch.optim.Optimizer optimizer, UnorderedMap optimizer_state, Func closure = null) { //https://github.com/pytorch/pytorch/blob/a00fad017719346bac6e08da0819358146e647e3/torch/amp/grad_scaler.py#L351 - float? retval=0; - foreach(var d in optimizer_state) - if (d.Value is torch.Tensor t) - retval += t.item(); - if (retval==0) - retval = optimizer.step().item(); - return retval; + if (optimizer_state.ContainsKey("found_inf_per_device")) { + + double? retval = 0; + if (optimizer_state["found_inf_per_device"] is Dictionary dict) { + foreach (var d in dict) + { + retval += (double)d.Value.item(); + //retval += d.Value.Sum(x=>x.item()); + /*foreach(var t in d.Value) + retval += t.item();*/ + //retval += d.Value.item(); + } + /*if (retval.HasValue) { + if(retval.Value > 0) + return + }*/ + + //https://gist.github.com/dorpxam/67ad2bc222b2cf567d4a6fc298375e13#file-gradscaler-hpp-L209 + } + /*foreach (var d in optimizer_state) + if (d.Value is torch.Tensor t) + retval += t.item();*/ + var res = optimizer.step(closure); + if (!(res is null)) { + return res.item(); + } + + /*if (retval == 0) + retval = .item(); + return retval;*/ + } + + return null; } - public float? step(torch.optim.Optimizer optimizer, params object[] obj) + public Scalar step(torch.optim.Optimizer optimizer, Func optimizer_args = null) { - if (obj.Length == 0) - throw new Exception("The obj param cannot be empty"); if (!Enabled) { + var res = optimizer.step(optimizer_args); + if (!(res is null)) + return res.item(); + return null; + } + + if (optimizer_args != null) + throw new Exception("Closure use is not currently supported if GradScaler is Enabled"); + + /*if (!Enabled) { if(obj.Length == 1 && obj[0] is Func closure) return optimizer.step(closure).item(); return null; - } + }*/ check_scale_growth_tracker(nameof(step)); var optimizer_state = _per_optimizer_states[optimizer.GetHashCode()]; + if (optimizer_state["stage"] is OptState state && state == OptState.Stepped) throw new Exception($"{nameof(step)} has already been called since the last update()"); - float? retval; + Scalar retval=null; //https://github.com/pytorch/pytorch/blob/a00fad017719346bac6e08da0819358146e647e3/torch/amp/grad_scaler.py#L398 var f = optimizer.GetType().GetField("_step_support_amp_scaling"); if (f != null && f.GetValue(optimizer) is bool b && !b) { bool has_grad_scaler = false;//I dont know how deal this... if (has_grad_scaler) { + throw new NotImplementedException(); } else { if (optimizer_state["stage"] is OptState optstate && optstate == OptState.Ready) @@ -260,8 +312,12 @@ public void unscale(torch.optim.Optimizer optimizer) } if (optimizer_state["stage"] is OptState state1 && state1 == OptState.Ready) unscale(optimizer); - Debug.Assert((optimizer_state["found_inf_per_device"] as torch.Tensor[])?.Length > 0, "(optimizer_state['found_inf_per_device'] as torch.Tensor).size(0) > 0"); - retval = maybe_opt_step(optimizer, optimizer_state); + if (optimizer_state["found_inf_per_device"] is ICollection col) + { + Debug.Assert(col.Count > 0, "(optimizer_state['found_inf_per_device'] as torch.Tensor).size(0) > 0"); + } + //Debug.Assert((optimizer_state["found_inf_per_device"] as Dictionary>)?.Count > 0, "(optimizer_state['found_inf_per_device'] as torch.Tensor).size(0) > 0"); + retval = maybe_opt_step(optimizer, optimizer_state, optimizer_args); optimizer_state["stage"] = OptState.Stepped; return retval; } @@ -294,11 +350,25 @@ public void update(object new_scale = null) _scale.copy_(t); } } else { - IList found_infs = new List(); - foreach (var state in _per_optimizer_states) - foreach (var found_inf in state.Value) - if(found_inf.Value is torch.Tensor t) - found_infs.Add(t); + List found_infs = new List(); + foreach (var state in _per_optimizer_states) { + if (state.Value["found_inf_per_device"] is Dictionary d) { + foreach(var found_inf in d.Values) + found_infs.Add(found_inf.to(_scale.device, true)); + } + } + + /*foreach (var found_inf in state.Value) { + if (found_inf.Value is torch.Tensor t) { + found_infs.Add(t); + } + + if (found_inf.Value is List ts) { + foreach(var te in ts) + found_infs.Add(te); + } + }*/ + Debug.Assert(found_infs.Count > 0, "No inf checks were recorded prior to update."); torch.Tensor found_inf_combined = found_infs[0]; if (found_infs.Count > 1) diff --git a/src/TorchSharp/Autograd.cs b/src/TorchSharp/Autograd.cs index 4c73fce46..d7c29cc24 100644 --- a/src/TorchSharp/Autograd.cs +++ b/src/TorchSharp/Autograd.cs @@ -2,6 +2,7 @@ using System; using System.Linq; using System.Collections.Generic; +using TorchSharp.Modules; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp @@ -145,6 +146,25 @@ public static IList grad(IList outputs, IList inputs, IL return results.Array.Select(x => new Tensor(x)).ToList(); } + public static IList grad(IList inputs, IEnumerable outputs, IList grad_outputs = null, bool retain_graph = false, bool create_graph = false, bool allow_unused = false) + { + using var outs = new PinnedArray(); + using var ins = new PinnedArray(); + using var grads = new PinnedArray(); + using var results = new PinnedArray(); + + IntPtr insRef = outs.CreateArray(outputs.Select(p => p.Handle).ToArray()); + IntPtr outsRef = ins.CreateArray(inputs.Select(p => p.Handle).ToArray()); + IntPtr gradsRef = grad_outputs == null ? IntPtr.Zero : grads.CreateArray(grad_outputs.Select(p => p.Handle).ToArray()); + long gradsLength = grad_outputs == null ? 0 : grads.Array.Length; + + //https://gist.github.com/dorpxam/67ad2bc222b2cf567d4a6fc298375e13#file-gradscaler_test-hpp-L318 + + THSAutograd_grad(outsRef, ins.Array.Length, insRef, outs.Array.Length, gradsRef, gradsLength, retain_graph, create_graph, allow_unused, results.CreateArray); + CheckForErrors(); + return results.Array.Select(x => new Tensor(x)).ToList(); + } + /// /// Computes the sum of gradients of given tensors with respect to graph leaves. /// diff --git a/src/TorchSharp/Utils/UnorderedMap.cs b/src/TorchSharp/Utils/UnorderedMap.cs index 6eb073b1d..3579f3cee 100644 --- a/src/TorchSharp/Utils/UnorderedMap.cs +++ b/src/TorchSharp/Utils/UnorderedMap.cs @@ -81,8 +81,10 @@ private static bool IsCollectionType(Type type) } public new TValue this[TKey tk] { get { - /*if (!this.ContainsKey(tk) && default_dict == null) - return default_dict;*/ + if (base.Count == 0 && !this.ContainsKey(tk) && default_dict != null) { + base[tk] = default_dict; + return base[tk]; + } if (this.ContainsKey(tk)) return base[tk]; var t = typeof(TValue); diff --git a/test/TorchSharpTest.WithCudaBinaries/TestGradScaler.cs b/test/TorchSharpTest.WithCudaBinaries/TestGradScaler.cs deleted file mode 100644 index af8b32afd..000000000 --- a/test/TorchSharpTest.WithCudaBinaries/TestGradScaler.cs +++ /dev/null @@ -1,346 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using TorchSharp; -using TorchSharp.Amp; -using TorchSharp.Modules; -using Xunit; -using static TorchSharp.torch; -using static TorchSharp.torch.nn; -namespace TorchSharpTest.WithCudaBinaries -{ - public class TestGradScaler - { - internal DeviceType device = DeviceType.CUDA; - internal ScalarType dtype = ScalarType.Float32; - - private (Sequential modctrl, Sequential modscal, torch.optim.Optimizer optctrl, torch.optim.Optimizer optscal) create_scaling_model_optimizer(DeviceType dev = DeviceType.CUDA) - { - var mod_control =Sequential(torch.nn.Linear(8,8), torch.nn.Linear(8, 8)); - mod_control.to(dev); - var mod_scaling = Sequential(torch.nn.Linear(8, 8), torch.nn.Linear(8, 8)); - mod_scaling.to(dev); - - using (torch.no_grad()) { - - using (var enumer = mod_control.parameters().Zip(mod_scaling.parameters()).GetEnumerator()) - while (enumer.MoveNext()) - enumer.Current.Second.copy_(enumer.Current.First); - - var opt_control = torch.optim.SGD(mod_control.parameters(), 1.0f); - var opt_scaling = torch.optim.SGD(mod_scaling.parameters(), 1.0f); - return (mod_control, mod_scaling, opt_control, opt_scaling); - } - } - internal (Sequential modctrl, Sequential modscal, torch.optim.Optimizer optctrl, torch.optim.Optimizer optscal, List> data, MSELoss loss_fn, int skip_iter) create_scaling_case(DeviceType dev = DeviceType.CUDA, ScalarType dtype = ScalarType.Float32) - { - var data = new List>() { - new(torch.randn(new long[]{8,8}, dtype, new Device(dev)),torch.randn(new long[]{8,8}, dtype, new Device(dev))), - new(torch.randn(new long[]{8,8}, dtype, new Device(dev)),torch.randn(new long[]{8,8}, dtype, new Device(dev))), - new(torch.randn(new long[]{8,8}, dtype, new Device(dev)),torch.randn(new long[]{8,8}, dtype, new Device(dev))), - new(torch.randn(new long[]{8,8}, dtype, new Device(dev)),torch.randn(new long[]{8,8}, dtype, new Device(dev))), - }; - - var loss_fn = MSELoss(); - loss_fn.to(DeviceType.CUDA); - const int skip_iter = 2; - var csmo = create_scaling_model_optimizer(dev); - return (csmo.modctrl, csmo.modscal, csmo.optctrl, csmo.optscal, data, loss_fn, skip_iter); - } - internal void run_scaling_case(Action>, Sequential, torch.optim.Optimizer, GradScaler, MSELoss, int, bool> run, int unskipped, int skipped, double atol = 1e07) - { - const double rtol = 1e-7d; - bool[] enableds = new bool[] { true, false }; - foreach (var enabled in enableds) { - var res =create_scaling_case(); - var scaler = new GradScaler(new Device(DeviceType.CUDA), 128.0f, 2.0f, growth_interval: 1); - run.Invoke(res.data, res.modctrl, res.optctrl, scaler, res.loss_fn, res.skip_iter, false); - run.Invoke(res.data, res.modscal, res.optscal, scaler, res.loss_fn, res.skip_iter, true); - if (enabled) { - var net_growth = unskipped > 0 ? MathF.Pow(scaler.get_growth_factor(), unskipped) : 1.0f; - var net_backoff = skipped> 0 ? MathF.Pow(scaler.get_backoff_factor(), skipped) : 1.0f; - Assert.Equal((128.0f * net_growth * net_backoff), scaler.get_scale()); - - } else { - Assert.Equal(1.0f, scaler.get_scale()); - } - - foreach(var seq in res.modctrl.parameters().Zip(res.modscal.parameters())){ - var c_grad = seq.First.grad; - var s_grad = seq.Second.grad; - if(!(c_grad is null) && !(s_grad is null)) - Assert.True(torch.allclose(seq.First.grad, seq.Second.grad, rtol, atol)); - var c_state = res.optctrl.ParamGroups; - var s_state = res.optscal.ParamGroups; - foreach(var c_s_state in c_state.Zip(s_state)) { - if (c_s_state.First is ParamGroup pg_c_state && c_s_state.Second is ParamGroup pg_s_state) { - foreach (var c_s_state_p in pg_c_state.Parameters.Zip(pg_s_state.Parameters)) - Assert.True(torch.allclose(c_s_state_p.First, c_s_state_p.Second, rtol, atol)); - } - } - Assert.True(torch.allclose(seq.First, seq.Second, rtol, atol)); - } - } - } - - [Fact] - [TestOf(nameof(GradScaler))] - public void TestGradScalingUnscaleSparse() - { - var scaler = new GradScaler(new Device(device)); - var inv_scale = torch.full(1, 0.25, dtype, new Device(device)); - var found_inf = torch.empty(1, dtype, new Device(device)); - var cur = found_inf.device; - var i = torch.tensor(new long[,] { { 0, 1, 1 }, { 2, 0, 2 } }, ScalarType.Int64, new Device(DeviceType.CUDA)); - var v = torch.tensor(new float[] { 16.0f,32.0f,64.0f}, ScalarType.Float32, new Device(DeviceType.CUDA)); - var s = torch.sparse_coo_tensor(i,v, new long[]{2,3}, dtype, new Device(DeviceType.CUDA)); - - var p = s.clone(); - Assert.True(p.is_sparse); - var optA = torch.optim.SGD(new[] { new Parameter(p) }, 1.0); - p.grad = s.clone(); - found_inf.zero_(); - found_inf = scaler.unscale_grads(optA, inv_scale, found_inf, false)[cur]; - - Assert.Equal(0.0f, found_inf.item()); - Assert.True(torch.equal(p.grad.to_dense(), (s/4).to_dense()).item()); - - v = torch.tensor(new float[] { 16.0f, 32.0f, float.PositiveInfinity }); - p.grad = torch.sparse_coo_tensor(i, v, new long[] { 2, 3 }, dtype, new Device(DeviceType.CUDA)); - found_inf.zero_(); - found_inf = scaler.unscale_grads(optA, inv_scale, found_inf, false)[cur]; - Assert.Equal(1.0f, found_inf.item()); - - v = torch.tensor(new float[] { 16.0f, 32.0f, float.NaN }); - p.grad = torch.sparse_coo_tensor(i, v, new long[] { 2, 3 }, dtype, new Device(DeviceType.CUDA)); - found_inf.zero_(); - found_inf = scaler.unscale_grads(optA, inv_scale, found_inf, false)[cur]; - Assert.Equal(1.0f, found_inf.item()); - - p = s.clone().to(ScalarType.Float16); - Assert.True(p.is_sparse); - var optB = torch.optim.SGD(new Parameter[] { new Parameter(p) }, 1.0); - - p.grad = s.clone().to(ScalarType.Float16); - found_inf.zero_(); - found_inf = scaler.unscale_grads(optB, inv_scale, found_inf, true)[cur]; - Assert.Equal(0.0f, found_inf.item()); - Assert.True(torch.equal(p.grad.to_dense(), (s.to(ScalarType.Float16) / 4).to_dense()).item()); - - i = torch.tensor(new long[,] { { 0, 1, 0 }, { 2, 0, 2 } }); - v = torch.tensor(new float[] { 64000.0f, 32.0f, 64000.0f }); - p.grad = torch.sparse_coo_tensor(i, v, new long[] { 2, 3 }, dtype, new Device(DeviceType.CUDA)); - found_inf.zero_(); - found_inf = scaler.unscale_grads(optB, inv_scale, found_inf, true)[cur]; - Assert.Equal(0.0f, found_inf.item()); - } - - [Fact] - [TestOf(nameof(GradScaler))] - public void TestGradScalingStateDict() - { - bool[] lazy_init_scale = new[] { true, false }; - foreach (var l in lazy_init_scale) { - var s0 = new GradScaler(new Device(DeviceType.CUDA), 3.0f, 4.0f, 0.5f, 2); - var s1 = new GradScaler(new Device(DeviceType.CUDA), 6.0f, 7.0f, 0.8f, 1); - s1.set_init_growth_tracker(7); - if (l) { - s1.scale(torch.full(1, 4.0f, ScalarType.Float32, new Device(DeviceType.CUDA, 0))); - Assert.Equal(ScalarType.Float32, s1.get_scale_async().dtype); - } - - var re = s0.state_dict(); - s1.load_state_dict(re); - - Assert.Equal(3.0f, s1.get_scale()); - Assert.Equal(0.5f, s1.get_growth_factor()); - Assert.Equal(2, s1.get_growth_interval()); - Assert.Equal(0.0f, s1.get_init_growth_tracker()); - } - } - - [Fact] - [TestOf(nameof(GradScaler))] - public void TestGradScaleWillNotOverflow() - { - var model = torch.nn.Linear(5, 1).to(DeviceType.CUDA); - var optimizer = torch.optim.Adam(model.parameters()); - var scaler = new GradScaler(new Device(DeviceType.CUDA), 1e38f, MathF.Pow(2.0f, 4), growth_interval:1); - optimizer.zero_grad(); - var x = torch.randn(new long[]{1,5}).to(DeviceType.CUDA); - var y = 1e-30 * torch.randn(new long[]{1,1}).to(DeviceType.CUDA); - var l = torch.pow(model.forward(x) - y, 2).mean(); - scaler.scale(l).backward(); - scaler.step(optimizer); - scaler.update(); - Assert.True(!scaler.get_scale_async().isinf().item() && !scaler.get_scale_async().isnan().item()); - } - [Fact] - [TestOf(nameof(GradScaler))] - public void TestGradScalingClipping() - { - run_scaling_case(new Action>, Sequential, optim.Optimizer, GradScaler, MSELoss, int, bool>(( - (data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api) => { - const float max_norm = 0.2f; - int idx = 0; - foreach (var ipair in data) { - //ipair. - optimizer.zero_grad(); - var output = model.forward(ipair.Key); - var loss = loss_fn.forward(output, ipair.Value); - if (try_scaling_api) { - scaler.scale(loss).backward(); - torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm * scaler.get_scale()); - if (idx == skip_iter && scaler.IsEnabled()) { - var weight = (model[1] as Linear)?.weight; - if (weight.is_null()) - throw new ArgumentNullException(nameof(weight)); - weight.grad.fill_(float.PositiveInfinity); - } - - scaler.step(optimizer); - scaler.update(); - } else { - loss.backward(); - torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm); - if (!scaler.IsEnabled() || (idx != skip_iter)) - optimizer.step(); - } - - idx++; - } - })), - 3, 1, 1e-5); - } - [Fact] - [TestOf(nameof(GradScaler))] - public void TestGradScalingClippingSeparateUnscale() - { - run_scaling_case(new Action>, Sequential, optim.Optimizer, GradScaler, MSELoss, int, bool>(( - (data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api) => { - const float max_norm = 0.2f; - int idx = 0; - foreach (var ipair in data) { - //ipair. - optimizer.zero_grad(); - var output = model.forward(ipair.Key); - var loss = loss_fn.forward(output, ipair.Value); - if (try_scaling_api) { - scaler.scale(loss).backward(); - torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm); - if (idx == skip_iter && scaler.IsEnabled()) { - var weight = (model[1] as Linear)?.weight; - weight.grad.fill_(float.PositiveInfinity); - } - - scaler.step(optimizer); - scaler.update(); - } else { - loss.backward(); - torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm); - if (!scaler.IsEnabled() || (idx != skip_iter)) - optimizer.step(); - } - - idx++; - } - })), - 3, 1); - } - [Fact] - [TestOf(nameof(GradScaler))] - public void TestGradScalingPenalty() - { - - run_scaling_case(new Action>, Sequential, optim.Optimizer, GradScaler, MSELoss, int, bool>(( - (data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api) => { - //const float max_norm = 0.2f; - int idx = 0; - foreach (var ipair in data) { - //ipair. - optimizer.zero_grad(); - var output = model.forward(ipair.Key); - var loss = loss_fn.forward(output, ipair.Value); - List grad_params = new List(); - if (try_scaling_api) { - //throw new NotImplementedException(); - //TODO: RESEARCH TORCH::AUTOGRAD:GRAD THE SECOND ARGUMENT SHOULD HAVE model->parameters(); - //grad_params = torch.autograd.grad(new List(){scaler.scale(loss)}, model.parameters()) - var inv_scale = 1.0f / scaler.get_scale(); - for (int i = 0; i < grad_params.Count; i++) - grad_params[i] *= inv_scale; - } else { - //throw new NotImplementedException(); - //TODO: RESEARCH TORCH::AUTOGRAD:GRAD THE SECOND ARGUMENT SHOULD HAVE model->parameters(); - //grad_params = torch.autograd.grad(new List(){scaler.scale(loss)}, model.parameters()) - } - - var grad_norm = torch.zeros(new long[] { 1 }).to(ipair.Key.device); - for (int i = 0; i < grad_params.Count; i++) - grad_norm += grad_params[i].pow(2).sum(); - grad_norm = grad_norm.sqrt(); - loss = loss + grad_norm; - if (try_scaling_api) { - scaler.scale(loss).backward(); - if (idx == skip_iter && scaler.IsEnabled()) { - var weight = (model[1] as Linear)?.weight; - weight.grad.fill_(float.PositiveInfinity); - } - - scaler.step(optimizer); - scaler.update(); - } else { - loss.backward(); - if (!scaler.IsEnabled() || (idx != skip_iter)) { - optimizer.step(); - } - } - idx++; - } - })), - 3, 1); - } - [Fact] - [TestOf(nameof(GradScaler))] - public void TestGradScalingAccumulation() - { - run_scaling_case(new Action>, Sequential, optim.Optimizer, GradScaler, MSELoss, int, bool>(( - (data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api) => { - const int iters_to_accumulate= 2; - int idx = 0; - foreach (var ipair in data) { - //ipair. - optimizer.zero_grad(); - var output = model.forward(ipair.Key); - var loss = loss_fn.forward(output, ipair.Value); - loss /= iters_to_accumulate; - - if (try_scaling_api) { - scaler.scale(loss).backward(); - } else { - loss.backward(); - } - - if ((idx + 1) % iters_to_accumulate == 0) { - if (try_scaling_api) { - scaler.step(optimizer); - scaler.update(); - optimizer.zero_grad(); - } else { - optimizer.step(); - optimizer.zero_grad(); - } - } - idx++; - } - })), - 2, 0); - } - [Fact] - [TestOf(nameof(GradScaler))] - public void TestGradScalingMultiple() - { - throw new NotImplementedException(); - } - } -} diff --git a/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj b/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj index 50b2438df..6f7a0ed24 100644 --- a/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj +++ b/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj @@ -26,6 +26,8 @@ Always + + diff --git a/test/TorchSharpTest.WithCudaBinaries/TestAutocast.cs b/test/TorchSharpTest/TestAutocast.cs similarity index 80% rename from test/TorchSharpTest.WithCudaBinaries/TestAutocast.cs rename to test/TorchSharpTest/TestAutocast.cs index 01b78e65a..f7ade93b4 100644 --- a/test/TorchSharpTest.WithCudaBinaries/TestAutocast.cs +++ b/test/TorchSharpTest/TestAutocast.cs @@ -13,25 +13,31 @@ public class TestAutocast { internal const ScalarType f32 = ScalarType.Float32; internal const ScalarType f16 = ScalarType.Float16; + internal static DeviceType availableDevice; private static void CheckCUDA() { - if (!torch.cuda_is_available()) - throw new Exception("CUDA IS NOT AVAILABLE"); + if (!torch.cuda_is_available()) { + availableDevice = DeviceType.CPU; + //throw new Exception("CUDA IS NOT AVAILABLE"); + } else { + availableDevice= DeviceType.CUDA; + } + AutocastMode.GetInstance(true); Assert.True(AutocastMode.IsAutocastEnabled()); } private Tensor randnf32cuda(long dim0) { - return torch.randn(dim0, f32, new Device(DeviceType.CUDA)); + return torch.randn(dim0, f32, new Device(availableDevice)); } private Tensor randnf32cuda(long dim0, long dim1) { - return torch.randn(dim0, dim1, f32, new Device(DeviceType.CUDA)); + return torch.randn(dim0, dim1, f32, new Device(availableDevice)); } private Tensor randnf32cuda(long dim0, long dim1, long dim2) { - return torch.randn(dim0, dim1,dim2, f32, new Device(DeviceType.CUDA)); + return torch.randn(dim0, dim1,dim2, f32, new Device(availableDevice)); } [Fact] [TestOf("AutocastF16")] @@ -67,7 +73,7 @@ public void TestAutocastF16() Assert.Equal(ScalarType.Float16, f.dtype); Assert.Equal(ScalarType.Float16, g.dtype); Assert.Equal(ScalarType.Float16, h.dtype); - Assert.Equal(ScalarType.Float16, i.dtype); + Assert.Equal(ScalarType.Float16, i.dtype); Assert.Equal(ScalarType.Float16, j.dtype);*/ //throw new NotImplementedException(); } @@ -94,8 +100,8 @@ public void TestAutocastF16Arithmetic() var mat2 = randnf32cuda(3, 3); var M3 = randnf32cuda(4, 3); - var vec1 = torch.rand(4, f32, new Device(DeviceType.CUDA)); - var vec2 = torch.rand(3, f32, new Device(DeviceType.CUDA)); + var vec1 = torch.rand(4, f32, new Device(availableDevice)); + var vec2 = torch.rand(3, f32, new Device(availableDevice)); using (AutocastMode.GetInstance().Enter()) { var c = cm.matmul(dm); var d = M.addbmm(batch1, batch2); @@ -124,16 +130,16 @@ public void TestAutocastF16Cell() { CheckCUDA(); //Like GRUCell, LSTM, RNN - var l = Linear(4, 4).to(DeviceType.CUDA); - var gru = GRUCell(4, 4).to(DeviceType.CUDA); - var lstm = LSTMCell(10, 20).to(DeviceType.CUDA); - var rnn = RNNCell(10,20).to(DeviceType.CUDA); + var l = Linear(4, 4).to(availableDevice); + var gru = GRUCell(4, 4).to(availableDevice); + var lstm = LSTMCell(10, 20).to(availableDevice); + var rnn = RNNCell(10,20).to(availableDevice); - var a = torch.rand(4,4, f32, new Device(DeviceType.CUDA)); - var b = torch.rand(4,4, f32, new Device(DeviceType.CUDA)); - var inpRNN = torch.rand(3,10, f32, new Device(DeviceType.CUDA)); - var hx = torch.rand(3,20, f32, new Device(DeviceType.CUDA)); - var cx = torch.rand(3,20, f32, new Device(DeviceType.CUDA)); + var a = torch.rand(4,4, f32, new Device(availableDevice)); + var b = torch.rand(4,4, f32, new Device(availableDevice)); + var inpRNN = torch.rand(3,10, f32, new Device(availableDevice)); + var hx = torch.rand(3,20, f32, new Device(availableDevice)); + var cx = torch.rand(3,20, f32, new Device(availableDevice)); Assert.Equal(f32, a.dtype); Assert.Equal(f32, b.dtype); @@ -161,8 +167,8 @@ public void TestAutocastF16Other() { //Like Linear, prelu, etc. CheckCUDA(); - var pr = PReLU(8).to(DeviceType.CUDA); - var a = torch.rand(8, 8, ScalarType.Float32, new Device(DeviceType.CUDA)); + var pr = PReLU(8).to(availableDevice); + var a = torch.rand(8, 8, ScalarType.Float32, new Device(availableDevice)); Assert.Equal(f32, a.dtype); using (AutocastMode.GetInstance().Enter()) { a = pr.forward(a); @@ -180,13 +186,13 @@ public void TestAutocastF16Convolutions() { CheckCUDA(); //Conv 1d,2d,3d, conv_transpose 1d,2d,3d - var c1 =Conv1d(4,4, 3).to(DeviceType.CUDA); - var c2 =Conv2d(4,4, 3).to(DeviceType.CUDA); - var c3 =Conv3d(4,4, 3).to(DeviceType.CUDA); + var c1 =Conv1d(4,4, 3).to(availableDevice); + var c2 =Conv2d(4,4, 3).to(availableDevice); + var c3 =Conv3d(4,4, 3).to(availableDevice); - var a = torch.rand(4, 4, f32, new Device(DeviceType.CUDA)); - var b = torch.rand(4, 4,3, f32, new Device(DeviceType.CUDA)); - var c = torch.rand(4, 4,4,3, f32, new Device(DeviceType.CUDA)); + var a = torch.rand(4, 4, f32, new Device(availableDevice)); + var b = torch.rand(4, 4,3, f32, new Device(availableDevice)); + var c = torch.rand(4, 4,4,3, f32, new Device(availableDevice)); Assert.Equal(f32, a.dtype); using (AutocastMode.GetInstance().Enter()) { a = c1.forward(a); @@ -215,7 +221,7 @@ public void TestAutocastF32Trigonometry() { CheckCUDA(); //Purpose rand f16 because inside autocast with these operations should return as f32 - var a = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); + var a = torch.rand(3, 2, 4, f16, new Device(availableDevice)); /*var b = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); var vec1 = torch.rand(3, f16, new Device(DeviceType.CUDA)); var vec2 = torch.rand(3, f16, new Device(DeviceType.CUDA));*/ @@ -238,7 +244,7 @@ public void TestAutocastF32Trigonometry() public void TestAutocastF32Logarithmic() { CheckCUDA(); - var a = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); + var a = torch.rand(3, 2, 4, f16, new Device(availableDevice)); /*var b = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); var vec1 = torch.rand(3, f16, new Device(DeviceType.CUDA)); var vec2 = torch.rand(3, f16, new Device(DeviceType.CUDA));*/ @@ -272,12 +278,12 @@ public void TestAutocastF32Other() public void TestAutocastF32Loss() { CheckCUDA(); - var a = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); - var b = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); - var vec1 = torch.rand(3, f16, new Device(DeviceType.CUDA)); - var vec2 = torch.rand(3, f16, new Device(DeviceType.CUDA)); + var a = torch.rand(3, 2, 4, f16, new Device(availableDevice)); + var b = torch.rand(3, 2, 4, f16, new Device(availableDevice)); + var vec1 = torch.rand(3, f16, new Device(availableDevice)); + var vec2 = torch.rand(3, f16, new Device(availableDevice)); using (AutocastMode.AutoCastEnter()) { - var c = torch.nn.L1Loss().to(DeviceType.CUDA).forward(a,b); + var c = torch.nn.L1Loss().to(availableDevice).forward(a,b); Assert.Equal(f32, c.dtype); } } diff --git a/test/TorchSharpTest/TestGradScaler.cs b/test/TorchSharpTest/TestGradScaler.cs index 42c997f43..07888ebe8 100644 --- a/test/TorchSharpTest/TestGradScaler.cs +++ b/test/TorchSharpTest/TestGradScaler.cs @@ -7,13 +7,18 @@ using Xunit; using static TorchSharp.torch; using static TorchSharp.torch.nn; -namespace TorchSharpTest +namespace TorchSharpTest.WithCudaBinaries { public class TestGradScaler { - internal DeviceType device = DeviceType.CPU; + //https://gist.github.com/dorpxam/67ad2bc222b2cf567d4a6fc298375e13 + internal DeviceType device = DeviceType.CUDA; internal ScalarType dtype = ScalarType.Float32; - + private static void CheckCUDA() + { + if (!torch.cuda_is_available()) + throw new Exception("CUDA IS NOT AVAILABLE"); + } private (Sequential modctrl, Sequential modscal, torch.optim.Optimizer optctrl, torch.optim.Optimizer optscal) create_scaling_model_optimizer(DeviceType dev = DeviceType.CUDA) { var mod_control =Sequential(torch.nn.Linear(8,8), torch.nn.Linear(8, 8)); @@ -87,10 +92,11 @@ internal void run_scaling_case(Action grad_params = new List(); + IList grad_params = new List(); if (try_scaling_api) { //throw new NotImplementedException(); //TODO: RESEARCH TORCH::AUTOGRAD:GRAD THE SECOND ARGUMENT SHOULD HAVE model->parameters(); - //grad_params = torch.autograd.grad(new List(){scaler.scale(loss)}, model.parameters()) + //grad_params = torch.autograd.grad(new List() { scaler.scale(loss) }, model.parameters()); + grad_params = torch.autograd.grad(new List() { scaler.scale(loss) }, model.parameters(),create_graph:true); var inv_scale = 1.0f / scaler.get_scale(); for (int i = 0; i < grad_params.Count; i++) grad_params[i] *= inv_scale; } else { //throw new NotImplementedException(); //TODO: RESEARCH TORCH::AUTOGRAD:GRAD THE SECOND ARGUMENT SHOULD HAVE model->parameters(); - //grad_params = torch.autograd.grad(new List(){scaler.scale(loss)}, model.parameters()) + grad_params = torch.autograd.grad(new List() { scaler.scale(loss) }, model.parameters(), create_graph: true); } var grad_norm = torch.zeros(new long[] { 1 }).to(ipair.Key.device); From 5a6240c6904191acf1923bee44bb43ccaf5aa1eb Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 3 Nov 2024 23:42:02 -0300 Subject: [PATCH 40/43] trying fix comp THSCuda --- Directory.Build.props | 4 +- src/Native/LibTorchSharp/THSCuda.cpp | 33 ++++-- src/Native/LibTorchSharp/THSCuda.h | 9 +- src/TorchSharp/Amp/AutocastMode.cs | 20 +++- src/TorchSharp/Torch.cs | 11 ++ src/TorchSharp/TorchSharp.csproj | 1 + src/TorchSharp/Utils/TorchCudaStruct.cs | 132 ++++++++++++++++++++++++ test/TorchSharpTest/TestAutocast.cs | 110 +++++++++++--------- 8 files changed, 254 insertions(+), 66 deletions(-) create mode 100644 src/TorchSharp/Utils/TorchCudaStruct.cs diff --git a/Directory.Build.props b/Directory.Build.props index f5687af68..262c4216a 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -92,7 +92,6 @@ $(LibTorchPackageVersion) - true @@ -164,6 +163,9 @@ $(DefineContants);DEBUG false + + $(DefineContants);CUDA_TOOLKIT_FOUND + true diff --git a/src/Native/LibTorchSharp/THSCuda.cpp b/src/Native/LibTorchSharp/THSCuda.cpp index a024bf4d0..baca29615 100644 --- a/src/Native/LibTorchSharp/THSCuda.cpp +++ b/src/Native/LibTorchSharp/THSCuda.cpp @@ -4,7 +4,7 @@ #include #include -#ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND +#ifdef CUDA_TOOLKIT_FOUND cudaDeviceProp THSCuda_get_device_prop(int device) { cudaDeviceProp cdp; @@ -16,18 +16,30 @@ cudaDeviceProp THSCuda_get_device_prop(int device) int THSCuda_get_major_compute_capability(int device) { - RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).major) +#ifdef CUDA_TOOLKIT_FOUND + return THSCuda_get_device_prop(device).major; +#else + return -1; +#endif } int THSCuda_get_minor_compute_capability(int device) { - RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).minor) +#ifdef CUDA_TOOLKIT_FOUND + return THSCuda_get_device_prop(device).minor; +#else + return -1; +#endif } int THSCuda_get_device_count(int* count) { - RETURN_CUDA_DEVICE(cudaGetDeviceCount(count)) +#ifdef CUDA_TOOLKIT_FOUND + return cudaGetDeviceCount(count); +#else + return -1; +#endif } int THSCuda_get_free_total(int device, int* id, size_t* free, size_t* total) @@ -47,13 +59,22 @@ int THSCuda_get_free_total(int device, int* id, size_t* free, size_t* total) size_t THSCuda_get_total_memory(int device) { - RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).totalConstMem) +#ifdef CUDA_TOOLKIT_FOUND + return THSCuda_get_device_prop(device).totalConstMem; +#else + return 0; //Is size_t (unsigned long) so cant be negative. +#endif + //RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).totalConstMem) } size_t THSCuda_get_global_total_memory(int device) { - RETURN_CUDA_DEVICE(THSCuda_get_device_prop(device).totalGlobalMem) +#ifdef CUDA_TOOLKIT_FOUND + return THSCuda_get_device_prop(device).totalGlobalMem; +#else + return 0; +#endif } //TODO: implement more function diff --git a/src/Native/LibTorchSharp/THSCuda.h b/src/Native/LibTorchSharp/THSCuda.h index 9ec7416ce..00f1d7d03 100644 --- a/src/Native/LibTorchSharp/THSCuda.h +++ b/src/Native/LibTorchSharp/THSCuda.h @@ -6,18 +6,19 @@ #include "torch/torch.h" #ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND +//#undef CUDA_TOOLKIT_FOUND #define CUDA_TOOLKIT_FOUND 1 #else -#define CUDA_TOOLKIT_FOUND 0 +#undef CUDA_TOOLKIT_FOUND #endif -#define RETURN_CUDA_DEVICE(x) \ +/*#define RETURN_CUDA_DEVICE(x) \ if(CUDA_TOOLKIT_FOUND) \ return x; \ else \ - return -1; + return -1; */ -#ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND +#ifdef CUDA_TOOLKIT_FOUND #include "cuda.h" #include "cuda_runtime_api.h" diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 68269f564..ef0c8a43c 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -34,6 +34,7 @@ public sealed class AutocastMode : IDisposable private static AutocastMode instance; public static AutocastMode GetInstance(bool enabled=false) { + //https://github.com/pytorch/pytorch/blob/e6ff07f00e04a9b58efb86a3dd70ed7280ae8522/torch/fx/experimental/proxy_tensor.py#L1251 return instance ??= new AutocastMode(torch.cuda_is_available() ? torch.CUDA : torch.CPU, enabled:enabled,cache_enabled:true); } @@ -45,7 +46,7 @@ private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enab this.device = dev.type; if (!torch.is_autocast_available(device)) throw new Exception($"User specified an unsupported autocast device_type {device}"); - fast_dtype = torch.get_autocast_dtype(device); + fast_dtype = torch.get_autocast_dtype(device); //If device is CPU this may return as BFloat16 _cache_enabled = torch.is_autocast_cache_enabled(); if (enabled && !torch.cuda_is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast enabled = false; @@ -55,9 +56,16 @@ private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enab _cache_enabled = cache_enabled.Value; if (dev.type != DeviceType.CPU && dev.type != DeviceType.CUDA && enabled) throw new Exception($"Currently autocast does not support {dev.type} only CPU or CUDA"); + /*if (dev.type == DeviceType.CPU) { + if (torch.get_autocast_dtype(device) != torch.ScalarType.Float32) { + Debug.WriteLine($"Currently is not support {torch.get_autocast_dtype(device)} on CPU, that feature will be add."); + } + fast_dtype = torch.ScalarType.Float32; + }*/ if (dev.type == DeviceType.CPU) { - if (fast_dtype != torch.ScalarType.Float16 || fast_dtype != torch.ScalarType.BFloat16) { - Debug.WriteLine($"In CPU autocast, but the target d type is not suported. Disabling autocast. CPU autocast only supports dtype of {torch.ScalarType.Float16} or {torch.ScalarType.BFloat16}"); + //https://github.com/pytorch/pytorch/blob/e6ff07f00e04a9b58efb86a3dd70ed7280ae8522/torch/amp/autocast_mode.py#L277 + if (enabled && (fast_dtype != torch.ScalarType.Float16 || fast_dtype != torch.ScalarType.BFloat16)) { + Debug.WriteLine($"In CPU autocast, but the target dtype is not suported. Disabling autocast. CPU autocast only supports dtype of {torch.ScalarType.Float16} or {torch.ScalarType.BFloat16}"); enabled = false; } } else if (dev.type == DeviceType.CUDA) { @@ -127,10 +135,12 @@ private static DeviceType GetDeviceType(IntPtr ptr) } public static IntPtr ToIf(IntPtr ptr, torch.ScalarType type) { - if (!IsAutocastEnabled() || !GetInstance().IsEnter) - return ptr; + if(GetInstance().device != DeviceType.CPU) //Warning: Remove this if is finished and working the struct BFloat16 C10 + if (!IsAutocastEnabled() || !GetInstance().IsEnter) + return ptr; if (GetDtype(ptr) == type) //if already have same dtype is not necesary convert to dtype, right??? return ptr; + //TODO: Check if is from CPU to passing BFloat16 if support /*if (!NativeMethods.THSAmp_is_autocast_enabled(NativeMethods.THSTensor_device_type(ptr))) return ptr;*/ diff --git a/src/TorchSharp/Torch.cs b/src/TorchSharp/Torch.cs index f0cfa8290..b7979f3b5 100644 --- a/src/TorchSharp/Torch.cs +++ b/src/TorchSharp/Torch.cs @@ -11,6 +11,7 @@ using System.Text.RegularExpressions; using TorchSharp.Modules; using TorchSharp.PInvoke; +using TorchSharp.Utils; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp @@ -620,6 +621,16 @@ public static ulong get_global_total_memory(int device) { return THSCuda_get_global_total_memory(device); } + /*public static cudaDeviceProp get_device_prop(int device) + { +#if CUDA_TOOLKIT_FOUND + cudaDeviceProp cdp = new cudaDeviceProp(); + throw new NotImplementedException("Implement the cudaDeviceProp THSCuda"); + //return cdp; +#else + return null; +#endif + }*/ } /// diff --git a/src/TorchSharp/TorchSharp.csproj b/src/TorchSharp/TorchSharp.csproj index d5cb1135d..14c95995f 100644 --- a/src/TorchSharp/TorchSharp.csproj +++ b/src/TorchSharp/TorchSharp.csproj @@ -21,6 +21,7 @@ + diff --git a/src/TorchSharp/Utils/TorchCudaStruct.cs b/src/TorchSharp/Utils/TorchCudaStruct.cs new file mode 100644 index 000000000..8341ec08f --- /dev/null +++ b/src/TorchSharp/Utils/TorchCudaStruct.cs @@ -0,0 +1,132 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Runtime.InteropServices; +namespace TorchSharp.Utils +{ +#pragma warning disable 0169 + public struct cudaDeviceProp + { + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 256)] + char[] name; /*< ASCII string identifying device */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 16)] + char[] uuid; /*< 16-byte unique identifier */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 8)] + char[] luid; /*< 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms */ + uint luidDeviceNodeMask; /*< LUID device node mask. Value is undefined on TCC and non-Windows platforms */ + ulong totalGlobalMem; /*< Global memory available on device in bytes */ + ulong sharedMemPerBlock; /*< Shared memory available per block in bytes */ + int regsPerBlock; /*< 32-bit registers available per block */ + int warpSize; /*< Warp size in threads */ + ulong memPitch; /*< Maximum pitch in bytes allowed by memory copies */ + int maxThreadsPerBlock; /*< Maximum number of threads per block */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 3)] + int[] maxThreadsDim; /*< Maximum size of each dimension of a block */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 3)] + int[] maxGridSize; /*< Maximum size of each dimension of a grid */ + int clockRate; /*< Deprecated, Clock frequency in kilohertz */ + ulong totalConstMem; /*< Constant memory available on device in bytes */ + int major; /*< Major compute capability */ + int minor; /*< Minor compute capability */ + ulong textureAlignment; /*< Alignment requirement for textures */ + ulong texturePitchAlignment; /*< Pitch alignment requirement for texture references bound to pitched memory */ + int deviceOverlap; /*< Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. */ + int multiProcessorCount; /*< Number of multiprocessors on device */ + int kernelExecTimeoutEnabled; /*< Deprecated, Specified whether there is a run time limit on kernels */ + int integrated; /*< Device is integrated as opposed to discrete */ + int canMapHostMemory; /*< Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer */ + int computeMode; /*< Deprecated, Compute mode (See ::cudaComputeMode) */ + int maxTexture1D; /*< Maximum 1D texture size */ + int maxTexture1DMipmap; /*< Maximum 1D mipmapped texture size */ + int maxTexture1DLinear; /*< Deprecated, do not use. Use cudaDeviceGetTexture1DLinearMaxWidth() or cuDeviceGetTexture1DLinearMaxWidth() instead. */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=2)] + int[] maxTexture2D; /*< Maximum 2D texture dimensions */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=2)] + int[] maxTexture2DMipmap; /*< Maximum 2D mipmapped texture dimensions */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=3)] + int[] maxTexture2DLinear; /*< Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=2)] + int[] maxTexture2DGather; /*< Maximum 2D texture dimensions if texture gather operations have to be performed */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=3)] + int[] maxTexture3D; /*< Maximum 3D texture dimensions */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=3)] + int[] maxTexture3DAlt; /*< Maximum alternate 3D texture dimensions */ + int maxTextureCubemap; /*< Maximum Cubemap texture dimensions */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=2)] + int[] maxTexture1DLayered; /*< Maximum 1D layered texture dimensions */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=3)] + int[] maxTexture2DLayered; /*< Maximum 2D layered texture dimensions */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=2)] + int[] maxTextureCubemapLayered;/*< Maximum Cubemap layered texture dimensions */ + int maxSurface1D; /*< Maximum 1D surface size */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=2)] + int[] maxSurface2D; /*< Maximum 2D surface dimensions */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=3)] + int[] maxSurface3D; /*< Maximum 3D surface dimensions */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=2)] + int[] maxSurface1DLayered; /*< Maximum 1D layered surface dimensions */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=3)] + int[] maxSurface2DLayered; /*< Maximum 2D layered surface dimensions */ + int maxSurfaceCubemap; /*< Maximum Cubemap surface dimensions */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=2)] + int[] maxSurfaceCubemapLayered;/*< Maximum Cubemap layered surface dimensions */ + ulong surfaceAlignment; /*< Alignment requirements for surfaces */ + int concurrentKernels; /*< Device can possibly execute multiple kernels concurrently */ + int ECCEnabled; /*< Device has ECC support enabled */ + int pciBusID; /*< PCI bus ID of the device */ + int pciDeviceID; /*< PCI device ID of the device */ + int pciDomainID; /*< PCI domain ID of the device */ + int tccDriver; /*< 1 if device is a Tesla device using TCC driver, 0 otherwise */ + int asyncEngineCount; /*< Number of asynchronous engines */ + int unifiedAddressing; /*< Device shares a unified address space with the host */ + int memoryClockRate; /*< Deprecated, Peak memory clock frequency in kilohertz */ + int memoryBusWidth; /*< Global memory bus width in bits */ + int l2CacheSize; /*< Size of L2 cache in bytes */ + int persistingL2CacheMaxSize; /*< Device's maximum l2 persisting lines capacity setting in bytes */ + int maxThreadsPerMultiProcessor;/*< Maximum resident threads per multiprocessor */ + int streamPrioritiesSupported; /*< Device supports stream priorities */ + int globalL1CacheSupported; /*< Device supports caching globals in L1 */ + int localL1CacheSupported; /*< Device supports caching locals in L1 */ + ulong sharedMemPerMultiprocessor; /*< Shared memory available per multiprocessor in bytes */ + int regsPerMultiprocessor; /*< 32-bit registers available per multiprocessor */ + int managedMemory; /*< Device supports allocating managed memory on this system */ + int isMultiGpuBoard; /*< Device is on a multi-GPU board */ + int multiGpuBoardGroupID; /*< Unique identifier for a group of devices on the same multi-GPU board */ + int hostNativeAtomicSupported; /*< Link between the device and the host supports native atomic operations */ + int singleToDoublePrecisionPerfRatio; /*< Deprecated, Ratio of single precision performance (in floating-point operations per second) to double precision performance */ + int pageableMemoryAccess; /*< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */ + int concurrentManagedAccess; /*< Device can coherently access managed memory concurrently with the CPU */ + int computePreemptionSupported; /*< Device supports Compute Preemption */ + int canUseHostPointerForRegisteredMem; /*< Device can access host registered memory at the same virtual address as the CPU */ + int cooperativeLaunch; /*< Device supports launching cooperative kernels via ::cudaLaunchCooperativeKernel */ + int cooperativeMultiDeviceLaunch; /*< Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated. */ + ulong sharedMemPerBlockOptin; /*< Per device maximum shared memory per block usable by special opt in */ + int pageableMemoryAccessUsesHostPageTables; /*< Device accesses pageable memory via the host's page tables */ + int directManagedMemAccessFromHost; /*< Host can directly access managed memory on the device without migration. */ + int maxBlocksPerMultiProcessor; /*< Maximum number of resident blocks per multiprocessor */ + int accessPolicyMaxWindowSize; /*< The maximum value of ::cudaAccessPolicyWindow::num_bytes. */ + ulong reservedSharedMemPerBlock; /*< Shared memory reserved by CUDA driver per block in bytes */ + int hostRegisterSupported; /*< Device supports host memory registration via ::cudaHostRegister. */ + int sparseCudaArraySupported; /*< 1 if the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, 0 otherwise */ + int hostRegisterReadOnlySupported; /*< Device supports using the ::cudaHostRegister flag cudaHostRegisterReadOnly to register memory that must be mapped as read-only to the GPU */ + int timelineSemaphoreInteropSupported; /*< External timeline semaphore interop is supported on the device */ + int memoryPoolsSupported; /*< 1 if the device supports using the cudaMallocAsync and cudaMemPool family of APIs, 0 otherwise */ + int gpuDirectRDMASupported; /*< 1 if the device supports GPUDirect RDMA APIs, 0 otherwise */ + uint gpuDirectRDMAFlushWritesOptions; /*< Bitmask to be interpreted according to the ::cudaFlushGPUDirectRDMAWritesOptions enum */ + int gpuDirectRDMAWritesOrdering;/*< See the ::cudaGPUDirectRDMAWritesOrdering enum for numerical values */ + uint memoryPoolSupportedHandleTypes; /*< Bitmask of handle types supported with mempool-based IPC */ + int deferredMappingCudaArraySupported; /*< 1 if the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays */ + int ipcEventSupported; /*< Device supports IPC Events. */ + int clusterLaunch; /*< Indicates device supports cluster launch */ + int unifiedFunctionPointers; /*< Indicates device supports unified pointers */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=2)] + int[] reserved2; + [MarshalAs(UnmanagedType.ByValArray, SizeConst=1)] + int[] reserved1; /*< Reserved for future use */ + [MarshalAs(UnmanagedType.ByValArray, SizeConst=60)] + int[] reserved; /*< Reserved for future use */ + } +#pragma warning restore 0169 + +} + diff --git a/test/TorchSharpTest/TestAutocast.cs b/test/TorchSharpTest/TestAutocast.cs index f7ade93b4..4a4787b9c 100644 --- a/test/TorchSharpTest/TestAutocast.cs +++ b/test/TorchSharpTest/TestAutocast.cs @@ -13,6 +13,14 @@ public class TestAutocast { internal const ScalarType f32 = ScalarType.Float32; internal const ScalarType f16 = ScalarType.Float16; + + /// + /// If is CUDA Get by default AutoCastType otherwise get FastType of Autocast + /// + /// + private static ScalarType AutoCastType => availableDevice == DeviceType.CUDA ? f16 : AutocastMode.GetInstance().GetFastType(); + private static ScalarType AutoCastTypeOfF32 => availableDevice == DeviceType.CUDA ? f32 : AutocastMode.GetInstance().GetFastType(); + internal static DeviceType availableDevice; private static void CheckCUDA() { @@ -40,8 +48,8 @@ private Tensor randnf32cuda(long dim0, long dim1, long dim2) return torch.randn(dim0, dim1,dim2, f32, new Device(availableDevice)); } [Fact] - [TestOf("AutocastF16")] - public void TestAutocastF16() + [TestOf("AutocastAutoCastType")] + public void TestAutocastAutoCastType() { CheckCUDA(); /*var a = torch.rand(3, 2, 4, ScalarType.Float32, new Device(DeviceType.CUDA)); @@ -79,8 +87,8 @@ public void TestAutocastF16() } [Fact] - [TestOf("AutocastF16")] - public void TestAutocastF16Arithmetic() + [TestOf("AutocastAutoCastType")] + public void TestAutocastAutoCastTypeArithmetic() { //Like matmul, addmm, mm, mv, etc. CheckCUDA(); @@ -111,22 +119,23 @@ public void TestAutocastF16Arithmetic() var h = cm.mm(dm); var i = M2.mv(vec2); var j = batch1.bmm(batch2); - Assert.Equal(f16, c.dtype); - Assert.Equal(f16, d.dtype); - Assert.Equal(f16, f.dtype); - Assert.Equal(f16, h.dtype); - //Assert.Equal(f16, e.dtype); - Assert.Equal(f16, f.dtype); - Assert.Equal(f16, g.dtype); - Assert.Equal(f16, h.dtype); - Assert.Equal(f16, i.dtype); - Assert.Equal(f16, j.dtype); + Assert.Equal(AutoCastType, c.dtype); + Assert.Equal(AutoCastType, d.dtype); + Assert.Equal(AutoCastType, f.dtype); + Assert.Equal(AutoCastType, h.dtype); + //Assert.Equal(AutoCastType, e.dtype); + Assert.Equal(AutoCastType, f.dtype); + Assert.Equal(AutoCastType, g.dtype); + Assert.Equal(AutoCastType, h.dtype); + Assert.Equal(AutoCastType, i.dtype); + Assert.Equal(AutoCastType, j.dtype); } } + [Fact] - [TestOf("AutocastF16")] - public void TestAutocastF16Cell() + [TestOf("AutocastAutoCastType")] + public void TestAutocastAutoCastTypeCell() { CheckCUDA(); //Like GRUCell, LSTM, RNN @@ -148,22 +157,22 @@ public void TestAutocastF16Cell() b = gru.forward(b); (torch.Tensor d, torch.Tensor f) = lstm.forward(inpRNN, new (hx,cx)); torch.Tensor g = rnn.forward(inpRNN, hx); - Assert.Equal(f16, a.dtype); - Assert.Equal(f16, b.dtype); - Assert.Equal(f16, d.dtype); - Assert.Equal(f16, f.dtype); - Assert.Equal(f16, g.dtype); + Assert.Equal(AutoCastType, a.dtype); + Assert.Equal(AutoCastType, b.dtype); + Assert.Equal(AutoCastType, d.dtype); + Assert.Equal(AutoCastType, f.dtype); + Assert.Equal(AutoCastType, g.dtype); } //Outside should have same dtype as inside - Assert.Equal(f16, a.dtype); - Assert.Equal(f16, b.dtype); - //Assert.Equal(f16, e.dtype); + Assert.Equal(AutoCastType, a.dtype); + Assert.Equal(AutoCastType, b.dtype); + //Assert.Equal(AutoCastType, e.dtype); } [Fact] - [TestOf("AutocastF16")] - public void TestAutocastF16Other() + [TestOf("AutocastAutoCastType")] + public void TestAutocastAutoCastTypeOther() { //Like Linear, prelu, etc. CheckCUDA(); @@ -172,17 +181,17 @@ public void TestAutocastF16Other() Assert.Equal(f32, a.dtype); using (AutocastMode.GetInstance().Enter()) { a = pr.forward(a); - Assert.Equal(f16, a.dtype); + Assert.Equal(AutoCastType, a.dtype); } //Outside should have same dtype as inside - Assert.Equal(f16, a.dtype); + Assert.Equal(AutoCastType, a.dtype); } [Fact] - [TestOf("AutocastF16")] - public void TestAutocastF16Convolutions() + [TestOf("AutocastAutoCastType")] + public void TestAutocastAutoCastTypeConvolutions() { CheckCUDA(); //Conv 1d,2d,3d, conv_transpose 1d,2d,3d @@ -198,14 +207,14 @@ public void TestAutocastF16Convolutions() a = c1.forward(a); b = c2.forward(b); c = c3.forward(c); - Assert.Equal(f16, a.dtype); - Assert.Equal(f16, b.dtype); - Assert.Equal(f16, c.dtype); + Assert.Equal(AutoCastType, a.dtype); + Assert.Equal(AutoCastType, b.dtype); + Assert.Equal(AutoCastType, c.dtype); } //Outside should have same dtype as inside - Assert.Equal(f16, a.dtype); - Assert.Equal(f16, b.dtype); - Assert.Equal(f16, c.dtype); + Assert.Equal(AutoCastType, a.dtype); + Assert.Equal(AutoCastType, b.dtype); + Assert.Equal(AutoCastType, c.dtype); } [Fact] [TestOf("AutocastF32")] @@ -219,12 +228,13 @@ public void TestAutocastF32() [TestOf("AutocastF32")] public void TestAutocastF32Trigonometry() { + //In Trigonometry all explicitily is passed to f32. CheckCUDA(); - //Purpose rand f16 because inside autocast with these operations should return as f32 - var a = torch.rand(3, 2, 4, f16, new Device(availableDevice)); - /*var b = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); - var vec1 = torch.rand(3, f16, new Device(DeviceType.CUDA)); - var vec2 = torch.rand(3, f16, new Device(DeviceType.CUDA));*/ + //Purpose rand AutoCastType because inside autocast with these operations should return as f32 + var a = torch.rand(3, 2, 4, AutoCastType, new Device(availableDevice)); + /*var b = torch.rand(3, 2, 4, AutoCastType, new Device(DeviceType.CUDA)); + var vec1 = torch.rand(3, AutoCastType, new Device(DeviceType.CUDA)); + var vec2 = torch.rand(3, AutoCastType, new Device(DeviceType.CUDA));*/ using (AutocastMode.GetInstance(true).Enter()) { var c = a.acos(); var d = a.asin(); @@ -244,10 +254,10 @@ public void TestAutocastF32Trigonometry() public void TestAutocastF32Logarithmic() { CheckCUDA(); - var a = torch.rand(3, 2, 4, f16, new Device(availableDevice)); - /*var b = torch.rand(3, 2, 4, f16, new Device(DeviceType.CUDA)); - var vec1 = torch.rand(3, f16, new Device(DeviceType.CUDA)); - var vec2 = torch.rand(3, f16, new Device(DeviceType.CUDA));*/ + var a = torch.rand(3, 2, 4, AutoCastType, new Device(availableDevice)); + /*var b = torch.rand(3, 2, 4, AutoCastType, new Device(DeviceType.CUDA)); + var vec1 = torch.rand(3, AutoCastType, new Device(DeviceType.CUDA)); + var vec2 = torch.rand(3, AutoCastType, new Device(DeviceType.CUDA));*/ using (AutocastMode.GetInstance().Enter()) { var c = a.log(); var d = a.log10(); @@ -266,7 +276,7 @@ public void TestAutocastF32Logarithmic() public void TestAutocastF32Other() { CheckCUDA(); - var a = torch.rand(3, 3, f16, new Device(DeviceType.CUDA)); + var a = torch.rand(3, 3, AutoCastType, new Device(DeviceType.CUDA)); //var b = torch.rand(3, 3, f32, new Device(DeviceType.CUDA)); using (AutocastMode.GetInstance().Enter()) { var c = a.cumprod(1); @@ -278,10 +288,10 @@ public void TestAutocastF32Other() public void TestAutocastF32Loss() { CheckCUDA(); - var a = torch.rand(3, 2, 4, f16, new Device(availableDevice)); - var b = torch.rand(3, 2, 4, f16, new Device(availableDevice)); - var vec1 = torch.rand(3, f16, new Device(availableDevice)); - var vec2 = torch.rand(3, f16, new Device(availableDevice)); + var a = torch.rand(3, 2, 4, AutoCastType, new Device(availableDevice)); + var b = torch.rand(3, 2, 4, AutoCastType, new Device(availableDevice)); + var vec1 = torch.rand(3, AutoCastType, new Device(availableDevice)); + var vec2 = torch.rand(3, AutoCastType, new Device(availableDevice)); using (AutocastMode.AutoCastEnter()) { var c = torch.nn.L1Loss().to(availableDevice).forward(a,b); Assert.Equal(f32, c.dtype); From e52423916e025cdd2853049299d4531ccd916040 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sat, 15 Feb 2025 13:43:09 -0300 Subject: [PATCH 41/43] custom libtorch fullpatch --- .gitignore | 2 + Directory.Build.props | 1 + ...eRestitcher.Tests.csproj.nuget.dgspec.json | 224 +++++ .../FileRestitcher.Tests.csproj.nuget.g.props | 35 + ...ileRestitcher.Tests.csproj.nuget.g.targets | 18 + .../project.assets.json | 841 ++++++++++++++++++ .../project.nuget.cache | 21 + .../FileRestitcher.csproj.nuget.dgspec.json | 19 +- .../FileRestitcher.csproj.nuget.g.props | 6 +- .../project.assets.json | 21 +- .../project.nuget.cache | 2 +- src/Native/LibTorchSharp/CMakeLists.txt | 13 +- src/Native/LibTorchSharp/THSLinearAlgebra.cpp | 142 ++- src/Native/build.proj | 7 +- src/TorchSharp/TorchSharp.csproj | 5 +- 15 files changed, 1333 insertions(+), 24 deletions(-) create mode 100644 pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/FileRestitcher.Tests.csproj.nuget.dgspec.json create mode 100644 pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/FileRestitcher.Tests.csproj.nuget.g.props create mode 100644 pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/FileRestitcher.Tests.csproj.nuget.g.targets create mode 100644 pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/project.assets.json create mode 100644 pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/project.nuget.cache diff --git a/.gitignore b/.gitignore index 13682298c..749832847 100644 --- a/.gitignore +++ b/.gitignore @@ -275,3 +275,5 @@ packages/ .vscode/settings.json /TestClear TestClear/ +/nuget.config +/src/Native/LibTorchSharp/third_party diff --git a/Directory.Build.props b/Directory.Build.props index 262c4216a..ac534f235 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -5,6 +5,7 @@ + Debug Debug;Release <_DefaultArchitecture>$([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture.ToString().ToLower()) diff --git a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/FileRestitcher.Tests.csproj.nuget.dgspec.json b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/FileRestitcher.Tests.csproj.nuget.dgspec.json new file mode 100644 index 000000000..0101447be --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/FileRestitcher.Tests.csproj.nuget.dgspec.json @@ -0,0 +1,224 @@ +{ + "format": 1, + "restore": { + "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher.Tests\\FileRestitcher.Tests.csproj": {} + }, + "projects": { + "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher.Tests\\FileRestitcher.Tests.csproj": { + "version": "1.0.0", + "restore": { + "projectUniqueName": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher.Tests\\FileRestitcher.Tests.csproj", + "projectName": "FileRestitcher.Tests", + "projectPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher.Tests\\FileRestitcher.Tests.csproj", + "packagesPath": "C:\\Users\\Dimitri\\.nuget\\packages\\", + "outputPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher.Tests\\FileRestitcher.Tests.NupkgProj\\", + "projectStyle": "PackageReference", + "crossTargeting": true, + "fallbackFolders": [ + "C:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\NuGetPackages" + ], + "configFilePaths": [ + "K:\\Proyects_Repos\\TorchSharp\\NuGet.Config", + "C:\\Users\\Dimitri\\AppData\\Roaming\\NuGet\\NuGet.Config", + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.FallbackLocation.config", + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.Offline.config" + ], + "originalTargetFrameworks": [ + "net472", + "netstandard2.0" + ], + "sources": { + "C:\\Program Files (x86)\\Microsoft SDKs\\NuGetPackages\\": {}, + "https://api.nuget.org/v3/index.json": {} + }, + "frameworks": { + "net472": { + "targetAlias": "net472", + "projectReferences": { + "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj": { + "projectPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj" + } + } + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "projectReferences": { + "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj": { + "projectPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj" + } + } + } + }, + "warningProperties": { + "warnAsError": [ + "NU1605" + ] + }, + "restoreAuditProperties": { + "enableAudit": "true", + "auditLevel": "low", + "auditMode": "all" + }, + "SdkAnalysisLevel": "9.0.100" + }, + "frameworks": { + "net472": { + "targetAlias": "net472", + "dependencies": { + "Microsoft.NET.Test.Sdk": { + "suppressParent": "None", + "target": "Package", + "version": "[16.9.4, )" + }, + "coverlet.collector": { + "include": "Runtime, Build, Native, ContentFiles, Analyzers, BuildTransitive", + "suppressParent": "All", + "target": "Package", + "version": "[3.0.2, )" + }, + "xunit": { + "suppressParent": "None", + "target": "Package", + "version": "[2.4.2, )" + } + }, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\9.0.100\\RuntimeIdentifierGraph.json" + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "dependencies": { + "Microsoft.NET.Test.Sdk": { + "suppressParent": "None", + "target": "Package", + "version": "[16.9.4, )" + }, + "NETStandard.Library": { + "suppressParent": "All", + "target": "Package", + "version": "[2.0.3, )", + "autoReferenced": true + }, + "coverlet.collector": { + "include": "Runtime, Build, Native, ContentFiles, Analyzers, BuildTransitive", + "suppressParent": "All", + "target": "Package", + "version": "[3.0.2, )" + }, + "xunit": { + "suppressParent": "None", + "target": "Package", + "version": "[2.4.2, )" + } + }, + "imports": [ + "net461", + "net462", + "net47", + "net471", + "net472", + "net48", + "net481" + ], + "assetTargetFallback": true, + "warn": true, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\9.0.100\\RuntimeIdentifierGraph.json" + } + } + }, + "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj": { + "version": "1.0.0", + "restore": { + "projectUniqueName": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj", + "projectName": "FileRestitcher", + "projectPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj", + "packagesPath": "C:\\Users\\Dimitri\\.nuget\\packages\\", + "outputPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.NupkgProj\\", + "projectStyle": "PackageReference", + "crossTargeting": true, + "fallbackFolders": [ + "C:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\NuGetPackages" + ], + "configFilePaths": [ + "K:\\Proyects_Repos\\TorchSharp\\NuGet.Config", + "C:\\Users\\Dimitri\\AppData\\Roaming\\NuGet\\NuGet.Config", + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.FallbackLocation.config", + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.Offline.config" + ], + "originalTargetFrameworks": [ + "net6.0", + "netstandard2.0" + ], + "sources": { + "C:\\Program Files (x86)\\Microsoft SDKs\\NuGetPackages\\": {}, + "https://api.nuget.org/v3/index.json": {} + }, + "frameworks": { + "net6.0": { + "targetAlias": "net6.0", + "projectReferences": {} + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "projectReferences": {} + } + }, + "warningProperties": { + "warnAsError": [ + "NU1605" + ] + }, + "restoreAuditProperties": { + "enableAudit": "true", + "auditLevel": "low", + "auditMode": "all" + }, + "SdkAnalysisLevel": "9.0.100" + }, + "frameworks": { + "net6.0": { + "targetAlias": "net6.0", + "imports": [ + "net461", + "net462", + "net47", + "net471", + "net472", + "net48", + "net481" + ], + "assetTargetFallback": true, + "warn": true, + "frameworkReferences": { + "Microsoft.NETCore.App": { + "privateAssets": "all" + } + }, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\9.0.100\\RuntimeIdentifierGraph.json" + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "dependencies": { + "NETStandard.Library": { + "suppressParent": "All", + "target": "Package", + "version": "[2.0.3, )", + "autoReferenced": true + } + }, + "imports": [ + "net461", + "net462", + "net47", + "net471", + "net472", + "net48", + "net481" + ], + "assetTargetFallback": true, + "warn": true, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\9.0.100\\RuntimeIdentifierGraph.json" + } + } + } + } +} \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/FileRestitcher.Tests.csproj.nuget.g.props b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/FileRestitcher.Tests.csproj.nuget.g.props new file mode 100644 index 000000000..7adfe6ee9 --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/FileRestitcher.Tests.csproj.nuget.g.props @@ -0,0 +1,35 @@ + + + + True + NuGet + $(MSBuildThisFileDirectory)project.assets.json + $(UserProfile)\.nuget\packages\ + C:\Users\Dimitri\.nuget\packages\;C:\Program Files (x86)\Microsoft Visual Studio\Shared\NuGetPackages + PackageReference + 6.12.0 + + + + + + + + + + + + + + + + + + + + C:\Users\Dimitri\.nuget\packages\xunit.analyzers\1.0.0 + + + C:\Users\Dimitri\.nuget\packages\xunit.analyzers\1.0.0 + + \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/FileRestitcher.Tests.csproj.nuget.g.targets b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/FileRestitcher.Tests.csproj.nuget.g.targets new file mode 100644 index 000000000..89347f8d0 --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/FileRestitcher.Tests.csproj.nuget.g.targets @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/project.assets.json b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/project.assets.json new file mode 100644 index 000000000..ac4726f8d --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/project.assets.json @@ -0,0 +1,841 @@ +{ + "version": 3, + "targets": { + ".NETFramework,Version=v4.7.2": { + "coverlet.collector/3.0.2": { + "type": "package", + "build": { + "build/netstandard1.0/coverlet.collector.targets": {} + } + }, + "Microsoft.CodeCoverage/16.9.4": { + "type": "package", + "compile": { + "lib/net45/Microsoft.VisualStudio.CodeCoverage.Shim.dll": {} + }, + "runtime": { + "lib/net45/Microsoft.VisualStudio.CodeCoverage.Shim.dll": {} + }, + "build": { + "build/netstandard1.0/Microsoft.CodeCoverage.props": {}, + "build/netstandard1.0/Microsoft.CodeCoverage.targets": {} + } + }, + "Microsoft.NET.Test.Sdk/16.9.4": { + "type": "package", + "dependencies": { + "Microsoft.CodeCoverage": "16.9.4" + }, + "compile": { + "lib/net45/_._": {} + }, + "runtime": { + "lib/net45/_._": {} + }, + "build": { + "build/net45/Microsoft.NET.Test.Sdk.props": {}, + "build/net45/Microsoft.NET.Test.Sdk.targets": {} + }, + "buildMultiTargeting": { + "buildMultiTargeting/Microsoft.NET.Test.Sdk.props": {} + } + }, + "xunit/2.4.2": { + "type": "package", + "dependencies": { + "xunit.analyzers": "1.0.0", + "xunit.assert": "2.4.2", + "xunit.core": "[2.4.2]" + } + }, + "xunit.abstractions/2.0.3": { + "type": "package", + "compile": { + "lib/net35/xunit.abstractions.dll": { + "related": ".xml" + } + }, + "runtime": { + "lib/net35/xunit.abstractions.dll": { + "related": ".xml" + } + } + }, + "xunit.analyzers/1.0.0": { + "type": "package" + }, + "xunit.assert/2.4.2": { + "type": "package", + "compile": { + "lib/netstandard1.1/xunit.assert.dll": { + "related": ".xml" + } + }, + "runtime": { + "lib/netstandard1.1/xunit.assert.dll": { + "related": ".xml" + } + } + }, + "xunit.core/2.4.2": { + "type": "package", + "dependencies": { + "xunit.extensibility.core": "[2.4.2]", + "xunit.extensibility.execution": "[2.4.2]" + }, + "build": { + "build/xunit.core.props": {}, + "build/xunit.core.targets": {} + }, + "buildMultiTargeting": { + "buildMultiTargeting/xunit.core.props": {}, + "buildMultiTargeting/xunit.core.targets": {} + } + }, + "xunit.extensibility.core/2.4.2": { + "type": "package", + "dependencies": { + "xunit.abstractions": "2.0.3" + }, + "compile": { + "lib/net452/xunit.core.dll": { + "related": ".dll.tdnet;.xml" + } + }, + "runtime": { + "lib/net452/xunit.core.dll": { + "related": ".dll.tdnet;.xml" + } + } + }, + "xunit.extensibility.execution/2.4.2": { + "type": "package", + "dependencies": { + "xunit.extensibility.core": "[2.4.2]" + }, + "compile": { + "lib/net452/xunit.execution.desktop.dll": { + "related": ".xml" + } + }, + "runtime": { + "lib/net452/xunit.execution.desktop.dll": { + "related": ".xml" + } + } + }, + "FileRestitcher/1.0.0": { + "type": "project", + "framework": ".NETStandard,Version=v2.0", + "compile": { + "bin/placeholder/FileRestitcher.dll": {} + }, + "runtime": { + "bin/placeholder/FileRestitcher.dll": {} + } + } + }, + ".NETStandard,Version=v2.0": { + "coverlet.collector/3.0.2": { + "type": "package", + "build": { + "build/netstandard1.0/coverlet.collector.targets": {} + } + }, + "Microsoft.CodeCoverage/16.9.4": { + "type": "package", + "build": { + "build/netstandard1.0/Microsoft.CodeCoverage.props": {}, + "build/netstandard1.0/Microsoft.CodeCoverage.targets": {} + } + }, + "Microsoft.NET.Test.Sdk/16.9.4": { + "type": "package", + "dependencies": { + "Microsoft.CodeCoverage": "16.9.4" + }, + "buildMultiTargeting": { + "buildMultiTargeting/Microsoft.NET.Test.Sdk.props": {} + } + }, + "Microsoft.NETCore.Platforms/1.1.0": { + "type": "package", + "compile": { + "lib/netstandard1.0/_._": {} + }, + "runtime": { + "lib/netstandard1.0/_._": {} + } + }, + "NETStandard.Library/2.0.3": { + "type": "package", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0" + }, + "compile": { + "lib/netstandard1.0/_._": {} + }, + "runtime": { + "lib/netstandard1.0/_._": {} + }, + "build": { + "build/netstandard2.0/NETStandard.Library.targets": {} + } + }, + "xunit/2.4.2": { + "type": "package", + "dependencies": { + "xunit.analyzers": "1.0.0", + "xunit.assert": "2.4.2", + "xunit.core": "[2.4.2]" + } + }, + "xunit.abstractions/2.0.3": { + "type": "package", + "compile": { + "lib/netstandard2.0/xunit.abstractions.dll": { + "related": ".xml" + } + }, + "runtime": { + "lib/netstandard2.0/xunit.abstractions.dll": { + "related": ".xml" + } + } + }, + "xunit.analyzers/1.0.0": { + "type": "package" + }, + "xunit.assert/2.4.2": { + "type": "package", + "dependencies": { + "NETStandard.Library": "1.6.1" + }, + "compile": { + "lib/netstandard1.1/xunit.assert.dll": { + "related": ".xml" + } + }, + "runtime": { + "lib/netstandard1.1/xunit.assert.dll": { + "related": ".xml" + } + } + }, + "xunit.core/2.4.2": { + "type": "package", + "dependencies": { + "xunit.extensibility.core": "[2.4.2]", + "xunit.extensibility.execution": "[2.4.2]" + }, + "build": { + "build/xunit.core.props": {}, + "build/xunit.core.targets": {} + }, + "buildMultiTargeting": { + "buildMultiTargeting/xunit.core.props": {}, + "buildMultiTargeting/xunit.core.targets": {} + } + }, + "xunit.extensibility.core/2.4.2": { + "type": "package", + "dependencies": { + "NETStandard.Library": "1.6.1", + "xunit.abstractions": "2.0.3" + }, + "compile": { + "lib/netstandard1.1/xunit.core.dll": { + "related": ".xml" + } + }, + "runtime": { + "lib/netstandard1.1/xunit.core.dll": { + "related": ".xml" + } + } + }, + "xunit.extensibility.execution/2.4.2": { + "type": "package", + "dependencies": { + "NETStandard.Library": "1.6.1", + "xunit.extensibility.core": "[2.4.2]" + }, + "compile": { + "lib/netstandard1.1/xunit.execution.dotnet.dll": { + "related": ".xml" + } + }, + "runtime": { + "lib/netstandard1.1/xunit.execution.dotnet.dll": { + "related": ".xml" + } + } + }, + "FileRestitcher/1.0.0": { + "type": "project", + "framework": ".NETStandard,Version=v2.0", + "compile": { + "bin/placeholder/FileRestitcher.dll": {} + }, + "runtime": { + "bin/placeholder/FileRestitcher.dll": {} + } + } + } + }, + "libraries": { + "coverlet.collector/3.0.2": { + "sha512": "iBvPAIDaI7j/iMx/DzCGCJ3rdiOmel9VINEfaTiBv/NKIGHOP4X3hqc6Q1wgMtArEshlhXexQknP17SK4vXb1w==", + "type": "package", + "path": "coverlet.collector/3.0.2", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "build/netstandard1.0/Microsoft.CSharp.dll", + "build/netstandard1.0/Microsoft.DotNet.PlatformAbstractions.dll", + "build/netstandard1.0/Microsoft.Extensions.DependencyInjection.Abstractions.dll", + "build/netstandard1.0/Microsoft.Extensions.DependencyInjection.dll", + "build/netstandard1.0/Microsoft.Extensions.DependencyModel.dll", + "build/netstandard1.0/Microsoft.Extensions.FileSystemGlobbing.dll", + "build/netstandard1.0/Microsoft.TestPlatform.CoreUtilities.dll", + "build/netstandard1.0/Microsoft.TestPlatform.PlatformAbstractions.dll", + "build/netstandard1.0/Microsoft.VisualStudio.TestPlatform.ObjectModel.dll", + "build/netstandard1.0/Mono.Cecil.Mdb.dll", + "build/netstandard1.0/Mono.Cecil.Pdb.dll", + "build/netstandard1.0/Mono.Cecil.Rocks.dll", + "build/netstandard1.0/Mono.Cecil.dll", + "build/netstandard1.0/Newtonsoft.Json.dll", + "build/netstandard1.0/NuGet.Frameworks.dll", + "build/netstandard1.0/System.AppContext.dll", + "build/netstandard1.0/System.Collections.Immutable.dll", + "build/netstandard1.0/System.Dynamic.Runtime.dll", + "build/netstandard1.0/System.IO.FileSystem.Primitives.dll", + "build/netstandard1.0/System.Linq.Expressions.dll", + "build/netstandard1.0/System.Linq.dll", + "build/netstandard1.0/System.ObjectModel.dll", + "build/netstandard1.0/System.Reflection.Emit.ILGeneration.dll", + "build/netstandard1.0/System.Reflection.Emit.Lightweight.dll", + "build/netstandard1.0/System.Reflection.Emit.dll", + "build/netstandard1.0/System.Reflection.Metadata.dll", + "build/netstandard1.0/System.Reflection.TypeExtensions.dll", + "build/netstandard1.0/System.Runtime.Serialization.Primitives.dll", + "build/netstandard1.0/System.Text.RegularExpressions.dll", + "build/netstandard1.0/System.Threading.Tasks.Extensions.dll", + "build/netstandard1.0/System.Threading.dll", + "build/netstandard1.0/System.Xml.ReaderWriter.dll", + "build/netstandard1.0/System.Xml.XDocument.dll", + "build/netstandard1.0/coverlet.collector.deps.json", + "build/netstandard1.0/coverlet.collector.dll", + "build/netstandard1.0/coverlet.collector.pdb", + "build/netstandard1.0/coverlet.collector.targets", + "build/netstandard1.0/coverlet.core.dll", + "build/netstandard1.0/coverlet.core.pdb", + "coverlet-icon.png", + "coverlet.collector.3.0.2.nupkg.sha512", + "coverlet.collector.nuspec" + ] + }, + "Microsoft.CodeCoverage/16.9.4": { + "sha512": "N/RYB07gJkPZ1nJiq0QGxFIL+X5vVl4GI99PiTYXpbfI30NTZMRJgZ+4jYLFYLDQqj9o1Juhv+3iiymd7lozrA==", + "type": "package", + "path": "microsoft.codecoverage/16.9.4", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "Icon.png", + "LICENSE_NET.txt", + "build/netstandard1.0/CodeCoverage/CodeCoverage.config", + "build/netstandard1.0/CodeCoverage/CodeCoverage.exe", + "build/netstandard1.0/CodeCoverage/VanguardInstrumentationProfiler_x86.config", + "build/netstandard1.0/CodeCoverage/amd64/CodeCoverage.exe", + "build/netstandard1.0/CodeCoverage/amd64/VanguardInstrumentationProfiler_x64.config", + "build/netstandard1.0/CodeCoverage/amd64/covrun64.dll", + "build/netstandard1.0/CodeCoverage/amd64/msdia140.dll", + "build/netstandard1.0/CodeCoverage/amd64/msvcdis140.dll", + "build/netstandard1.0/CodeCoverage/amd64/msvcp140.dll", + "build/netstandard1.0/CodeCoverage/amd64/msvcp140_atomic_wait.dll", + "build/netstandard1.0/CodeCoverage/amd64/vcruntime140.dll", + "build/netstandard1.0/CodeCoverage/amd64/vcruntime140_1.dll", + "build/netstandard1.0/CodeCoverage/codecoveragemessages.dll", + "build/netstandard1.0/CodeCoverage/coreclr/Microsoft.VisualStudio.CodeCoverage.Shim.dll", + "build/netstandard1.0/CodeCoverage/covrun32.dll", + "build/netstandard1.0/CodeCoverage/msdia140.dll", + "build/netstandard1.0/CodeCoverage/msvcdis140.dll", + "build/netstandard1.0/CodeCoverage/msvcp140.dll", + "build/netstandard1.0/CodeCoverage/msvcp140_atomic_wait.dll", + "build/netstandard1.0/CodeCoverage/vcruntime140.dll", + "build/netstandard1.0/InstrumentationEngine/x64/MicrosoftInstrumentationEngine_x64.dll", + "build/netstandard1.0/InstrumentationEngine/x86/MicrosoftInstrumentationEngine_x86.dll", + "build/netstandard1.0/Microsoft.CodeCoverage.props", + "build/netstandard1.0/Microsoft.CodeCoverage.targets", + "build/netstandard1.0/Microsoft.VisualStudio.Coverage.CoreLib.Net.dll", + "build/netstandard1.0/Microsoft.VisualStudio.Coverage.Interprocess.dll", + "build/netstandard1.0/Microsoft.VisualStudio.TraceDataCollector.dll", + "build/netstandard1.0/cs/Microsoft.VisualStudio.Coverage.CoreLib.Net.resources.dll", + "build/netstandard1.0/cs/Microsoft.VisualStudio.TraceDataCollector.resources.dll", + "build/netstandard1.0/de/Microsoft.VisualStudio.Coverage.CoreLib.Net.resources.dll", + "build/netstandard1.0/de/Microsoft.VisualStudio.TraceDataCollector.resources.dll", + "build/netstandard1.0/es/Microsoft.VisualStudio.Coverage.CoreLib.Net.resources.dll", + "build/netstandard1.0/es/Microsoft.VisualStudio.TraceDataCollector.resources.dll", + "build/netstandard1.0/fr/Microsoft.VisualStudio.Coverage.CoreLib.Net.resources.dll", + "build/netstandard1.0/fr/Microsoft.VisualStudio.TraceDataCollector.resources.dll", + "build/netstandard1.0/it/Microsoft.VisualStudio.Coverage.CoreLib.Net.resources.dll", + "build/netstandard1.0/it/Microsoft.VisualStudio.TraceDataCollector.resources.dll", + "build/netstandard1.0/ja/Microsoft.VisualStudio.Coverage.CoreLib.Net.resources.dll", + "build/netstandard1.0/ja/Microsoft.VisualStudio.TraceDataCollector.resources.dll", + "build/netstandard1.0/ko/Microsoft.VisualStudio.Coverage.CoreLib.Net.resources.dll", + "build/netstandard1.0/ko/Microsoft.VisualStudio.TraceDataCollector.resources.dll", + "build/netstandard1.0/pl/Microsoft.VisualStudio.Coverage.CoreLib.Net.resources.dll", + "build/netstandard1.0/pl/Microsoft.VisualStudio.TraceDataCollector.resources.dll", + "build/netstandard1.0/pt-BR/Microsoft.VisualStudio.Coverage.CoreLib.Net.resources.dll", + "build/netstandard1.0/pt-BR/Microsoft.VisualStudio.TraceDataCollector.resources.dll", + "build/netstandard1.0/ru/Microsoft.VisualStudio.Coverage.CoreLib.Net.resources.dll", + "build/netstandard1.0/ru/Microsoft.VisualStudio.TraceDataCollector.resources.dll", + "build/netstandard1.0/tr/Microsoft.VisualStudio.Coverage.CoreLib.Net.resources.dll", + "build/netstandard1.0/tr/Microsoft.VisualStudio.TraceDataCollector.resources.dll", + "build/netstandard1.0/zh-Hans/Microsoft.VisualStudio.Coverage.CoreLib.Net.resources.dll", + "build/netstandard1.0/zh-Hans/Microsoft.VisualStudio.TraceDataCollector.resources.dll", + "build/netstandard1.0/zh-Hant/Microsoft.VisualStudio.Coverage.CoreLib.Net.resources.dll", + "build/netstandard1.0/zh-Hant/Microsoft.VisualStudio.TraceDataCollector.resources.dll", + "lib/net45/Microsoft.VisualStudio.CodeCoverage.Shim.dll", + "lib/netcoreapp1.0/Microsoft.VisualStudio.CodeCoverage.Shim.dll", + "microsoft.codecoverage.16.9.4.nupkg.sha512", + "microsoft.codecoverage.nuspec" + ] + }, + "Microsoft.NET.Test.Sdk/16.9.4": { + "sha512": "M/k16vmS7Hz/+Kuy3p6XE743XPjYYMzfN5ZvpSLY44Ngh5IBMk0Je5Qed8oq6/kvzJA2DTrXa7YrfceHhbQKeQ==", + "type": "package", + "path": "microsoft.net.test.sdk/16.9.4", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "Icon.png", + "LICENSE_NET.txt", + "build/net40/Microsoft.NET.Test.Sdk.props", + "build/net40/Microsoft.NET.Test.Sdk.targets", + "build/net45/Microsoft.NET.Test.Sdk.props", + "build/net45/Microsoft.NET.Test.Sdk.targets", + "build/netcoreapp1.0/Microsoft.NET.Test.Sdk.Program.cs", + "build/netcoreapp1.0/Microsoft.NET.Test.Sdk.Program.fs", + "build/netcoreapp1.0/Microsoft.NET.Test.Sdk.Program.vb", + "build/netcoreapp1.0/Microsoft.NET.Test.Sdk.props", + "build/netcoreapp1.0/Microsoft.NET.Test.Sdk.targets", + "build/netcoreapp2.1/Microsoft.NET.Test.Sdk.Program.cs", + "build/netcoreapp2.1/Microsoft.NET.Test.Sdk.Program.fs", + "build/netcoreapp2.1/Microsoft.NET.Test.Sdk.Program.vb", + "build/netcoreapp2.1/Microsoft.NET.Test.Sdk.props", + "build/netcoreapp2.1/Microsoft.NET.Test.Sdk.targets", + "build/uap10.0/Microsoft.NET.Test.Sdk.props", + "buildMultiTargeting/Microsoft.NET.Test.Sdk.props", + "lib/net40/_._", + "lib/net45/_._", + "lib/netcoreapp1.0/_._", + "lib/netcoreapp2.1/_._", + "lib/uap10.0/_._", + "microsoft.net.test.sdk.16.9.4.nupkg.sha512", + "microsoft.net.test.sdk.nuspec" + ] + }, + "Microsoft.NETCore.Platforms/1.1.0": { + "sha512": "kz0PEW2lhqygehI/d6XsPCQzD7ff7gUJaVGPVETX611eadGsA3A877GdSlU0LRVMCTH/+P3o2iDTak+S08V2+A==", + "type": "package", + "path": "microsoft.netcore.platforms/1.1.0", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "ThirdPartyNotices.txt", + "dotnet_library_license.txt", + "lib/netstandard1.0/_._", + "microsoft.netcore.platforms.1.1.0.nupkg.sha512", + "microsoft.netcore.platforms.nuspec", + "runtime.json" + ] + }, + "NETStandard.Library/2.0.3": { + "sha512": "st47PosZSHrjECdjeIzZQbzivYBJFv6P2nv4cj2ypdI204DO+vZ7l5raGMiX4eXMJ53RfOIg+/s4DHVZ54Nu2A==", + "type": "package", + "path": "netstandard.library/2.0.3", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "LICENSE.TXT", + "THIRD-PARTY-NOTICES.TXT", + "build/netstandard2.0/NETStandard.Library.targets", + "build/netstandard2.0/ref/Microsoft.Win32.Primitives.dll", + "build/netstandard2.0/ref/System.AppContext.dll", + "build/netstandard2.0/ref/System.Collections.Concurrent.dll", + "build/netstandard2.0/ref/System.Collections.NonGeneric.dll", + "build/netstandard2.0/ref/System.Collections.Specialized.dll", + "build/netstandard2.0/ref/System.Collections.dll", + "build/netstandard2.0/ref/System.ComponentModel.Composition.dll", + "build/netstandard2.0/ref/System.ComponentModel.EventBasedAsync.dll", + "build/netstandard2.0/ref/System.ComponentModel.Primitives.dll", + "build/netstandard2.0/ref/System.ComponentModel.TypeConverter.dll", + "build/netstandard2.0/ref/System.ComponentModel.dll", + "build/netstandard2.0/ref/System.Console.dll", + "build/netstandard2.0/ref/System.Core.dll", + "build/netstandard2.0/ref/System.Data.Common.dll", + "build/netstandard2.0/ref/System.Data.dll", + "build/netstandard2.0/ref/System.Diagnostics.Contracts.dll", + "build/netstandard2.0/ref/System.Diagnostics.Debug.dll", + "build/netstandard2.0/ref/System.Diagnostics.FileVersionInfo.dll", + "build/netstandard2.0/ref/System.Diagnostics.Process.dll", + "build/netstandard2.0/ref/System.Diagnostics.StackTrace.dll", + "build/netstandard2.0/ref/System.Diagnostics.TextWriterTraceListener.dll", + "build/netstandard2.0/ref/System.Diagnostics.Tools.dll", + "build/netstandard2.0/ref/System.Diagnostics.TraceSource.dll", + "build/netstandard2.0/ref/System.Diagnostics.Tracing.dll", + "build/netstandard2.0/ref/System.Drawing.Primitives.dll", + "build/netstandard2.0/ref/System.Drawing.dll", + "build/netstandard2.0/ref/System.Dynamic.Runtime.dll", + "build/netstandard2.0/ref/System.Globalization.Calendars.dll", + "build/netstandard2.0/ref/System.Globalization.Extensions.dll", + "build/netstandard2.0/ref/System.Globalization.dll", + "build/netstandard2.0/ref/System.IO.Compression.FileSystem.dll", + "build/netstandard2.0/ref/System.IO.Compression.ZipFile.dll", + "build/netstandard2.0/ref/System.IO.Compression.dll", + "build/netstandard2.0/ref/System.IO.FileSystem.DriveInfo.dll", + "build/netstandard2.0/ref/System.IO.FileSystem.Primitives.dll", + "build/netstandard2.0/ref/System.IO.FileSystem.Watcher.dll", + "build/netstandard2.0/ref/System.IO.FileSystem.dll", + "build/netstandard2.0/ref/System.IO.IsolatedStorage.dll", + "build/netstandard2.0/ref/System.IO.MemoryMappedFiles.dll", + "build/netstandard2.0/ref/System.IO.Pipes.dll", + "build/netstandard2.0/ref/System.IO.UnmanagedMemoryStream.dll", + "build/netstandard2.0/ref/System.IO.dll", + "build/netstandard2.0/ref/System.Linq.Expressions.dll", + "build/netstandard2.0/ref/System.Linq.Parallel.dll", + "build/netstandard2.0/ref/System.Linq.Queryable.dll", + "build/netstandard2.0/ref/System.Linq.dll", + "build/netstandard2.0/ref/System.Net.Http.dll", + "build/netstandard2.0/ref/System.Net.NameResolution.dll", + "build/netstandard2.0/ref/System.Net.NetworkInformation.dll", + "build/netstandard2.0/ref/System.Net.Ping.dll", + "build/netstandard2.0/ref/System.Net.Primitives.dll", + "build/netstandard2.0/ref/System.Net.Requests.dll", + "build/netstandard2.0/ref/System.Net.Security.dll", + "build/netstandard2.0/ref/System.Net.Sockets.dll", + "build/netstandard2.0/ref/System.Net.WebHeaderCollection.dll", + "build/netstandard2.0/ref/System.Net.WebSockets.Client.dll", + "build/netstandard2.0/ref/System.Net.WebSockets.dll", + "build/netstandard2.0/ref/System.Net.dll", + "build/netstandard2.0/ref/System.Numerics.dll", + "build/netstandard2.0/ref/System.ObjectModel.dll", + "build/netstandard2.0/ref/System.Reflection.Extensions.dll", + "build/netstandard2.0/ref/System.Reflection.Primitives.dll", + "build/netstandard2.0/ref/System.Reflection.dll", + "build/netstandard2.0/ref/System.Resources.Reader.dll", + "build/netstandard2.0/ref/System.Resources.ResourceManager.dll", + "build/netstandard2.0/ref/System.Resources.Writer.dll", + "build/netstandard2.0/ref/System.Runtime.CompilerServices.VisualC.dll", + "build/netstandard2.0/ref/System.Runtime.Extensions.dll", + "build/netstandard2.0/ref/System.Runtime.Handles.dll", + "build/netstandard2.0/ref/System.Runtime.InteropServices.RuntimeInformation.dll", + "build/netstandard2.0/ref/System.Runtime.InteropServices.dll", + "build/netstandard2.0/ref/System.Runtime.Numerics.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.Formatters.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.Json.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.Primitives.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.Xml.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.dll", + "build/netstandard2.0/ref/System.Runtime.dll", + "build/netstandard2.0/ref/System.Security.Claims.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.Algorithms.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.Csp.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.Encoding.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.Primitives.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.X509Certificates.dll", + "build/netstandard2.0/ref/System.Security.Principal.dll", + "build/netstandard2.0/ref/System.Security.SecureString.dll", + "build/netstandard2.0/ref/System.ServiceModel.Web.dll", + "build/netstandard2.0/ref/System.Text.Encoding.Extensions.dll", + "build/netstandard2.0/ref/System.Text.Encoding.dll", + "build/netstandard2.0/ref/System.Text.RegularExpressions.dll", + "build/netstandard2.0/ref/System.Threading.Overlapped.dll", + "build/netstandard2.0/ref/System.Threading.Tasks.Parallel.dll", + "build/netstandard2.0/ref/System.Threading.Tasks.dll", + "build/netstandard2.0/ref/System.Threading.Thread.dll", + "build/netstandard2.0/ref/System.Threading.ThreadPool.dll", + "build/netstandard2.0/ref/System.Threading.Timer.dll", + "build/netstandard2.0/ref/System.Threading.dll", + "build/netstandard2.0/ref/System.Transactions.dll", + "build/netstandard2.0/ref/System.ValueTuple.dll", + "build/netstandard2.0/ref/System.Web.dll", + "build/netstandard2.0/ref/System.Windows.dll", + "build/netstandard2.0/ref/System.Xml.Linq.dll", + "build/netstandard2.0/ref/System.Xml.ReaderWriter.dll", + "build/netstandard2.0/ref/System.Xml.Serialization.dll", + "build/netstandard2.0/ref/System.Xml.XDocument.dll", + "build/netstandard2.0/ref/System.Xml.XPath.XDocument.dll", + "build/netstandard2.0/ref/System.Xml.XPath.dll", + "build/netstandard2.0/ref/System.Xml.XmlDocument.dll", + "build/netstandard2.0/ref/System.Xml.XmlSerializer.dll", + "build/netstandard2.0/ref/System.Xml.dll", + "build/netstandard2.0/ref/System.dll", + "build/netstandard2.0/ref/mscorlib.dll", + "build/netstandard2.0/ref/netstandard.dll", + "build/netstandard2.0/ref/netstandard.xml", + "lib/netstandard1.0/_._", + "netstandard.library.2.0.3.nupkg.sha512", + "netstandard.library.nuspec" + ] + }, + "xunit/2.4.2": { + "sha512": "6Mj73Ont3zj2CJuoykVJfE0ZmRwn7C+pTuRP8c4bnaaTFjwNG6tGe0prJ1yIbMe9AHrpDys63ctWacSsFJWK/w==", + "type": "package", + "path": "xunit/2.4.2", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "_content/logo-128-transparent.png", + "xunit.2.4.2.nupkg.sha512", + "xunit.nuspec" + ] + }, + "xunit.abstractions/2.0.3": { + "sha512": "pot1I4YOxlWjIb5jmwvvQNbTrZ3lJQ+jUGkGjWE3hEFM0l5gOnBWS+H3qsex68s5cO52g+44vpGzhAt+42vwKg==", + "type": "package", + "path": "xunit.abstractions/2.0.3", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "lib/net35/xunit.abstractions.dll", + "lib/net35/xunit.abstractions.xml", + "lib/netstandard1.0/xunit.abstractions.dll", + "lib/netstandard1.0/xunit.abstractions.xml", + "lib/netstandard2.0/xunit.abstractions.dll", + "lib/netstandard2.0/xunit.abstractions.xml", + "xunit.abstractions.2.0.3.nupkg.sha512", + "xunit.abstractions.nuspec" + ] + }, + "xunit.analyzers/1.0.0": { + "sha512": "BeO8hEgs/c8Ls2647fPfieMngncvf0D0xYNDfIO59MolxtCtVjFRd6SRc+7tj8VMqkVOuJcnc9eh4ngI2cAmLQ==", + "type": "package", + "path": "xunit.analyzers/1.0.0", + "hasTools": true, + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "_content/logo-128-transparent.png", + "analyzers/dotnet/cs/xunit.analyzers.dll", + "analyzers/dotnet/cs/xunit.analyzers.fixes.dll", + "tools/install.ps1", + "tools/uninstall.ps1", + "xunit.analyzers.1.0.0.nupkg.sha512", + "xunit.analyzers.nuspec" + ] + }, + "xunit.assert/2.4.2": { + "sha512": "pxJISOFjn2XTTi1mcDCkRZrTFb9OtRRCtx2kZFNF51GdReLr1ls2rnyxvAS4JO247K3aNtflvh5Q0346K5BROA==", + "type": "package", + "path": "xunit.assert/2.4.2", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "_content/logo-128-transparent.png", + "lib/netstandard1.1/xunit.assert.dll", + "lib/netstandard1.1/xunit.assert.xml", + "xunit.assert.2.4.2.nupkg.sha512", + "xunit.assert.nuspec" + ] + }, + "xunit.core/2.4.2": { + "sha512": "KB4yGCxNqIVyekhJLXtKSEq6BaXVp/JO3mbGVE1hxypZTLEe7h+sTbAhpA+yZW2dPtXTuiW+C1B2oxxHEkrmOw==", + "type": "package", + "path": "xunit.core/2.4.2", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "_content/logo-128-transparent.png", + "build/xunit.core.props", + "build/xunit.core.targets", + "buildMultiTargeting/xunit.core.props", + "buildMultiTargeting/xunit.core.targets", + "xunit.core.2.4.2.nupkg.sha512", + "xunit.core.nuspec" + ] + }, + "xunit.extensibility.core/2.4.2": { + "sha512": "W1BoXTIN1C6kpVSMw25huSet25ky6IAQUNovu3zGOGN/jWnbgSoTyCrlIhmXSg0tH5nEf8q7h3OjNHOjyu5PfA==", + "type": "package", + "path": "xunit.extensibility.core/2.4.2", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "_content/logo-128-transparent.png", + "lib/net452/xunit.core.dll", + "lib/net452/xunit.core.dll.tdnet", + "lib/net452/xunit.core.xml", + "lib/net452/xunit.runner.tdnet.dll", + "lib/net452/xunit.runner.utility.net452.dll", + "lib/netstandard1.1/xunit.core.dll", + "lib/netstandard1.1/xunit.core.xml", + "xunit.extensibility.core.2.4.2.nupkg.sha512", + "xunit.extensibility.core.nuspec" + ] + }, + "xunit.extensibility.execution/2.4.2": { + "sha512": "CZmgcKkwpyo8FlupZdWpJCryrAOWLh1FBPG6gmVZuPQkGQsim/oL4PcP4nfrC2hHgXUFtluvaJ0Sp9PQKUMNpg==", + "type": "package", + "path": "xunit.extensibility.execution/2.4.2", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "_content/logo-128-transparent.png", + "lib/net452/xunit.execution.desktop.dll", + "lib/net452/xunit.execution.desktop.xml", + "lib/netstandard1.1/xunit.execution.dotnet.dll", + "lib/netstandard1.1/xunit.execution.dotnet.xml", + "xunit.extensibility.execution.2.4.2.nupkg.sha512", + "xunit.extensibility.execution.nuspec" + ] + }, + "FileRestitcher/1.0.0": { + "type": "project", + "path": "../FileRestitcher/FileRestitcher.csproj", + "msbuildProject": "../FileRestitcher/FileRestitcher.csproj" + } + }, + "projectFileDependencyGroups": { + ".NETFramework,Version=v4.7.2": [ + "FileRestitcher >= 1.0.0", + "Microsoft.NET.Test.Sdk >= 16.9.4", + "coverlet.collector >= 3.0.2", + "xunit >= 2.4.2" + ], + ".NETStandard,Version=v2.0": [ + "FileRestitcher >= 1.0.0", + "Microsoft.NET.Test.Sdk >= 16.9.4", + "NETStandard.Library >= 2.0.3", + "coverlet.collector >= 3.0.2", + "xunit >= 2.4.2" + ] + }, + "packageFolders": { + "C:\\Users\\Dimitri\\.nuget\\packages\\": {}, + "C:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\NuGetPackages": {} + }, + "project": { + "version": "1.0.0", + "restore": { + "projectUniqueName": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher.Tests\\FileRestitcher.Tests.csproj", + "projectName": "FileRestitcher.Tests", + "projectPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher.Tests\\FileRestitcher.Tests.csproj", + "packagesPath": "C:\\Users\\Dimitri\\.nuget\\packages\\", + "outputPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher.Tests\\FileRestitcher.Tests.NupkgProj\\", + "projectStyle": "PackageReference", + "crossTargeting": true, + "fallbackFolders": [ + "C:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\NuGetPackages" + ], + "configFilePaths": [ + "K:\\Proyects_Repos\\TorchSharp\\NuGet.Config", + "C:\\Users\\Dimitri\\AppData\\Roaming\\NuGet\\NuGet.Config", + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.FallbackLocation.config", + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.Offline.config" + ], + "originalTargetFrameworks": [ + "net472", + "netstandard2.0" + ], + "sources": { + "C:\\Program Files (x86)\\Microsoft SDKs\\NuGetPackages\\": {}, + "https://api.nuget.org/v3/index.json": {} + }, + "frameworks": { + "net472": { + "targetAlias": "net472", + "projectReferences": { + "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj": { + "projectPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj" + } + } + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "projectReferences": { + "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj": { + "projectPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj" + } + } + } + }, + "warningProperties": { + "warnAsError": [ + "NU1605" + ] + }, + "restoreAuditProperties": { + "enableAudit": "true", + "auditLevel": "low", + "auditMode": "all" + }, + "SdkAnalysisLevel": "9.0.100" + }, + "frameworks": { + "net472": { + "targetAlias": "net472", + "dependencies": { + "Microsoft.NET.Test.Sdk": { + "suppressParent": "None", + "target": "Package", + "version": "[16.9.4, )" + }, + "coverlet.collector": { + "include": "Runtime, Build, Native, ContentFiles, Analyzers, BuildTransitive", + "suppressParent": "All", + "target": "Package", + "version": "[3.0.2, )" + }, + "xunit": { + "suppressParent": "None", + "target": "Package", + "version": "[2.4.2, )" + } + }, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\9.0.100\\RuntimeIdentifierGraph.json" + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "dependencies": { + "Microsoft.NET.Test.Sdk": { + "suppressParent": "None", + "target": "Package", + "version": "[16.9.4, )" + }, + "NETStandard.Library": { + "suppressParent": "All", + "target": "Package", + "version": "[2.0.3, )", + "autoReferenced": true + }, + "coverlet.collector": { + "include": "Runtime, Build, Native, ContentFiles, Analyzers, BuildTransitive", + "suppressParent": "All", + "target": "Package", + "version": "[3.0.2, )" + }, + "xunit": { + "suppressParent": "None", + "target": "Package", + "version": "[2.4.2, )" + } + }, + "imports": [ + "net461", + "net462", + "net47", + "net471", + "net472", + "net48", + "net481" + ], + "assetTargetFallback": true, + "warn": true, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\9.0.100\\RuntimeIdentifierGraph.json" + } + } + } +} \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/project.nuget.cache b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/project.nuget.cache new file mode 100644 index 000000000..fd9b0a74d --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.NupkgProj/project.nuget.cache @@ -0,0 +1,21 @@ +{ + "version": 2, + "dgSpecHash": "md8eUrGszbk=", + "success": true, + "projectFilePath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher.Tests\\FileRestitcher.Tests.csproj", + "expectedPackageFiles": [ + "C:\\Users\\Dimitri\\.nuget\\packages\\coverlet.collector\\3.0.2\\coverlet.collector.3.0.2.nupkg.sha512", + "C:\\Users\\Dimitri\\.nuget\\packages\\microsoft.codecoverage\\16.9.4\\microsoft.codecoverage.16.9.4.nupkg.sha512", + "C:\\Users\\Dimitri\\.nuget\\packages\\microsoft.net.test.sdk\\16.9.4\\microsoft.net.test.sdk.16.9.4.nupkg.sha512", + "C:\\Users\\Dimitri\\.nuget\\packages\\microsoft.netcore.platforms\\1.1.0\\microsoft.netcore.platforms.1.1.0.nupkg.sha512", + "C:\\Users\\Dimitri\\.nuget\\packages\\netstandard.library\\2.0.3\\netstandard.library.2.0.3.nupkg.sha512", + "C:\\Users\\Dimitri\\.nuget\\packages\\xunit\\2.4.2\\xunit.2.4.2.nupkg.sha512", + "C:\\Users\\Dimitri\\.nuget\\packages\\xunit.abstractions\\2.0.3\\xunit.abstractions.2.0.3.nupkg.sha512", + "C:\\Users\\Dimitri\\.nuget\\packages\\xunit.analyzers\\1.0.0\\xunit.analyzers.1.0.0.nupkg.sha512", + "C:\\Users\\Dimitri\\.nuget\\packages\\xunit.assert\\2.4.2\\xunit.assert.2.4.2.nupkg.sha512", + "C:\\Users\\Dimitri\\.nuget\\packages\\xunit.core\\2.4.2\\xunit.core.2.4.2.nupkg.sha512", + "C:\\Users\\Dimitri\\.nuget\\packages\\xunit.extensibility.core\\2.4.2\\xunit.extensibility.core.2.4.2.nupkg.sha512", + "C:\\Users\\Dimitri\\.nuget\\packages\\xunit.extensibility.execution\\2.4.2\\xunit.extensibility.execution.2.4.2.nupkg.sha512" + ], + "logs": [] +} \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.dgspec.json b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.dgspec.json index fc625189a..bbe687ab8 100644 --- a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.dgspec.json +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.dgspec.json @@ -15,12 +15,13 @@ "projectStyle": "PackageReference", "crossTargeting": true, "fallbackFolders": [ - "C:\\Program Files (x86)\\Progress\\ToolboxNuGetPackages" + "C:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\NuGetPackages" ], "configFilePaths": [ + "K:\\Proyects_Repos\\TorchSharp\\NuGet.Config", "C:\\Users\\Dimitri\\AppData\\Roaming\\NuGet\\NuGet.Config", - "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.Offline.config", - "C:\\Program Files (x86)\\NuGet\\Config\\Telerik UI for WinForms.config" + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.FallbackLocation.config", + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.Offline.config" ], "originalTargetFrameworks": [ "net6.0", @@ -44,7 +45,13 @@ "warnAsError": [ "NU1605" ] - } + }, + "restoreAuditProperties": { + "enableAudit": "true", + "auditLevel": "low", + "auditMode": "all" + }, + "SdkAnalysisLevel": "9.0.100" }, "frameworks": { "net6.0": { @@ -65,7 +72,7 @@ "privateAssets": "all" } }, - "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\8.0.101\\RuntimeIdentifierGraph.json" + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\9.0.100\\RuntimeIdentifierGraph.json" }, "netstandard2.0": { "targetAlias": "netstandard2.0", @@ -88,7 +95,7 @@ ], "assetTargetFallback": true, "warn": true, - "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\8.0.101\\RuntimeIdentifierGraph.json" + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\9.0.100\\RuntimeIdentifierGraph.json" } } } diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.props b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.props index 1e9807451..9c25bbe46 100644 --- a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.props +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.props @@ -5,12 +5,12 @@ NuGet $(MSBuildThisFileDirectory)project.assets.json $(UserProfile)\.nuget\packages\ - C:\Users\Dimitri\.nuget\packages\;C:\Program Files (x86)\Progress\ToolboxNuGetPackages + C:\Users\Dimitri\.nuget\packages\;C:\Program Files (x86)\Microsoft Visual Studio\Shared\NuGetPackages PackageReference - 6.8.0 + 6.12.0 - + \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.assets.json b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.assets.json index 1f13839e4..7e747e944 100644 --- a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.assets.json +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.assets.json @@ -183,7 +183,7 @@ }, "packageFolders": { "C:\\Users\\Dimitri\\.nuget\\packages\\": {}, - "C:\\Program Files (x86)\\Progress\\ToolboxNuGetPackages": {} + "C:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\NuGetPackages": {} }, "project": { "version": "1.0.0", @@ -196,12 +196,13 @@ "projectStyle": "PackageReference", "crossTargeting": true, "fallbackFolders": [ - "C:\\Program Files (x86)\\Progress\\ToolboxNuGetPackages" + "C:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\NuGetPackages" ], "configFilePaths": [ + "K:\\Proyects_Repos\\TorchSharp\\NuGet.Config", "C:\\Users\\Dimitri\\AppData\\Roaming\\NuGet\\NuGet.Config", - "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.Offline.config", - "C:\\Program Files (x86)\\NuGet\\Config\\Telerik UI for WinForms.config" + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.FallbackLocation.config", + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.Offline.config" ], "originalTargetFrameworks": [ "net6.0", @@ -225,7 +226,13 @@ "warnAsError": [ "NU1605" ] - } + }, + "restoreAuditProperties": { + "enableAudit": "true", + "auditLevel": "low", + "auditMode": "all" + }, + "SdkAnalysisLevel": "9.0.100" }, "frameworks": { "net6.0": { @@ -246,7 +253,7 @@ "privateAssets": "all" } }, - "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\8.0.101\\RuntimeIdentifierGraph.json" + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\9.0.100\\RuntimeIdentifierGraph.json" }, "netstandard2.0": { "targetAlias": "netstandard2.0", @@ -269,7 +276,7 @@ ], "assetTargetFallback": true, "warn": true, - "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\8.0.101\\RuntimeIdentifierGraph.json" + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\9.0.100\\RuntimeIdentifierGraph.json" } } } diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.nuget.cache b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.nuget.cache index 2e00179eb..aab7970d8 100644 --- a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.nuget.cache +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.nuget.cache @@ -1,6 +1,6 @@ { "version": 2, - "dgSpecHash": "GQbFl6JNwUfeVMRAQIxv+0FH84dIn8y+ZsWz3KR/dVMkJNNXpooEgJaT2UFkLhFNLf08uGLF+sf+HuE1qkdsqQ==", + "dgSpecHash": "rM+0M7K4/ZA=", "success": true, "projectFilePath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj", "expectedPackageFiles": [ diff --git a/src/Native/LibTorchSharp/CMakeLists.txt b/src/Native/LibTorchSharp/CMakeLists.txt index e03a9746c..560fba1a2 100644 --- a/src/Native/LibTorchSharp/CMakeLists.txt +++ b/src/Native/LibTorchSharp/CMakeLists.txt @@ -7,13 +7,24 @@ if(CUDA_FOUND) add_compile_definitions(TORCHSHARP_CUDA_TOOLKIT_FOUND) endif() +add_compile_definitions(NOMINMAX) + + +#add_library(CUDA::nvToolsExt INTERFACE IMPORTED) +# ensure that PyTorch is told to use NVTX3 headers +#target_compile_definitions(CUDA::nvToolsExt INTERFACETORCH_CUDA_USE_NVTX3) +#target_link_libraries(CUDA::nvToolsExt INTERFACE CUDA::nvtx3) + + + if(APPLE AND NOT LIBTORCH_ARCH STREQUAL "arm64") include_directories("/usr/local/include" "/usr/local/opt/llvm/include") link_directories("/usr/local/lib" "/usr/local/opt/llvm/lib") endif() -#set(LIBTORCH_PATH "K:/Proyects_Repos/TorchSharp/bin/obj/AnyCPU.Debug/libtorch-cuda-12.1/libtorch-win-shared-with-deps-debug-2.4.0cu121/libtorch") +#set(LIBTORCH_PATH "K:/FrameworksForC/LibTorch/libtorch-win-shared-with-deps-2.6.0+cu126") find_package(Torch REQUIRED PATHS ${LIBTORCH_PATH}) +#find_package(Torch CONFIG) set(SOURCES cifar10.h diff --git a/src/Native/LibTorchSharp/THSLinearAlgebra.cpp b/src/Native/LibTorchSharp/THSLinearAlgebra.cpp index 4ed6419db..ea0ab8e8e 100644 --- a/src/Native/LibTorchSharp/THSLinearAlgebra.cpp +++ b/src/Native/LibTorchSharp/THSLinearAlgebra.cpp @@ -4,9 +4,15 @@ #include #include +#define IS_260_OR_NEWER TORCH_VERSION_MAJOR == 2 && TORCH_VERSION_MINOR >= 6 + Tensor THSLinalg_cholesky(const Tensor tensor) { +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_cholesky(*tensor)) +#else CATCH_TENSOR(torch::linalg::cholesky(*tensor)) +#endif } Tensor THSLinalg_cholesky_ex(const Tensor tensor, bool check_errors, Tensor* info) @@ -29,7 +35,11 @@ Tensor THSLinalg_cond_float(const Tensor tensor, const double p) Tensor THSLinalg_cond_str(const Tensor tensor, const char* p) { +#if IS_260_OR_NEWER + CATCH_TENSOR(p != nullptr ? torch::linalg_cond(*tensor, c10::string_view(p)) : torch::linalg_cond(*tensor)) +#else CATCH_TENSOR(p != nullptr ? torch::linalg_cond(*tensor, p) : torch::linalg_cond(*tensor)) +#endif } Tensor THSLinalg_cond_none(const Tensor tensor) @@ -44,7 +54,11 @@ Tensor THSLinalg_cross(const Tensor input, const Tensor other, const int64_t dim Tensor THSLinalg_det(const Tensor tensor) { +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_det(*tensor)) +#else CATCH_TENSOR(torch::linalg::det(*tensor)) +#endif } Tensor THSTensor_logdet(const Tensor tensor) @@ -55,7 +69,11 @@ Tensor THSTensor_logdet(const Tensor tensor) Tensor THSLinalg_slogdet(const Tensor tensor, Tensor* logabsdet) { std::tuple res; +#if IS_260_OR_NEWER + CATCH(res = torch::linalg_slogdet(*tensor);) +#else CATCH(res = torch::linalg::slogdet(*tensor);) +#endif *logabsdet = ResultTensor(std::get<1>(res)); return ResultTensor(std::get<0>(res)); } @@ -63,7 +81,11 @@ Tensor THSLinalg_slogdet(const Tensor tensor, Tensor* logabsdet) Tensor THSLinalg_eig(const Tensor tensor, Tensor* eigenvectors) { std::tuple res; +#if IS_260_OR_NEWER + CATCH(res = torch::linalg_eig(*tensor);) +#else CATCH(res = torch::linalg::eig(*tensor);); +#endif *eigenvectors = ResultTensor(std::get<1>(res)); return ResultTensor(std::get<0>(res)); } @@ -93,31 +115,51 @@ Tensor THSLinalg_eigh(const Tensor tensor, const char UPLO, Tensor* eigenvectors std::string _uplo; _uplo.push_back(UPLO); std::tuple res; +#if IS_260_OR_NEWER + CATCH(res = torch::linalg_eigh(*tensor, _uplo);); +#else CATCH(res = torch::linalg::eigh(*tensor, _uplo);); +#endif *eigenvectors = ResultTensor(std::get<1>(res)); return ResultTensor(std::get<0>(res)); } Tensor THSLinalg_eigvals(const Tensor tensor) { +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_eigvals(*tensor)) +#else CATCH_TENSOR(torch::linalg::eigvals(*tensor)) +#endif } Tensor THSLinalg_eigvalsh(const Tensor tensor, const char UPLO) { std::string _uplo; _uplo.push_back(UPLO); +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_eigvalsh(*tensor, _uplo)) +#else CATCH_TENSOR(torch::linalg::eigvalsh(*tensor, _uplo)) +#endif } Tensor THSLinalg_householder_product(const Tensor tensor, const Tensor tau) { +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_householder_product(*tensor, *tau)) +#else CATCH_TENSOR(torch::linalg::householder_product(*tensor, *tau)) +#endif } Tensor THSLinalg_inv(const Tensor tensor) { +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_inv(*tensor)) +#else CATCH_TENSOR(torch::linalg::inv(*tensor)) +#endif } Tensor THSLinalg_inv_ex(const Tensor tensor, bool check_errors, Tensor* info) @@ -131,7 +173,11 @@ Tensor THSLinalg_inv_ex(const Tensor tensor, bool check_errors, Tensor* info) Tensor THSLinalg_lstsq_none(const Tensor A, const Tensor B, Tensor* residuals, Tensor* rank, Tensor* singular_values) { std::tuple res; +#if IS_260_OR_NEWER + CATCH(res = torch::linalg_lstsq(*A, *B, c10::nullopt, c10::nullopt);) +#else CATCH(res = torch::linalg::lstsq(*A, *B, c10::nullopt, c10::nullopt);) +#endif *residuals = ResultTensor(std::get<1>(res)); *rank = ResultTensor(std::get<2>(res)); *singular_values = ResultTensor(std::get<3>(res)); @@ -141,7 +187,11 @@ Tensor THSLinalg_lstsq_none(const Tensor A, const Tensor B, Tensor* residuals, T Tensor THSLinalg_lstsq_rcond(const Tensor A, const Tensor B, const double rcond, Tensor* residuals, Tensor* rank, Tensor* singular_values) { std::tuple res; +#if IS_260_OR_NEWER + CATCH(res = torch::linalg_lstsq(*A, *B, rcond, c10::nullopt);) +#else CATCH(res = torch::linalg::lstsq(*A, *B, rcond, c10::nullopt);) +#endif *residuals = ResultTensor(std::get<1>(res)); *rank = ResultTensor(std::get<2>(res)); *singular_values = ResultTensor(std::get<3>(res)); @@ -151,7 +201,11 @@ Tensor THSLinalg_lstsq_rcond(const Tensor A, const Tensor B, const double rcond, Tensor THSLinalg_lu(const Tensor A, const bool pivot, Tensor* L, Tensor* U) { std::tuple res; +#if IS_260_OR_NEWER + CATCH(res = torch::linalg_lu(*A, pivot);) +#else CATCH(res = torch::linalg::lu(*A, pivot);) +#endif *L = ResultTensor(std::get<1>(res)); *U = ResultTensor(std::get<2>(res)); return ResultTensor(std::get<0>(res)); @@ -160,7 +214,12 @@ Tensor THSLinalg_lu(const Tensor A, const bool pivot, Tensor* L, Tensor* U) Tensor THSLinalg_lu_factor(const Tensor A, const bool pivot, Tensor* pivots) { std::tuple res; +#if IS_260_OR_NEWER + CATCH(res = torch::linalg_lu_factor(*A, pivot);) +#else CATCH(res = torch::linalg::lu_factor(*A, pivot);) +#endif + *pivots = ResultTensor(std::get<1>(res)); return ResultTensor(std::get<0>(res)); } @@ -190,69 +249,111 @@ Tensor THSLinalg_ldl_solve(const Tensor LD, const Tensor pivots, const Tensor B, Tensor THSLinalg_matrix_norm(const Tensor tensor, const Scalar ord, const int64_t* dim, const int dim_length, const bool keepdim) { auto dims = c10::ArrayRef(dim, dim_length); +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_matrix_norm(*tensor, *ord, dims, keepdim, c10::nullopt)) +#else CATCH_TENSOR(torch::linalg::matrix_norm(*tensor, *ord, dims, keepdim, c10::nullopt)) +#endif } Tensor THSLinalg_matrix_norm_fronuc(const Tensor tensor, const int8_t fronuc, const int64_t* dim, const int dim_length, const bool keepdim) { auto dims = c10::ArrayRef(dim, dim_length); +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_matrix_norm(*tensor, (fronuc == 0) ? "fro" : "nuc", dims, keepdim, c10::nullopt)) +#else CATCH_TENSOR(torch::linalg::matrix_norm(*tensor, (fronuc == 0) ? "fro" : "nuc", dims, keepdim, c10::nullopt)) +#endif } Tensor THSLinalg_vector_norm(const Tensor tensor, const Scalar ord, const int64_t* dim, const int dim_length, const bool keepdim) { auto dims = c10::ArrayRef(dim, dim_length); +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_vector_norm(*tensor, *ord, dims, keepdim, c10::nullopt)) +#else CATCH_TENSOR(torch::linalg::vector_norm(*tensor, *ord, dims, keepdim, c10::nullopt)) +#endif } Tensor THSLinalg_matrix_rank(const Tensor tensor, const double atol, const bool has_atol, const double rtol, const bool has_rtol, const bool hermitian) { auto atol_ = has_atol ? atol : c10::optional(); auto rtol_ = has_rtol ? rtol : c10::optional(); - +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_matrix_rank(*tensor, atol_, rtol_, hermitian)) +#else CATCH_TENSOR(torch::linalg::matrix_rank(*tensor, atol_, rtol_, hermitian)) +#endif } Tensor THSLinalg_matrix_rank_tensor(const Tensor tensor, const Tensor atol, const Tensor rtol, const bool hermitian) { const c10::optional atol_ = atol != nullptr ? *atol : c10::optional(); const c10::optional rtol_ = rtol != nullptr ? *rtol : c10::optional(); - +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_matrix_rank(*tensor, atol_, rtol_, hermitian)) +#else CATCH_TENSOR(torch::linalg::matrix_rank(*tensor, atol_, rtol_, hermitian)) +#endif } Tensor THSLinalg_matrix_power(const Tensor tensor, const int64_t n) { +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_matrix_power(*tensor, n)) +#else CATCH_TENSOR(torch::linalg::matrix_power(*tensor, n)) +#endif } Tensor THSLinalg_multi_dot(const Tensor* tensors, const int length) { +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_multi_dot(toTensors((torch::Tensor**)tensors, length))) +#else CATCH_TENSOR(torch::linalg::multi_dot(toTensors((torch::Tensor**)tensors, length))) +#endif } Tensor THSLinalg_norm_str(const Tensor tensor, const char* p, const int64_t* dim, const int dim_length, const bool keepdim) { c10::optional dims = (dim == nullptr) ? c10::nullopt : c10::optional(at::ArrayRef(dim, dim_length)); +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_norm(*tensor, c10::string_view(p), dims, keepdim, c10::nullopt)) +#else CATCH_TENSOR(torch::linalg::norm(*tensor, p, dims, keepdim, c10::nullopt)) +#endif } Tensor THSLinalg_norm_float(const Tensor tensor, const double p, const int64_t* dim, const int dim_length, const bool keepdim) { c10::optional dims = (dim == nullptr) ? c10::nullopt : c10::optional(at::ArrayRef(dim, dim_length)); +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_norm(*tensor, p, dims, keepdim, c10::nullopt)) +#else CATCH_TENSOR(torch::linalg::norm(*tensor, p, dims, keepdim, c10::nullopt)) +#endif } Tensor THSLinalg_norm_int(const Tensor tensor, const int p, const int64_t* dim, const int dim_length, const bool keepdim) { c10::optional dims = (dim == nullptr) ? c10::nullopt : c10::optional(at::ArrayRef(dim, dim_length)); +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_norm(*tensor, p, dims, keepdim, c10::nullopt)) +#else CATCH_TENSOR(torch::linalg::norm(*tensor, p, dims, keepdim, c10::nullopt)) +#endif } Tensor THSLinalg_norm_opt(const Tensor tensor, const int64_t* dim, const int dim_length, const bool keepdim) { c10::optional dims = (dim == nullptr) ? c10::nullopt : c10::optional(at::ArrayRef(dim, dim_length)); +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_norm(*tensor, c10::nullopt, dims, keepdim, c10::nullopt)) +#else CATCH_TENSOR(torch::linalg::norm(*tensor, c10::nullopt, dims, keepdim, c10::nullopt)) +#endif } Tensor THSLinalg_pinv(const Tensor tensor, const double atol, const bool has_atol, const double rtol, const bool has_rtol, const bool hermitian) @@ -273,7 +374,11 @@ Tensor THSLinalg_pinv_tensor(const Tensor tensor, const Tensor atol, const Tenso Tensor THSLinalg_pinverse(const Tensor tensor, const double rcond, const bool hermitian) { +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_pinv(*tensor, rcond, hermitian)) +#else CATCH_TENSOR(torch::linalg::pinv(*tensor, rcond, hermitian)) +#endif } Tensor THSLinalg_qr(const Tensor tensor, const char mode, Tensor* R) @@ -295,31 +400,52 @@ Tensor THSLinalg_qr(const Tensor tensor, const char mode, Tensor* R) Tensor THSLinalg_solve(const Tensor tensor, Tensor other, bool left) { +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_solve(*tensor, *other, left)) +#else CATCH_TENSOR(torch::linalg::solve(*tensor, *other, left)) +#endif + } Tensor THSLinalg_solve_ex(const Tensor tensor, Tensor other, bool left, bool check_errors, Tensor* S) { std::tuple res; +#if IS_260_OR_NEWER + CATCH(res = torch::linalg_solve_ex(*tensor, *other, left, check_errors);); +#else CATCH(res = torch::linalg::solve_ex(*tensor, *other, left, check_errors);); +#endif *S = ResultTensor(std::get<1>(res)); return ResultTensor(std::get<0>(res)); } Tensor THSLinalg_solve_triangular(const Tensor tensor, Tensor other, bool upper, bool left, bool unitriangular) { +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_solve_triangular(*tensor, *other, upper, left, unitriangular)) +#else CATCH_TENSOR(torch::linalg::solve_triangular(*tensor, *other, upper, left, unitriangular)) +#endif } Tensor THSLinalg_solve_triangular_out(const Tensor tensor, Tensor other, bool upper, bool left, bool unitriangular, Tensor result) { +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_solve_triangular_out(*result, *tensor, *other, upper, left, unitriangular)) +#else CATCH_TENSOR(torch::linalg::solve_triangular_out(*result, *tensor, *other, upper, left, unitriangular)) +#endif } Tensor THSLinalg_svd(const Tensor tensor, const bool full_matrices, Tensor* S, Tensor* Vh) { std::tuple res; +#if IS_260_OR_NEWER + CATCH(res = torch::linalg_svd(*tensor, full_matrices, c10::nullopt);); +#else CATCH(res = torch::linalg::svd(*tensor, full_matrices, c10::nullopt);); +#endif *S = ResultTensor(std::get<1>(res)); *Vh = ResultTensor(std::get<2>(res)); return ResultTensor(std::get<0>(res)); @@ -327,18 +453,30 @@ Tensor THSLinalg_svd(const Tensor tensor, const bool full_matrices, Tensor* S, T Tensor THSLinalg_svdvals(const Tensor tensor) { +#if IS_260_OR_NEWER + CATCH_TENSOR(res = torch::linalg_svdvals(*tensor, c10::nullopt)) +#else CATCH_TENSOR(res = torch::linalg::svdvals(*tensor, c10::nullopt)) +#endif } Tensor THSLinalg_tensorinv(const Tensor tensor, const int64_t ind) { +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_tensorinv(*tensor, ind)) +#else CATCH_TENSOR(torch::linalg::tensorinv(*tensor, ind)) +#endif } Tensor THSLinalg_tensorsolve(const Tensor tensor, Tensor other, const int64_t* dim, const int dim_length) { c10::optional dims = (dim == nullptr) ? c10::nullopt : c10::optional(at::ArrayRef(dim, dim_length)); +#if IS_260_OR_NEWER + CATCH_TENSOR(torch::linalg_tensorsolve(*tensor, *other, dims)) +#else CATCH_TENSOR(torch::linalg::tensorsolve(*tensor, *other, dims)) +#endif } Tensor THSLinalg_vander(const Tensor tensor, const int64_t N) diff --git a/src/Native/build.proj b/src/Native/build.proj index 6dbbc70a9..d2499c9a0 100644 --- a/src/Native/build.proj +++ b/src/Native/build.proj @@ -31,7 +31,6 @@ Condition="'$(OS)' != 'Windows_NT'"> - --stripsymbols --configuration $(NativeConfiguration) --arch $(TargetArchitecture) $(StripArgs) --libtorchpath $(LibTorchCmakePath) @@ -44,9 +43,13 @@ - + $(NativeConfiguration) $(TargetArchitecture) --libtorchpath $(LibTorchCmakePath) + + + $(NativeConfiguration) $(TargetArchitecture) --libtorchpath $(CustomLibTorchFullPath) + diff --git a/src/TorchSharp/TorchSharp.csproj b/src/TorchSharp/TorchSharp.csproj index 14c95995f..73c8c6069 100644 --- a/src/TorchSharp/TorchSharp.csproj +++ b/src/TorchSharp/TorchSharp.csproj @@ -76,13 +76,14 @@ - + - + + From 8f35385548c7a43d47b0dc011ea3b92ddfd98e8e Mon Sep 17 00:00:00 2001 From: Dimitri Date: Wed, 26 Mar 2025 12:13:31 -0300 Subject: [PATCH 42/43] some update --- .../FileRestitcher.Tests/FileRestitcher.Tests.csproj | 3 +++ .../TorchSharpTest.WithCudaBinaries.csproj | 2 ++ test/TorchSharpTest/TorchSharpTest.csproj | 2 ++ 3 files changed, 7 insertions(+) diff --git a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj index 39dc54a1b..bf0f2412d 100644 --- a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj +++ b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj @@ -13,6 +13,9 @@ + + + runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj b/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj index 6f7a0ed24..faff588b4 100644 --- a/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj +++ b/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj @@ -144,6 +144,8 @@ + + diff --git a/test/TorchSharpTest/TorchSharpTest.csproj b/test/TorchSharpTest/TorchSharpTest.csproj index 065301040..39b4b5128 100644 --- a/test/TorchSharpTest/TorchSharpTest.csproj +++ b/test/TorchSharpTest/TorchSharpTest.csproj @@ -118,6 +118,8 @@ + + From 137779e19fc1f089be2daf4a3a1c6d7bd2a4317a Mon Sep 17 00:00:00 2001 From: Dimitri Date: Thu, 11 Sep 2025 17:47:24 -0300 Subject: [PATCH 43/43] com --- nuget.config | 2 +- src/Examples.Utils/Examples.Utils.csproj | 3 ++- src/Examples/Examples.csproj | 2 ++ src/FSharp.Examples/FSharp.Examples.fsproj | 3 +++ src/Native/LibTorchSharp/THSNN.h | 2 +- src/TorchSharp/NN/Linear.cs | 15 +++++++-------- .../TorchSharpTest.WithCudaBinaries.csproj | 1 + 7 files changed, 17 insertions(+), 11 deletions(-) diff --git a/nuget.config b/nuget.config index ef5d6f41e..eb0286a2c 100644 --- a/nuget.config +++ b/nuget.config @@ -1,4 +1,4 @@ - F:\NugetPackages + D:\NugetPackages \ No newline at end of file diff --git a/src/Examples.Utils/Examples.Utils.csproj b/src/Examples.Utils/Examples.Utils.csproj index 6fa145333..6d3855545 100644 --- a/src/Examples.Utils/Examples.Utils.csproj +++ b/src/Examples.Utils/Examples.Utils.csproj @@ -21,9 +21,10 @@ + - + diff --git a/src/Examples/Examples.csproj b/src/Examples/Examples.csproj index 9b7a980b9..0fcec0611 100644 --- a/src/Examples/Examples.csproj +++ b/src/Examples/Examples.csproj @@ -26,9 +26,11 @@ + + diff --git a/src/FSharp.Examples/FSharp.Examples.fsproj b/src/FSharp.Examples/FSharp.Examples.fsproj index fe3c34a15..47db64db5 100644 --- a/src/FSharp.Examples/FSharp.Examples.fsproj +++ b/src/FSharp.Examples/FSharp.Examples.fsproj @@ -25,7 +25,10 @@ + + + diff --git a/src/Native/LibTorchSharp/THSNN.h b/src/Native/LibTorchSharp/THSNN.h index 2bd59af29..d86b45157 100644 --- a/src/Native/LibTorchSharp/THSNN.h +++ b/src/Native/LibTorchSharp/THSNN.h @@ -177,7 +177,7 @@ EXPORT_API(void) THSNN_ConvTranspose3d_set_bias(const NNModule module, const // Normalization -EXPORT_API(Tensor) THSNN_normalize(const Tensor input, const double p, const int64_t dim, const double eps); +//EXPORT_API(Tensor) THSNN_normalize(const Tensor input, const double p, const int64_t dim, const double eps); EXPORT_API(Tensor) THSNN_batch_norm(const Tensor input, const Tensor running_mean, const Tensor running_var, const Tensor weight, const Tensor bias, const bool training, const double momentum, const double eps); EXPORT_API(Tensor) THSNN_group_norm(const Tensor input, int64_t num_groups, const Tensor weight, const Tensor bias, const double eps); EXPORT_API(Tensor) THSNN_instance_norm(const Tensor input, const Tensor running_mean, const Tensor running_var, const Tensor weight, const Tensor bias, const bool use_input_stats, const double momentum, const double eps); diff --git a/src/TorchSharp/NN/Linear.cs b/src/TorchSharp/NN/Linear.cs index bb5f6c9f3..fc9bb6896 100644 --- a/src/TorchSharp/NN/Linear.cs +++ b/src/TorchSharp/NN/Linear.cs @@ -25,7 +25,7 @@ public LinearInfo(long inFeatures, long outFeatures) } public sealed class Linear : torch.nn.Module { - public LinearInfo linearInfo; + public LinearInfo? linearInfo; /*internal Linear(IntPtr handle, IntPtr boxedHandle) : base(handle, boxedHandle) { }*/ @@ -72,7 +72,7 @@ public Parameter? bias { set { _bias?.Dispose(); _bias = value?.DetachFromDisposeScope() as Parameter; - ConditionallyRegisterParameter(BiasComponentName, _bias); + ConditionallyRegisterParameter("BiasComponentName", _bias); } } @@ -83,7 +83,7 @@ public Parameter weight { if (value.Handle != _weight?.Handle) { _weight?.Dispose(); _weight = (value.DetachFromDisposeScope() as Parameter)!; - ConditionallyRegisterParameter(WeightComponentName, _weight); + ConditionallyRegisterParameter("WeightComponentName", _weight); } } } @@ -121,9 +121,9 @@ protected internal override nn.Module _to(ScalarType dtype, bool non_blocking) { } - [ComponentName(Name = BiasComponentName)] + [ComponentName(Name = "BiasComponentName")] private Parameter? _bias; - [ComponentName(Name = WeightComponentName)] + [ComponentName(Name = "WeightComponentName")] private Parameter? _weight; public long in_features { get; set; } @@ -149,9 +149,8 @@ public static Linear Linear(long inputSize, long outputSize, bool hasBias = true { return new Linear(inputSize, outputSize, hasBias, device, dtype); } - - return new Linear(res, boxedHandle, inputSize, outputSize).MoveModule(device, dtype); - } + /*return new Linear(res, boxedHandle, inputSize, outputSize).MoveModule(device, dtype); + }*/ public static partial class functional { diff --git a/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj b/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj index c3c352238..47bb510a7 100644 --- a/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj +++ b/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj @@ -150,6 +150,7 @@ +