Skip to content

Commit 34d970f

Browse files
harshithapvcodemzs
authored andcommitted
Buffer re-use using ArrayPool and a few more checks (#4293)
* commit b468adb Author: Harshitha Parnandi Venkata <[email protected]> Date: Tue Oct 1 21:19:57 2019 -0700 Fixed a bug in the unit test for image classification commit 30aa4d1 Author: Harshitha Parnandi Venkata <[email protected]> Date: Tue Oct 1 20:43:17 2019 -0700 addressed Zeeshan's comments commit 3d4f5fe Merge: 0fbd3d2 718a238 Author: Harshitha Parnandi Venkata <[email protected]> Date: Tue Oct 1 20:41:21 2019 -0700 Merge branch 'master' of https://github.com/dotnet/machinelearning into ImageClassificationVBuf commit 0fbd3d2 Author: Harshitha Parnandi Venkata <[email protected]> Date: Tue Oct 1 17:10:49 2019 -0700 Changed type to useImageType in LoadImages(). Changed appropriate variable names in ImageClassificationTransform.cs commit 2417888 Merge: 3ad26b4 4944be7 Author: Harshitha Parnandi Venkata <[email protected]> Date: Tue Oct 1 16:55:25 2019 -0700 Merge branch 'master' of https://github.com/dotnet/machinelearning into ImageClassificationVBuf commit 3ad26b4 Author: Harshitha Parnandi Venkata <[email protected]> Date: Tue Oct 1 15:59:06 2019 -0700 Added buffer re-use while reading the image in netstandard 2.0. Addressed Eric's comments. Changed ImageLoadingTransformer to take a bool type instead of a DataViewType to make it user friendly. (type = true means we are using VBuffer<byte> , type = false means we are using ImageDataViewType) commit c67dd08 Author: Harshitha Parnandi Venkata <[email protected]> Date: Tue Oct 1 09:50:52 2019 -0700 Added functionality to load images as VBuffer<byte> in ImageLoader. If no DataViewType options are provided it defaults to loading images as ImageDataViewType. Made LoadImages a part of the sample in ResnetV2101TransferLearningTrainTestSplit.cs. Addressed some of the comments from Zeeshan and Yael. Added a unit test for testing the API. Added TargetFrameworks to get cross platform functionality for System.IO.Stream.Read(Span<Byte>) which doesn't work for netstandard2.0. commit ae2ac0d Author: Harshitha Parnandi Venkata <[email protected]> Date: Wed Sep 25 14:49:41 2019 -0700 Added some edits to address Yael's comments commit b1e5739 Author: Harshitha Parnandi Venkata <[email protected]> Date: Wed Sep 25 13:24:03 2019 -0700 Added unit test for the change commit acf985d Author: Harshitha Parnandi Venkata <[email protected]> Date: Mon Sep 23 10:39:07 2019 -0700 Changed the calling function back to how it was in master commit b80f7ad Author: Harshitha Parnandi Venkata <[email protected]> Date: Mon Sep 23 10:20:31 2019 -0700 Added a few optimizations to re-use buffers and thereby improving performance. commit b106ae0 Author: Harshitha Parnandi Venkata <[email protected]> Date: Thu Sep 19 14:07:15 2019 -0700 Changed Image Classification API to take in a VBuffer<byte> type instead of ImagePath. * fixed merge conflicts * Fixed some unit tests that were failing after the merge. Addressed a few comments. * Fixed TensorFlow unit tests * Changed the buffer re-use logic for ReadToEnd * Changed ReadToEnd function to read using span instead of unsafe blocks * removed unnecessary commits * Added version check with backward compatability. Addressed Zeeshan's comments. * Fixed tab and synced to master * Addressed comments. Checkpoint commit * changed the solution files and version check in ImageLoader.cs * Added changes for StableApi.csproj * Added ArrayPool for buffer re-use * Handled the case when MakeGetter src is empty we need to send an empty VBuffer. Another check for handling empty images. * Addressed comments
1 parent ee743f7 commit 34d970f

File tree

6 files changed

+53
-36
lines changed

6 files changed

+53
-36
lines changed

build/Dependencies.props

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
<!-- Core Product Dependencies -->
44
<PropertyGroup>
55
<NewtonsoftJsonPackageVersion>10.0.3</NewtonsoftJsonPackageVersion>
6+
<SystemBuffersVersion>4.4.0</SystemBuffersVersion>
67
<SystemCodeDomPackageVersion>4.4.0</SystemCodeDomPackageVersion>
78
<SystemCollectionsImmutableVersion>1.5.0</SystemCollectionsImmutableVersion>
89
<SystemMemoryVersion>4.5.1</SystemMemoryVersion>

docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningTrainTestSplit.cs

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
using System;
33
using System.Collections.Generic;
44
using System.IO;
5+
using System.IO.Compression;
6+
using System.Linq;
7+
using System.Net;
8+
using System.Threading;
59
using System.Threading.Tasks;
610
using Microsoft.ML;
11+
using Microsoft.ML.Data;
712
using Microsoft.ML.Transforms;
813
using static Microsoft.ML.DataOperationsCatalog;
9-
using System.Linq;
10-
using Microsoft.ML.Data;
11-
using System.IO.Compression;
12-
using System.Threading;
13-
using System.Net;
1414

1515
namespace Samples.Dynamic
1616
{
@@ -131,9 +131,6 @@ private static void TrySinglePrediction(string imagesForPredictions,
131131
IEnumerable<ImageData> testImages = LoadImagesFromDirectory(
132132
imagesForPredictions, false);
133133

134-
byte[] imgBytes = File.ReadAllBytes(testImages.First().ImagePath);
135-
VBuffer<Byte> imgData = new VBuffer<byte>(imgBytes.Length, imgBytes);
136-
137134
ImageData imageToPredict = new ImageData
138135
{
139136
ImagePath = testImages.First().ImagePath
@@ -169,7 +166,7 @@ private static void EvaluateModel(MLContext mlContext,
169166
Console.WriteLine("Predicting and Evaluation took: " +
170167
(elapsed2Ms / 1000).ToString() + " seconds");
171168
}
172-
169+
173170
public static IEnumerable<ImageData> LoadImagesFromDirectory(string folder,
174171
bool useFolderNameAsLabel = true)
175172
{
@@ -194,7 +191,7 @@ public static IEnumerable<ImageData> LoadImagesFromDirectory(string folder,
194191
}
195192
}
196193
}
197-
194+
198195
yield return new ImageData()
199196
{
200197
ImagePath = file,

pkg/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.nupkgproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
<ItemGroup>
99
<ProjectReference Include="../Microsoft.ML/Microsoft.ML.nupkgproj" />
10+
<PackageReference Include="System.Buffers" Version="$(SystemBuffersVersion)" />
1011
<PackageReference Include="System.Drawing.Common" Version="$(SystemDrawingCommonPackageVersion)" />
1112
</ItemGroup>
1213
</Project>

src/Microsoft.ML.Dnn/ImageClassificationTransform.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,9 @@ private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName
275275
{
276276
labelGetter(ref label);
277277
imageGetter(ref image);
278+
if (image.Length <= 0)
279+
continue; //Empty Image
280+
278281
var imageTensor = imageProcessor.ProcessImage(image);
279282
runner.AddInput(imageTensor, 0);
280283
var featurizedImage = runner.Run()[0]; // Reuse memory

src/Microsoft.ML.ImageAnalytics/ImageLoader.cs

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6+
using System.Buffers;
67
using System.Collections.Concurrent;
78
using System.Collections.Generic;
89
using System.Diagnostics.Contracts;
@@ -189,14 +190,12 @@ private sealed class Mapper : OneToOneMapperBase
189190
{
190191
private readonly ImageLoadingTransformer _parent;
191192
private readonly bool _type;
192-
private readonly ConcurrentBag<byte[]> _bufferPool;
193193

194194
public Mapper(ImageLoadingTransformer parent, DataViewSchema inputSchema, bool type)
195195
: base(parent.Host.Register(nameof(Mapper)), parent, inputSchema)
196196
{
197197
_type = type;
198198
_parent = parent;
199-
_bufferPool = new ConcurrentBag<byte[]>();
200199
}
201200

202201
protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func<int, bool> activeOutput, out Action disposer)
@@ -255,31 +254,30 @@ private Delegate MakeGetterVectorDataViewByteType(DataViewRow input, int iinfo,
255254
ValueGetter<VBuffer<byte>> del =
256255
(ref VBuffer<byte> dst) =>
257256
{
258-
byte[] buffer = null;
259-
if (!_bufferPool.TryTake(out buffer))
260-
{
261-
buffer = new byte[4096];
262-
}
263-
264257
getSrc(ref src);
265-
266258
if (src.Length > 0)
267259
{
268260
string path = src.ToString();
269261
if (!string.IsNullOrWhiteSpace(_parent.ImageFolder))
270262
path = Path.Combine(_parent.ImageFolder, path);
271-
if (!TryLoadDataIntoBuffer(path, ref dst, buffer))
272-
throw Host.Except($"Failed to load image {src.ToString()}.");
263+
if (!TryLoadDataIntoBuffer(path, ref dst))
264+
{
265+
var editor = VBufferEditor.Create(ref dst, 0); //Empty Image
266+
dst = editor.Commit();
267+
}
268+
}
269+
else
270+
{
271+
var editor = VBufferEditor.Create(ref dst, 0 );
272+
dst = editor.Commit();
273273
}
274274

275-
Contract.Assert(buffer != null);
276-
_bufferPool.Add(buffer);
277275
};
278276

279277
return del;
280278
}
281279

282-
private static bool TryLoadDataIntoBuffer(string path, ref VBuffer<byte> imgData, byte[] readBuffer)
280+
private static bool TryLoadDataIntoBuffer(string path, ref VBuffer<byte> imgData)
283281
{
284282
int count = -1;
285283
int bytesread = -1;
@@ -302,28 +300,43 @@ private static bool TryLoadDataIntoBuffer(string path, ref VBuffer<byte> imgData
302300

303301
count = (int)fileLength;
304302
var editor = VBufferEditor.Create(ref imgData, count);
305-
bytesread = ReadToEnd(fs, editor.Values, readBuffer);
303+
bytesread = ReadToEnd(fs, editor.Values);
306304
imgData = editor.Commit();
307305
return (count > 0);
308306
}
309307

310308
}
311309

312-
private static int ReadToEnd(System.IO.Stream stream, Span<byte> bufferSpan, byte[] readBuffer)
310+
private static int ReadToEnd(System.IO.Stream stream, Span<byte> bufferSpan)
313311
{
314312
int totalBytesRead = 0;
315313
int bytesRead;
316-
var readBufferSpan = readBuffer.AsSpan();
317-
var srcSpan = readBufferSpan;
318-
while ((bytesRead = stream.Read(readBuffer, 0, readBuffer.Length)) > 0)
314+
315+
int chunksize = 4096; // Most optimal size for buffer, friendly to CPU's L1 cache
316+
var bufferPool = ArrayPool<byte>.Shared;
317+
byte[] readBuffer = bufferPool.Rent(chunksize);
318+
319+
try
319320
{
320-
if (bytesRead != srcSpan.Length)
321-
srcSpan = readBufferSpan.Slice(0, bytesRead);
322-
var dstSpan = bufferSpan.Slice(totalBytesRead, bytesRead);
323-
Contract.Assert(srcSpan.Length == dstSpan.Length);
324-
srcSpan.CopyTo(dstSpan);
325-
totalBytesRead += bytesRead;
321+
var readBufferSpan = readBuffer.AsSpan();
322+
var srcSpan = readBufferSpan;
323+
while ((bytesRead = stream.Read(readBuffer, 0, readBuffer.Length)) > 0)
324+
{
325+
if (bytesRead != srcSpan.Length)
326+
srcSpan = readBufferSpan.Slice(0, bytesRead);
327+
var dstSpan = bufferSpan.Slice(totalBytesRead, bytesRead);
328+
Contract.Assert(srcSpan.Length == dstSpan.Length);
329+
srcSpan.CopyTo(dstSpan);
330+
totalBytesRead += bytesRead;
331+
}
326332
}
333+
finally
334+
{
335+
336+
// don't use the reference to the buffer after returning it!
337+
bufferPool.Return(readBuffer);
338+
}
339+
327340
return totalBytesRead;
328341
}
329342

src/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.csproj

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
</PropertyGroup>
77

88
<ItemGroup>
9+
<PackageReference Include="System.Buffers" Version="$(SystemBuffersVersion)" />
910
<PackageReference Include="System.Drawing.Common" Version="$(SystemDrawingCommonPackageVersion)" />
1011
</ItemGroup>
1112

1213
<ItemGroup>
1314
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
1415
<ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
15-
</ItemGroup>
16+
</ItemGroup>
17+
1618
</Project>

0 commit comments

Comments
 (0)