Skip to content

Commit 9d5743a

Browse files
author
Shahab Moradi
authored
Added samples for loading text (#3793)
* Added samples for loading text * Simplification
1 parent 2ddb9ee commit 9d5743a

File tree

3 files changed

+91
-0
lines changed

3 files changed

+91
-0
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Text;
5+
using Microsoft.ML;
6+
using Microsoft.ML.Data;
7+
8+
namespace Samples.Dynamic.DataOperations
9+
{
10+
public static class LoadingText
11+
{
12+
// This examples shows all the ways to load data with TextLoader.
13+
public static void Example()
14+
{
15+
// Create 5 data files to illustrate different loading methods.
16+
var dataFiles = new List<string>();
17+
var random = new Random();
18+
var dataDirectoryName = "DataDir";
19+
Directory.CreateDirectory(dataDirectoryName);
20+
for (int i = 0; i < 5; i++)
21+
{
22+
var fileName = Path.Combine(dataDirectoryName, $"Data_{i}.csv");
23+
dataFiles.Add(fileName);
24+
using (var fs = File.CreateText(fileName))
25+
// Write random lines without header
26+
for (int line = 0; line < 10; line++)
27+
fs.WriteLine(random.NextDouble().ToString());
28+
}
29+
30+
// Create a TextLoader.
31+
var mlContext = new MLContext();
32+
var loader = mlContext.Data.CreateTextLoader(
33+
columns: new[]
34+
{
35+
new TextLoader.Column("RandomFeature", DataKind.Single, 0)
36+
},
37+
hasHeader: false
38+
);
39+
40+
// Load a single file from path.
41+
var singleFileData = loader.Load(dataFiles[0]);
42+
PrintRowCount(singleFileData);
43+
44+
// Expected Output:
45+
// 10
46+
47+
48+
// Load all 5 files from path.
49+
var multipleFilesData = loader.Load(dataFiles.ToArray());
50+
PrintRowCount(multipleFilesData);
51+
52+
// Expected Output:
53+
// 50
54+
55+
56+
// Load all files using path wildcard.
57+
var multipleFilesWildcardData =
58+
loader.Load(Path.Combine(dataDirectoryName, "*"));
59+
PrintRowCount(multipleFilesWildcardData);
60+
61+
// Expected Output:
62+
// 50
63+
}
64+
65+
private static void PrintRowCount(IDataView idv)
66+
{
67+
// IDataView is lazy so we need to iterate through it
68+
// to get the number of rows.
69+
long rowCount = 0;
70+
using (var cursor = idv.GetRowCursor(idv.Schema))
71+
while (cursor.MoveNext())
72+
rowCount++;
73+
74+
Console.WriteLine(rowCount);
75+
}
76+
}
77+
}

src/Microsoft.ML.Data/DataLoadSave/DataLoaderExtensions.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ public static class DataLoaderExtensions
1717
/// </summary>
1818
/// <param name="loader">The loader to use.</param>
1919
/// <param name="path">One or more paths from which to load data.</param>
20+
/// <example>
21+
/// <format type="text/markdown">
22+
/// <![CDATA[
23+
/// [!code-csharp[Load](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadingText.cs)]
24+
/// ]]>
25+
/// </format>
26+
/// </example>
2027
public static IDataView Load(this IDataLoader<IMultiStreamSource> loader, params string[] path)
2128
=> loader.Load(new MultiFileSource(path));
2229
}

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1432,6 +1432,13 @@ void ICanSaveModel.Save(ModelSaveContext ctx)
14321432
/// Loads data from <paramref name="source"/> into an <see cref="IDataView"/>.
14331433
/// </summary>
14341434
/// <param name="source">The source from which to load data.</param>
1435+
/// <example>
1436+
/// <format type="text/markdown">
1437+
/// <![CDATA[
1438+
/// [!code-csharp[Load](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadingText.cs)]
1439+
/// ]]>
1440+
/// </format>
1441+
/// </example>
14351442
public IDataView Load(IMultiStreamSource source) => new BoundLoader(this, source);
14361443

14371444
internal static TextLoader CreateTextLoader<TInput>(IHostEnvironment host,

0 commit comments

Comments
 (0)