Skip to content

Commit af38932

Browse files
committed
Adds support for entity hierarchies in compare
This change allows users to declaratively specify hierarchical entities in their expected utterance results. For example, a user may declare the following: ```json { "text": "Order a pepperoni pizza" "intent": "OrderFood", "entities": { "entity": "FoodItem": "startPos": 8, "endPos": 22, "children": [ { "entity": "Topping", "startPos": 8, "endPos": 16 }, { "entity": "FoodType", "startPos": 18, "endPos": 22 } ] } } ``` This would result in 3 test cases, one for the parent entity (the "FoodItem" entity), and two additional test cases for each of the two nested entities ("FoodItem::Topping" and "FoodItem::FoodType"). Child entity type names are prefixed by their parent entity type names in the format `parentType::childType`. As such, the recursive entity parsing for the LUIS V3 provider has been updated to use this convention. Fixes #335
1 parent 89f296e commit af38932

File tree

11 files changed

+255
-83
lines changed

11 files changed

+255
-83
lines changed

docs/Analyze.md

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -125,42 +125,6 @@ thresholds:
125125
threshold: 0.1
126126
```
127127
128-
#### Example
129-
130-
While it's useful to set up the performance regression testing in a CI environment, you can also run the tools locally. Here's an end-to-end example for running a performance regression test.
131-
132-
The assumptions are that you have the following:
133-
1. An existing NLU endpoint (in this case, for LUIS).
134-
2. Environment variables or app settings pointing to the correct LUIS application to query and update.
135-
3. A set of changes to the NLU training utterances to evaluate (`utterances.json`).
136-
4. A test set that can be used to evaluate the endpoint (`tests.json`).
137-
138-
Here is the end-to-end:
139-
```sh
140-
# Get predictions from the current endpoint
141-
dotnet nlu test -s luis -u tests.json -o baselineResults.json
142-
# Generate the confusion matrix statistics for the results
143-
dotnet nlu compare -e tests.json -a baselineResults.json -o baseline
144-
# Train a new version of the model
145-
dotnet nlu train -s luis -u utterances.json -a
146-
# Get predictions from the new endpoint
147-
dotnet nlu test -s luis -u tests.json -o latestResults.json
148-
# Create a regression threshold for the overall intent F1 score
149-
echo -e "thresholds:\n\
150-
- type: intent\n\
151-
- threshold: 0.1\n" > \
152-
thresholds.yml
153-
# Generate the confusion matrix statistics for the results and validate regression thresholds
154-
dotnet nlu compare \
155-
-e tests.json \
156-
-a latestResults.json \
157-
-o latest \
158-
-b baseline/statistics.json \
159-
-t thresholds.yml
160-
```
161-
162-
If the F<sub>1</sub> score for overall intents has not dropped more than 0.1, the exit code for the final command will be 0, otherwise it will be 1 (or, more generally, the number of regression threshold tests failed).
163-
164128
### Unit Test Mode
165129
166130
Unit test mode can be enabled using the [`--unit-test`](#-u---unit-test) flag. This flag configures the command to return a non-zero exit code if any false positive or false negative results are detected. When in unit test mode, false positive results for entities are only generated for entity types included in the `strictEntities` configuration from `--test-settings` or the labeled test utterance. Similarly, false positive results will only be generated for intents when an explicit negative intent (e.g., "None") is included in the expected results. For example:

src/NLU.DevOps.Core.Tests/JsonLabeledUtteranceConverterTests.cs

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
namespace NLU.DevOps.Core.Tests
55
{
66
using System;
7-
using System.Collections.Generic;
87
using System.Linq;
98
using FluentAssertions;
9+
using FluentAssertions.Json;
1010
using Newtonsoft.Json;
1111
using Newtonsoft.Json.Linq;
1212
using Newtonsoft.Json.Serialization;
@@ -87,6 +87,63 @@ public static void ConvertsUtteranceWithStartPosAndEndPosEntity()
8787
actual.Entities[0].MatchIndex.Should().Be(2);
8888
}
8989

90+
[Test]
91+
public static void ConvertsUtteranceWithNestedEntities()
92+
{
93+
var text = "foo bar baz";
94+
95+
var leafEntity = new JObject
96+
{
97+
{ "entity", "baz" },
98+
{ "startPos", 8 },
99+
{ "endPos", 10 },
100+
{ "foo", new JArray(42) },
101+
{ "bar", null },
102+
{ "baz", 42 },
103+
{ "qux", JValue.CreateUndefined() },
104+
};
105+
106+
var midEntity = new JObject
107+
{
108+
{ "entityType", "bar" },
109+
{ "matchText", "bar baz" },
110+
{ "children", new JArray { leafEntity } },
111+
{ "entityValue", new JObject { { "bar", "qux" } } },
112+
};
113+
114+
var entity = new JObject
115+
{
116+
{ "entity", "foo" },
117+
{ "startPos", 0 },
118+
{ "endPos", 10 },
119+
{ "children", new JArray { midEntity } },
120+
};
121+
122+
var json = new JObject
123+
{
124+
{ "text", text },
125+
{ "entities", new JArray { entity } },
126+
};
127+
128+
var serializer = CreateSerializer();
129+
var actual = json.ToObject<JsonLabeledUtterance>(serializer);
130+
actual.Text.Should().Be(text);
131+
actual.Entities.Count.Should().Be(3);
132+
actual.Entities[0].EntityType.Should().Be("foo");
133+
actual.Entities[0].MatchText.Should().Be(text);
134+
actual.Entities[1].EntityType.Should().Be("foo::bar");
135+
actual.Entities[1].MatchText.Should().Be("bar baz");
136+
actual.Entities[1].EntityValue.Should().BeEquivalentTo(new JObject { { "bar", "qux" } });
137+
actual.Entities[2].EntityType.Should().Be("foo::bar::baz");
138+
actual.Entities[2].MatchText.Should().Be("baz");
139+
140+
var additionalProperties = actual.Entities[2].As<Entity>().AdditionalProperties;
141+
additionalProperties["foo"].As<JToken>().Should().BeEquivalentTo(new JArray(42));
142+
additionalProperties["bar"].Should().BeNull();
143+
additionalProperties["baz"].Should().Be(42);
144+
additionalProperties["qux"].Should().BeNull();
145+
}
146+
90147
private static JsonSerializer CreateSerializer()
91148
{
92149
var serializer = JsonSerializer.CreateDefault();

src/NLU.DevOps.Core.Tests/NLU.DevOps.Core.Tests.csproj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
<PackageReference Include="nunit" Version="3.12.0" />
1616
<PackageReference Include="NUnit3TestAdapter" Version="3.13.0" />
1717
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.2.0" />
18-
<PackageReference Include="FluentAssertions" Version="5.7.0" />
18+
<PackageReference Include="FluentAssertions" Version="5.5.3" />
19+
<PackageReference Include="FluentAssertions.Json" Version="5.0.0" />
1920
</ItemGroup>
2021

2122
<ItemGroup>

src/NLU.DevOps.Core/EntityConverter.cs

Lines changed: 100 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
namespace NLU.DevOps.Core
55
{
66
using System;
7+
using System.Collections.Generic;
8+
using System.Diagnostics;
79
using Newtonsoft.Json;
810
using Newtonsoft.Json.Linq;
911

@@ -16,42 +18,33 @@ public EntityConverter(string utterance)
1618

1719
private string Utterance { get; }
1820

21+
private string Prefix { get; set; } = string.Empty;
22+
1923
public override Entity ReadJson(JsonReader reader, Type objectType, Entity existingValue, bool hasExistingValue, JsonSerializer serializer)
2024
{
25+
Debug.Assert(!hasExistingValue, "Entity instance can only be constructor initialized.");
26+
2127
var jsonObject = JObject.Load(reader);
28+
return typeof(HierarchicalEntity).IsAssignableFrom(objectType)
29+
? this.ReadHierarchicalEntity(jsonObject, serializer)
30+
: this.ReadEntity(jsonObject, objectType, serializer);
31+
}
32+
33+
public override void WriteJson(JsonWriter writer, Entity value, JsonSerializer serializer)
34+
{
35+
throw new NotImplementedException();
36+
}
37+
38+
private Entity ReadEntity(JObject jsonObject, Type objectType, JsonSerializer serializer)
39+
{
2240
var matchText = jsonObject.Value<string>("matchText");
41+
var matchIndex = jsonObject.Value<int>("matchIndex");
2342
var startPosOrNull = jsonObject.Value<int?>("startPos");
2443
var endPosOrNull = jsonObject.Value<int?>("endPos");
25-
if (matchText == null && startPosOrNull != null && endPosOrNull != null)
44+
if (matchText == null && startPosOrNull.HasValue && endPosOrNull.HasValue)
2645
{
27-
var startPos = startPosOrNull.Value;
28-
var endPos = endPosOrNull.Value;
29-
var length = endPos - startPos + 1;
30-
if (!this.IsValid(startPos, endPos))
31-
{
32-
throw new InvalidOperationException(
33-
$"Invalid start position '{startPos}' or end position '{endPos}' for utterance '{this.Utterance}'.");
34-
}
35-
36-
matchText = this.Utterance.Substring(startPos, length);
46+
(matchText, matchIndex) = this.GetMatchInfo(startPosOrNull.Value, endPosOrNull.Value);
3747
jsonObject.Add("matchText", matchText);
38-
var matchIndex = 0;
39-
var currentPos = 0;
40-
while (true)
41-
{
42-
currentPos = this.Utterance.IndexOf(matchText, currentPos, StringComparison.InvariantCulture);
43-
44-
// Because 'matchText' is derived from the utterance from 'startPos' and 'endPos',
45-
// we are guaranteed to find a match at with index 'startPos'.
46-
if (currentPos == startPos)
47-
{
48-
break;
49-
}
50-
51-
currentPos += length;
52-
matchIndex++;
53-
}
54-
5548
jsonObject.Add("matchIndex", matchIndex);
5649
jsonObject.Remove("startPos");
5750
jsonObject.Remove("endPos");
@@ -76,9 +69,86 @@ public override Entity ReadJson(JsonReader reader, Type objectType, Entity exist
7669
}
7770
}
7871

79-
public override void WriteJson(JsonWriter writer, Entity value, JsonSerializer serializer)
72+
private HierarchicalEntity ReadHierarchicalEntity(JObject jsonObject, JsonSerializer serializer)
8073
{
81-
throw new NotImplementedException();
74+
var matchText = jsonObject.Value<string>("matchText");
75+
var matchIndex = jsonObject.Value<int>("matchIndex");
76+
var startPosOrNull = jsonObject.Value<int?>("startPos");
77+
var endPosOrNull = jsonObject.Value<int?>("endPos");
78+
if (matchText == null && startPosOrNull.HasValue && endPosOrNull.HasValue)
79+
{
80+
(matchText, matchIndex) = this.GetMatchInfo(startPosOrNull.Value, endPosOrNull.Value);
81+
}
82+
83+
var entityType = jsonObject.Value<string>("entityType") ?? jsonObject.Value<string>("entity");
84+
var childrenJson = jsonObject["children"];
85+
var children = default(IEnumerable<HierarchicalEntity>);
86+
if (childrenJson != null)
87+
{
88+
var prefix = $"{entityType}::";
89+
this.Prefix += prefix;
90+
try
91+
{
92+
children = childrenJson.ToObject<IEnumerable<HierarchicalEntity>>(serializer);
93+
}
94+
finally
95+
{
96+
this.Prefix = this.Prefix.Substring(0, this.Prefix.Length - prefix.Length);
97+
}
98+
}
99+
100+
var entity = new HierarchicalEntity($"{this.Prefix}{entityType}", jsonObject["entityValue"], matchText, matchIndex, children);
101+
foreach (var property in jsonObject)
102+
{
103+
switch (property.Key)
104+
{
105+
case "children":
106+
case "endPos":
107+
case "entity":
108+
case "entityType":
109+
case "entityValue":
110+
case "matchText":
111+
case "matchIndex":
112+
case "startPos":
113+
break;
114+
default:
115+
var value = property.Value is JValue jsonValue ? jsonValue.Value : property.Value;
116+
entity.AdditionalProperties.Add(property.Key, value);
117+
break;
118+
}
119+
}
120+
121+
return entity;
122+
}
123+
124+
private Tuple<string, int> GetMatchInfo(int startPos, int endPos)
125+
{
126+
if (!this.IsValid(startPos, endPos))
127+
{
128+
throw new InvalidOperationException(
129+
$"Invalid start position '{startPos}' or end position '{endPos}' for utterance '{this.Utterance}'.");
130+
}
131+
132+
var length = endPos - startPos + 1;
133+
var matchText = this.Utterance.Substring(startPos, length);
134+
var matchIndex = 0;
135+
var currentPos = 0;
136+
while (true)
137+
{
138+
currentPos = this.Utterance.IndexOf(matchText, currentPos, StringComparison.InvariantCulture);
139+
140+
// Because 'matchText' is derived from the utterance from 'startPos' and 'endPos',
141+
// we are guaranteed to find a match at with index 'startPos'.
142+
if (currentPos == startPos)
143+
{
144+
break;
145+
}
146+
147+
currentPos += length;
148+
matchIndex++;
149+
}
150+
151+
return Tuple.Create(matchText, matchIndex);
82152
}
83153

84154
private bool IsValid(int startPos, int endPos)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
namespace NLU.DevOps.Core
5+
{
6+
using System.Collections.Generic;
7+
using Newtonsoft.Json.Linq;
8+
9+
/// <summary>
10+
/// Entity appearing in utterance.
11+
/// </summary>
12+
public sealed class HierarchicalEntity : Entity, IHierarchicalEntity
13+
{
14+
/// <summary>
15+
/// Initializes a new instance of the <see cref="HierarchicalEntity"/> class.
16+
/// </summary>
17+
/// <param name="entityType">Entity type name.</param>
18+
/// <param name="entityValue">Entity value, generally a canonical form of the entity.</param>
19+
/// <param name="matchText">Matching text in the utterance.</param>
20+
/// <param name="matchIndex">Occurrence index of matching token in the utterance.</param>
21+
/// <param name="children">Children entities.</param>
22+
public HierarchicalEntity(string entityType, JToken entityValue, string matchText, int matchIndex, IEnumerable<HierarchicalEntity> children)
23+
: base(entityType, entityValue, matchText, matchIndex)
24+
{
25+
this.Children = children;
26+
}
27+
28+
/// <inheritdoc />
29+
public IEnumerable<IHierarchicalEntity> Children { get; }
30+
}
31+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
namespace NLU.DevOps.Core
5+
{
6+
using System.Collections.Generic;
7+
using Models;
8+
9+
/// <summary>
10+
/// Entity with nested children.
11+
/// </summary>
12+
public interface IHierarchicalEntity : IEntity
13+
{
14+
/// <summary>
15+
/// Gets the child entities.
16+
/// </summary>
17+
IEnumerable<IHierarchicalEntity> Children { get; }
18+
}
19+
}

src/NLU.DevOps.Core/JsonEntities.cs

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33

44
namespace NLU.DevOps.Core
55
{
6+
using System;
67
using System.Collections.Generic;
8+
using System.Linq;
9+
using Models;
710
using Newtonsoft.Json;
811

912
/// <summary>
@@ -15,20 +18,44 @@ public class JsonEntities
1518
/// Initializes a new instance of the <see cref="JsonEntities"/> class.
1619
/// </summary>
1720
/// <param name="entities">Entities referenced in the utterance.</param>
18-
public JsonEntities(IReadOnlyList<Entity> entities)
21+
public JsonEntities(IEnumerable<HierarchicalEntity> entities)
1922
{
20-
this.Entities = entities;
23+
this.Entities = FlattenChildren(entities)?.ToArray();
2124
}
2225

2326
/// <summary>
2427
/// Gets the entities referenced in the utterance.
2528
/// </summary>
26-
public IReadOnlyList<Entity> Entities { get; }
29+
public IReadOnlyList<IEntity> Entities { get; }
2730

2831
/// <summary>
2932
/// Gets the additional properties.
3033
/// </summary>
3134
[JsonExtensionData]
3235
public IDictionary<string, object> AdditionalProperties { get; } = new Dictionary<string, object>();
36+
37+
private static IEnumerable<IEntity> FlattenChildren(IEnumerable<IHierarchicalEntity> entities, string prefix = "")
38+
{
39+
if (entities == null)
40+
{
41+
return null;
42+
}
43+
44+
IEnumerable<IEntity> getChildren(IHierarchicalEntity entity)
45+
{
46+
yield return entity;
47+
48+
var children = FlattenChildren(entity.Children, $"{prefix}{entity.EntityType}::");
49+
if (children != null)
50+
{
51+
foreach (var child in children)
52+
{
53+
yield return child;
54+
}
55+
}
56+
}
57+
58+
return entities.SelectMany(getChildren);
59+
}
3360
}
3461
}

0 commit comments

Comments
 (0)