Skip to content

Commit 241d578

Browse files
authored
Merge pull request #1598 from antony-liu/poi/v3.17-patch14
Bug 61431 - Conditional formatting evaluation ignores undefined cells
2 parents b5c14ad + 5814e29 commit 241d578

11 files changed

+440
-21
lines changed

main/SS/Formula/ConditionalFormattingEvaluator.cs

Lines changed: 169 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,53 @@
22
using NPOI.SS.Util;
33
using System;
44
using System.Collections.Generic;
5+
using System.Linq;
56
using System.Text;
67

78
namespace NPOI.SS.Formula
89
{
10+
/// <summary>
11+
/// <para>
12+
/// Evaluates Conditional Formatting constraints.
13+
/// </para>
14+
/// <para>
15+
/// For performance reasons, this class keeps a cache of all previously evaluated rules and cells.
16+
/// Be sure to call <see cref="clearAllCachedFormats()" /> if any conditional formats are modified, added, or deleted,
17+
/// and <see cref="clearAllCachedValues()" /> whenever cell values change.
18+
/// </para>
19+
/// </summary>
920
public class ConditionalFormattingEvaluator
1021
{
1122
private WorkbookEvaluator workbookEvaluator;
1223
private IWorkbook workbook;
13-
24+
/// <summary>
25+
/// <para>
26+
/// All the underlying structures, for both HSSF and XSSF, repeatedly go to the raw bytes/XML for the
27+
/// different pieces used in the ConditionalFormatting* structures. That's highly inefficient,
28+
/// and can cause significant lag when checking formats for large workbooks.
29+
/// </para>
30+
/// <para>
31+
/// Instead we need a cached version that is discarded when definitions change.
32+
/// </para>
33+
/// <para>
34+
/// Sheets don't implement equals, and since its an interface,
35+
/// there's no guarantee instances won't be recreated on the fly by some implementation.
36+
/// So we use sheet name.
37+
/// </para>
38+
/// </summary>
1439
private Dictionary<String, List<EvaluationConditionalFormatRule>> formats = new Dictionary<string, List<EvaluationConditionalFormatRule>>();
40+
/// <summary>
41+
/// <para>
42+
/// Evaluating rules for cells in their region(s) is expensive, so we want to cache them,
43+
/// and empty/reevaluate the cache when values change.
44+
/// </para>
45+
/// <para>
46+
/// Rule lists are in priority order, as evaluated by Excel (smallest priority # for XSSF, definition order for HSSF)
47+
/// </para>
48+
/// <para>
49+
/// CellReference : equals().
50+
/// </para>
51+
/// </summary>
1552
private SortedDictionary<CellReference, List<EvaluationConditionalFormatRule>> values = new SortedDictionary<CellReference, List<EvaluationConditionalFormatRule>>();
1653

1754
public ConditionalFormattingEvaluator(IWorkbook wb, IWorkbookEvaluatorProvider provider)
@@ -26,6 +63,28 @@ protected WorkbookEvaluator WorkbookEvaluator
2663
return workbookEvaluator;
2764
}
2865
}
66+
67+
/// <summary>
68+
/// <para>
69+
/// This checks all applicable <see cref="ConditionalFormattingRule"/>s for the cell's sheet,
70+
/// in defined "priority" order, returning the matches if any. This is a property currently
71+
/// not exposed from <c>CTCfRule</c> in <c>XSSFConditionalFormattingRule</c>.
72+
/// </para>
73+
/// <para>
74+
/// Most cells will have zero or one applied rule, but it is possible to define multiple rules
75+
/// that apply at the same time to the same cell, thus the List result.
76+
/// </para>
77+
/// <para>
78+
/// Note that to properly apply conditional rules, care must be taken to offset the base
79+
/// formula by the relative position of the current cell, or the wrong value is checked.
80+
/// This is handled by <see cref="WorkbookEvaluator.Evaluate(String, CellReference, CellRangeAddressBase)" />.
81+
/// </para>
82+
/// </summary>
83+
/// <param name="cellRef">NOTE: if no sheet name is specified, this uses the workbook active sheet</param>
84+
/// <returns>Unmodifiable List of <see cref="EvaluationConditionalFormatRule"/>s that apply to the current cell value,
85+
/// in priority order, as evaluated by Excel (smallest priority # for XSSF, definition order for HSSF),
86+
/// or null if none apply
87+
/// </returns>
2988
public List<EvaluationConditionalFormatRule> GetConditionalFormattingForCell(CellReference cellRef)
3089
{
3190
List<EvaluationConditionalFormatRule> rules = values.TryGetValue(cellRef, out List<EvaluationConditionalFormatRule> value) ? value : null;
@@ -71,6 +130,28 @@ public List<EvaluationConditionalFormatRule> GetConditionalFormattingForCell(Cel
71130

72131
return rules;
73132
}
133+
134+
/// <summary>
135+
/// <para>
136+
/// This checks all applicable <see cref="ConditionalFormattingRule"/>s for the cell's sheet,
137+
/// in defined "priority" order, returning the matches if any. This is a property currently
138+
/// not exposed from <c>CTCfRule</c> in <c>XSSFConditionalFormattingRule</c>.
139+
/// </para>
140+
/// <para>
141+
/// Most cells will have zero or one applied rule, but it is possible to define multiple rules
142+
/// that apply at the same time to the same cell, thus the List result.
143+
/// </para>
144+
/// <para>
145+
/// Note that to properly apply conditional rules, care must be taken to offset the base
146+
/// formula by the relative position of the current cell, or the wrong value is checked.
147+
/// This is handled by <see cref="WorkbookEvaluator.Evaluate(String, CellReference, CellRangeAddressBase)" />.
148+
/// </para>
149+
/// </summary>
150+
/// <param name="cell"></param>
151+
/// <returns>Unmodifiable List of <see cref="EvaluationConditionalFormatRule"/>s that apply to the current cell value,
152+
/// in priority order, as evaluated by Excel (smallest priority # for XSSF, definition order for HSSF),
153+
/// or null if none apply
154+
/// </returns>
74155
public List<EvaluationConditionalFormatRule> GetConditionalFormattingForCell(ICell cell)
75156
{
76157
return GetConditionalFormattingForCell(GetRef(cell));
@@ -80,6 +161,11 @@ public static CellReference GetRef(ICell cell)
80161
return new CellReference(cell.Sheet.SheetName, cell.RowIndex, cell.ColumnIndex, false, false);
81162
}
82163

164+
/// <summary>
165+
/// lazy load by sheet since reading can be expensive
166+
/// </summary>
167+
/// <param name="sheet"></param>
168+
/// <returns>unmodifiable list of rules</returns>
83169
protected List<EvaluationConditionalFormatRule> GetRules(ISheet sheet)
84170
{
85171
String sheetName = sheet.SheetName;
@@ -110,13 +196,95 @@ protected List<EvaluationConditionalFormatRule> GetRules(ISheet sheet)
110196
}
111197
return rules;
112198
}
199+
/// <summary>
200+
/// Call this whenever rules are added, reordered, or removed, or a rule formula is changed
201+
/// (not the formula inputs but the formula expression itself)
202+
/// </summary>
113203
public void ClearAllCachedFormats()
114204
{
115205
formats.Clear();
116206
}
207+
/// <summary>
208+
/// <para>
209+
/// Call this whenever cell values change in the workbook, so condional formats are re-evaluated
210+
/// for all cells.
211+
/// </para>
212+
/// <para>
213+
/// TODO: eventually this should work like <see cref="EvaluationCache.notifyUpdateCell(int, int, EvaluationCell)" />
214+
/// and only clear values that need recalculation based on the formula dependency tree.
215+
/// </para>
216+
/// </summary>
117217
public void ClearAllCachedValues()
118218
{
119219
values.Clear();
120220
}
221+
222+
/// <summary>
223+
/// </summary>
224+
/// <param name="sheetName"></param>
225+
/// <returns>unmodifiable list of all Conditional format rules for the given sheet, if any</returns>
226+
public List<EvaluationConditionalFormatRule> GetFormatRulesForSheet(string sheetName) {
227+
return GetFormatRulesForSheet(workbook.GetSheet(sheetName));
228+
}
229+
230+
/// <summary>
231+
/// </summary>
232+
/// <param name="sheet"></param>
233+
/// <returns>unmodifiable list of all Conditional format rules for the given sheet, if any</returns>
234+
public List<EvaluationConditionalFormatRule> GetFormatRulesForSheet(ISheet sheet) {
235+
return GetRules(sheet);
236+
}
237+
238+
/// <summary>
239+
/// <para>
240+
/// Conditional formatting rules can apply only to cells in the sheet to which they are attached.
241+
/// The POI data model does not have a back-reference to the owning sheet, so it must be passed in separately.
242+
/// </para>
243+
/// <para>
244+
/// We could overload this with convenience methods taking a sheet name and sheet index as well.
245+
/// </para>
246+
/// </summary>
247+
/// <param name="sheet">containing the rule</param>
248+
/// <param name="conditionalFormattingIndex">of the <see cref="ConditionalFormatting"/> instance in the sheet's array</param>
249+
/// <param name="ruleIndex">of the <see cref="ConditionalFormattingRule"/> instance within the <see cref="ConditionalFormatting"/></param>
250+
/// <returns>unmodifiable List of all cells in the rule's region matching the rule's condition</returns>
251+
public List<ICell> GetMatchingCells(ISheet sheet, int conditionalFormattingIndex, int ruleIndex) {
252+
foreach (EvaluationConditionalFormatRule rule in GetRules(sheet)) {
253+
if (rule.Sheet.Equals(sheet) && rule.FormattingIndex == conditionalFormattingIndex && rule.RuleIndex == ruleIndex) {
254+
return GetMatchingCells(rule);
255+
}
256+
}
257+
return [];
258+
}
259+
260+
/// <summary>
261+
/// </summary>
262+
/// <param name="rule"></param>
263+
/// <returns>unmodifiable List of all cells in the rule's region matching the rule's condition</returns>
264+
public List<ICell> GetMatchingCells(EvaluationConditionalFormatRule rule) {
265+
List<ICell> cells = new List<ICell>();
266+
ISheet sheet = rule.Sheet;
267+
268+
foreach (CellRangeAddress region in rule.Regions) {
269+
for (int r = region.FirstRow; r <= region.LastRow; r++) {
270+
IRow row = sheet.GetRow(r);
271+
if (row == null) {
272+
continue; // no cells to check
273+
}
274+
for (int c = region.FirstColumn; c <= region.LastColumn; c++) {
275+
ICell cell = row.GetCell(c);
276+
if (cell == null) {
277+
continue;
278+
}
279+
280+
List<EvaluationConditionalFormatRule> cellRules = GetConditionalFormattingForCell(cell);
281+
if (cellRules.Contains(rule)) {
282+
cells.Add(cell);
283+
}
284+
}
285+
}
286+
}
287+
return cells;
288+
}
121289
}
122290
}

main/SS/Formula/EvaluationConditionalFormatRule.cs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,10 @@ private bool CheckFilter(ICell cell, CellReference reference, CellRangeAddress r
562562
case ConditionFilterType.TOP_10:
563563
// from testing, Excel only operates on numbers and dates (which are stored as numbers) in the range.
564564
// numbers stored as text are ignored, but numbers formatted as text are treated as numbers.
565-
565+
if (! cv.IsNumber)
566+
{
567+
return false;
568+
}
566569
return GetMeaningfulValues(region, true, (allValues)=> {
567570
IConditionFilterData fc = rule.FilterConfiguration;
568571

@@ -696,16 +699,20 @@ private bool CheckFilter(ICell cell, CellReference reference, CellRangeAddress r
696699
return OperatorEnumHelper.IsValid(op, val, comp, null);
697700
case ConditionFilterType.CONTAINS_TEXT:
698701
// implemented both by a cfRule "text" attribute and a formula. Use the text.
699-
return text == null ? false : cv.ToString().ToLowerInvariant().Contains(lowerText);
702+
//return text == null ? false : cv.ToString().ToLowerInvariant().Contains(lowerText);
703+
return CheckFormula(reference, region);
700704
case ConditionFilterType.NOT_CONTAINS_TEXT:
701705
// implemented both by a cfRule "text" attribute and a formula. Use the text.
702-
return text == null ? true : !cv.ToString().ToLowerInvariant().Contains(lowerText);
706+
//return text == null ? true : !cv.ToString().ToLowerInvariant().Contains(lowerText);
707+
return CheckFormula(reference, region);
703708
case ConditionFilterType.BEGINS_WITH:
704709
// implemented both by a cfRule "text" attribute and a formula. Use the text.
705-
return cv.ToString().ToLowerInvariant().StartsWith(lowerText);
710+
//return cv.ToString().ToLowerInvariant().StartsWith(lowerText);
711+
return CheckFormula(reference, region);
706712
case ConditionFilterType.ENDS_WITH:
707713
// implemented both by a cfRule "text" attribute and a formula. Use the text.
708-
return cv.ToString().ToLowerInvariant().EndsWith(lowerText);
714+
//return cv.ToString().ToLowerInvariant().EndsWith(lowerText);
715+
return CheckFormula(reference, region);
709716
case ConditionFilterType.CONTAINS_BLANKS:
710717
try
711718
{

main/SS/Formula/WorkbookEvaluator.cs

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -327,17 +327,40 @@ private ValueEval Evaluate(String formula, CellReference target, CellRangeAddres
327327
formulaType.GetAttributes().Get<SingleValueAttribute>().IsSingleValue);
328328
return EvaluateNameFormula(ptgs, ec);
329329
}
330+
/// <summary>
331+
/// Adjust formula relative references by the offset between the start of the given region and the given target cell.
332+
/// </summary>
333+
/// <param name="ptgs"></param>
334+
/// <param name="target">cell within the region to use.</param>
335+
/// <param name="region">containing the cell</param>
336+
/// <returns>true if any Ptg references were shifted</returns>
337+
/// <exception cref="IndexOutOfRangeException">if the resulting shifted row/column indexes are over the document format limits</exception>
338+
/// <exception cref="ArgumentException">if target is not within region.</exception>
330339
protected bool AdjustRegionRelativeReference(Ptg[] ptgs, CellReference target, CellRangeAddressBase region)
331340
{
332341
if (!region.IsInRange(target))
333342
{
334343
throw new ArgumentException(target + " is not within " + region);
335344
}
336345

337-
//return adjustRegionRelativeReference(ptgs, target.getRow() - region.getFirstRow(), target.getCol() - region.getFirstColumn());
338-
339-
int deltaRow = target.Row;
340-
int deltaColumn = target.Col;
346+
return AdjustRegionRelativeReference(ptgs, target.Row - region.FirstRow, target.Col - region.FirstColumn);
347+
}
348+
349+
/// <summary>
350+
/// Adjust the formula relative cell references by a given delta
351+
/// </summary>
352+
/// <param name="ptgs"></param>
353+
/// <param name="deltaRow">target row offset from the top left cell of a region</param>
354+
/// <param name="deltaColumn">target column offset from the top left cell of a region</param>
355+
/// <returns>true if any Ptg references were shifted</returns>
356+
/// <exception cref="IndexOutOfRangeException">if the resulting shifted row/column indexes are over the document format limits</exception>
357+
/// <exception cref="ArgumentException">if either of the deltas are negative, as the assumption is we are shifting formulas
358+
/// relative to the top left cell of a region.
359+
/// </exception>
360+
protected bool AdjustRegionRelativeReference(Ptg[] ptgs, int deltaRow, int deltaColumn)
361+
{
362+
if (deltaRow < 0) throw new ArgumentException("offset row must be positive");
363+
if (deltaColumn < 0) throw new ArgumentException("offset column must be positive");
341364

342365
bool shifted = false;
343366
foreach(Ptg ptg in ptgs)

ooxml/POIXMLTextExtractor.cs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ limitations under the License.
1818
namespace NPOI
1919
{
2020
using System;
21+
using System.Text;
2122
using NPOI.OpenXml4Net.OPC;
23+
using NPOI.OpenXml4Net.Util;
2224

2325
public abstract class POIXMLTextExtractor : POITextExtractor
2426
{
@@ -102,6 +104,23 @@ public override void Close()
102104
}
103105
base.Close();
104106
}
107+
108+
protected void CheckMaxTextSize(StringBuilder text, String string1)
109+
{
110+
if(string1 == null)
111+
{
112+
return;
113+
}
114+
115+
int size = text.Length + string1.Length;
116+
if(size > ZipSecureFile.GetMaxTextSize())
117+
{
118+
throw new InvalidOperationException("The text would exceed the max allowed overall size of extracted text. "
119+
+ "By default this is prevented as some documents may exhaust available memory and it may indicate that the file is used to inflate memory usage and thus could pose a security risk. "
120+
+ "You can adjust this limit via ZipSecureFile.setMaxTextSize() if you need to work with files which have a lot of text. "
121+
+ "Size: " + size + ", limit: MAX_TEXT_SIZE: " + ZipSecureFile.GetMaxTextSize());
122+
}
123+
}
105124
}
106125

107126
}

0 commit comments

Comments
 (0)