Skip to content

Commit bb60f30

Browse files
committed
Fix XMP metadata parser to be able to parse not finished ASCII control characters
DEVSIX-9085 Autoported commit. Original commit hash: [1f333ee6d] Manual files: commons/src/sharpenconfig/java/com/itextpdf/commons/SharpenConfigMapping.java pom.xml
1 parent 8c71ecc commit bb60f30

File tree

11 files changed

+57
-59
lines changed

11 files changed

+57
-59
lines changed

itext.tests/itext.kernel.tests/itext/kernel/pdf/XMPMetadataTest.cs

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ namespace iText.Kernel.Pdf {
3232
[NUnit.Framework.Category("IntegrationTest")]
3333
public class XMPMetadataTest : ExtendedITextTest {
3434
public static readonly String SOURCE_FOLDER = iText.Test.TestUtil.GetParentProjectDirectory(NUnit.Framework.TestContext
35-
.CurrentContext.TestDirectory) + "/resources/itext/kernel/pdf/XmpWriterTest/";
35+
.CurrentContext.TestDirectory) + "/resources/itext/kernel/pdf/XMPMetadataTest/";
3636

37-
public static readonly String DESTINATION_FOLDER = TestUtil.GetOutputPath() + "/kernel/pdf/XmpWriterTest/";
37+
public static readonly String DESTINATION_FOLDER = TestUtil.GetOutputPath() + "/kernel/pdf/XMPMetadataTest/";
3838

3939
[NUnit.Framework.OneTimeSetUp]
4040
public static void BeforeClass() {
@@ -289,5 +289,42 @@ public virtual void ListParsingTest() {
289289
NUnit.Framework.Assert.Catch(typeof(XMPException), () => XMPMetaFactory.ParseFromBuffer(xmp.GetBytes(System.Text.Encoding
290290
.UTF8)));
291291
}
292+
293+
[NUnit.Framework.Test]
294+
public virtual void ReadDocumentWithControlCharactersInXMPMetadata() {
295+
String src = SOURCE_FOLDER + "docWithControlCharactersInXmp.pdf";
296+
using (PdfDocument document = new PdfDocument(new PdfReader(src), new PdfWriter(new MemoryStream()), new StampingProperties
297+
())) {
298+
NUnit.Framework.Assert.AreEqual(PdfConformance.PDF_A_3A, document.GetConformance());
299+
}
300+
}
301+
302+
[NUnit.Framework.Test]
303+
public virtual void ReadDocumentWithBrokenControlCharactersInXMPMetadata() {
304+
String src = SOURCE_FOLDER + "docWithBrokenControlCharactersInXmp.pdf";
305+
using (PdfDocument document = new PdfDocument(new PdfReader(src), new PdfWriter(new MemoryStream()), new StampingProperties
306+
())) {
307+
NUnit.Framework.Assert.AreEqual(PdfConformance.PDF_A_3A, document.GetConformance());
308+
}
309+
}
310+
311+
[NUnit.Framework.Test]
312+
public virtual void ReadDocumentWithInvalidConformance() {
313+
String src = SOURCE_FOLDER + "docWithInvalidConformance.pdf";
314+
using (PdfDocument document = new PdfDocument(new PdfReader(src), new PdfWriter(new MemoryStream()), new StampingProperties
315+
())) {
316+
NUnit.Framework.Assert.AreEqual(PdfConformance.PDF_NONE_CONFORMANCE, document.GetConformance());
317+
}
318+
}
319+
320+
[LogMessage(iText.IO.Logs.IoLogMessageConstant.EXCEPTION_WHILE_UPDATING_XMPMETADATA)]
321+
[NUnit.Framework.Test]
322+
public virtual void ReadDocumentWithInvalidXMPMetadata() {
323+
String src = SOURCE_FOLDER + "docWithInvalidMetadata.pdf";
324+
using (PdfDocument document = new PdfDocument(new PdfReader(src), new PdfWriter(new MemoryStream()), new StampingProperties
325+
())) {
326+
NUnit.Framework.Assert.AreEqual(PdfConformance.PDF_NONE_CONFORMANCE, document.GetConformance());
327+
}
328+
}
292329
}
293330
}

itext.tests/itext.pdfa.tests/itext/pdfa/PdfAXmpTest.cs

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ You should have received a copy of the GNU Affero General Public License
2929
using iText.Kernel.XMP;
3030
using iText.Kernel.XMP.Options;
3131
using iText.Test;
32-
using iText.Test.Attributes;
3332

3433
namespace iText.Pdfa {
3534
[NUnit.Framework.Category("IntegrationTest")]
@@ -113,34 +112,6 @@ public virtual void SaveAndReadDocumentWithCanonicalXmpMetadata() {
113112
}
114113
}
115114

116-
[NUnit.Framework.Test]
117-
public virtual void ReadDocumentWithControlCharactersInXMPMetadata() {
118-
String src = sourceFolder + "pdfs/docWithControlCharactersInXmp.pdf";
119-
using (PdfADocument document = new PdfADocument(new PdfReader(src), new PdfWriter(new MemoryStream()), new
120-
StampingProperties())) {
121-
NUnit.Framework.Assert.AreEqual(PdfConformance.PDF_A_3A, document.GetConformance());
122-
}
123-
}
124-
125-
[NUnit.Framework.Test]
126-
public virtual void ReadDocumentWithInvalidConformance() {
127-
String src = sourceFolder + "pdfs/docWithInvalidConformance.pdf";
128-
using (PdfDocument document = new PdfDocument(new PdfReader(src), new PdfWriter(new MemoryStream()), new StampingProperties
129-
())) {
130-
NUnit.Framework.Assert.AreEqual(PdfConformance.PDF_NONE_CONFORMANCE, document.GetConformance());
131-
}
132-
}
133-
134-
[LogMessage(iText.IO.Logs.IoLogMessageConstant.EXCEPTION_WHILE_UPDATING_XMPMETADATA)]
135-
[NUnit.Framework.Test]
136-
public virtual void ReadDocumentWithInvalidXMPMetadata() {
137-
String src = sourceFolder + "pdfs/docWithInvalidMetadata.pdf";
138-
using (PdfDocument document = new PdfDocument(new PdfReader(src), new PdfWriter(new MemoryStream()), new StampingProperties
139-
())) {
140-
NUnit.Framework.Assert.AreEqual(PdfConformance.PDF_NONE_CONFORMANCE, document.GetConformance());
141-
}
142-
}
143-
144115
[NUnit.Framework.Test]
145116
public virtual void TestPdfUAExtensionMetadata() {
146117
String outFile = destinationFolder + "testPdfUAExtensionMetadata.pdf";

itext/itext.kernel/itext/kernel/xmp/impl/FixASCIIControlsReader.cs

Lines changed: 17 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232

3333
namespace iText.Kernel.XMP.Impl {
3434
/// <since>22.08.2006</since>
35-
public class FixASCIIControlsReader : PushbackReader {
35+
public class FixASCIIControlsReader : TextReader {
3636
private const int STATE_START = 0;
3737

3838
private const int STATE_AMP = 1;
@@ -45,8 +45,6 @@ public class FixASCIIControlsReader : PushbackReader {
4545

4646
private const int STATE_ERROR = 5;
4747

48-
private const int BUFFER_SIZE = 8;
49-
5048
/// <summary>the state of the automaton</summary>
5149
private int state = STATE_START;
5250

@@ -56,56 +54,48 @@ public class FixASCIIControlsReader : PushbackReader {
5654
/// <summary>count the digits of the sequence</summary>
5755
private int digits = 0;
5856

59-
/// <summary>The look-ahead size is 6 at maximum (&amp;#xAB;)</summary>
60-
/// <seealso cref="iText.Commons.Utils.PushbackReader.PushbackReader(System.IO.TextReader, int)"/>
57+
private TextReader @in;
58+
59+
/// <summary>A wrapper xmp reader to handle control characters (&amp;#xAB;)</summary>
6160
/// <param name="input">a Reader</param>
62-
public FixASCIIControlsReader(TextReader input)
63-
: base(input, BUFFER_SIZE) {
61+
public FixASCIIControlsReader(TextReader input) {
62+
@in = input;
6463
}
6564

6665
/// <seealso cref="System.IO.TextReader.Read(char[], int, int)"/>
6766
public override int Read(char[] cbuf, int off, int len) {
68-
int readAhead = 0;
6967
int read = 0;
7068
int pos = off;
71-
char[] readAheadBuffer = new char[BUFFER_SIZE];
69+
char[] readAheadBuffer = new char[1];
7270
bool available = true;
7371
while (available && read < len) {
74-
available = base.Read(readAheadBuffer, readAhead, 1) == 1;
72+
available = @in.Read(readAheadBuffer, 0, 1) == 1;
7573
if (available) {
76-
char c = ProcessChar(readAheadBuffer[readAhead]);
74+
char c = ProcessChar(readAheadBuffer[0]);
7775
if (state == STATE_START) {
7876
// replace control chars with space
79-
if (iText.Kernel.XMP.Impl.Utils.IsControlChar(c)) {
77+
if (Utils.IsControlChar(c)) {
8078
c = ' ';
8179
}
8280
cbuf[pos++] = c;
83-
readAhead = 0;
8481
read++;
8582
}
8683
else {
8784
if (state == STATE_ERROR) {
88-
Unread(readAheadBuffer, 0, readAhead + 1);
89-
readAhead = 0;
90-
}
91-
else {
92-
readAhead++;
9385
}
9486
}
9587
}
96-
else {
97-
if (readAhead > 0) {
98-
// handles case when file ends within excaped sequence
99-
Unread(readAheadBuffer, 0, readAhead);
100-
state = STATE_ERROR;
101-
readAhead = 0;
102-
available = true;
103-
}
104-
}
10588
}
89+
// It's broken ASCII character sequence, let's just skip them
90+
// If we try to preserve them, SAX parser will throw later on anyway
10691
return read > 0 || available ? read : XMPUtilsImpl.EofReadBytesValue();
10792
}
10893

94+
/// <summary><inheritDoc/></summary>
95+
public override void Close() {
96+
@in.Close();
97+
}
98+
10999
/// <summary>Processes numeric escaped chars to find out if they are a control character.</summary>
110100
/// <param name="ch">a char</param>
111101
/// <returns>Returns the char directly or as replacement for the escaped sequence.</returns>

0 commit comments

Comments
 (0)