Skip to content

Commit b136b82

Browse files
committed
MM-13248
Resolve issue with encoding extraction regex. Bumped version number to prepare for follow up release. While here also noticed that a standalone HTML::TreeBuilder was instantiated within this method. This is now replaced with a CSS::Inliner::TreeBuilder, and configured identically to the other instances within the class for consistency purposes.
1 parent e7a1270 commit b136b82

File tree

2 files changed

+18
-8
lines changed

2 files changed

+18
-8
lines changed

ChangeLog

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,3 +231,9 @@
231231
* Update MANIFEST to reference all added tests/assets
232232
* Fix minor formatting issues within some tests/assets
233233
* Address concerns raised by CPAN RT96414, conditionally test for connectivity instead of outright failing
234+
235+
4003 2015-12-16 Kevin Kamel <[email protected]>
236+
* Resolve charset sniffing issue
237+
- invalid charset present within the document would cause charset sniffing to end prematurely
238+
- invalid charset present within the document would cause Inliner to die during the decode phase
239+
* Resolve issue whereby a TreeBuilder instance was not configured as expected

lib/CSS/Inliner.pm

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package CSS::Inliner;
22
use strict;
33
use warnings;
44

5-
our $VERSION = '4002';
5+
our $VERSION = '4003';
66

77
use Carp;
88
use Encode;
@@ -964,10 +964,12 @@ sub _extract_meta_charset {
964964
local $SIG{__WARN__} = sub { my $warning = shift; warn $warning unless $warning =~ /^Parsing of undecoded UTF-8/ };
965965

966966
# parse document and pull out key header elements
967-
my $doc = HTML::TreeBuilder->new();
968-
$doc->parse_content($$params{content});
967+
my $extract_tree = new CSS::Inliner::TreeBuilder();
968+
$self->_configure_tree({ tree => $extract_tree });
969969

970-
my $head = $doc->look_down("_tag", "head"); # there should only be one
970+
$extract_tree->parse_content($$params{content});
971+
972+
my $head = $extract_tree->look_down("_tag", "head"); # there should only be one
971973

972974
my $meta_charset;
973975
if ($head) {
@@ -979,12 +981,14 @@ sub _extract_meta_charset {
979981
if ($meta_equiv_charset_elem) {
980982
my $meta_equiv_content = $meta_equiv_charset_elem->attr('content');
981983

982-
if ($meta_equiv_content =~ /charset=(.*)(?:[";,]?)/i) {
983-
$meta_charset = $1;
984+
# leverage charset allowable chars from https://tools.ietf.org/html/rfc2978
985+
if ($meta_equiv_content =~ /charset(?:\s*)=(?:\s*)([\w!#$%&'\-+^`{}~]+)/i) {
986+
$meta_charset = find_encoding($1);
984987
}
985988
}
986-
elsif ($meta_charset_elem) {
987-
$meta_charset = $meta_charset_elem->attr('charset');
989+
990+
if (!defined($meta_charset) && $meta_charset_elem) {
991+
$meta_charset = find_encoding($meta_charset_elem->attr('charset'));
988992
}
989993
}
990994

0 commit comments

Comments
 (0)