Skip to content

Commit f0f953f

Browse files
committed
MM-12116
Add support for a constructor argument to override the default user-agent provided by the module when handling remote documents. Add patch provided by Dave Gray ([email protected]) for properly setting request headers when handling remote documents. Update associated documentation, fix some minor POD issues, update Changelog with changes, Prepare to tag version 3958
1 parent bde162c commit f0f953f

File tree

3 files changed

+28
-22
lines changed

3 files changed

+28
-22
lines changed

ChangeLog

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,3 +204,9 @@
204204
- Resolves improper !important rule handling
205205
- Adds test for validating proper !important rule handling
206206
* Update copyright to 2015 throughout project
207+
208+
3958 2015-05-29 Kevin Kamel <[email protected]>
209+
* Add support for an agent string argument for the remote fetching of documents
210+
* Add patch provided by Dave Gray ([email protected])
211+
- Adds proper headers for remote fetching of files
212+
* Fix issues within pod documentation

README

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ METHODS
4343

4444
NOTE: This argument is not compatible with passing an html_tree.
4545

46+
agent - (optional) Pass in a string containing a preferred
47+
user-agent, overrides the internal default provided by the module
48+
for handling remote documents
49+
4650
fetch_file
4751
Fetches a remote HTML file that supposedly contains both HTML and a
4852
style declaration, properly tags the data with the proper
@@ -57,6 +61,11 @@ METHODS
5761

5862
$self->fetch_file({ url => 'http://www.example.com' });
5963

64+
Note that you can specify a user-agent to override the default
65+
user-agent of 'Mozilla/4.0' within the constructor. Doing so may
66+
avoid certain issues with agent filtering related to quirky
67+
webserver configs.
68+
6069
read_file
6170
Opens and reads an HTML file that supposedly contains both HTML and
6271
a style declaration. It subsequently calls the read() method

lib/CSS/Inliner.pm

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package CSS::Inliner;
22
use strict;
33
use warnings;
44

5-
our $VERSION = '3957';
5+
our $VERSION = '3958';
66

77
use Carp;
88

@@ -41,7 +41,7 @@ support top level <style> declarations.
4141
=cut
4242

4343
BEGIN {
44-
my $members = ['stylesheet','css','html','html_tree','entities','query','strip_attrs','relaxed','leave_style','warns_as_errors','content_warnings'];
44+
my $members = ['stylesheet','css','html','html_tree','entities','query','strip_attrs','relaxed','leave_style','warns_as_errors','content_warnings','agent'];
4545

4646
#generate all the getter/setter we need
4747
foreach my $member (@{$members}) {
@@ -59,8 +59,6 @@ BEGIN {
5959
}
6060
}
6161

62-
=pod
63-
6462
=head1 METHODS
6563
6664
=over
@@ -84,6 +82,8 @@ relaxed - (optional) Relaxed HTML parsing which will attempt to interpret non-HT
8482
8583
NOTE: This argument is not compatible with passing an html_tree.
8684
85+
agent - (optional) Pass in a string containing a preferred user-agent, overrides the internal default provided by the module for handling remote documents
86+
8787
=cut
8888

8989
sub new {
@@ -118,7 +118,8 @@ sub new {
118118
strip_attrs => (defined($$params{strip_attrs}) && $$params{strip_attrs}) ? 1 : 0,
119119
relaxed => (defined($$params{relaxed}) && $$params{relaxed}) ? 1 : 0,
120120
leave_style => (defined($$params{leave_style}) && $$params{leave_style}) ? 1 : 0,
121-
warns_as_errors => (defined($$params{warns_as_errors}) && $$params{warns_as_errors}) ? 1 : 0
121+
warns_as_errors => (defined($$params{warns_as_errors}) && $$params{warns_as_errors}) ? 1 : 0,
122+
agent => (defined($$params{agent}) && $$params{agent}) ? $$params{agent} : 'Mozilla/4.0'
122123
};
123124

124125
bless $self, $class;
@@ -133,8 +134,6 @@ sub new {
133134
return $self;
134135
}
135136

136-
=pod
137-
138137
=item fetch_file
139138
140139
Fetches a remote HTML file that supposedly contains both HTML and a
@@ -150,6 +149,10 @@ url argument for the requested document. For example:
150149
151150
$self->fetch_file({ url => 'http://www.example.com' });
152151
152+
Note that you can specify a user-agent to override the default user-agent
153+
of 'Mozilla/4.0' within the constructor. Doing so may avoid certain issues
154+
with agent filtering related to quirky webserver configs.
155+
153156
=cut
154157

155158
sub fetch_file {
@@ -161,7 +164,6 @@ sub fetch_file {
161164
croak 'You must pass in hash params that contain a url argument';
162165
}
163166

164-
#fetch a absolutized version of the html
165167
my $html = $self->_fetch_html({ url => $$params{url} });
166168

167169
$self->read({ html => $html });
@@ -212,8 +214,6 @@ sub read_file {
212214
return();
213215
}
214216

215-
=pod
216-
217217
=item read
218218
219219
Reads passed html data and parses it. The intermediate data is stored in
@@ -256,8 +256,6 @@ sub read {
256256
return();
257257
}
258258

259-
=pod
260-
261259
=item inlinify
262260
263261
Processes the html data that was entered through either 'read' or
@@ -392,8 +390,6 @@ sub inlinify {
392390
return $html . "\n";
393391
}
394392

395-
=pod
396-
397393
=item query
398394
399395
Given a particular selector return back the applicable styles
@@ -412,8 +408,6 @@ sub query {
412408
return $self->_query->query($$params{selector});
413409
}
414410

415-
=pod
416-
417411
=item specificity
418412
419413
Given a particular selector return back the associated selectivity
@@ -432,8 +426,6 @@ sub specificity {
432426
return $self->_query->get_specificity($$params{selector});
433427
}
434428

435-
=pod
436-
437429
=item content_warnings
438430
439431
Return back any warnings thrown while inlining a given block of content.
@@ -496,13 +488,14 @@ sub _fetch_url {
496488

497489
# Create a user agent object
498490
my $ua = LWP::UserAgent->new;
499-
$ua->agent('Mozilla/4.0'); # masquerade as Mozilla/4.0
491+
492+
$ua->agent($self->_agent()); # masquerade as Mozilla/4.0 unless otherwise specified in the constructor
500493
$ua->protocols_allowed( ['http','https'] );
501494

502495
# Create a request
503496
my $uri = URI->new($$params{url});
504497

505-
my $req = HTTP::Request->new('GET',$uri);
498+
my $req = HTTP::Request->new('GET', $uri, [ 'Accept' => 'text/html, */*' ]);
506499

507500
# Pass request to the user agent and get a response back
508501
my $res = $ua->request($req);
@@ -887,8 +880,6 @@ sub _grep_important_declarations {
887880

888881
1;
889882

890-
=pod
891-
892883
=head1 Sponsor
893884
894885
This code has been developed under sponsorship of MailerMailer LLC,

0 commit comments

Comments
 (0)