Skip to content

Commit bfa6c2f

Browse files
committed
Add attribute for XSLT, closes #37
As a consequence, rss_file will now use rss_str internally. This means that the rss result is loaded into memory even when writing to files, which could mean much poorer performance for huge feeds in particular. If this is too much, then we might reverse this commit, but I don't think it will be much of a problem.
1 parent bec9859 commit bfa6c2f

File tree

4 files changed

+145
-11
lines changed

4 files changed

+145
-11
lines changed

doc/user/basic_usage_guide/part_1.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ Commonly used
5656
p.feed_url = "https://example.com/feeds/podcast.rss" # URL of this feed
5757
p.category = Category("Technology", "Podcasting")
5858
p.owner = p.authors[0]
59+
p.xslt = "https://example.com/feed/stylesheet.xsl" # URL of XSLT stylesheet
5960

6061
Read more:
6162

@@ -65,6 +66,7 @@ Read more:
6566
* :attr:`~podgen.Podcast.feed_url`
6667
* :attr:`~podgen.Podcast.category`
6768
* :attr:`~podgen.Podcast.owner`
69+
* :attr:`~podgen.Podcast.xslt`
6870

6971

7072
Less commonly used

podgen/__main__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def main():
5757
p.complete = False
5858
p.new_feed_url = 'http://example.com/new-feed.rss'
5959
p.owner = Person('John Doe', '[email protected]')
60+
p.xslt = "http://example.com/stylesheet.xsl"
6061

6162
e1 = p.add_episode()
6263
e1.id = 'http://lernfunk.de/_MEDIAID_123#1'

podgen/podcast.py

Lines changed: 91 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
import dateutil.parser
1616
import dateutil.tz
1717
from podgen.episode import Episode
18-
from podgen.util import ensure_format, formatRFC2822, listToHumanreadableStr
18+
from podgen.util import ensure_format, formatRFC2822, listToHumanreadableStr, \
19+
htmlencode
1920
from podgen.person import Person
2021
import podgen.version
2122
import sys
@@ -306,6 +307,37 @@ def __init__(self, **kwargs):
306307
.. _latest version of the standard: http://pubsubhubbub.github.io/PubSubHubbub/pubsubhubbub-core-0.4.html#rfc.section.4
307308
"""
308309

310+
self.xslt = None
311+
"""
312+
Absolute URL to the XSLT file which web browsers should use with this
313+
feed.
314+
315+
`XSLT`_ stands for Extensible Stylesheet Language Transformations and
316+
can be regarded as a template language made for transforming XML into
317+
XHTML (among other things). You can use it to avoid giving users an
318+
ugly XML listing when trying to subscribe to your podcast; this
319+
technique is in fact employed by most podcast publishers today.
320+
In a web browser, it looks like a web page, and to the
321+
podcatchers, it looks like a normal podcast feed. To put it another
322+
way, the very same URL can be used as an information web page about the
323+
podcast as well as the URL you subscribe to in podcatchers.
324+
325+
:type: :obj:`str`
326+
:RSS: Processor instruction right after the xml declaration called
327+
``xml-stylesheet``, with type set to ``text/xsl`` and href set to
328+
this attribute.
329+
330+
.. note::
331+
332+
Firefox will use its own stylesheet for RSS feeds, so you
333+
must test using another browser and possibly a `simple web server`_
334+
(``python -m http.server 8000 -b 127.0.0.1``).
335+
336+
.. _XSLT: https://en.wikipedia.org/wiki/XSLT
337+
.. _simple web server:
338+
https://docs.python.org/3/library/http.server.html
339+
"""
340+
309341
# Populate the podcast with the keyword arguments
310342
for attribute, value in iteritems(kwargs):
311343
if hasattr(self, attribute):
@@ -584,6 +616,34 @@ def _create_rss(self):
584616

585617
return feed
586618

619+
def _add_xslt_pi(self, rss, xml_declaration):
620+
"""Add an XSLT processor instruction to the RSS string provided."""
621+
# This is a hackish way of getting a processor instruction between
622+
# the XML declaration and the RSS element; simply because lxml doesn't
623+
# support processor instructions outside the root element. So we do
624+
# a str.replace to replace the first newline with the processor
625+
# instruction, since the XML declaration is followed by a newline.
626+
627+
# Get the processor instruction as a string
628+
pi = self._get_xslt_pi()
629+
if xml_declaration:
630+
return rss.replace(
631+
"\n",
632+
'\n%s\n' % pi,
633+
1)
634+
else:
635+
# No declaration, so just put it at the beginning (assuming the
636+
# caller wants it there, why else would you set self.xslt?)
637+
return pi + "\n" + rss
638+
639+
def _get_xslt_pi(self):
640+
htmlescaped_url = htmlencode(self.xslt)
641+
quote_sanitized = htmlescaped_url.replace('"', '').replace("\\", "")
642+
return etree.tostring(etree.ProcessingInstruction(
643+
"xml-stylesheet",
644+
'type="text/xsl" href="' + quote_sanitized + '"',
645+
), encoding=str)
646+
587647
def __str__(self):
588648
"""Print the podcast in RSS format, using the default options.
589649
@@ -607,14 +667,28 @@ def rss_str(self, minimize=False, encoding='UTF-8',
607667
:returns: The generated RSS feed as a :obj:`str`.
608668
"""
609669
feed = self._create_rss()
610-
return etree.tostring(feed, pretty_print=not minimize, encoding=encoding,
670+
rss = etree.tostring(feed, pretty_print=not minimize, encoding=encoding,
611671
xml_declaration=xml_declaration).decode(encoding)
612-
672+
if self.xslt:
673+
return self._add_xslt_pi(rss, xml_declaration=xml_declaration)
674+
else:
675+
return rss
613676

614677
def rss_file(self, filename, minimize=False,
615678
encoding='UTF-8', xml_declaration=True):
616679
"""Generate an RSS feed and write the resulting XML to a file.
617680
681+
.. note::
682+
683+
If atomicity is needed, then you are expected to provide that
684+
yourself. That means that you should write the feed to a temporary
685+
file which you rename to the final name afterwards; renaming is an
686+
atomic operation on Unix(like) systems.
687+
688+
.. note::
689+
690+
File-like objects given to this method will not be closed.
691+
618692
:param filename: Name of file to write, or a file-like object, or a URL.
619693
:type filename: str or fd
620694
:param minimize: Set to True to disable splitting the feed into multiple
@@ -628,10 +702,20 @@ def rss_file(self, filename, minimize=False,
628702
:type xml_declaration: bool
629703
:returns: Nothing.
630704
"""
631-
feed = self._create_rss()
632-
doc = etree.ElementTree(feed)
633-
doc.write(filename, pretty_print=not minimize, encoding=encoding,
634-
xml_declaration=xml_declaration)
705+
rss = self.rss_str(minimize=minimize, encoding=encoding,
706+
xml_declaration=xml_declaration)
707+
# Have we got a filename, or a file-like object?
708+
if isinstance(filename, string_types):
709+
# It is a string, assume it is filename
710+
with open(filename, "w") as fd:
711+
fd.write(rss)
712+
elif hasattr(filename, "write"):
713+
# It is file-like enough to fool us
714+
filename.write(rss)
715+
else:
716+
raise TypeError("filename must either be a filename (str/unicode) "
717+
"or a file-like object (with write method); "
718+
"%s satisfies none of those conditions." % filename)
635719

636720
def apply_episode_order(self):
637721
"""Make sure that the episodes appear on iTunes in the exact order

podgen/tests/test_podcast.py

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def setUp(self):
6565
self.owner = self.author
6666
self.complete = True
6767
self.new_feed_url = "https://example.com/feeds/myfeed2.rss"
68+
self.xslt = "http://example.com/feed/stylesheet.xsl"
6869

6970

7071
fg.name = self.name
@@ -86,6 +87,7 @@ def setUp(self):
8687
fg.owner = self.owner
8788
fg.complete = self.complete
8889
fg.new_feed_url = self.new_feed_url
90+
fg.xslt = self.xslt
8991

9092
self.fg = fg
9193

@@ -110,7 +112,8 @@ def test_constructor(self):
110112
image=self.image,
111113
owner=self.owner,
112114
complete=self.complete,
113-
new_feed_url=self.new_feed_url
115+
new_feed_url=self.new_feed_url,
116+
xslt=self.xslt,
114117
)
115118
# Test that the fields are actually set
116119
self.test_baseFeed()
@@ -144,10 +147,15 @@ def test_baseFeed(self):
144147
assert fg.new_feed_url == self.new_feed_url
145148
assert fg.skip_days == self.skip_days
146149
assert fg.skip_hours == self.skip_hours
150+
assert fg.xslt == self.xslt
147151

148152
def test_rssFeedFile(self):
149153
fg = self.fg
154+
rssString = self.getRssFeedFileContents(fg, xml_declaration=False)\
155+
.replace('\n', '')
156+
self.checkRssString(rssString)
150157

158+
def getRssFeedFileContents(self, fg, **kwargs):
151159
# Keep track of our temporary file and its filename
152160
filename = None
153161
file = None
@@ -158,10 +166,10 @@ def test_rssFeedFile(self):
158166
# Close the file; we will just use its name
159167
file.close()
160168
# Write the RSS to the file (overwriting it)
161-
fg.rss_file(filename=filename, xml_declaration=False)
169+
fg.rss_file(filename=filename, **kwargs)
162170
# Read the resulting RSS
163171
with open(filename, "r") as myfile:
164-
rssString=myfile.read().replace('\n', '')
172+
rssString = myfile.read()
165173
finally:
166174
# We don't need the file any longer, so delete it
167175
if filename:
@@ -175,14 +183,19 @@ def test_rssFeedFile(self):
175183
# We were interrupted between entering the try-block and
176184
# getting the temporary file. Not much we can do.
177185
pass
186+
return rssString
178187

179-
self.checkRssString(rssString)
180188

181189
def test_rssFeedString(self):
182190
fg = self.fg
183191
rssString = fg.rss_str(xml_declaration=False)
184192
self.checkRssString(rssString)
185193

194+
def test_rssStringAndFileAreEqual(self):
195+
rss_string = self.fg.rss_str()
196+
rss_file = self.getRssFeedFileContents(self.fg)
197+
self.assertEqual(rss_string, rss_file)
198+
186199
def checkRssString(self, rssString):
187200
feed = etree.fromstring(rssString)
188201
nsRss = self.nsContent
@@ -491,5 +504,39 @@ def test_modifyingSkipHoursAfterwards(self):
491504
self.fg.skip_hours.remove(26)
492505
self.fg.rss_str() # Now it works
493506

507+
# Tests for xslt
508+
def test_xslt_str(self):
509+
def use_str(**kwargs):
510+
return self.fg.rss_str(**kwargs)
511+
self.help_test_xslt_using(use_str)
512+
513+
def test_xslt_file(self):
514+
def use_file(**kwargs):
515+
return self.getRssFeedFileContents(self.fg, **kwargs)
516+
self.help_test_xslt_using(use_file)
517+
518+
def help_test_xslt_using(self, generated_feed):
519+
"""Run tests for xslt, generating the feed str using the given function.
520+
"""
521+
xslt_path = "http://example.com/mystylesheet.xsl"
522+
xslt_pi = "<?xml-stylesheet"
523+
524+
# No xslt when set to None
525+
self.fg.xslt = None
526+
assert xslt_pi not in generated_feed()
527+
assert xslt_pi not in generated_feed(minimize=True)
528+
assert xslt_pi not in generated_feed(xml_declaration=False)
529+
530+
self.fg.xslt = xslt_path
531+
532+
# Now we have the stylesheet in there
533+
assert xslt_pi in generated_feed()
534+
assert xslt_pi in generated_feed(minimize=True)
535+
assert xslt_pi in generated_feed(xml_declaration=False)
536+
537+
assert xslt_path in generated_feed()
538+
assert xslt_path in generated_feed(minimize=True)
539+
assert xslt_path in generated_feed(xml_declaration=False)
540+
494541
if __name__ == '__main__':
495542
unittest.main()

0 commit comments

Comments
 (0)