Mercurial > hg > chronicle
changeset 222:fdbbc1bb5d7c
Add the stub migration script + readme
author | Steve Kemp <steve@steve.org.uk> |
---|---|
date | Wed, 16 Apr 2008 19:31:23 +0100 |
parents | eded23ae8288 |
children | c9262aa2918a |
files | MIGRATING bin/chronicle-rss-importer |
diffstat | 2 files changed, 236 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MIGRATING Wed Apr 16 19:31:23 2008 +0100 @@ -0,0 +1,20 @@ + +Migrating to Chronicle +---------------------- + + Included with this release is the utility script `chronicle-rss-importer` + which will allow you to import any entries which are available as an + RSS feed from your current platform. + + Usage is as simple as: + + chronicle-rss-importer --feed=http://blog.steve.org.uk/index.rss \ + --output=/path/to/write/entries/to [--sequential] + + This will fetch the remote feed, parse it into individual entries and + write each on out to a new file beneath the specified output directory. + + + +Steve +--
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/chronicle-rss-importer Wed Apr 16 19:31:23 2008 +0100 @@ -0,0 +1,216 @@ +#!/usr/bin/perl -w + +use strict; +use warnings; + +use Getopt::Long; +use HTML::Entities; +use LWP; +use XML::RSSLite; + + +# +# Configuration variables +# +my %CONFIG; + + +# +# Parse arguments. +# +parseCommandLineArguments(); + + +# +# Validate any arguments. +# +validateCommandLineArguments(); + + +# +# Fetch the feed. +# +my $content = fetchRSSFeed( $CONFIG{ 'feed' } ); + + +# +# Parse the feed +# +my %rssHash; +parseRSS( \%rssHash, \$content ); + + +# +# Now import +# +processEntries(%rssHash); + + +# +# All done. +# + + + +=begin doc + + Parse the command line arguments, if any. + +=end doc + +=cut + +sub parseCommandLineArguments +{ + GetOptions( + + # Help options + "feed=s", \$CONFIG{ 'feed' }, + "output=s", \$CONFIG{ 'output' }, + "sequential", \$CONFIG{ 'sequential' }, + ); +} + + + +=begin doc + + Ensure we received the arguments we need, and that + those arguments look OK. + +=end doc + +=cut + +sub validateCommandLineArguments +{ + + # + # We need an output dir + # + if ( !$CONFIG{ 'output' } ) + { + print "Output directory is mandatory.\n"; + print "Please specificy via --output=...\n"; + exit; + } + if ( !-d $CONFIG{ 'output' } ) + { + print +"Specified output directory [$CONFIG{'output'}] is not a directory!\n"; + exit; + } + + # + # We need a feed + # + if ( !$CONFIG{ 'feed' } ) + { + print "Please specify a feed to import, via --feed=http:/....\n"; + exit; + } + +} + + + +=begin doc + + Fetch the remote RSS feed. + +=end doc + +=cut + +sub fetchRSSFeed +{ + my ($uri) = (@_); + + my $ua = LWP::UserAgent->new(); + $ua->timeout(10); + $ua->agent('chronicle-importer'); + + my $response = $ua->get($uri); + + if ( $response->is_success ) + { + return ( $response->content() ); + } + else + { + print "Failed to fetch feed: $uri\n"; + print "\n"; + print $response->message() . "\n"; + exit; + } + +} + + + +=begin doc + + Iterate over the items in our feed and write each one out to a + single file. + +=end doc + +=cut + +sub processEntries +{ + my (%entries) = (@_); + + my $count = 1; + + foreach my $item ( @{ $rssHash{ 'item' } } ) + { + + # + # Get details from the feed. + # + my $title = $item->{ 'title' } || "no title"; + my $date = $item->{ 'pubDate' } || $item->{ 'dc:date' } || undef; + my $body = + $item->{ 'description' } + || $item->{ 'content:encoded' } + || undef; + my $filename; + + + # + # Build up a suitable filename. + # + if ( $CONFIG{ 'sequential' } ) + { + $filename = $count . ".txt"; + } + else + { + $filename = $title; + $filename =~ s/[^a-z0-9]/_/gi; + $filename .= ".txt"; + + } + + # + # Naive expansion. + # + if ( $body =~ m/</ ) + { + $body = decode_entities($body); + } + $filename = $CONFIG{ 'output' } . "/" . $filename; + open( OUTPUT, ">", $filename ) + or die "Failed to write to $filename - $!"; + print OUTPUT <<EOF; +Title: $title +Date: $date + +$body +EOF + close(OUTPUT); + $count += 1; + } + +}