changeset 222:fdbbc1bb5d7c

Add the stub migration script + readme
author Steve Kemp <steve@steve.org.uk>
date Wed, 16 Apr 2008 19:31:23 +0100
parents eded23ae8288
children c9262aa2918a
files MIGRATING bin/chronicle-rss-importer
diffstat 2 files changed, 236 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MIGRATING	Wed Apr 16 19:31:23 2008 +0100
@@ -0,0 +1,20 @@
+
+Migrating to Chronicle
+----------------------
+
+  Included with this release is the utility script `chronicle-rss-importer`
+ which will allow you to import any entries which are available as an
+ RSS feed from your current platform.
+
+  Usage is as simple as:
+
+    chronicle-rss-importer --feed=http://blog.steve.org.uk/index.rss \
+     --output=/path/to/write/entries/to   [--sequential]
+
+  This will fetch the remote feed, parse it into individual entries and
+ write each on out to a new file beneath the specified output directory.
+
+
+
+Steve
+--
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/chronicle-rss-importer	Wed Apr 16 19:31:23 2008 +0100
@@ -0,0 +1,216 @@
+#!/usr/bin/perl -w
+
+use strict;
+use warnings;
+
+use Getopt::Long;
+use HTML::Entities;
+use LWP;
+use XML::RSSLite;
+
+
+#
+#  Configuration variables
+#
+my %CONFIG;
+
+
+#
+#  Parse arguments.
+#
+parseCommandLineArguments();
+
+
+#
+#  Validate any arguments.
+#
+validateCommandLineArguments();
+
+
+#
+#  Fetch the feed.
+#
+my $content = fetchRSSFeed( $CONFIG{ 'feed' } );
+
+
+#
+# Parse the feed
+#
+my %rssHash;
+parseRSS( \%rssHash, \$content );
+
+
+#
+#  Now import
+#
+processEntries(%rssHash);
+
+
+#
+#  All done.
+#
+
+
+
+=begin doc
+
+  Parse the command line arguments, if any.
+
+=end doc
+
+=cut
+
+sub parseCommandLineArguments
+{
+    GetOptions(
+
+        # Help options
+        "feed=s",     \$CONFIG{ 'feed' },
+        "output=s",   \$CONFIG{ 'output' },
+        "sequential", \$CONFIG{ 'sequential' },
+    );
+}
+
+
+
+=begin doc
+
+  Ensure we received the arguments we need, and that
+ those arguments look OK.
+
+=end doc
+
+=cut
+
+sub validateCommandLineArguments
+{
+
+    #
+    #  We need an output dir
+    #
+    if ( !$CONFIG{ 'output' } )
+    {
+        print "Output directory is mandatory.\n";
+        print "Please specificy via --output=...\n";
+        exit;
+    }
+    if ( !-d $CONFIG{ 'output' } )
+    {
+        print
+"Specified output directory [$CONFIG{'output'}] is not a directory!\n";
+        exit;
+    }
+
+    #
+    #  We need a feed
+    #
+    if ( !$CONFIG{ 'feed' } )
+    {
+        print "Please specify a feed to import, via --feed=http:/....\n";
+        exit;
+    }
+
+}
+
+
+
+=begin doc
+
+  Fetch the remote RSS feed.
+
+=end doc
+
+=cut
+
+sub fetchRSSFeed
+{
+    my ($uri) = (@_);
+
+    my $ua = LWP::UserAgent->new();
+    $ua->timeout(10);
+    $ua->agent('chronicle-importer');
+
+    my $response = $ua->get($uri);
+
+    if ( $response->is_success )
+    {
+        return ( $response->content() );
+    }
+    else
+    {
+        print "Failed to fetch feed: $uri\n";
+        print "\n";
+        print $response->message() . "\n";
+        exit;
+    }
+
+}
+
+
+
+=begin doc
+
+  Iterate over the items in our feed and write each one out to a
+ single file.
+
+=end doc
+
+=cut
+
+sub processEntries
+{
+    my (%entries) = (@_);
+
+    my $count = 1;
+
+    foreach my $item ( @{ $rssHash{ 'item' } } )
+    {
+
+        #
+        #  Get details from the feed.
+        #
+        my $title = $item->{ 'title' } || "no title";
+        my $date = $item->{ 'pubDate' } || $item->{ 'dc:date' } || undef;
+        my $body =
+             $item->{ 'description' }
+          || $item->{ 'content:encoded' }
+          || undef;
+        my $filename;
+
+
+        #
+        #  Build up a suitable filename.
+        #
+        if ( $CONFIG{ 'sequential' } )
+        {
+            $filename = $count . ".txt";
+        }
+        else
+        {
+            $filename = $title;
+            $filename =~ s/[^a-z0-9]/_/gi;
+            $filename .= ".txt";
+
+        }
+
+        #
+        #  Naive expansion.
+        #
+        if ( $body =~ m/&lt;/ )
+        {
+            $body = decode_entities($body);
+        }
+        $filename = $CONFIG{ 'output' } . "/" . $filename;
+        open( OUTPUT, ">", $filename )
+          or die "Failed to write to $filename - $!";
+        print OUTPUT <<EOF;
+Title: $title
+Date: $date
+
+$body
+EOF
+        close(OUTPUT);
+        $count += 1;
+    }
+
+}