Mercurial > hg > chronicle
comparison bin/chronicle-rss-importer @ 222:fdbbc1bb5d7c
Add the stub migration script + readme
author | Steve Kemp <steve@steve.org.uk> |
---|---|
date | Wed, 16 Apr 2008 19:31:23 +0100 |
parents | |
children | 389f234d4111 |
comparison
equal
deleted
inserted
replaced
221:eded23ae8288 | 222:fdbbc1bb5d7c |
---|---|
1 #!/usr/bin/perl -w | |
2 | |
3 use strict; | |
4 use warnings; | |
5 | |
6 use Getopt::Long; | |
7 use HTML::Entities; | |
8 use LWP; | |
9 use XML::RSSLite; | |
10 | |
11 | |
12 # | |
13 # Configuration variables | |
14 # | |
15 my %CONFIG; | |
16 | |
17 | |
18 # | |
19 # Parse arguments. | |
20 # | |
21 parseCommandLineArguments(); | |
22 | |
23 | |
24 # | |
25 # Validate any arguments. | |
26 # | |
27 validateCommandLineArguments(); | |
28 | |
29 | |
30 # | |
31 # Fetch the feed. | |
32 # | |
33 my $content = fetchRSSFeed( $CONFIG{ 'feed' } ); | |
34 | |
35 | |
36 # | |
37 # Parse the feed | |
38 # | |
39 my %rssHash; | |
40 parseRSS( \%rssHash, \$content ); | |
41 | |
42 | |
43 # | |
44 # Now import | |
45 # | |
46 processEntries(%rssHash); | |
47 | |
48 | |
49 # | |
50 # All done. | |
51 # | |
52 | |
53 | |
54 | |
55 =begin doc | |
56 | |
57 Parse the command line arguments, if any. | |
58 | |
59 =end doc | |
60 | |
61 =cut | |
62 | |
63 sub parseCommandLineArguments | |
64 { | |
65 GetOptions( | |
66 | |
67 # Help options | |
68 "feed=s", \$CONFIG{ 'feed' }, | |
69 "output=s", \$CONFIG{ 'output' }, | |
70 "sequential", \$CONFIG{ 'sequential' }, | |
71 ); | |
72 } | |
73 | |
74 | |
75 | |
76 =begin doc | |
77 | |
78 Ensure we received the arguments we need, and that | |
79 those arguments look OK. | |
80 | |
81 =end doc | |
82 | |
83 =cut | |
84 | |
85 sub validateCommandLineArguments | |
86 { | |
87 | |
88 # | |
89 # We need an output dir | |
90 # | |
91 if ( !$CONFIG{ 'output' } ) | |
92 { | |
93 print "Output directory is mandatory.\n"; | |
94 print "Please specificy via --output=...\n"; | |
95 exit; | |
96 } | |
97 if ( !-d $CONFIG{ 'output' } ) | |
98 { | |
99 print | |
100 "Specified output directory [$CONFIG{'output'}] is not a directory!\n"; | |
101 exit; | |
102 } | |
103 | |
104 # | |
105 # We need a feed | |
106 # | |
107 if ( !$CONFIG{ 'feed' } ) | |
108 { | |
109 print "Please specify a feed to import, via --feed=http:/....\n"; | |
110 exit; | |
111 } | |
112 | |
113 } | |
114 | |
115 | |
116 | |
117 =begin doc | |
118 | |
119 Fetch the remote RSS feed. | |
120 | |
121 =end doc | |
122 | |
123 =cut | |
124 | |
125 sub fetchRSSFeed | |
126 { | |
127 my ($uri) = (@_); | |
128 | |
129 my $ua = LWP::UserAgent->new(); | |
130 $ua->timeout(10); | |
131 $ua->agent('chronicle-importer'); | |
132 | |
133 my $response = $ua->get($uri); | |
134 | |
135 if ( $response->is_success ) | |
136 { | |
137 return ( $response->content() ); | |
138 } | |
139 else | |
140 { | |
141 print "Failed to fetch feed: $uri\n"; | |
142 print "\n"; | |
143 print $response->message() . "\n"; | |
144 exit; | |
145 } | |
146 | |
147 } | |
148 | |
149 | |
150 | |
151 =begin doc | |
152 | |
153 Iterate over the items in our feed and write each one out to a | |
154 single file. | |
155 | |
156 =end doc | |
157 | |
158 =cut | |
159 | |
160 sub processEntries | |
161 { | |
162 my (%entries) = (@_); | |
163 | |
164 my $count = 1; | |
165 | |
166 foreach my $item ( @{ $rssHash{ 'item' } } ) | |
167 { | |
168 | |
169 # | |
170 # Get details from the feed. | |
171 # | |
172 my $title = $item->{ 'title' } || "no title"; | |
173 my $date = $item->{ 'pubDate' } || $item->{ 'dc:date' } || undef; | |
174 my $body = | |
175 $item->{ 'description' } | |
176 || $item->{ 'content:encoded' } | |
177 || undef; | |
178 my $filename; | |
179 | |
180 | |
181 # | |
182 # Build up a suitable filename. | |
183 # | |
184 if ( $CONFIG{ 'sequential' } ) | |
185 { | |
186 $filename = $count . ".txt"; | |
187 } | |
188 else | |
189 { | |
190 $filename = $title; | |
191 $filename =~ s/[^a-z0-9]/_/gi; | |
192 $filename .= ".txt"; | |
193 | |
194 } | |
195 | |
196 # | |
197 # Naive expansion. | |
198 # | |
199 if ( $body =~ m/</ ) | |
200 { | |
201 $body = decode_entities($body); | |
202 } | |
203 $filename = $CONFIG{ 'output' } . "/" . $filename; | |
204 open( OUTPUT, ">", $filename ) | |
205 or die "Failed to write to $filename - $!"; | |
206 print OUTPUT <<EOF; | |
207 Title: $title | |
208 Date: $date | |
209 | |
210 $body | |
211 EOF | |
212 close(OUTPUT); | |
213 $count += 1; | |
214 } | |
215 | |
216 } |