1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
package Crawl::Bot::Role::RSS;
use Moose::Role;
use autodie;
use File::Spec;
use XML::RAI;
requires 'data_dir', 'rss_item_to_id', 'rss_feed';
has rss_items => (
traits => ['Hash'],
isa => 'HashRef',
builder => '_build_rss_items',
handles => {
has_rss_item => 'exists',
rss_items => 'keys',
_add_rss_item => 'set',
},
);
sub _build_rss_items {
my $self = shift;
warn "Loading RSS cache for " . blessed($self);
my $file = $self->rss_cache_file;
my %items;
if (-r $file) {
open my $fh, '<', $file;
while (<$fh>) {
chomp;
$items{$_} = 1;
}
}
else {
$self->each_rss_item(sub {
my $item = shift;
my $id = $self->rss_item_to_id($item);
$items{$id} = 1;
});
}
warn "Done loading";
\%items;
}
sub add_rss_item {
my $self = shift;
$self->_add_rss_item($_[0], 1);
}
sub each_rss_item {
my $self = shift;
my ($code) = @_;
my $rss = XML::RAI->parse_uri($self->rss_feed);
for my $item (@{ $rss->items }) {
$code->($item);
}
}
sub save_rss_cache {
my $self = shift;
open my $fh, '>', $self->rss_cache_file;
$fh->print("$_\n") for $self->rss_items;
}
sub rss_cache_file {
my $self = shift;
my $class = blessed($self);
(my $plugin = $class) =~ s/^Crawl::Bot::Plugin:://;
$plugin = lc($plugin);
my $file = "${plugin}_rss_cache";
return File::Spec->catfile($self->data_dir, $file);
}
after BUILDALL => sub {
my $self = shift;
$self->save_rss_cache;
};
no Moose::Role;
1;
|