1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
#!/usr/bin/env perl
#
# Feed an IRC::Ebooks bot IRC files for its brain.
#
# Author: Tom Ryder <tom@sanctum.geek.nz>
#
package IRC::Ebooks::Feed;
# Force me to write this properly
use warnings;
use strict;
use utf8;
use autodie;
# Everything's UTF-8
use utf8::all;
# Require Perl v5.14
use 5.014;
# Import required modules
use Carp;
use Const::Fast;
use Config::Tiny;
use English qw(-no_match_vars);
use Getopt::Long::Descriptive;
use List::MoreUtils qw(any);
# Our custom module
use IRC::Ebooks;
# Declare package version
our $VERSION = 0.1;
# Expected line format, all else ignored
const my $LINE_FORMAT => qr{
^ # Start of line
[\d:]* # Timestamp
\s+ # At least one space
<\W*(?<nick>\w+)> # Nickname in angle brackets
\s+ # At least one space
(?<text>.+) # The message said
$ # End of line
}msx;
# Skip text lines that match these patterns
const my @SKIP_PATTERNS => (
qr{ <[@%+ ]?\w+> }msx, # Looks like nick (probably a quote)
qr{ \d:\d }msx, # Looks like timestamp (probably a quote)
qr{ :// }msx, # Looks like URL
);
# Delete these from text
const my @DELETE_PATTERNS => (
qr{ ^\w+:\s }msx, # Address in form of "nick: "
qr{ ["()\[\]] }msx, # Punctuations marks that may unbalance
);
# Import required modules
my ( $opt, $usage ) = describe_options(
'irc-ebooks-feed %o [IRCLOGFILE ...]',
[
'config|c=s',
'configuration file',
{ default => '/etc/irc-ebooks.conf' },
],
[ 'dump|d', 'dump the text that would be learned to stdout instead' ],
[ 'help', 'print usage message and exit' ],
);
# Give help if needed
if ( $opt->help ) {
print {*STDOUT} $usage->text
or carp q{Couldn't write to stdout};
exit;
}
# Read configuration
my $config = Config::Tiny->read( $opt->config )
or croak sprintf q{Couldn't read configuration file %s}, $opt->config;
# Create IRC::Ebooks object
my $irc_ebooks = IRC::Ebooks->new($config)
or croak q{Failed to create IRC::Ebooks object};
# Read from input files
while ( my $line = <> ) {
chomp $line;
# Ignore line if not in expected format
$line =~ $LINE_FORMAT or next;
# Get nick and text from match vars
my $nick = $LAST_PAREN_MATCH{nick};
my $text = $LAST_PAREN_MATCH{text};
# Ignore line if the person we're emulating didn't say it
next if $nick ne $config->{options}->{learn};
# Ignore line if any of the skip patterns matched
next if any { $text =~ $_ } @SKIP_PATTERNS;
# Filter out any unwanted parts of the text
for (@DELETE_PATTERNS) {
$text =~ s{$_}{}gmsx;
}
# If the user chose --dump, just print the text
if ( $opt->dump ) {
say {*STDOUT} $text
or carp q{Couldn't write to stdout};
}
# Otherwise, have the bot learn it
else {
$irc_ebooks->learn($text);
}
}
|