aboutsummaryrefslogtreecommitdiff
path: root/bin/irc-ebooks-feed
blob: c8d49a442f276d3302b5d27cc2bbc9222ec61cfc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env perl

#
# Feed an IRC::Ebooks bot IRC files for its brain.
#
# Author: Tom Ryder <tom@sanctum.geek.nz>
#
package IRC::Ebooks::Feed;

# Force me to write this properly
use warnings;
use strict;
use utf8;
use autodie;

# Everything's UTF-8
use utf8::all;

# Require Perl v5.14
use 5.014;

# Import required modules
use Carp;
use Const::Fast;
use Config::Tiny;
use English qw(-no_match_vars);
use Getopt::Long::Descriptive;
use List::MoreUtils qw(any);

# Our custom module
use IRC::Ebooks;

# Declare package version
our $VERSION = 0.1;

# Expected line format, all else ignored
const my $LINE_FORMAT => qr{
    ^                 # Start of line
    [\d:]*            # Timestamp
    \s+               # At least one space
    <\W*(?<nick>\w+)> # Nickname in angle brackets
    \s+               # At least one space
    (?<text>.+)       # The message said
    $                 # End of line
}msx;

# Skip text lines that match these patterns
const my @SKIP_PATTERNS => (
    qr{ <[@%+ ]?\w+> }msx,  # Looks like nick (probably a quote)
    qr{ \d:\d }msx,         # Looks like timestamp (probably a quote)
    qr{ :// }msx,           # Looks like URL
);

# Delete these from text
const my @DELETE_PATTERNS => (
    qr{ ^\w+:\s }msx,       # Address in form of "nick: "
    qr{ ["()\[\]] }msx,     # Punctuations marks that may unbalance
);

# Import required modules
my ( $opt, $usage ) = describe_options(
    'irc-ebooks-feed %o [IRCLOGFILE ...]',
    [
        'config|c=s',
        'configuration file',
        { default => '/etc/irc-ebooks.conf' },
    ],
    [ 'dump|d', 'dump the text that would be learned to stdout instead' ],
    [ 'help',   'print usage message and exit' ],
);

# Give help if needed
if ( $opt->help ) {
    print {*STDOUT} $usage->text
      or carp q{Couldn't write to stdout};
    exit;
}

# Read configuration
my $config = Config::Tiny->read( $opt->config )
  or croak sprintf q{Couldn't read configuration file %s}, $opt->config;

# Create IRC::Ebooks object
my $irc_ebooks = IRC::Ebooks->new($config)
  or croak q{Failed to create IRC::Ebooks object};

# Read from input files
while ( my $line = <> ) {
    chomp $line;

    # Ignore line if not in expected format
    $line =~ $LINE_FORMAT or next;

    # Get nick and text from match vars
    my $nick = $LAST_PAREN_MATCH{nick};
    my $text = $LAST_PAREN_MATCH{text};

    # Ignore line if the person we're emulating didn't say it
    next if $nick ne $config->{options}->{learn};

    # Ignore line if any of the skip patterns matched
    next if any { $text =~ $_ } @SKIP_PATTERNS;

    # Filter out any unwanted parts of the text
    for (@DELETE_PATTERNS) {
        $text =~ s{$_}{}gmsx;
    }

    # If the user chose --dump, just print the text
    if ( $opt->dump ) {
        say {*STDOUT} $text
          or carp q{Couldn't write to stdout};
    }

    # Otherwise, have the bot learn it
    else {
        $irc_ebooks->learn($text);
    }
}