]> git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/CheckRFC.pm
deleted 'use Carp' (debugging time is over ;)
[selfforum.git] / selfforum-cgi / shared / CheckRFC.pm
1 package CheckRFC;
2
3 ################################################################################
4 # #
5 # File: shared/CheckRFC.pm #
6 # #
7 # Authors: Andre Malo <nd@o3media.de>, 2001-03-30 #
8 # #
9 # Description: implement several string checks on RFC correctness #
10 # #
11 ################################################################################
12
13 use strict;
14 use vars qw(%url $email @EXPORT);
15
16 use autouse 'Carp' => qw(croak);
17
18 ################################################################################
19 #
20 # Export
21 #
22 use base qw(Exporter);
23 @EXPORT = qw(is_URL is_email);
24
25 ### is_URL ($@) ################################################################
26 #
27 # check URL
28 #
29 # Params: $string string to check
30 # @schemes possible URL schemes in $string
31 # qw(http ftp news nntp telnet gopher wais mailto file prospero)
32 # if there's no scheme given, 'http' is default
33 # use ':ALL' (without quotes) for all schemes
34 #
35 # Return: Status code (Bool)
36 #
37 sub is_URL ($@) {
38 my ($string, @schemes) = @_;
39 @schemes = qw(http) unless (@schemes);
40 @schemes = keys %url if (@schemes == 1 and $schemes[0] eq ':ALL');
41
42 for (@schemes) {
43 croak "unknown url scheme '$_'" unless exists $url{$_};
44 return 1 if $string =~ /$url{$_}/;
45 }
46
47 # no match => return false
48 return;
49 }
50
51 ### is_email ($) ###############################################################
52 #
53 # check email (comments can be nested)
54 #
55 # Params: $string string to check
56 #
57 # Return: Status code (Bool)
58 #
59 sub is_email ($) {
60 my $string = shift;
61
62 # false if any non-ascii chars
63 return if $string =~ /[\200-\377]/;
64
65 # remove nested comments
66 while ($string =~ s/\([^()]*\)//g) {};
67
68 return ($string =~ /^$email$/);
69 }
70
71 ### BEGIN # (1) ################################################################
72 #
73 # define regex for nearly RFC 822 email address
74 #
75 BEGIN {
76 # Thanx to J. Friedl:
77
78 my $esc = '\\\\';
79 my $Period = '\.';
80 my $space = '\040';
81 my $tab = '\t';
82 my $OpenBR = '\[';
83 my $CloseBR = '\]';
84 my $OpenParen = '\(';
85 my $CloseParen = '\)';
86 my $NonASCII = '\x80-\xff';
87 my $ctrl = '\000-\037';
88 my $CRlist = '\n\015';
89 my $qtext = qq/[^$esc$NonASCII$CRlist\"]/;
90 my $dtext = qq/[^$esc$NonASCII$CRlist$OpenBR$CloseBR]/;
91 my $quoted_pair = qq< $esc [^$NonASCII] >;
92 my $ctext = qq< [^$esc$NonASCII$CRlist()] >;
93 my $Cnested = qq< $OpenParen $ctext* (?: $quoted_pair $ctext* )* $CloseParen >;
94 my $comment = qq< $OpenParen $ctext* (?: (?: $quoted_pair | $Cnested ) $ctext* )* $CloseParen >;
95 my $X = qq< [$space$tab]* (?: $comment [$space$tab]* )* >;
96 my $atom_char = qq/[^($space)<>\@,;:\".$esc$OpenBR$CloseBR$ctrl$NonASCII]/;
97 my $atom = qq< $atom_char+ (?!$atom_char) >;
98 my $quoted_str = qq< \" $qtext * (?: $quoted_pair $qtext * )* \" >;
99 my $word = qq< (?: $atom | $quoted_str ) >;
100 my $domain_ref = $atom;
101 my $domain_lit = qq< $OpenBR (?: $dtext | $quoted_pair )* $CloseBR >;
102 my $sub_domain = qq< (?: $domain_ref | $domain_lit ) $X >;
103 my $domain = qq< $sub_domain (?: $Period $X $sub_domain )* >;
104 my $route = qq< \@ $X $domain (?: , $X \@ $X $domain )* : $X >;
105 my $local_part = qq< $word $X (?: $Period $X $word $X )* >;
106 my $addr_spec = qq< $local_part \@ $X $domain >;
107 my $route_addr = qq[ < $X (?: $route )? $addr_spec > ];
108 my $phrase_ctrl = '\000-\010\012-\037';
109 my $phrase_char = qq/[^()<>\@,;:\".$esc$OpenBR$CloseBR$NonASCII$phrase_ctrl]/;
110 my $phrase = qq< $word $phrase_char * (?: (?: $comment | $quoted_str ) $phrase_char * )* >;
111 $email = qq< $X (?: $addr_spec | $phrase $route_addr ) >;
112 $email = qr /$email/x;
113 }
114
115 ### BEGIN # (2) ################################################################
116 #
117 # define regexes for URLs
118 #
119 BEGIN {
120 # credits to an unknown(?) programmer ;)
121 # modified by n.d.p.
122
123 my $lowalpha = '(?:[a-z])';
124 my $hialpha = '(?:[A-Z])';
125 my $alpha = "(?:$lowalpha|$hialpha)";
126 my $digit = '(?:\d)';
127 my $safe = '(?:[$_.+-])';
128 my $extra = '(?:[!*\'(),])';
129 my $national = '(?:[{}|\\\\^~\[\]`])';
130 my $punctuation = '(?:[<>#%"])';
131 my $reserved = '(?:[;/?:@&=])';
132 my $hex = '(?:[\dA-Fa-f])';
133 my $escape = "(?:%$hex$hex)";
134 my $unreserved = "(?:$alpha|$digit|$safe|$extra)";
135 my $uchar = "(?:$unreserved|$escape)";
136 my $xchar = "(?:$unreserved|$escape|$reserved)";
137 my $digits = '(?:\d+)';
138 my $alphadigit = "(?:$alpha|\\d)";
139
140 # URL schemeparts for ip based protocols:
141 my $urlpath = "(?:$xchar*)";
142 my $user = "(?:(?:$uchar|[;?&=])*)";
143 my $password = "(?:(?:$uchar|[;?&=])*)";
144 my $port = '(?:[0-5]?\d\d?\d?\d?|6[0-4]\d\d\d|65[0-4]\d\d|655[0-2]\d|6553[0-5])';
145 my $ip4part = '(?:[01]?\d\d?|2[0-4]\d|25[0-5])';
146 my $hostnumber = '(?:(?!0+\.0+\.0+\.0+)'."$ip4part\\.$ip4part\\.$ip4part\\.$ip4part)";
147 my $toplabel = "(?:(?:$alpha(?:$alphadigit|-)*$alphadigit)|$alpha)";
148 my $domainlabel = "(?:(?:$alphadigit(?:$alphadigit|-)*$alphadigit)|$alphadigit)";
149 my $hostname = "(?:(?:$domainlabel\\.)*$toplabel)";
150 my $host = "(?:(?:$hostname)|(?:$hostnumber))";
151 my $hostport = "(?:(?:$host)(?::$port)?)";
152 my $login = "(?:(?:$user(?::$password)?\@)?$hostport)";
153 my $ip_schemepart = "(?://$login(?:/$urlpath)?)";
154
155 my $schemepart = "(?:$xchar*|$ip_schemepart)";
156 my $scheme = "(?:(?:$lowalpha|$digit|[+.-])+)";
157
158 # The predefined schemes:
159
160 # FTP (see also RFC959)
161 my $fsegment = "(?:(?:$uchar|[?:\@&=])*)";
162 my $ftptype = "(?:[AIDaid])";
163 my $fpath = "(?:$fsegment(?:/$fsegment)*)";
164 my $ftpurl = "(?:ftp://$login(?:/$fpath(?:;type=$ftptype)?)?)";
165
166 # FILE
167 my $fileurl = "(?:file://(?:(?:$host)|localhost)?/$fpath)";
168
169 # HTTP
170 my $httpuchar = "(?:(?:$alpha|$digit|$safe|(?:[!*\',]))|$escape)";
171 my $hsegment = "(?:(?:$httpuchar|[;:\@&=~])*)";
172 my $search = "(?:(?:$httpuchar|[;:\@&=~])*)";
173 my $hpath = "(?:$hsegment(?:/$hsegment)*)";
174 my $httpurl = "(?:http://$hostport(?:/$hpath(?:\\?$search)?)?(?:#$xchar*)?)";
175
176 # GOPHER (see also RFC1436)
177 my $gopher_plus = "(?:$xchar*)";
178 my $selector = "(?:$xchar*)";
179 my $gtype = "(?:$xchar)";
180 my $gopherurl = "(?:gopher://$hostport(?:/$gtype(?:$selector(?:%09$search(?:%09$gopher_plus)?)?)?)?)";
181
182 # MAILTO (see also RFC822)
183 my $encoded822addr = "(?:$email)";
184 my $mailtourl = "(?:mailto:$encoded822addr)";
185
186 # NEWS (see also RFC1036)
187 my $article = "(?:(?:$uchar|[;/?:&=])+\@$host)";
188 my $group = "(?:$alpha(?:$alpha|$digit|[.+_-])*)";
189 my $grouppart = "(?:$article|$group|\\*)";
190 my $newsurl = "(?:news:$grouppart)";
191
192 # NNTP (see also RFC977)
193 my $nntpurl = "(?:nntp://$hostport/$group(?:/$digits)?)";
194
195 # TELNET
196 my $telneturl = "(?:telnet://$login(?:/)?)";
197
198 # WAIS (see also RFC1625)
199 my $wpath = "(?:$uchar*)";
200 my $wtype = "(?:$uchar*)";
201 my $database = "(?:$uchar*)";
202 my $waisdoc = "(?:wais://$hostport/$database/$wtype/$wpath)";
203 my $waisindex = "(?:wais://$hostport/$database\\?$search)";
204 my $waisdatabase = "(?:wais://$hostport/$database)";
205 my $waisurl = "(?:$waisdatabase|$waisindex|$waisdoc)";
206
207 # PROSPERO
208 my $fieldvalue = "(?:(?:$uchar|[?:\@&]))";
209 my $fieldname = "(?:(?:$uchar|[?:\@&]))";
210 my $fieldspec = "(?:;$fieldname=$fieldvalue)";
211 my $psegment = "(?:(?:$uchar|[?:\@&=]))";
212 my $ppath = "(?:$psegment(?:/$psegment)*)";
213 my $prosperourl = "(?:prospero://$hostport/$ppath(?:$fieldspec)*)";
214
215 %url = (
216 http => qr/^$httpurl$/,
217 ftp => qr/^$ftpurl$/,
218 news => qr/^$newsurl$/,
219 nntp => qr/^$nntpurl$/,
220 telnet => qr/^$telneturl$/,
221 gopher => qr/^$gopherurl$/,
222 wais => qr/^$waisurl$/,
223 mailto => qr/^$mailtourl$/,
224 file => qr/^$fileurl$/,
225 prospero => qr/^$prosperourl$/
226 );
227 }
228
229 # keeping require happy
230 1;
231
232 #
233 #
234 ### end of CheckRFC ############################################################

patrick-canterino.de