]> git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/CheckRFC.pm
b000dcff7f737918538be98882ee8daac16e0bef
[selfforum.git] / selfforum-cgi / shared / CheckRFC.pm
1 package CheckRFC;
2
3 ################################################################################
4 # #
5 # File: shared/CheckRFC.pm #
6 # #
7 # Authors: Andre Malo <nd@o3media.de>, 2001-04-14 #
8 # #
9 # Description: implement several string checks on RFC correctness #
10 # #
11 ################################################################################
12
13 use strict;
14 use vars qw(
15 $v56
16 %url
17 @email
18 @EXPORT
19 @ISA
20 $VERSION
21 );
22
23 $v56 = eval q[
24 local $SIG{__DIE__};
25 require 5.6.0;
26 ];
27
28 use Carp qw(croak);
29
30 # Version check
31 #
32 $VERSION = do { my @r =(q$Revision$ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
33
34 ################################################################################
35 #
36 # Export
37 #
38 require Exporter;
39 @ISA = qw(Exporter);
40
41 @EXPORT = qw(
42 is_URL
43 is_email
44 );
45
46 ### is_URL ($@) ################################################################
47 #
48 # check URL
49 #
50 # Params: $string string to check
51 # @schemes possible URL schemes in $string
52 # qw( http strict_http ftp news nntp telnet
53 # gopher wais mailto strict_mailto file prospero)
54 # if there's no scheme given, 'http' is default
55 # use ':ALL' (without quotes) for all schemes
56 #
57 # Return: Status code (Bool)
58 #
59 sub is_URL (;$@) {
60 my ($string, @schemes) = @_;
61 $string = $_ unless defined $string;
62 my $scheme;
63
64 return unless (defined ($string) and length ($string));
65
66 @schemes = qw(http) unless (@schemes);
67 @schemes = keys %url if (@schemes == 1 and $schemes[0] eq ':ALL');
68
69 for $scheme (@schemes) {
70 croak "unknown url scheme '$scheme'" unless exists $url{$scheme};
71 unless ($scheme =~ /mailto/) {
72 return 1 if ($string =~ /$url{$scheme}/);
73 }
74 else {
75 if ($string =~ /^mailto:(.+)/) {
76
77 if ($scheme eq 'mailto') {
78 return 1 if (is_email ($1));
79 }
80 elsif ($scheme eq 'strict_mailto') {
81 return 1 if (is_email ($1,1));
82 }
83 }
84 }
85 }
86
87 # no match => return false
88 return;
89 }
90
91 ### is_email ($) ###############################################################
92 #
93 # check email (comments can be nested)
94 #
95 # Params: $string - string to check
96 # $strict - (optional) check strict RFC syntax (no TLD needed) if true
97 #
98 # Return: Status code (Bool)
99 #
100 sub is_email (;$$) {
101 my $string = shift;
102 $string = $_ unless defined $string;
103 return unless defined $string;
104
105 my $strict = shift;
106
107 # false if any non-ascii chars
108 return unless (defined ($string) and length ($string));
109 return if $string =~ /[\200-\377]/;
110
111 # remove nested comments
112 1 while ($string =~ s/\([^()]*\)//g);
113
114 return ($string =~ /^$email[0]$/) unless $strict;
115
116 return ($string =~ /^$email[1]$/);
117 }
118
119 ### BEGIN # (1) ################################################################
120 #
121 # define regex for nearly RFC 822 email address
122 #
123 BEGIN {
124 # Thanx to J. Friedl:
125
126 my $esc = '\\\\';
127 my $Period = '\.';
128 my $space = '\040';
129 my $tab = '\t';
130 my $OpenBR = '\[';
131 my $CloseBR = '\]';
132 my $OpenParen = '\(';
133 my $CloseParen = '\)';
134 my $NonASCII = '\x80-\xff';
135 my $ctrl = '\000-\037';
136 my $CRlist = '\n\015';
137 my $qtext = qq/[^$esc$NonASCII$CRlist\"]/;
138 my $dtext = qq/[^$esc$NonASCII$CRlist$OpenBR$CloseBR]/;
139 my $quoted_pair = qq< $esc [^$NonASCII] >;
140 my $ctext = qq< [^$esc$NonASCII$CRlist()] >;
141 my $Cnested = qq< $OpenParen $ctext* (?: $quoted_pair $ctext* )* $CloseParen >;
142 my $comment = qq< $OpenParen $ctext* (?: (?: $quoted_pair | $Cnested ) $ctext* )* $CloseParen >;
143 my $X = qq< [$space$tab]* (?: $comment [$space$tab]* )* >;
144 my $atom_char = qq/[^($space)<>\@,;:\".$esc$OpenBR$CloseBR$ctrl$NonASCII]/;
145 my $atom = qq< $atom_char+ (?!$atom_char) >;
146 my $quoted_str = qq< \" $qtext * (?: $quoted_pair $qtext * )* \" >;
147 my $word = qq< (?: $atom | $quoted_str ) >;
148 my $domain_ref = $atom;
149 my $domain_lit = qq< $OpenBR (?: $dtext | $quoted_pair )* $CloseBR >;
150 my $sub_domain = qq< (?: $domain_ref | $domain_lit ) $X >;
151 my $domain;
152
153 @email = ();
154 for $domain (
155 qq< $sub_domain (?: $Period $X $sub_domain )* $Period [A-Za-z][A-Za-z][A-Za-z]?[A-Za-z]? >,
156 qq< $sub_domain (?: $Period $X $sub_domain )* >
157 ) {
158 my $route = qq< \@ $X $domain (?: , $X \@ $X $domain )* : $X >;
159 my $local_part = qq< $word $X (?: $Period $X $word $X )* >;
160 my $addr_spec = qq< $local_part \@ $X $domain >;
161 my $route_addr = qq[ < $X (?: $route )? $addr_spec > ];
162 my $phrase_ctrl = '\000-\010\012-\037';
163 my $phrase_char = qq/[^()<>\@,;:\".$esc$OpenBR$CloseBR$NonASCII$phrase_ctrl]/;
164 my $phrase = qq< $word $phrase_char * (?: (?: $comment | $quoted_str ) $phrase_char * )* >;
165 my $email = qq< $X (?: $addr_spec | $phrase $route_addr ) >;
166
167 if ($v56) {
168 eval q<
169 local $SIG{__DIE__};
170 $email = qr/$email/x;
171 >;
172 }
173 else {
174 $email =~ s/\s+//g;
175 }
176
177 push @email => $email;
178 }
179 }
180
181 ### BEGIN # (2) ################################################################
182 #
183 # define regexes for URLs
184 #
185 BEGIN {
186 # credits to an unknown(?) programmer ;)
187 # modified by n.d.p.
188
189 my $lowalpha = '[a-z]';
190 my $hialpha = '[A-Z]';
191 my $alpha = '[a-zA-Z]';
192 my $digit = '\d';
193 my $safe = '[$_.+-]';
194 my $extra = '[!*\'(),]';
195 my $national = '[{}|\\\\^~\[\]`]';
196 my $punctuation = '[<>#%"]';
197 my $reserved = '[;/?:@&=]';
198 my $hex = '[\dA-Fa-f]';
199 my $escape = "(?:%$hex$hex)";
200 my $unreserved = '[{}|\\\\^~\[\]`a-zA-Z\d$_.+-]'; #"(?:$alpha|$digit|$safe|$extra)";
201 my $uchar = "(?:$unreserved|$escape)";
202 my $xchar = "(?:$unreserved|$escape|$reserved)";
203 my $digits = '(?:\d+)';
204 my $alphadigit = '[a-zA-Z\d]'; #"(?:$alpha|$digit)";
205
206 # URL schemeparts for ip based protocols:
207 my $urlpath = "(?:$xchar*)";
208 my $user = "(?:(?:$uchar|[;?&=])*)";
209 my $password = "(?:(?:$uchar|[;?&=])*)";
210 my $port = '(?:[0-5]?\d\d?\d?\d?|6[0-4]\d\d\d|65[0-4]\d\d|655[0-2]\d|6553[0-5])';
211 my $ip4part = '(?:[01]?\d\d?|2[0-4]\d|25[0-5])';
212 my $hostnumber = '(?:(?!0+\.0+\.0+\.0+)(?!255\.255\.255\.255)'."$ip4part\\.$ip4part\\.$ip4part\\.$ip4part)";
213 my $toplabel = "(?:(?:$alpha(?:$alphadigit|-)*$alphadigit)|$alpha)";
214 my $domainlabel = "(?:(?:$alphadigit(?:$alphadigit|-)*$alphadigit)|$alphadigit)";
215 my $hostname = "(?:(?:$domainlabel\\.)*$toplabel)";
216 my $host = "(?:(?:$hostname)|(?:$hostnumber))";
217 my $hostport = "(?:(?:$host)(?::$port)?)";
218 my $login = "(?:(?:$user(?::$password)?\@)?$hostport)";
219 my $ip_schemepart = "(?://$login(?:/$urlpath)?)";
220
221 my $schemepart = "(?:$xchar*|$ip_schemepart)";
222 my $scheme = "(?:(?:$lowalpha|$digit|[+.-])+)";
223
224 # The predefined schemes:
225
226 # FTP (see also RFC959)
227 my $fsegment = "(?:(?:$uchar|[?:\@&=])*)";
228 my $ftptype = "(?:[AIDaid])";
229 my $fpath = "(?:$fsegment(?:/$fsegment)*)";
230 my $ftpurl = "(?:ftp://$login(?:/$fpath(?:;type=$ftptype)?)?)";
231
232 # FILE
233 my $fileurl = "(?:file://(?:(?:$host)|localhost)?/$fpath)";
234
235 # HTTP
236 my $httpuchar = "(?:(?:$alpha|$digit|$safe|(?:[!*\',]))|$escape)";
237 my $hsegment = "(?:(?:$httpuchar|[;:\@&=~])*)";
238 my $search = "(?:(?:$httpuchar|[;:\@&=~])*)";
239 my $hpath = "(?:$hsegment(?:/$hsegment)*)";
240 my $httpurl = "(?:https?://$hostport(?:/$hpath(?:\\?$search)?)?(?:#$xchar*)?)";
241 my $strict_httpurl = "(?:https?://$hostport(?:/$hpath(?:\\?$search)?)?)";
242
243 # GOPHER (see also RFC1436)
244 my $gopher_plus = "(?:$xchar*)";
245 my $selector = "(?:$xchar*)";
246 my $gtype = "(?:$xchar)";
247 my $gopherurl = "(?:gopher://$hostport(?:/$gtype(?:$selector(?:%09$search(?:%09$gopher_plus)?)?)?)?)";
248
249 # NEWS (see also RFC1036)
250 my $article = "(?:(?:$uchar|[;/?:&=])+\@$host)";
251 my $group = "(?:$alpha(?:$alpha|$digit|[.+_-])*)";
252 my $grouppart = "(?:$article|$group|\\*)";
253 my $newsurl = "(?:news:$grouppart)";
254
255 # NNTP (see also RFC977)
256 my $nntpurl = "(?:nntp://$hostport/$group(?:/$digits)?)";
257
258 # TELNET
259 my $telneturl = "(?:telnet://$login(?:/)?)";
260
261 # WAIS (see also RFC1625)
262 my $wpath = "(?:$uchar*)";
263 my $wtype = "(?:$uchar*)";
264 my $database = "(?:$uchar*)";
265 my $waisdoc = "(?:wais://$hostport/$database/$wtype/$wpath)";
266 my $waisindex = "(?:wais://$hostport/$database\\?$search)";
267 my $waisdatabase = "(?:wais://$hostport/$database)";
268 my $waisurl = "(?:$waisdatabase|$waisindex|$waisdoc)";
269
270 # PROSPERO
271 my $fieldvalue = "(?:(?:$uchar|[?:\@&]))";
272 my $fieldname = "(?:(?:$uchar|[?:\@&]))";
273 my $fieldspec = "(?:;$fieldname=$fieldvalue)";
274 my $psegment = "(?:(?:$uchar|[?:\@&=]))";
275 my $ppath = "(?:$psegment(?:/$psegment)*)";
276 my $prosperourl = "(?:prospero://$hostport/$ppath(?:$fieldspec)*)";
277
278 if ($v56) {
279 eval q[%url = (
280 http => qr/^$httpurl$/,
281 strict_http => qr/^$strict_httpurl$/,
282 ftp => qr/^$ftpurl$/,
283 news => qr/^$newsurl$/,
284 nntp => qr/^$nntpurl$/,
285 telnet => qr/^$telneturl$/,
286 gopher => qr/^$gopherurl$/,
287 wais => qr/^$waisurl$/,
288 mailto => 0,
289 strict_mailto => 0,
290 file => qr/^$fileurl$/,
291 prospero => qr/^$prosperourl$/
292 );];
293 }
294 else {
295 %url = (
296 http => "^$httpurl\$",
297 strict_http => "^$strict_httpurl\$",
298 ftp => "^$ftpurl\$",
299 news => "^$newsurl\$",
300 nntp => "^$nntpurl\$",
301 telnet => "^$telneturl\$",
302 gopher => "^$gopherurl\$",
303 wais => "^$waisurl\$",
304 mailto => 0,
305 strict_mailto => 0,
306 file => "^$fileurl\$",
307 prospero => "^$prosperourl\$"
308 );
309 }
310 }
311
312 # keeping require happy
313 1;
314
315 #
316 #
317 ### end of CheckRFC ############################################################

patrick-canterino.de