]> git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/CheckRFC.pm
new kernel implemented (not only programming style changes), several bugs fixed,...
[selfforum.git] / selfforum-cgi / shared / CheckRFC.pm
1 package CheckRFC;
2
3 ################################################################################
4 # #
5 # File: shared/CheckRFC.pm #
6 # #
7 # Authors: Andre Malo <nd@o3media.de>, 2001-04-14 #
8 # #
9 # Description: implement several string checks on RFC correctness #
10 # #
11 ################################################################################
12
13 use strict;
14 use vars qw(
15 $v56
16 %url
17 @email
18 @EXPORT
19 @ISA
20 );
21
22 $v56 = eval q[
23 local $SIG{__DIE__};
24 require 5.6.0;
25 ];
26
27 use Carp qw(croak);
28
29 ################################################################################
30 #
31 # Export
32 #
33 require Exporter;
34 @ISA = qw(Exporter);
35
36 @EXPORT = qw(
37 is_URL
38 is_email
39 );
40
41 ### is_URL ($@) ################################################################
42 #
43 # check URL
44 #
45 # Params: $string string to check
46 # @schemes possible URL schemes in $string
47 # qw( http strict_http ftp news nntp telnet
48 # gopher wais mailto strict_mailto file prospero)
49 # if there's no scheme given, 'http' is default
50 # use ':ALL' (without quotes) for all schemes
51 #
52 # Return: Status code (Bool)
53 #
54 sub is_URL ($@) {
55 my ($string, @schemes) = @_;
56
57 return unless (defined ($string) and length ($string));
58
59 @schemes = qw(http) unless (@schemes);
60 @schemes = keys %url if (@schemes == 1 and $schemes[0] eq ':ALL');
61
62 for (@schemes) {
63 croak "unknown url scheme '$_'" unless exists $url{$_};
64 unless (/mailto/) {
65 return 1 if ($string =~ /$url{$_}/);
66 }
67 else {
68 return unless ($string =~ /^mailto:(.+)/);
69
70 if ($_ eq 'mailto') {
71 return 1 if (is_email ($1));
72 }
73 elsif ($_ eq 'strict_mailto') {
74 return 1 if (is_email ($1,1));
75 }
76 }
77 }
78
79 # no match => return false
80 return;
81 }
82
83 ### is_email ($) ###############################################################
84 #
85 # check email (comments can be nested)
86 #
87 # Params: $string - string to check
88 # $strict - (optional) check strict RFC syntax (no TLD needed) if true
89 #
90 # Return: Status code (Bool)
91 #
92 sub is_email ($;$) {
93 my $string = shift;
94 my $strict = shift;
95
96 # false if any non-ascii chars
97 return unless (defined ($string) and length ($string));
98 return if $string =~ /[\200-\377]/;
99
100 # remove nested comments
101 1 while ($string =~ s/\([^()]*\)//g);
102
103 return ($string =~ /^$email[0]$/) unless $strict;
104
105 return ($string =~ /^$email[1]$/);
106 }
107
108 ### BEGIN # (1) ################################################################
109 #
110 # define regex for nearly RFC 822 email address
111 #
112 BEGIN {
113 # Thanx to J. Friedl:
114
115 my $esc = '\\\\';
116 my $Period = '\.';
117 my $space = '\040';
118 my $tab = '\t';
119 my $OpenBR = '\[';
120 my $CloseBR = '\]';
121 my $OpenParen = '\(';
122 my $CloseParen = '\)';
123 my $NonASCII = '\x80-\xff';
124 my $ctrl = '\000-\037';
125 my $CRlist = '\n\015';
126 my $qtext = qq/[^$esc$NonASCII$CRlist\"]/;
127 my $dtext = qq/[^$esc$NonASCII$CRlist$OpenBR$CloseBR]/;
128 my $quoted_pair = qq< $esc [^$NonASCII] >;
129 my $ctext = qq< [^$esc$NonASCII$CRlist()] >;
130 my $Cnested = qq< $OpenParen $ctext* (?: $quoted_pair $ctext* )* $CloseParen >;
131 my $comment = qq< $OpenParen $ctext* (?: (?: $quoted_pair | $Cnested ) $ctext* )* $CloseParen >;
132 my $X = qq< [$space$tab]* (?: $comment [$space$tab]* )* >;
133 my $atom_char = qq/[^($space)<>\@,;:\".$esc$OpenBR$CloseBR$ctrl$NonASCII]/;
134 my $atom = qq< $atom_char+ (?!$atom_char) >;
135 my $quoted_str = qq< \" $qtext * (?: $quoted_pair $qtext * )* \" >;
136 my $word = qq< (?: $atom | $quoted_str ) >;
137 my $domain_ref = $atom;
138 my $domain_lit = qq< $OpenBR (?: $dtext | $quoted_pair )* $CloseBR >;
139 my $sub_domain = qq< (?: $domain_ref | $domain_lit ) $X >;
140 my $domain;
141
142 @email = ();
143 for $domain (
144 qq< $sub_domain (?: $Period $X $sub_domain )+ >,
145 qq< $sub_domain (?: $Period $X $sub_domain )* >
146 ) {
147 my $route = qq< \@ $X $domain (?: , $X \@ $X $domain )* : $X >;
148 my $local_part = qq< $word $X (?: $Period $X $word $X )* >;
149 my $addr_spec = qq< $local_part \@ $X $domain >;
150 my $route_addr = qq[ < $X (?: $route )? $addr_spec > ];
151 my $phrase_ctrl = '\000-\010\012-\037';
152 my $phrase_char = qq/[^()<>\@,;:\".$esc$OpenBR$CloseBR$NonASCII$phrase_ctrl]/;
153 my $phrase = qq< $word $phrase_char * (?: (?: $comment | $quoted_str ) $phrase_char * )* >;
154 my $email = qq< $X (?: $addr_spec | $phrase $route_addr ) >;
155
156 if ($v56) {
157 eval q<
158 local $SIG{__DIE__};
159 $email = qr/$email/x;
160 >;
161 }
162 else {
163 $email =~ s/\s+//g;
164 }
165
166 push @email => $email;
167 }
168 }
169
170 ### BEGIN # (2) ################################################################
171 #
172 # define regexes for URLs
173 #
174 BEGIN {
175 # credits to an unknown(?) programmer ;)
176 # modified by n.d.p.
177
178 my $lowalpha = '(?:[a-z])';
179 my $hialpha = '(?:[A-Z])';
180 my $alpha = "(?:$lowalpha|$hialpha)";
181 my $digit = '(?:\d)';
182 my $safe = '(?:[$_.+-])';
183 my $extra = '(?:[!*\'(),])';
184 my $national = '(?:[{}|\\\\^~\[\]`])';
185 my $punctuation = '(?:[<>#%"])';
186 my $reserved = '(?:[;/?:@&=])';
187 my $hex = '(?:[\dA-Fa-f])';
188 my $escape = "(?:%$hex$hex)";
189 my $unreserved = "(?:$alpha|$digit|$safe|$extra)";
190 my $uchar = "(?:$unreserved|$escape)";
191 my $xchar = "(?:$unreserved|$escape|$reserved)";
192 my $digits = '(?:\d+)';
193 my $alphadigit = "(?:$alpha|\\d)";
194
195 # URL schemeparts for ip based protocols:
196 my $urlpath = "(?:$xchar*)";
197 my $user = "(?:(?:$uchar|[;?&=])*)";
198 my $password = "(?:(?:$uchar|[;?&=])*)";
199 my $port = '(?:[0-5]?\d\d?\d?\d?|6[0-4]\d\d\d|65[0-4]\d\d|655[0-2]\d|6553[0-5])';
200 my $ip4part = '(?:[01]?\d\d?|2[0-4]\d|25[0-5])';
201 my $hostnumber = '(?:(?!0+\.0+\.0+\.0+)(?!255\.255\.255\.255)'."$ip4part\\.$ip4part\\.$ip4part\\.$ip4part)";
202 my $toplabel = "(?:(?:$alpha(?:$alphadigit|-)*$alphadigit)|$alpha)";
203 my $domainlabel = "(?:(?:$alphadigit(?:$alphadigit|-)*$alphadigit)|$alphadigit)";
204 my $hostname = "(?:(?:$domainlabel\\.)*$toplabel)";
205 my $host = "(?:(?:$hostname)|(?:$hostnumber))";
206 my $hostport = "(?:(?:$host)(?::$port)?)";
207 my $login = "(?:(?:$user(?::$password)?\@)?$hostport)";
208 my $ip_schemepart = "(?://$login(?:/$urlpath)?)";
209
210 my $schemepart = "(?:$xchar*|$ip_schemepart)";
211 my $scheme = "(?:(?:$lowalpha|$digit|[+.-])+)";
212
213 # The predefined schemes:
214
215 # FTP (see also RFC959)
216 my $fsegment = "(?:(?:$uchar|[?:\@&=])*)";
217 my $ftptype = "(?:[AIDaid])";
218 my $fpath = "(?:$fsegment(?:/$fsegment)*)";
219 my $ftpurl = "(?:ftp://$login(?:/$fpath(?:;type=$ftptype)?)?)";
220
221 # FILE
222 my $fileurl = "(?:file://(?:(?:$host)|localhost)?/$fpath)";
223
224 # HTTP
225 my $httpuchar = "(?:(?:$alpha|$digit|$safe|(?:[!*\',]))|$escape)";
226 my $hsegment = "(?:(?:$httpuchar|[;:\@&=~])*)";
227 my $search = "(?:(?:$httpuchar|[;:\@&=~])*)";
228 my $hpath = "(?:$hsegment(?:/$hsegment)*)";
229 my $httpurl = "(?:http://$hostport(?:/$hpath(?:\\?$search)?)?(?:#$xchar*)?)";
230 my $strict_httpurl = "(?:http://$hostport(?:/$hpath(?:\\?$search)?)?)";
231
232 # GOPHER (see also RFC1436)
233 my $gopher_plus = "(?:$xchar*)";
234 my $selector = "(?:$xchar*)";
235 my $gtype = "(?:$xchar)";
236 my $gopherurl = "(?:gopher://$hostport(?:/$gtype(?:$selector(?:%09$search(?:%09$gopher_plus)?)?)?)?)";
237
238 # NEWS (see also RFC1036)
239 my $article = "(?:(?:$uchar|[;/?:&=])+\@$host)";
240 my $group = "(?:$alpha(?:$alpha|$digit|[.+_-])*)";
241 my $grouppart = "(?:$article|$group|\\*)";
242 my $newsurl = "(?:news:$grouppart)";
243
244 # NNTP (see also RFC977)
245 my $nntpurl = "(?:nntp://$hostport/$group(?:/$digits)?)";
246
247 # TELNET
248 my $telneturl = "(?:telnet://$login(?:/)?)";
249
250 # WAIS (see also RFC1625)
251 my $wpath = "(?:$uchar*)";
252 my $wtype = "(?:$uchar*)";
253 my $database = "(?:$uchar*)";
254 my $waisdoc = "(?:wais://$hostport/$database/$wtype/$wpath)";
255 my $waisindex = "(?:wais://$hostport/$database\\?$search)";
256 my $waisdatabase = "(?:wais://$hostport/$database)";
257 my $waisurl = "(?:$waisdatabase|$waisindex|$waisdoc)";
258
259 # PROSPERO
260 my $fieldvalue = "(?:(?:$uchar|[?:\@&]))";
261 my $fieldname = "(?:(?:$uchar|[?:\@&]))";
262 my $fieldspec = "(?:;$fieldname=$fieldvalue)";
263 my $psegment = "(?:(?:$uchar|[?:\@&=]))";
264 my $ppath = "(?:$psegment(?:/$psegment)*)";
265 my $prosperourl = "(?:prospero://$hostport/$ppath(?:$fieldspec)*)";
266
267 if ($v56) {
268 eval q[%url = (
269 http => qr/^$httpurl$/,
270 strict_http => qr/^$strict_httpurl$/,
271 ftp => qr/^$ftpurl$/,
272 news => qr/^$newsurl$/,
273 nntp => qr/^$nntpurl$/,
274 telnet => qr/^$telneturl$/,
275 gopher => qr/^$gopherurl$/,
276 wais => qr/^$waisurl$/,
277 mailto => 0,
278 strict_mailto => 0,
279 file => qr/^$fileurl$/,
280 prospero => qr/^$prosperourl$/
281 );];
282 }
283 else {
284 %url = (
285 http => "^$httpurl\$",
286 strict_http => "^$strict_httpurl\$",
287 ftp => "^$ftpurl\$",
288 news => "^$newsurl\$",
289 nntp => "^$nntpurl\$",
290 telnet => "^$telneturl\$",
291 gopher => "^$gopherurl\$",
292 wais => "^$waisurl\$",
293 mailto => 0,
294 strict_mailto => 0,
295 file => "^$fileurl\$",
296 prospero => "^$prosperourl\$"
297 );
298 }
299 }
300
301 # keeping require happy
302 1;
303
304 #
305 #
306 ### end of CheckRFC ############################################################

patrick-canterino.de