]>
git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/CheckRFC.pm
3 ################################################################################
5 # File: shared/CheckRFC.pm #
7 # Authors: Andre Malo <nd@o3media.de>, 2001-04-14 #
9 # Description: implement several string checks on RFC correctness #
11 ################################################################################
32 $VERSION = do { my @r =(q
$Revision$ =~ /\d+/g); sprintf "%d."."%02d" x
$#r, @r };
34 ################################################################################
46 ### is_URL ($@) ################################################################
50 # Params: $string string to check
51 # @schemes possible URL schemes in $string
52 # qw( http strict_http ftp news nntp telnet
53 # gopher wais mailto strict_mailto file prospero)
54 # if there's no scheme given, 'http' is default
55 # use ':ALL' (without quotes) for all schemes
57 # Return: Status code (Bool)
60 my ($string, @schemes) = @_;
63 return unless (defined ($string) and length ($string));
65 @schemes = qw(http) unless (@schemes);
66 @schemes = keys %url if (@schemes == 1 and $schemes[0] eq ':ALL');
68 for $scheme (@schemes) {
69 croak
"unknown url scheme '$scheme'" unless exists $url{$scheme};
70 unless ($scheme =~ /mailto/) {
71 return 1 if ($string =~ /$url{$scheme}/);
74 if ($string =~ /^mailto:(.+)/) {
76 if ($scheme eq 'mailto') {
77 return 1 if (is_email
($1));
79 elsif ($scheme eq 'strict_mailto') {
80 return 1 if (is_email
($1,1));
86 # no match => return false
90 ### is_email ($) ###############################################################
92 # check email (comments can be nested)
94 # Params: $string - string to check
95 # $strict - (optional) check strict RFC syntax (no TLD needed) if true
97 # Return: Status code (Bool)
103 # false if any non-ascii chars
104 return unless (defined ($string) and length ($string));
105 return if $string =~ /[\200-\377]/;
107 # remove nested comments
108 1 while ($string =~ s/\([^()]*\)//g);
109 #$string =~ s/^\s+//;
110 #$string =~ s/\s+$//;
112 return ($string =~ /^$email[0]$/) unless $strict;
114 return ($string =~ /^$email[1]$/);
117 ### BEGIN # (1) ################################################################
119 # define regex for nearly RFC 822 email address
122 # Thanx to J. Friedl:
130 my $OpenParen = '\(';
131 my $CloseParen = '\)';
132 my $NonASCII = '\x80-\xff';
133 my $ctrl = '\000-\037';
134 my $CRlist = '\n\015';
135 my $qtext = qq/[^$esc$NonASCII$CRlist\"]/;
136 my $dtext = qq/[^$esc$NonASCII$CRlist$OpenBR$CloseBR]/;
137 my $quoted_pair = qq< $esc [^$NonASCII] >;
138 my $ctext = qq< [^$esc$NonASCII$CRlist()] >;
139 my $Cnested = qq< $OpenParen $ctext* (?
: $quoted_pair $ctext* )* $CloseParen >;
140 my $comment = qq< $OpenParen $ctext* (?
: (?
: $quoted_pair | $Cnested ) $ctext* )* $CloseParen >;
141 my $X = qq< [$space$tab]* (?
: $comment [$space$tab]* )* >;
142 my $atom_char = qq/[^($space)<>\@,;:\".$esc$OpenBR$CloseBR$ctrl$NonASCII]/;
143 my $atom = qq< $atom_char+ (?
!$atom_char) >;
144 my $quoted_str = qq< \" $qtext * (?
: $quoted_pair $qtext * )* \" >;
145 my $word = qq< (?
: $atom | $quoted_str ) >;
146 my $domain_ref = $atom;
147 my $domain_lit = qq< $OpenBR (?
: $dtext | $quoted_pair )* $CloseBR >;
148 my $sub_domain = qq< (?
: $domain_ref | $domain_lit ) $X >;
153 qq< $sub_domain (?
: $Period $X $sub_domain )* $Period [A
-Za
-z
][A
-Za
-z
][A
-Za
-z
]?
[A
-Za
-z
]?
>,
154 qq< $sub_domain (?
: $Period $X $sub_domain )* >
156 my $route = qq< \@
$X $domain (?
: , $X \@
$X $domain )* : $X >;
157 my $local_part = qq< $word $X (?
: $Period $X $word $X )* >;
158 my $addr_spec = qq< $local_part \@
$X $domain >;
159 my $route_addr = qq[ < $X (?
: $route )?
$addr_spec > ];
160 my $phrase_ctrl = '\000-\010\012-\037';
161 my $phrase_char = qq/[^()<>\@,;:\".$esc$OpenBR$CloseBR$NonASCII$phrase_ctrl]/;
162 my $phrase = qq< $word $phrase_char * (?
: (?
: $comment | $quoted_str ) $phrase_char * )* >;
163 my $email = qq< $X (?
: $addr_spec | $phrase $route_addr ) >;
168 $email = qr/$email/x;
175 push @email => $email;
179 ### BEGIN # (2) ################################################################
181 # define regexes for URLs
184 # credits to an unknown(?) programmer ;)
187 my $lowalpha = '[a-z]';
188 my $hialpha = '[A-Z]';
189 my $alpha = '[a-zA-Z]';
191 my $safe = '[$_.+-]';
192 my $extra = '[!*\'(),]';
193 my $national = '[{}|\\\\^~\[\]`]';
194 my $punctuation = '[<>#%"]';
195 my $reserved = '[;/?:@&=]';
196 my $hex = '[\dA-Fa-f]';
197 my $escape = "(?:%$hex$hex)";
198 my $unreserved = '[{}|\\\\^~\[\]`a-zA-Z\d$_.+-]'; #"(?:$alpha|$digit|$safe|$extra)";
199 my $uchar = "(?:$unreserved|$escape)";
200 my $xchar = "(?:$unreserved|$escape|$reserved)";
201 my $digits = '(?:\d+)';
202 my $alphadigit = '[a-zA-Z\d]'; #"(?:$alpha|$digit)";
204 # URL schemeparts for ip based protocols:
205 my $urlpath = "(?:$xchar*)";
206 my $user = "(?:(?:$uchar|[;?&=])*)";
207 my $password = "(?:(?:$uchar|[;?&=])*)";
208 my $port = '(?:[0-5]?\d\d?\d?\d?|6[0-4]\d\d\d|65[0-4]\d\d|655[0-2]\d|6553[0-5])';
209 my $ip4part = '(?:[01]?\d\d?|2[0-4]\d|25[0-5])';
210 my $hostnumber = '(?:(?!0+\.0+\.0+\.0+)(?!255\.255\.255\.255)'."$ip4part\\.$ip4part\\.$ip4part\\.$ip4part)";
211 my $toplabel = "(?:(?:$alpha(?:$alphadigit|-)*$alphadigit)|$alpha)";
212 my $domainlabel = "(?:(?:$alphadigit(?:$alphadigit|-)*$alphadigit)|$alphadigit)";
213 my $hostname = "(?:(?:$domainlabel\\.)*$toplabel)";
214 my $host = "(?:(?:$hostname)|(?:$hostnumber))";
215 my $hostport = "(?:(?:$host)(?::$port)?)";
216 my $login = "(?:(?:$user(?::$password)?\@)?$hostport)";
217 my $ip_schemepart = "(?://$login(?:/$urlpath)?)";
219 my $schemepart = "(?:$xchar*|$ip_schemepart)";
220 my $scheme = "(?:(?:$lowalpha|$digit|[+.-])+)";
222 # The predefined schemes:
224 # FTP (see also RFC959)
225 my $fsegment = "(?:(?:$uchar|[?:\@&=])*)";
226 my $ftptype = "(?:[AIDaid])";
227 my $fpath = "(?:$fsegment(?:/$fsegment)*)";
228 my $ftpurl = "(?:ftp://$login(?:/$fpath(?:;type=$ftptype)?)?)";
231 my $fileurl = "(?:file://(?:(?:$host)|localhost)?/$fpath)";
234 my $httpuchar = "(?:(?:$alpha|$digit|$safe|(?:[!*\',]))|$escape)";
235 my $hsegment = "(?:(?:$httpuchar|[;:\@&=~])*)";
236 my $search = "(?:(?:$httpuchar|[;:\@&=~])*)";
237 my $hpath = "(?:$hsegment(?:/$hsegment)*)";
238 my $httpurl = "(?:https?://$hostport(?:/$hpath(?:\\?$search)?)?(?:#$xchar*)?)";
239 my $strict_httpurl = "(?:https?://$hostport(?:/$hpath(?:\\?$search)?)?)";
241 # GOPHER (see also RFC1436)
242 my $gopher_plus = "(?:$xchar*)";
243 my $selector = "(?:$xchar*)";
244 my $gtype = "(?:$xchar)";
245 my $gopherurl = "(?:gopher://$hostport(?:/$gtype(?:$selector(?:%09$search(?:%09$gopher_plus)?)?)?)?)";
247 # NEWS (see also RFC1036)
248 my $article = "(?:(?:$uchar|[;/?:&=])+\@$host)";
249 my $group = "(?:$alpha(?:$alpha|$digit|[.+_-])*)";
250 my $grouppart = "(?:$article|$group|\\*)";
251 my $newsurl = "(?:news:$grouppart)";
253 # NNTP (see also RFC977)
254 my $nntpurl = "(?:nntp://$hostport/$group(?:/$digits)?)";
257 my $telneturl = "(?:telnet://$login(?:/)?)";
259 # WAIS (see also RFC1625)
260 my $wpath = "(?:$uchar*)";
261 my $wtype = "(?:$uchar*)";
262 my $database = "(?:$uchar*)";
263 my $waisdoc = "(?:wais://$hostport/$database/$wtype/$wpath)";
264 my $waisindex = "(?:wais://$hostport/$database\\?$search)";
265 my $waisdatabase = "(?:wais://$hostport/$database)";
266 my $waisurl = "(?:$waisdatabase|$waisindex|$waisdoc)";
269 my $fieldvalue = "(?:(?:$uchar|[?:\@&]))";
270 my $fieldname = "(?:(?:$uchar|[?:\@&]))";
271 my $fieldspec = "(?:;$fieldname=$fieldvalue)";
272 my $psegment = "(?:(?:$uchar|[?:\@&=]))";
273 my $ppath = "(?:$psegment(?:/$psegment)*)";
274 my $prosperourl = "(?:prospero://$hostport/$ppath(?:$fieldspec)*)";
278 http
=> qr/^$httpurl$/,
279 strict_http
=> qr/^$strict_httpurl$/,
280 ftp
=> qr/^$ftpurl$/,
281 news
=> qr/^$newsurl$/,
282 nntp
=> qr/^$nntpurl$/,
283 telnet
=> qr/^$telneturl$/,
284 gopher
=> qr/^$gopherurl$/,
285 wais
=> qr/^$waisurl$/,
288 file
=> qr/^$fileurl$/,
289 prospero
=> qr/^$prosperourl$/
294 http
=> "^$httpurl\$",
295 strict_http
=> "^$strict_httpurl\$",
297 news
=> "^$newsurl\$",
298 nntp
=> "^$nntpurl\$",
299 telnet
=> "^$telneturl\$",
300 gopher
=> "^$gopherurl\$",
301 wais
=> "^$waisurl\$",
304 file
=> "^$fileurl\$",
305 prospero
=> "^$prosperourl\$"
310 # keeping require happy
315 ### end of CheckRFC ############################################################
patrick-canterino.de