]>
git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/CheckRFC.pm
3a1ed0c3cc2b05d3779f1c71ab95f3d811fc7099
3 ################################################################################
5 # File: shared/CheckRFC.pm #
7 # Authors: Andre Malo <nd@o3media.de>, 2001-03-30 #
9 # Description: implement several string checks on RFC correctness #
11 ################################################################################
14 use vars
qw(%url $email @EXPORT);
16 use autouse 'Carp' => qw(croak);
18 ################################################################################
22 use base
qw(Exporter);
23 @EXPORT = qw(is_URL is_email);
25 ### is_URL ($@) ################################################################
29 # Params: $string string to check
30 # @schemes possible URL schemes in $string
31 # qw(http ftp news nntp telnet gopher wais mailto file prospero)
32 # if there's no scheme given, 'http' is default
33 # use ':ALL' (without quotes) for all schemes
35 # Return: Status code (Bool)
38 my ($string, @schemes) = @_;
39 @schemes = qw(http) unless (@schemes);
40 @schemes = keys %url if (@schemes == 1 and $schemes[0] eq ':ALL');
43 croak
"unknown url scheme '$_'" unless exists $url{$_};
44 return 1 if $string =~ /$url{$_}/;
47 # no match => return false
51 ### is_email ($) ###############################################################
53 # check email (comments can be nested)
55 # Params: $string string to check
57 # Return: Status code (Bool)
62 # false if any non-ascii chars
63 return if $string =~ /[\200-\377]/;
65 # remove nested comments
66 while ($string =~ s/\([^()]*\)//g) {};
68 return ($string =~ /^$email$/);
71 ### BEGIN # (1) ################################################################
73 # define regex for nearly RFC 822 email address
85 my $CloseParen = '\)';
86 my $NonASCII = '\x80-\xff';
87 my $ctrl = '\000-\037';
88 my $CRlist = '\n\015';
89 my $qtext = qq/[^$esc$NonASCII$CRlist\"]/;
90 my $dtext = qq/[^$esc$NonASCII$CRlist$OpenBR$CloseBR]/;
91 my $quoted_pair = qq< $esc [^$NonASCII] >;
92 my $ctext = qq< [^$esc$NonASCII$CRlist()] >;
93 my $Cnested = qq< $OpenParen $ctext* (?
: $quoted_pair $ctext* )* $CloseParen >;
94 my $comment = qq< $OpenParen $ctext* (?
: (?
: $quoted_pair | $Cnested ) $ctext* )* $CloseParen >;
95 my $X = qq< [$space$tab]* (?
: $comment [$space$tab]* )* >;
96 my $atom_char = qq/[^($space)<>\@,;:\".$esc$OpenBR$CloseBR$ctrl$NonASCII]/;
97 my $atom = qq< $atom_char+ (?
!$atom_char) >;
98 my $quoted_str = qq< \" $qtext * (?
: $quoted_pair $qtext * )* \" >;
99 my $word = qq< (?
: $atom | $quoted_str ) >;
100 my $domain_ref = $atom;
101 my $domain_lit = qq< $OpenBR (?
: $dtext | $quoted_pair )* $CloseBR >;
102 my $sub_domain = qq< (?
: $domain_ref | $domain_lit ) $X >;
103 my $domain = qq< $sub_domain (?
: $Period $X $sub_domain )* >;
104 my $route = qq< \@
$X $domain (?
: , $X \@
$X $domain )* : $X >;
105 my $local_part = qq< $word $X (?
: $Period $X $word $X )* >;
106 my $addr_spec = qq< $local_part \@
$X $domain >;
107 my $route_addr = qq[ < $X (?
: $route )?
$addr_spec > ];
108 my $phrase_ctrl = '\000-\010\012-\037';
109 my $phrase_char = qq/[^()<>\@,;:\".$esc$OpenBR$CloseBR$NonASCII$phrase_ctrl]/;
110 my $phrase = qq< $word $phrase_char * (?
: (?
: $comment | $quoted_str ) $phrase_char * )* >;
111 $email = qq< $X (?
: $addr_spec | $phrase $route_addr ) >;
112 $email = qr
/$email/x;
115 ### BEGIN # (2) ################################################################
117 # define regexes for URLs
120 # credits to an unknown(?) programmer ;)
123 my $lowalpha = '(?:[a-z])';
124 my $hialpha = '(?:[A-Z])';
125 my $alpha = "(?:$lowalpha|$hialpha)";
126 my $digit = '(?:\d)';
127 my $safe = '(?:[$_.+-])';
128 my $extra = '(?:[!*\'(),])';
129 my $national = '(?:[{}|\\\\^~\[\]`])';
130 my $punctuation = '(?:[<>#%"])';
131 my $reserved = '(?:[;/?:@&=])';
132 my $hex = '(?:[\dA-Fa-f])';
133 my $escape = "(?:%$hex$hex)";
134 my $unreserved = "(?:$alpha|$digit|$safe|$extra)";
135 my $uchar = "(?:$unreserved|$escape)";
136 my $xchar = "(?:$unreserved|$escape|$reserved)";
137 my $digits = '(?:\d+)';
138 my $alphadigit = "(?:$alpha|\\d)";
140 # URL schemeparts for ip based protocols:
141 my $urlpath = "(?:$xchar*)";
142 my $user = "(?:(?:$uchar|[;?&=])*)";
143 my $password = "(?:(?:$uchar|[;?&=])*)";
144 my $port = '(?:[0-5]?\d\d?\d?\d?|6[0-4]\d\d\d|65[0-4]\d\d|655[0-2]\d|6553[0-5])';
145 my $ip4part = '(?:[01]?\d\d?|2[0-4]\d|25[0-5])';
146 my $hostnumber = '(?:(?!0+\.0+\.0+\.0+)'."$ip4part\\.$ip4part\\.$ip4part\\.$ip4part)";
147 my $toplabel = "(?:(?:$alpha(?:$alphadigit|-)*$alphadigit)|$alpha)";
148 my $domainlabel = "(?:(?:$alphadigit(?:$alphadigit|-)*$alphadigit)|$alphadigit)";
149 my $hostname = "(?:(?:$domainlabel\\.)*$toplabel)";
150 my $host = "(?:(?:$hostname)|(?:$hostnumber))";
151 my $hostport = "(?:(?:$host)(?::$port)?)";
152 my $login = "(?:(?:$user(?::$password)?\@)?$hostport)";
153 my $ip_schemepart = "(?://$login(?:/$urlpath)?)";
155 my $schemepart = "(?:$xchar*|$ip_schemepart)";
156 my $scheme = "(?:(?:$lowalpha|$digit|[+.-])+)";
158 # The predefined schemes:
160 # FTP (see also RFC959)
161 my $fsegment = "(?:(?:$uchar|[?:\@&=])*)";
162 my $ftptype = "(?:[AIDaid])";
163 my $fpath = "(?:$fsegment(?:/$fsegment)*)";
164 my $ftpurl = "(?:ftp://$login(?:/$fpath(?:;type=$ftptype)?)?)";
167 my $fileurl = "(?:file://(?:(?:$host)|localhost)?/$fpath)";
170 my $httpuchar = "(?:(?:$alpha|$digit|$safe|(?:[!*\',]))|$escape)";
171 my $hsegment = "(?:(?:$httpuchar|[;:\@&=~])*)";
172 my $search = "(?:(?:$httpuchar|[;:\@&=~])*)";
173 my $hpath = "(?:$hsegment(?:/$hsegment)*)";
174 my $httpurl = "(?:http://$hostport(?:/$hpath(?:\\?$search)?)?(?:#$xchar*)?)";
176 # GOPHER (see also RFC1436)
177 my $gopher_plus = "(?:$xchar*)";
178 my $selector = "(?:$xchar*)";
179 my $gtype = "(?:$xchar)";
180 my $gopherurl = "(?:gopher://$hostport(?:/$gtype(?:$selector(?:%09$search(?:%09$gopher_plus)?)?)?)?)";
182 # MAILTO (see also RFC822)
183 my $encoded822addr = "(?:$email)";
184 my $mailtourl = "(?:mailto:$encoded822addr)";
186 # NEWS (see also RFC1036)
187 my $article = "(?:(?:$uchar|[;/?:&=])+\@$host)";
188 my $group = "(?:$alpha(?:$alpha|$digit|[.+_-])*)";
189 my $grouppart = "(?:$article|$group|\\*)";
190 my $newsurl = "(?:news:$grouppart)";
192 # NNTP (see also RFC977)
193 my $nntpurl = "(?:nntp://$hostport/$group(?:/$digits)?)";
196 my $telneturl = "(?:telnet://$login(?:/)?)";
198 # WAIS (see also RFC1625)
199 my $wpath = "(?:$uchar*)";
200 my $wtype = "(?:$uchar*)";
201 my $database = "(?:$uchar*)";
202 my $waisdoc = "(?:wais://$hostport/$database/$wtype/$wpath)";
203 my $waisindex = "(?:wais://$hostport/$database\\?$search)";
204 my $waisdatabase = "(?:wais://$hostport/$database)";
205 my $waisurl = "(?:$waisdatabase|$waisindex|$waisdoc)";
208 my $fieldvalue = "(?:(?:$uchar|[?:\@&]))";
209 my $fieldname = "(?:(?:$uchar|[?:\@&]))";
210 my $fieldspec = "(?:;$fieldname=$fieldvalue)";
211 my $psegment = "(?:(?:$uchar|[?:\@&=]))";
212 my $ppath = "(?:$psegment(?:/$psegment)*)";
213 my $prosperourl = "(?:prospero://$hostport/$ppath(?:$fieldspec)*)";
216 http
=> qr/^$httpurl$/,
217 ftp
=> qr/^$ftpurl$/,
218 news
=> qr/^$newsurl$/,
219 nntp
=> qr/^$nntpurl$/,
220 telnet
=> qr/^$telneturl$/,
221 gopher
=> qr/^$gopherurl$/,
222 wais
=> qr/^$waisurl$/,
223 mailto
=> qr/^$mailtourl$/,
224 file
=> qr/^$fileurl$/,
225 prospero
=> qr/^$prosperourl$/
229 # keeping require happy
234 ### end of CheckRFC ############################################################
patrick-canterino.de