]> git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/CheckRFC.pm
improved the behavior of get_all_threads and create_forum_xml_string
[selfforum.git] / selfforum-cgi / shared / CheckRFC.pm
1 package CheckRFC;
2
3 ################################################################################
4 # #
5 # File: shared/CheckRFC.pm #
6 # #
7 # Authors: André Malo <nd@o3media.de> #
8 # #
9 # Description: implement several string checks on RFC correctness #
10 # #
11 ################################################################################
12
13 use strict;
14 use vars qw(
15 $v56
16 %url
17 @email
18 @EXPORT
19 @ISA
20 );
21
22 $v56 = eval q[
23 local $SIG{__DIE__};
24 require 5.6.0;
25 ];
26
27 use Carp qw(croak);
28
29 ################################################################################
30 #
31 # Version check
32 #
33 # last modified:
34 # $Date$ (GMT)
35 # by $Author$
36 #
37 sub VERSION {(q$Revision$ =~ /([\d.]+)\s*$/)[0] or '0.0'}
38
39 ################################################################################
40 #
41 # Export
42 #
43 require Exporter;
44 @ISA = qw(Exporter);
45
46 @EXPORT = qw(
47 is_URL
48 is_email
49 );
50
51 ### is_URL ($@) ################################################################
52 #
53 # check URL
54 #
55 # Params: $string string to check
56 # @schemes possible URL schemes in $string
57 # qw( http strict_http ftp news nntp telnet
58 # gopher wais mailto strict_mailto file prospero)
59 # if there's no scheme given, 'http' is default
60 # use ':ALL' (without quotes) for all schemes
61 #
62 # Return: Status code (Bool)
63 #
64 sub is_URL (;$@) {
65 my ($string, @schemes) = @_;
66 $string = $_ unless defined $string;
67 my $scheme;
68
69 return unless (defined ($string) and length ($string));
70
71 @schemes = qw(http) unless (@schemes);
72 @schemes = keys %url if (@schemes == 1 and $schemes[0] eq ':ALL');
73
74 for $scheme (@schemes) {
75 croak "unknown url scheme '$scheme'" unless exists $url{$scheme};
76 unless ($scheme =~ /mailto/) {
77 return 1 if ($string =~ /$url{$scheme}/);
78 }
79 else {
80 if ($string =~ /^mailto:(.+)/) {
81
82 if ($scheme eq 'mailto') {
83 return 1 if (is_email ($1));
84 }
85 elsif ($scheme eq 'strict_mailto') {
86 return 1 if (is_email ($1,1));
87 }
88 }
89 }
90 }
91
92 # no match => return false
93 return;
94 }
95
96 ### is_email ($) ###############################################################
97 #
98 # check email (comments can be nested)
99 #
100 # Params: $string - string to check
101 # $strict - (optional) check strict RFC syntax (no TLD needed) if true
102 #
103 # Return: Status code (Bool)
104 #
105 sub is_email (;$$) {
106 my $string = shift;
107 $string = $_ unless defined $string;
108 return unless defined $string;
109
110 my $strict = shift;
111
112 # false if any non-ascii chars
113 return unless (defined ($string) and length ($string));
114 return if $string =~ /[\200-\377]/;
115
116 # remove nested comments
117 1 while ($string =~ s/\([^()]*\)//g);
118
119 return ($string =~ /^$email[0]$/) unless $strict;
120
121 return ($string =~ /^$email[1]$/);
122 }
123
124 ### BEGIN # (1) ################################################################
125 #
126 # define regex for nearly RFC 822 email address
127 #
128 BEGIN {
129 # Thanx to J. Friedl:
130
131 my $esc = '\\\\';
132 my $Period = '\.';
133 my $space = '\040';
134 my $tab = '\t';
135 my $OpenBR = '\[';
136 my $CloseBR = '\]';
137 my $OpenParen = '\(';
138 my $CloseParen = '\)';
139 my $NonASCII = '\x80-\xff';
140 my $ctrl = '\000-\037';
141 my $CRlist = '\n\015';
142 my $qtext = qq/[^$esc$NonASCII$CRlist\"]/;
143 my $dtext = qq/[^$esc$NonASCII$CRlist$OpenBR$CloseBR]/;
144 my $quoted_pair = qq< $esc [^$NonASCII] >;
145 my $ctext = qq< [^$esc$NonASCII$CRlist()] >;
146 my $Cnested = qq< $OpenParen $ctext* (?: $quoted_pair $ctext* )* $CloseParen >;
147 my $comment = qq< $OpenParen $ctext* (?: (?: $quoted_pair | $Cnested ) $ctext* )* $CloseParen >;
148 my $X = qq< [$space$tab]* (?: $comment [$space$tab]* )* >;
149 my $atom_char = qq/[^($space)<>\@,;:\".$esc$OpenBR$CloseBR$ctrl$NonASCII]/;
150 my $atom = qq< $atom_char+ (?!$atom_char) >;
151 my $quoted_str = qq< \" $qtext * (?: $quoted_pair $qtext * )* \" >;
152 my $word = qq< (?: $atom | $quoted_str ) >;
153 my $domain_ref = $atom;
154 my $domain_lit = qq< $OpenBR (?: $dtext | $quoted_pair )* $CloseBR >;
155 my $sub_domain = qq< (?: $domain_ref | $domain_lit ) $X >;
156 my $domain;
157
158 @email = ();
159 for $domain (
160 qq< $sub_domain (?: $Period $X $sub_domain )* $Period [A-Za-z][A-Za-z][A-Za-z]?[A-Za-z]? >,
161 qq< $sub_domain (?: $Period $X $sub_domain )* >
162 ) {
163 my $route = qq< \@ $X $domain (?: , $X \@ $X $domain )* : $X >;
164 my $local_part = qq< $word $X (?: $Period $X $word $X )* >;
165 my $addr_spec = qq< $local_part \@ $X $domain >;
166 my $route_addr = qq[ < $X (?: $route )? $addr_spec > ];
167 my $phrase_ctrl = '\000-\010\012-\037';
168 my $phrase_char = qq/[^()<>\@,;:\".$esc$OpenBR$CloseBR$NonASCII$phrase_ctrl]/;
169 my $phrase = qq< $word $phrase_char * (?: (?: $comment | $quoted_str ) $phrase_char * )* >;
170 my $email = qq< $X (?: $addr_spec | $phrase $route_addr ) >;
171
172 if ($v56) {
173 eval q<
174 local $SIG{__DIE__};
175 $email = qr/$email/x;
176 >;
177 }
178 else {
179 $email =~ s/\s+//g;
180 }
181
182 push @email => $email;
183 }
184 }
185
186 ### BEGIN # (2) ################################################################
187 #
188 # define regexes for URLs
189 #
190 BEGIN {
191 # credits to an unknown(?) programmer ;)
192 # modified by n.d.p.
193
194 my $lowalpha = '[a-z]';
195 my $hialpha = '[A-Z]';
196 my $alpha = '[a-zA-Z]';
197 my $digit = '\d';
198 my $safe = '[$_.+-]';
199 my $extra = '[!*\'(),]';
200 my $national = '[{}|\\\\^~\[\]`]';
201 my $punctuation = '[<>#%"]';
202 my $reserved = '[;/?:@&=]';
203 my $hex = '[\dA-Fa-f]';
204 my $escape = "(?:%$hex$hex)";
205 my $unreserved = '[{}|\\\\^~\[\]`a-zA-Z\d$_.+-]'; #"(?:$alpha|$digit|$safe|$extra)";
206 my $uchar = "(?:$unreserved|$escape)";
207 my $xchar = "(?:$unreserved|$escape|$reserved)";
208 my $digits = '(?:\d+)';
209 my $alphadigit = '[a-zA-Z\d]'; #"(?:$alpha|$digit)";
210
211 # URL schemeparts for ip based protocols:
212 my $urlpath = "(?:$xchar*)";
213 my $user = "(?:(?:$uchar|[;?&=])*)";
214 my $password = "(?:(?:$uchar|[;?&=])*)";
215 my $port = '(?:[0-5]?\d\d?\d?\d?|6[0-4]\d\d\d|65[0-4]\d\d|655[0-2]\d|6553[0-5])';
216 my $ip4part = '(?:[01]?\d\d?|2[0-4]\d|25[0-5])';
217 my $hostnumber = '(?:(?!0+\.0+\.0+\.0+)(?!255\.255\.255\.255)'."$ip4part\\.$ip4part\\.$ip4part\\.$ip4part)";
218 my $toplabel = "(?:(?:$alpha(?:$alphadigit|-)*$alphadigit)|$alpha)";
219 my $domainlabel = "(?:(?:$alphadigit(?:$alphadigit|-)*$alphadigit)|$alphadigit)";
220 my $hostname = "(?:(?:$domainlabel\\.)*$toplabel)";
221 my $host = "(?:(?:$hostname)|(?:$hostnumber))";
222 my $hostport = "(?:(?:$host)(?::$port)?)";
223 my $login = "(?:(?:$user(?::$password)?\@)?$hostport)";
224 my $ip_schemepart = "(?://$login(?:/$urlpath)?)";
225
226 my $schemepart = "(?:$xchar*|$ip_schemepart)";
227 my $scheme = "(?:(?:$lowalpha|$digit|[+.-])+)";
228
229 # The predefined schemes:
230
231 # FTP (see also RFC959)
232 my $fsegment = "(?:(?:$uchar|[?:\@&=])*)";
233 my $ftptype = "(?:[AIDaid])";
234 my $fpath = "(?:$fsegment(?:/$fsegment)*)";
235 my $ftpurl = "(?:ftp://$login(?:/$fpath(?:;type=$ftptype)?)?)";
236
237 # FILE
238 my $fileurl = "(?:file://(?:(?:$host)|localhost)?/$fpath)";
239
240 # HTTP
241 my $httpuchar = "(?:(?:$alpha|$digit|$safe|(?:[!*\',]))|$escape)";
242 my $hsegment = "(?:(?:$httpuchar|[;:\@&=~])*)";
243 my $search = "(?:(?:$httpuchar|[;:\@&=~])*)";
244 my $hpath = "(?:$hsegment(?:/$hsegment)*)";
245 my $httpurl = "(?:https?://$hostport(?:/$hpath(?:\\?$search)?)?(?:#$xchar*)?)";
246 my $strict_httpurl = "(?:https?://$hostport(?:/$hpath(?:\\?$search)?)?)";
247
248 # GOPHER (see also RFC1436)
249 my $gopher_plus = "(?:$xchar*)";
250 my $selector = "(?:$xchar*)";
251 my $gtype = "(?:$xchar)";
252 my $gopherurl = "(?:gopher://$hostport(?:/$gtype(?:$selector(?:%09$search(?:%09$gopher_plus)?)?)?)?)";
253
254 # NEWS (see also RFC1036)
255 my $article = "(?:(?:$uchar|[;/?:&=])+\@$host)";
256 my $group = "(?:$alpha(?:$alpha|$digit|[.+_-])*)";
257 my $grouppart = "(?:$article|$group|\\*)";
258 my $newsurl = "(?:news:$grouppart)";
259
260 # NNTP (see also RFC977)
261 my $nntpurl = "(?:nntp://$hostport/$group(?:/$digits)?)";
262
263 # TELNET
264 my $telneturl = "(?:telnet://$login(?:/)?)";
265
266 # WAIS (see also RFC1625)
267 my $wpath = "(?:$uchar*)";
268 my $wtype = "(?:$uchar*)";
269 my $database = "(?:$uchar*)";
270 my $waisdoc = "(?:wais://$hostport/$database/$wtype/$wpath)";
271 my $waisindex = "(?:wais://$hostport/$database\\?$search)";
272 my $waisdatabase = "(?:wais://$hostport/$database)";
273 my $waisurl = "(?:$waisdatabase|$waisindex|$waisdoc)";
274
275 # PROSPERO
276 my $fieldvalue = "(?:(?:$uchar|[?:\@&]))";
277 my $fieldname = "(?:(?:$uchar|[?:\@&]))";
278 my $fieldspec = "(?:;$fieldname=$fieldvalue)";
279 my $psegment = "(?:(?:$uchar|[?:\@&=]))";
280 my $ppath = "(?:$psegment(?:/$psegment)*)";
281 my $prosperourl = "(?:prospero://$hostport/$ppath(?:$fieldspec)*)";
282
283 if ($v56) {
284 eval q[%url = (
285 http => qr/^$httpurl$/,
286 strict_http => qr/^$strict_httpurl$/,
287 ftp => qr/^$ftpurl$/,
288 news => qr/^$newsurl$/,
289 nntp => qr/^$nntpurl$/,
290 telnet => qr/^$telneturl$/,
291 gopher => qr/^$gopherurl$/,
292 wais => qr/^$waisurl$/,
293 mailto => 0,
294 strict_mailto => 0,
295 file => qr/^$fileurl$/,
296 prospero => qr/^$prosperourl$/
297 );];
298 }
299 else {
300 %url = (
301 http => "^$httpurl\$",
302 strict_http => "^$strict_httpurl\$",
303 ftp => "^$ftpurl\$",
304 news => "^$newsurl\$",
305 nntp => "^$nntpurl\$",
306 telnet => "^$telneturl\$",
307 gopher => "^$gopherurl\$",
308 wais => "^$waisurl\$",
309 mailto => 0,
310 strict_mailto => 0,
311 file => "^$fileurl\$",
312 prospero => "^$prosperourl\$"
313 );
314 }
315 }
316
317 # keep 'require' happy
318 1;
319
320 #
321 #
322 ### end of CheckRFC ############################################################

patrick-canterino.de