+package CheckRFC;
+
+################################################################################
+# #
+# File: shared/CheckRFC.pm #
+# #
+# Authors: Andre Malo <nd@o3media.de>, 2001-03-30 #
+# #
+# Description: implement several string checks on RFC correctness #
+# #
+################################################################################
+
+use strict;
+
+use vars qw(%url $email @EXPORT);
+
+use autouse 'Carp' => qw(croak);
+
+################################################################################
+#
+# Export
+#
+use base qw(Exporter);
+@EXPORT = qw(is_URL is_email);
+
+### is_URL ($@) ################################################################
+#
+# check URL
+#
+# Params: $string string to check
+# @schemes possible URL schemes in $string
+# qw(http ftp news nntp telnet gopher wais mailto file prospero)
+# if there's no scheme given, 'http' is default
+# use ':ALL' (without quotes) for all schemes
+#
+# Return: Status code (Bool)
+#
+sub is_URL ($@) {
+ my ($string, @schemes) = @_;
+ @schemes = qw(http) unless (@schemes);
+ @schemes = keys %url if (@schemes == 1 and $schemes[0] eq ':ALL');
+
+ for (@schemes) {
+ croak "unknown url scheme '$_'" unless exists $url{$_};
+ return 1 if $string =~ /$url{$_}/;
+ }
+
+ # no match => return false
+ return;
+}
+
+### is_email ($) ###############################################################
+#
+# check email (comments can be nested)
+#
+# Params: $string string to check
+#
+# Return: Status code (Bool)
+#
+sub is_email ($) {
+ my $string = shift;
+
+ # false if any non-ascii chars
+ return if $string =~ /[\200-\377]/;
+
+ # remove nested comments
+ while ($string =~ s/\([^()]*\)//g) {};
+
+ return ($string =~ /^$email$/);
+}
+
+### BEGIN # (1) ################################################################
+#
+# define regex for nearly RFC 822 email address
+#
+BEGIN {
+ # Thanx to J. Friedl:
+
+ my $esc = '\\\\';
+ my $Period = '\.';
+ my $space = '\040';
+ my $tab = '\t';
+ my $OpenBR = '\[';
+ my $CloseBR = '\]';
+ my $OpenParen = '\(';
+ my $CloseParen = '\)';
+ my $NonASCII = '\x80-\xff';
+ my $ctrl = '\000-\037';
+ my $CRlist = '\n\015';
+ my $qtext = qq/[^$esc$NonASCII$CRlist\"]/;
+ my $dtext = qq/[^$esc$NonASCII$CRlist$OpenBR$CloseBR]/;
+ my $quoted_pair = qq< $esc [^$NonASCII] >;
+ my $ctext = qq< [^$esc$NonASCII$CRlist()] >;
+ my $Cnested = qq< $OpenParen $ctext* (?: $quoted_pair $ctext* )* $CloseParen >;
+ my $comment = qq< $OpenParen $ctext* (?: (?: $quoted_pair | $Cnested ) $ctext* )* $CloseParen >;
+ my $X = qq< [$space$tab]* (?: $comment [$space$tab]* )* >;
+ my $atom_char = qq/[^($space)<>\@,;:\".$esc$OpenBR$CloseBR$ctrl$NonASCII]/;
+ my $atom = qq< $atom_char+ (?!$atom_char) >;
+ my $quoted_str = qq< \" $qtext * (?: $quoted_pair $qtext * )* \" >;
+ my $word = qq< (?: $atom | $quoted_str ) >;
+ my $domain_ref = $atom;
+ my $domain_lit = qq< $OpenBR (?: $dtext | $quoted_pair )* $CloseBR >;
+ my $sub_domain = qq< (?: $domain_ref | $domain_lit ) $X >;
+ my $domain = qq< $sub_domain (?: $Period $X $sub_domain )* >;
+ my $route = qq< \@ $X $domain (?: , $X \@ $X $domain )* : $X >;
+ my $local_part = qq< $word $X (?: $Period $X $word $X )* >;
+ my $addr_spec = qq< $local_part \@ $X $domain >;
+ my $route_addr = qq[ < $X (?: $route )? $addr_spec > ];
+ my $phrase_ctrl = '\000-\010\012-\037';
+ my $phrase_char = qq/[^()<>\@,;:\".$esc$OpenBR$CloseBR$NonASCII$phrase_ctrl]/;
+ my $phrase = qq< $word $phrase_char * (?: (?: $comment | $quoted_str ) $phrase_char * )* >;
+ $email = qq< $X (?: $addr_spec | $phrase $route_addr ) >;
+ $email = qr /$email/x;
+}
+
+### BEGIN # (2) ################################################################
+#
+# define regexes for URLs
+#
+BEGIN {
+ # credits to an unknown(?) programmer ;)
+ # modified by n.d.p.
+
+ my $lowalpha = '(?:[a-z])';
+ my $hialpha = '(?:[A-Z])';
+ my $alpha = "(?:$lowalpha|$hialpha)";
+ my $digit = '(?:\d)';
+ my $safe = '(?:[$_.+-])';
+ my $extra = '(?:[!*\'(),])';
+ my $national = '(?:[{}|\\\\^~\[\]`])';
+ my $punctuation = '(?:[<>#%"])';
+ my $reserved = '(?:[;/?:@&=])';
+ my $hex = '(?:[\dA-Fa-f])';
+ my $escape = "(?:%$hex$hex)";
+ my $unreserved = "(?:$alpha|$digit|$safe|$extra)";
+ my $uchar = "(?:$unreserved|$escape)";
+ my $xchar = "(?:$unreserved|$escape|$reserved)";
+ my $digits = '(?:\d+)';
+ my $alphadigit = "(?:$alpha|\\d)";
+
+ # URL schemeparts for ip based protocols:
+ my $urlpath = "(?:$xchar*)";
+ my $user = "(?:(?:$uchar|[;?&=])*)";
+ my $password = "(?:(?:$uchar|[;?&=])*)";
+ my $port = '(?:[0-5]?\d\d?\d?\d?|6[0-4]\d\d\d|65[0-4]\d\d|655[0-2]\d|6553[0-5])';
+ my $ip4part = '(?:[01]?\d\d?|2[0-4]\d|25[0-5])';
+ my $hostnumber = '(?:(?!0+\.0+\.0+\.0+)'."$ip4part\\.$ip4part\\.$ip4part\\.$ip4part)";
+ my $toplabel = "(?:(?:$alpha(?:$alphadigit|-)*$alphadigit)|$alpha)";
+ my $domainlabel = "(?:(?:$alphadigit(?:$alphadigit|-)*$alphadigit)|$alphadigit)";
+ my $hostname = "(?:(?:$domainlabel\\.)*$toplabel)";
+ my $host = "(?:(?:$hostname)|(?:$hostnumber))";
+ my $hostport = "(?:(?:$host)(?::$port)?)";
+ my $login = "(?:(?:$user(?::$password)?\@)?$hostport)";
+ my $ip_schemepart = "(?://$login(?:/$urlpath)?)";
+
+ my $schemepart = "(?:$xchar*|$ip_schemepart)";
+ my $scheme = "(?:(?:$lowalpha|$digit|[+.-])+)";
+
+ # The predefined schemes:
+
+ # FTP (see also RFC959)
+ my $fsegment = "(?:(?:$uchar|[?:\@&=])*)";
+ my $ftptype = "(?:[AIDaid])";
+ my $fpath = "(?:$fsegment(?:/$fsegment)*)";
+ my $ftpurl = "(?:ftp://$login(?:/$fpath(?:;type=$ftptype)?)?)";
+
+ # FILE
+ my $fileurl = "(?:file://(?:(?:$host)|localhost)?/$fpath)";
+
+ # HTTP
+ my $httpuchar = "(?:(?:$alpha|$digit|$safe|(?:[!*\',]))|$escape)";
+ my $hsegment = "(?:(?:$httpuchar|[;:\@&=~])*)";
+ my $search = "(?:(?:$httpuchar|[;:\@&=~])*)";
+ my $hpath = "(?:$hsegment(?:/$hsegment)*)";
+ my $httpurl = "(?:http://$hostport(?:/$hpath(?:\\?$search)?)?)";
+
+ # GOPHER (see also RFC1436)
+ my $gopher_plus = "(?:$xchar*)";
+ my $selector = "(?:$xchar*)";
+ my $gtype = "(?:$xchar)";
+ my $gopherurl = "(?:gopher://$hostport(?:/$gtype(?:$selector(?:%09$search(?:%09$gopher_plus)?)?)?)?)";
+
+ # MAILTO (see also RFC822)
+ my $encoded822addr = "(?:$email)";
+ my $mailtourl = "(?:mailto:$encoded822addr)";
+
+ # NEWS (see also RFC1036)
+ my $article = "(?:(?:$uchar|[;/?:&=])+\@$host)";
+ my $group = "(?:$alpha(?:$alpha|$digit|[.+_-])*)";
+ my $grouppart = "(?:$article|$group|\\*)";
+ my $newsurl = "(?:news:$grouppart)";
+
+ # NNTP (see also RFC977)
+ my $nntpurl = "(?:nntp://$hostport/$group(?:/$digits)?)";
+
+ # TELNET
+ my $telneturl = "(?:telnet://$login(?:/)?)";
+
+ # WAIS (see also RFC1625)
+ my $wpath = "(?:$uchar*)";
+ my $wtype = "(?:$uchar*)";
+ my $database = "(?:$uchar*)";
+ my $waisdoc = "(?:wais://$hostport/$database/$wtype/$wpath)";
+ my $waisindex = "(?:wais://$hostport/$database\\?$search)";
+ my $waisdatabase = "(?:wais://$hostport/$database)";
+ my $waisurl = "(?:$waisdatabase|$waisindex|$waisdoc)";
+
+ # PROSPERO
+ my $fieldvalue = "(?:(?:$uchar|[?:\@&]))";
+ my $fieldname = "(?:(?:$uchar|[?:\@&]))";
+ my $fieldspec = "(?:;$fieldname=$fieldvalue)";
+ my $psegment = "(?:(?:$uchar|[?:\@&=]))";
+ my $ppath = "(?:$psegment(?:/$psegment)*)";
+ my $prosperourl = "(?:prospero://$hostport/$ppath(?:$fieldspec)*)";
+
+ %url = (
+ http => qr/^$httpurl$/,
+ ftp => qr/^$ftpurl$/,
+ news => qr/^$newsurl$/,
+ nntp => qr/^$nntpurl$/,
+ telnet => qr/^$telneturl$/,
+ gopher => qr/^$gopherurl$/,
+ wais => qr/^$waisurl$/,
+ mailto => qr/^$mailtourl$/,
+ file => qr/^$fileurl$/,
+ prospero => qr/^$prosperourl$/
+ );
+}
+
+# keeping require happy
+1;
+
+#
+#
+### end of CheckRFC ############################################################
\ No newline at end of file