]>
git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/Encode/Plain.pm
3 ################################################################################
5 # File: shared/Encode/Plain.pm #
7 # Authors: André Malo <nd@o3media.de>, 2001-04-12 #
9 # Description: Encode text for HTML Output (entities, spaces) #
11 ################################################################################
22 $v56 = eval {local $SIG{__DIE__
}; require 5.6.0;};
24 ################################################################################
28 use base
qw(Exporter);
29 @EXPORT = qw(plain multiline toUTF8);
31 ### sub myunpack ###############################################################
33 # if perl version < 5.6 use myunpack instead of unpack 'U' ;(
35 # Params: $string - UTF8-encoded string to unpack
37 # Return: Number - unpacked UTF8
40 return unless defined $_[0];
42 my @c = map {ord} split // => shift;
44 return ($c[0] & 31) << 6 | $c[1] & 63
47 and ($c[0] & 224) == 192
48 and ($c[1] & 192) == 128
51 return ($c[0] & 15) << 12 | ($c[1] & 63) << 6 | $c[2] && 63
54 and ($c[0] & 240) == 224
55 and ($c[1] & 192) == 128
56 and ($c[2] & 192) == 128
62 ### sub plain ##################################################################
64 # encode characters of plain text into entities for HTML output
66 # (excludes space problem)
68 # Params: $old - String (or scalar reference) to encode
69 # $ref - (optional) (hash reference) Options
70 # (-amp -except -utf8)
72 # Return: encoded string (or scalar reference)
78 return unless (defined $old);
80 my $new = ref ($old) ?
$$old : $old;
82 $new ='' unless (defined $new);
84 my $unicode = defined ($ref -> {-utf8
})
90 my $except = exists($ref->{-except
});
93 if (ref ($ref -> {-except
})) {
94 # turn list into a regex
96 $exreg = join '|' => map {quotemeta $_} @
{$ref -> {-except
}};
99 # quote regex delimiters
101 $exreg = $ref -> {-except
};
106 # encode the &-character
108 if (lc($ref->{-amp
}) eq 'soft') {
111 $new=~s/($exreg)|(?:\&(?!(?:#[Xx][\da-fA-F]+|#\d+|[a-zA-Z]+);))/defined($1)?$1:'&'/eg;
114 $new=~s/\&(?!(?:#[Xx][\da-fA-F]+|#\d+|[a-zA-Z]+);)/&/g;
117 elsif (lc($ref->{-amp
}) ne 'no') {
120 $new=~s/($exreg)|\&/defined($1)?$1:'&'/eg;
130 $new =~ s/($exreg)|</defined($1)?$1:'<'/eg;
131 $new =~ s/($exreg)|>/defined($1)?$1:'>'/eg;
132 $new =~ s/($exreg)|\|/defined($1)?$1:'|'/eg;
133 $new =~ s/($exreg)|"/defined($1)?$1:'"'/eg;
140 $new =~ s
/($exreg)|([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
143 : ( exists($unimap{$x = unpack('U',$2)})
150 $new =~ s
/($exreg)|([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
153 : ( exists($unimap{$x = myunpack
($2)})
160 $new =~ s/($exreg)|([\177-\377])/defined($1)?$1:$sonder{$2}/eg;
167 $new =~ s/\|/|/g;
168 $new =~ s/"/"/g;
175 $new =~ s
/([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
176 exists($unimap{$x = unpack('U',$1)})
182 $new =~ s
/([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
183 exists($unimap{$x = myunpack
($1)})
189 $new =~ s/([\177-\377])/$sonder{$1}/g;
192 # characters < 32, but whitespaces
194 $new=~s
/([^\041-\377\000\s])/
206 ### sub multiline ##############################################################
208 # solve the space problem
210 # Params: $old - String (or scalar reference): text to encode
212 # Return: scalar reference: encoded string
216 my $string=(ref ($old))
220 $string='' unless (defined $string);
224 $string=~s/\015\012|\015|\012/\n/g;
228 $string=~s/\n/<br>/g;
230 # more than 1 space =>
232 $string=~s/(\s\s+)/(' ' x (length($1)-1)) . ' '/eg;
234 # Single Spaces after <br> =>
235 # (save ascii arts ;)
237 $string=~s/(?:^|(<br>))\s/($1?$1:'').' '/eg;
244 ### sub toUTF8 #################################################################
246 # map ISO-8859-1 to UTF8
248 # Params: String or scalar reference: string to map
250 # Return: String or scalar reference: mapped string
254 my $string = ref($ref)
260 $string =~ tr/\x80-\xff//CU
;
265 { chr((ord ($1) >> 6) | 192)
266 .chr((ord ($1) & 191))
275 ################################################################################
299 "\215" => 'ì',
350 "\300" => 'À',
351 "\301" => 'Á',
353 "\303" => 'Ã',
357 "\307" => 'Ç',
358 "\310" => 'È',
359 "\311" => 'É',
362 "\314" => 'Ì',
363 "\315" => 'Í',
367 "\321" => 'Ñ',
368 "\322" => 'Ò',
369 "\323" => 'Ó',
371 "\325" => 'Õ',
374 "\330" => 'Ø',
375 "\331" => 'Ù',
376 "\332" => 'Ú',
379 "\335" => 'Ý',
382 "\340" => 'à',
383 "\341" => 'á',
385 "\343" => 'ã',
389 "\347" => 'ç',
390 "\350" => 'è',
391 "\351" => 'é',
394 "\354" => 'ì',
395 "\355" => 'í',
399 "\361" => 'ñ',
400 "\362" => 'ò',
401 "\363" => 'ó',
403 "\365" => 'õ',
405 "\367" => '÷',
406 "\370" => 'ø',
407 "\371" => 'ù',
408 "\372" => 'ú',
411 "\375" => 'ý',
529 # keeping require happy
534 ### end of Encode::Plain #######################################################
patrick-canterino.de