]>
git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/Encode/Plain.pm
3 ################################################################################
5 # File: shared/Encode/Plain.pm #
7 # Authors: André Malo <nd@o3media.de> #
9 # Description: Encode text for HTML Output (entities, spaces) #
11 ################################################################################
22 ################################################################################
30 sub VERSION
{(q
$Revision$ =~ /([\d.]+)\s*$/)[0] or '0.0'}
32 $v56 = eval {local $SIG{__DIE__
}; require 5.6.0;};
34 ################################################################################
38 use base
qw(Exporter);
39 @EXPORT = qw(plain multiline toUTF8);
41 ### sub myunpack ###############################################################
43 # if perl version < 5.6 use myunpack instead of unpack 'U' ;(
45 # Params: $string - UTF8-encoded string to unpack
47 # Return: Number - unpacked UTF8
50 return unless defined $_[0];
52 my @c = map {ord} split // => shift;
54 return ($c[0] & 31) << 6 | $c[1] & 63
57 and ($c[0] & 224) == 192
58 and ($c[1] & 192) == 128
61 return ($c[0] & 15) << 12 | ($c[1] & 63) << 6 | $c[2] && 63
64 and ($c[0] & 240) == 224
65 and ($c[1] & 192) == 128
66 and ($c[2] & 192) == 128
72 ### sub plain ##################################################################
74 # encode characters of plain text into entities for HTML output
76 # (excludes space problem)
78 # Params: $old - String (or scalar reference) to encode
79 # $ref - (optional) (hash reference) Options
80 # (-amp -except -utf8)
82 # Return: encoded string (or scalar reference)
88 return unless (defined $old);
90 my $new = ref ($old) ?
$$old : $old;
92 $new ='' unless (defined $new);
94 my $unicode = defined ($ref -> {-utf8
})
100 my $except = exists($ref->{-except
});
103 if (ref ($ref -> {-except
})) {
104 # turn list into a regex
106 $exreg = join '|' => map {quotemeta $_} @
{$ref -> {-except
}};
109 # quote regex delimiters
111 $exreg = $ref -> {-except
};
116 # encode the &-character
118 if (lc($ref->{-amp
}) eq 'soft') {
121 $new=~s/($exreg)|(?:\&(?!(?:#[Xx][\da-fA-F]+|#\d+|[a-zA-Z]+);))/defined($1)?$1:'&'/eg;
124 $new=~s/\&(?!(?:#[Xx][\da-fA-F]+|#\d+|[a-zA-Z]+);)/&/g;
127 elsif (lc($ref->{-amp
}) ne 'no') {
130 $new=~s/($exreg)|\&/defined($1)?$1:'&'/eg;
140 $new =~ s/($exreg)|</defined($1)?$1:'<'/eg;
141 $new =~ s/($exreg)|>/defined($1)?$1:'>'/eg;
142 $new =~ s/($exreg)|"/defined($1)?$1:'"'/eg;
149 $new =~ s
/($exreg)|([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
152 : ( exists($unimap{$x = unpack('U',$2)})
159 $new =~ s
/($exreg)|([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
162 : ( exists($unimap{$x = myunpack
($2)})
169 $new =~ s/($exreg)|([\177-\377])/defined($1)?$1:$sonder{$2}/eg;
176 $new =~ s/"/"/g;
183 $new =~ s
/([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
184 exists($unimap{$x = unpack('U',$1)})
190 $new =~ s
/([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
191 exists($unimap{$x = myunpack
($1)})
197 $new =~ s/([\177-\377])/$sonder{$1}/g;
200 # characters < 32, but whitespaces
202 $new=~s
/([^\041-\377\000\s])/
214 ### sub multiline ##############################################################
216 # solve the space problem
218 # Params: $old - String (or scalar reference): text to encode
220 # Return: scalar reference: encoded string
224 my $string=(ref ($old))
228 $string='' unless (defined $string);
232 $string=~s/\015\012|\015|\012/\n/g;
236 $string=~s!\n!<br />!g;
238 # more than 1 space =>
240 $string=~s/(\s\s+)/(' ' x (length($1)-1)) . ' '/eg;
242 # Single Spaces after <br> =>
243 # (save ascii arts ;)
245 $string=~s!(?:^|(<br(?:\s*/)?>))\s!($1?$1:'').' '!eg;
252 ### sub toUTF8 #################################################################
254 # map ISO-8859-1 to UTF8
256 # Params: String or scalar reference: string to map
258 # Return: String or scalar reference: mapped string
262 my $string = ref($ref)
268 { chr((ord ($1) >> 6) | 192)
269 .chr((ord ($1) & 191))
277 ################################################################################
301 "\215" => 'ì',
352 "\300" => 'À',
353 "\301" => 'Á',
355 "\303" => 'Ã',
359 "\307" => 'Ç',
360 "\310" => 'È',
361 "\311" => 'É',
364 "\314" => 'Ì',
365 "\315" => 'Í',
369 "\321" => 'Ñ',
370 "\322" => 'Ò',
371 "\323" => 'Ó',
373 "\325" => 'Õ',
376 "\330" => 'Ø',
377 "\331" => 'Ù',
378 "\332" => 'Ú',
381 "\335" => 'Ý',
384 "\340" => 'à',
385 "\341" => 'á',
387 "\343" => 'ã',
391 "\347" => 'ç',
392 "\350" => 'è',
393 "\351" => 'é',
396 "\354" => 'ì',
397 "\355" => 'í',
401 "\361" => 'ñ',
402 "\362" => 'ò',
403 "\363" => 'ó',
405 "\365" => 'õ',
407 "\367" => '÷',
408 "\370" => 'ø',
409 "\371" => 'ù',
410 "\372" => 'ú',
413 "\375" => 'ý',
531 # keep 'require' happy
536 ### end of Encode::Plain #######################################################
patrick-canterino.de