]>
git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/Encode/Plain.pm
3 ################################################################################
5 # File: shared/Encode/Plain.pm #
7 # Authors: André Malo <nd@o3media.de>, 2001-04-12 #
9 # Description: Encode text for HTML Output (entities, spaces) #
11 ################################################################################
23 ################################################################################
27 $VERSION = do { my @r =(q
$Revision$ =~ /\d+/g); sprintf "%d."."%02d" x
$#r, @r };
29 $v56 = eval {local $SIG{__DIE__
}; require 5.6.0;};
31 ################################################################################
35 use base
qw(Exporter);
36 @EXPORT = qw(plain multiline toUTF8);
38 ### sub myunpack ###############################################################
40 # if perl version < 5.6 use myunpack instead of unpack 'U' ;(
42 # Params: $string - UTF8-encoded string to unpack
44 # Return: Number - unpacked UTF8
47 return unless defined $_[0];
49 my @c = map {ord} split // => shift;
51 return ($c[0] & 31) << 6 | $c[1] & 63
54 and ($c[0] & 224) == 192
55 and ($c[1] & 192) == 128
58 return ($c[0] & 15) << 12 | ($c[1] & 63) << 6 | $c[2] && 63
61 and ($c[0] & 240) == 224
62 and ($c[1] & 192) == 128
63 and ($c[2] & 192) == 128
69 ### sub plain ##################################################################
71 # encode characters of plain text into entities for HTML output
73 # (excludes space problem)
75 # Params: $old - String (or scalar reference) to encode
76 # $ref - (optional) (hash reference) Options
77 # (-amp -except -utf8)
79 # Return: encoded string (or scalar reference)
85 return unless (defined $old);
87 my $new = ref ($old) ?
$$old : $old;
89 $new ='' unless (defined $new);
91 my $unicode = defined ($ref -> {-utf8
})
97 my $except = exists($ref->{-except
});
100 if (ref ($ref -> {-except
})) {
101 # turn list into a regex
103 $exreg = join '|' => map {quotemeta $_} @
{$ref -> {-except
}};
106 # quote regex delimiters
108 $exreg = $ref -> {-except
};
113 # encode the &-character
115 if (lc($ref->{-amp
}) eq 'soft') {
118 $new=~s/($exreg)|(?:\&(?!(?:#[Xx][\da-fA-F]+|#\d+|[a-zA-Z]+);))/defined($1)?$1:'&'/eg;
121 $new=~s/\&(?!(?:#[Xx][\da-fA-F]+|#\d+|[a-zA-Z]+);)/&/g;
124 elsif (lc($ref->{-amp
}) ne 'no') {
127 $new=~s/($exreg)|\&/defined($1)?$1:'&'/eg;
137 $new =~ s/($exreg)|</defined($1)?$1:'<'/eg;
138 $new =~ s/($exreg)|>/defined($1)?$1:'>'/eg;
139 $new =~ s/($exreg)|"/defined($1)?$1:'"'/eg;
146 $new =~ s
/($exreg)|([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
149 : ( exists($unimap{$x = unpack('U',$2)})
156 $new =~ s
/($exreg)|([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
159 : ( exists($unimap{$x = myunpack
($2)})
166 $new =~ s/($exreg)|([\177-\377])/defined($1)?$1:$sonder{$2}/eg;
173 $new =~ s/"/"/g;
180 $new =~ s
/([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
181 exists($unimap{$x = unpack('U',$1)})
187 $new =~ s
/([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
188 exists($unimap{$x = myunpack
($1)})
194 $new =~ s/([\177-\377])/$sonder{$1}/g;
197 # characters < 32, but whitespaces
199 $new=~s
/([^\041-\377\000\s])/
211 ### sub multiline ##############################################################
213 # solve the space problem
215 # Params: $old - String (or scalar reference): text to encode
217 # Return: scalar reference: encoded string
221 my $string=(ref ($old))
225 $string='' unless (defined $string);
229 $string=~s/\015\012|\015|\012/\n/g;
233 $string=~s!\n!<br />!g;
235 # more than 1 space =>
237 $string=~s/(\s\s+)/(' ' x (length($1)-1)) . ' '/eg;
239 # Single Spaces after <br> =>
240 # (save ascii arts ;)
242 $string=~s!(?:^|(<br(?:\s*/)?>))\s!($1?$1:'').' '!eg;
249 ### sub toUTF8 #################################################################
251 # map ISO-8859-1 to UTF8
253 # Params: String or scalar reference: string to map
255 # Return: String or scalar reference: mapped string
259 my $string = ref($ref)
265 { chr((ord ($1) >> 6) | 192)
266 .chr((ord ($1) & 191))
274 ################################################################################
298 "\215" => 'ì',
349 "\300" => 'À',
350 "\301" => 'Á',
352 "\303" => 'Ã',
356 "\307" => 'Ç',
357 "\310" => 'È',
358 "\311" => 'É',
361 "\314" => 'Ì',
362 "\315" => 'Í',
366 "\321" => 'Ñ',
367 "\322" => 'Ò',
368 "\323" => 'Ó',
370 "\325" => 'Õ',
373 "\330" => 'Ø',
374 "\331" => 'Ù',
375 "\332" => 'Ú',
378 "\335" => 'Ý',
381 "\340" => 'à',
382 "\341" => 'á',
384 "\343" => 'ã',
388 "\347" => 'ç',
389 "\350" => 'è',
390 "\351" => 'é',
393 "\354" => 'ì',
394 "\355" => 'í',
398 "\361" => 'ñ',
399 "\362" => 'ò',
400 "\363" => 'ó',
402 "\365" => 'õ',
404 "\367" => '÷',
405 "\370" => 'ø',
406 "\371" => 'ù',
407 "\372" => 'ú',
410 "\375" => 'ý',
528 # keeping require happy
533 ### end of Encode::Plain #######################################################
patrick-canterino.de