]> git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/Encode/Plain.pm
added version checks, fixed some issues ;)
[selfforum.git] / selfforum-cgi / shared / Encode / Plain.pm
1 package Encode::Plain;
2
3 ################################################################################
4 # #
5 # File: shared/Encode/Plain.pm #
6 # #
7 # Authors: André Malo <nd@o3media.de>, 2001-04-12 #
8 # #
9 # Description: Encode text for HTML Output (entities, spaces) #
10 # #
11 ################################################################################
12
13 use strict;
14 use vars qw(
15 @EXPORT
16 %sonder
17 %unimap
18 $utf8
19 $v56
20 $VERSION
21 );
22
23 ################################################################################
24 #
25 # Version check
26 #
27 $VERSION = do { my @r =(q$Revision$ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
28
29 $v56 = eval {local $SIG{__DIE__}; require 5.6.0;};
30
31 ################################################################################
32 #
33 # Export
34 #
35 use base qw(Exporter);
36 @EXPORT = qw(plain multiline toUTF8);
37
38 ### sub myunpack ###############################################################
39 #
40 # if perl version < 5.6 use myunpack instead of unpack 'U' ;(
41 #
42 # Params: $string - UTF8-encoded string to unpack
43 #
44 # Return: Number - unpacked UTF8
45 #
46 sub myunpack ($) {
47 return unless defined $_[0];
48
49 my @c = map {ord} split // => shift;
50
51 return ($c[0] & 31) << 6 | $c[1] & 63
52 if (
53 @c == 2
54 and ($c[0] & 224) == 192
55 and ($c[1] & 192) == 128
56 );
57
58 return ($c[0] & 15) << 12 | ($c[1] & 63) << 6 | $c[2] && 63
59 if (
60 @c == 3
61 and ($c[0] & 240) == 224
62 and ($c[1] & 192) == 128
63 and ($c[2] & 192) == 128
64 );
65
66 return;
67 }
68
69 ### sub plain ##################################################################
70 #
71 # encode characters of plain text into entities for HTML output
72 # (includes < > " &)
73 # (excludes space problem)
74 #
75 # Params: $old - String (or scalar reference) to encode
76 # $ref - (optional) (hash reference) Options
77 # (-amp -except -utf8)
78 #
79 # Return: encoded string (or scalar reference)
80 #
81 sub plain ($;$) {
82 my ($old, $ref) = @_;
83 my $exreg;
84
85 return unless (defined $old);
86
87 my $new = ref ($old) ? $$old : $old;
88 $ref = $ref || {};
89 $new ='' unless (defined $new);
90
91 my $unicode = defined ($ref -> {-utf8})
92 ? $ref -> {-utf8}
93 : $utf8;
94
95 # Exceptions
96 #
97 my $except = exists($ref->{-except});
98 if ($except) {
99
100 if (ref ($ref -> {-except})) {
101 # turn list into a regex
102 #
103 $exreg = join '|' => map {quotemeta $_} @{$ref -> {-except}};
104 }
105 else {
106 # quote regex delimiters
107 #
108 $exreg = $ref -> {-except};
109 $exreg =~ s|/|\\/|g;
110 }
111 }
112
113 # encode the &-character
114 #
115 if (lc($ref->{-amp}) eq 'soft') {
116
117 if ($except) {
118 $new=~s/($exreg)|(?:\&(?!(?:#[Xx][\da-fA-F]+|#\d+|[a-zA-Z]+);))/defined($1)?$1:'&amp;'/eg;
119 }
120 else {
121 $new=~s/\&(?!(?:#[Xx][\da-fA-F]+|#\d+|[a-zA-Z]+);)/&amp;/g;
122 }
123 }
124 elsif (lc($ref->{-amp}) ne 'no') {
125
126 if ($except) {
127 $new=~s/($exreg)|\&/defined($1)?$1:'&amp;'/eg;
128 }
129 else {
130 $new=~s/\&/&amp;/g;
131 }
132 }
133
134 # further characters
135 #
136 if ($except) {
137 $new =~ s/($exreg)|</defined($1)?$1:'&lt;'/eg;
138 $new =~ s/($exreg)|>/defined($1)?$1:'&gt;'/eg;
139 $new =~ s/($exreg)|"/defined($1)?$1:'&quot;'/eg;
140
141 # the big hash
142 #
143 if ($unicode) {
144 my $x;
145 if ($v56) {
146 $new =~ s/($exreg)|([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
147 defined($1)
148 ? $1
149 : ( exists($unimap{$x = unpack('U',$2)})
150 ? $unimap{$x}
151 : "&#$x;"
152 )
153 /eg;
154 }
155 else {
156 $new =~ s/($exreg)|([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
157 defined($1)
158 ? $1
159 : ( exists($unimap{$x = myunpack($2)})
160 ? $unimap{$x}
161 : "&#$x;"
162 )
163 /eg;
164 }
165 }
166 $new =~ s/($exreg)|([\177-\377])/defined($1)?$1:$sonder{$2}/eg;
167 }
168 else {
169 # no exceptions
170 #
171 $new =~ s/</&lt;/g;
172 $new =~ s/>/&gt;/g;
173 $new =~ s/"/&quot;/g;
174
175 # the big hash
176 #
177 if ($unicode) {
178 my $x;
179 if ($v56) {
180 $new =~ s/([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
181 exists($unimap{$x = unpack('U',$1)})
182 ? $unimap{$x}
183 : "&#$x;"
184 /eg;
185 }
186 else {
187 $new =~ s/([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
188 exists($unimap{$x = myunpack($1)})
189 ? $unimap{$x}
190 : "&#$x;"
191 /eg;
192 }
193 }
194 $new =~ s/([\177-\377])/$sonder{$1}/g;
195 }
196
197 # characters < 32, but whitespaces
198 #
199 $new=~s/([^\041-\377\000\s])/
200 '&#' . ord($1) . ';'
201 /eg;
202 $new=~s/\000/ /g;
203
204 # return
205 #
206 ref $old
207 ? \$new
208 : $new;
209 }
210
211 ### sub multiline ##############################################################
212 #
213 # solve the space problem
214 #
215 # Params: $old - String (or scalar reference): text to encode
216 #
217 # Return: scalar reference: encoded string
218 #
219 sub multiline ($) {
220 my $old = shift;
221 my $string=(ref ($old))
222 ? $$old
223 : $old;
224
225 $string='' unless (defined $string);
226
227 # normalize newlines
228 #
229 $string=~s/\015\012|\015|\012/\n/g;
230
231 # turn \n into <br>
232 #
233 $string=~s!\n!<br />!g;
234
235 # more than 1 space => &nbsp;
236 #
237 $string=~s/(\s\s+)/('&nbsp;' x (length($1)-1)) . ' '/eg;
238
239 # Single Spaces after <br> => &nbsp;
240 # (save ascii arts ;)
241 #
242 $string=~s!(?:^|(<br(?:\s*/)?>))\s!($1?$1:'').'&nbsp;'!eg;
243
244 # return
245 #
246 \$string;
247 }
248
249 ### sub toUTF8 #################################################################
250 #
251 # map ISO-8859-1 to UTF8
252 #
253 # Params: String or scalar reference: string to map
254 #
255 # Return: String or scalar reference: mapped string
256 #
257 sub toUTF8 ($) {
258 my $ref = shift;
259 my $string = ref($ref)
260 ? $$ref
261 : $ref;
262
263 $string =~ s
264 {([\x80-\xff])}
265 { chr((ord ($1) >> 6) | 192)
266 .chr((ord ($1) & 191))
267 }eg;
268
269 ref($ref)
270 ? \$string
271 : $string;
272 }
273
274 ################################################################################
275 #
276 # package init
277 #
278 BEGIN {
279 $utf8 = 0;
280
281 # Latin 1 + guessed
282 #
283 %sonder=(
284 "\177" => '&#127;',
285 "\200" => '&#8364;',
286 "\201" => '&uuml;',
287 "\202" => '&#8218;',
288 "\203" => '&#402;',
289 "\204" => '&#8222;',
290 "\205" => '&#8230;',
291 "\206" => '&#8224;',
292 "\207" => '&#8225;',
293 "\210" => '&#710;',
294 "\211" => '&#8240;',
295 "\212" => '&#352;',
296 "\213" => '&#8249;',
297 "\214" => '&#338;',
298 "\215" => '&igrave;',
299 "\216" => '&#381;',
300 "\217" => '&Aring;',
301 "\220" => '&uuml;',
302 "\221" => "'",
303 "\222" => "'",
304 "\223" => '&#8220;',
305 "\224" => '&#8220;',
306 "\225" => '&#8226;',
307 "\226" => '-',
308 "\227" => '-',
309 "\230" => '&#732;',
310 "\231" => '&#8482;',
311 "\232" => '&#353;',
312 "\233" => '&#8250;',
313 "\234" => '&#339;',
314 "\235" => '&#216;',
315 "\236" => '&#215;',
316 "\237" => '&#376;',
317 "\240" => '&nbsp;',
318 "\241" => '&#161;',
319 "\242" => '&#162;',
320 "\243" => '&pound;',
321 "\244" => '&#164;',
322 "\245" => '&yen;',
323 "\246" => '&#166;',
324 "\247" => '&sect;',
325 "\250" => '&#168;',
326 "\251" => '&copy;',
327 "\252" => '&#170;',
328 "\253" => '&laquo;',
329 "\254" => '&#172;',
330 "\255" => '-',
331 "\256" => '&reg;',
332 "\257" => '&szlig;',
333 "\260" => '&#176;',
334 "\261" => '&#177;',
335 "\262" => '&#178;',
336 "\263" => '&#179;',
337 "\264" => '&acute;',
338 "\265" => '&#181;',
339 "\266" => '&#182;',
340 "\267" => '&#183;',
341 "\270" => '&cedil;',
342 "\271" => '&sup1;',
343 "\272" => '&#186;',
344 "\273" => '&raquo;',
345 "\274" => '&#188;',
346 "\275" => '&#189;',
347 "\276" => '&#190;',
348 "\277" => '&#191;',
349 "\300" => '&Agrave;',
350 "\301" => '&Aacute;',
351 "\302" => '&Acirc;',
352 "\303" => '&Atilde;',
353 "\304" => '&Auml;',
354 "\305" => '&Aring;',
355 "\306" => '&AElig;',
356 "\307" => '&Ccedil;',
357 "\310" => '&Egrave;',
358 "\311" => '&Eacute;',
359 "\312" => '&Ecirc;',
360 "\313" => '&Euml;',
361 "\314" => '&Igrave;',
362 "\315" => '&Iacute;',
363 "\316" => '&Icirc;',
364 "\317" => '&Iuml;',
365 "\320" => '&ETH;',
366 "\321" => '&Ntilde;',
367 "\322" => '&Ograve;',
368 "\323" => '&Oacute;',
369 "\324" => '&Ocirc;',
370 "\325" => '&Otilde;',
371 "\326" => '&Ouml;',
372 "\327" => '&#215;',
373 "\330" => '&Oslash;',
374 "\331" => '&Ugrave;',
375 "\332" => '&Uacute;',
376 "\333" => '&Ucirc;',
377 "\334" => '&Uuml;',
378 "\335" => '&Yacute;',
379 "\336" => '&THORN;',
380 "\337" => '&szlig;',
381 "\340" => '&agrave;',
382 "\341" => '&aacute;',
383 "\342" => '&acirc;',
384 "\343" => '&atilde;',
385 "\344" => '&auml;',
386 "\345" => '&aring;',
387 "\346" => '&aelig;',
388 "\347" => '&ccedil;',
389 "\350" => '&egrave;',
390 "\351" => '&eacute;',
391 "\352" => '&ecirc;',
392 "\353" => '&euml;',
393 "\354" => '&igrave;',
394 "\355" => '&iacute;',
395 "\356" => '&icirc;',
396 "\357" => '&iuml;',
397 "\360" => '&eth;',
398 "\361" => '&ntilde;',
399 "\362" => '&ograve;',
400 "\363" => '&oacute;',
401 "\364" => '&ocirc;',
402 "\365" => '&otilde;',
403 "\366" => '&ouml;',
404 "\367" => '&divide;',
405 "\370" => '&oslash;',
406 "\371" => '&ugrave;',
407 "\372" => '&uacute;',
408 "\373" => '&ucirc;',
409 "\374" => '&uuml;',
410 "\375" => '&yacute;',
411 "\376" => '&thorn;',
412 "\377" => '&yuml;'
413 );
414
415 # Unicode-Mapping
416 %unimap=(
417 128 => '&#8364;',
418 129 => '&uuml;',
419 130 => '&#8218;',
420 131 => '&#402;',
421 132 => '&#8222;',
422 133 => '&#8230;',
423 134 => '&#8224;',
424 135 => '&#8225;',
425 136 => '&#710;',
426 137 => '&#8240;',
427 138 => '&#352;',
428 139 => '&#8249;',
429 140 => '&#338;',
430 141 => '&igrave;',
431 142 => '&#381;',
432 143 => '&Aring;',
433 144 => '&uuml;',
434 145 => "'",
435 146 => "'",
436 147 => '&#8220;',
437 148 => '&#8220;',
438 149 => '&#8226;',
439 150 => '-',
440 151 => '-',
441 152 => '&#732;',
442 153 => '&#8482;',
443 154 => '&#353;',
444 155 => '&#8250;',
445 156 => '&#339;',
446 157 => '&#216;',
447 158 => '&#215;',
448 159 => '&#376;',
449 160 => '&nbsp;',
450 163 => '&pound;',
451 165 => '&yen;',
452 167 => '&sect;',
453 169 => '&copy;',
454 171 => '&laquo;',
455 173 => '-',
456 174 => '&reg;',
457 175 => '&szlig;',
458 180 => '&acute;',
459 184 => '&cedil;',
460 185 => '&sup1;',
461 187 => '&raquo;',
462 192 => '&Agrave;',
463 193 => '&Aacute;',
464 194 => '&Acirc;',
465 195 => '&Atilde;',
466 196 => '&Auml;',
467 197 => '&Aring;',
468 198 => '&AElig;',
469 199 => '&Ccedil;',
470 200 => '&Egrave;',
471 201 => '&Eacute;',
472 202 => '&Ecirc;',
473 203 => '&Euml;',
474 204 => '&Igrave;',
475 205 => '&Iacute;',
476 206 => '&Icirc;',
477 207 => '&Iuml;',
478 208 => '&ETH;',
479 209 => '&Ntilde;',
480 210 => '&Ograve;',
481 211 => '&Oacute;',
482 212 => '&Ocirc;',
483 213 => '&Otilde;',
484 214 => '&Ouml;',
485 216 => '&Oslash;',
486 217 => '&Ugrave;',
487 218 => '&Uacute;',
488 219 => '&Ucirc;',
489 220 => '&Uuml;',
490 221 => '&Yacute;',
491 222 => '&THORN;',
492 223 => '&szlig;',
493 224 => '&agrave;',
494 225 => '&aacute;',
495 226 => '&acirc;',
496 227 => '&atilde;',
497 228 => '&auml;',
498 229 => '&aring;',
499 230 => '&aelig;',
500 231 => '&ccedil;',
501 232 => '&egrave;',
502 233 => '&eacute;',
503 234 => '&ecirc;',
504 235 => '&euml;',
505 236 => '&igrave;',
506 237 => '&iacute;',
507 238 => '&icirc;',
508 239 => '&iuml;',
509 240 => '&eth;',
510 241 => '&ntilde;',
511 242 => '&ograve;',
512 243 => '&oacute;',
513 244 => '&ocirc;',
514 245 => '&otilde;',
515 246 => '&ouml;',
516 247 => '&divide;',
517 248 => '&oslash;',
518 249 => '&ugrave;',
519 250 => '&uacute;',
520 251 => '&ucirc;',
521 252 => '&uuml;',
522 253 => '&yacute;',
523 254 => '&thorn;',
524 255 => '&yuml;'
525 );
526 }
527
528 # keeping require happy
529 1;
530
531 #
532 #
533 ### end of Encode::Plain #######################################################

patrick-canterino.de