]> git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/Encode/Plain.pm
ordered alphabetically (in some places in germany, the alphabetical order is differen...
[selfforum.git] / selfforum-cgi / shared / Encode / Plain.pm
1 package Encode::Plain;
2
3 ################################################################################
4 # #
5 # File: shared/Encode/Plain.pm #
6 # #
7 # Authors: André Malo <nd@o3media.de>, 2001-04-12 #
8 # #
9 # Description: Encode text for HTML Output (entities, spaces) #
10 # #
11 ################################################################################
12
13 use strict;
14 use vars qw(
15 @EXPORT
16 %sonder
17 %unimap
18 $utf8
19 $v56
20 );
21
22 $v56 = eval {local $SIG{__DIE__}; require 5.6.0;};
23
24 ################################################################################
25 #
26 # Export
27 #
28 use base qw(Exporter);
29 @EXPORT = qw(plain multiline toUTF8);
30
31 ### sub myunpack ###############################################################
32 #
33 # if perl version < 5.6 use myunpack instead of unpack 'U' ;(
34 #
35 # Params: $string - UTF8-encoded string to unpack
36 #
37 # Return: Number - unpacked UTF8
38 #
39 sub myunpack ($) {
40 return unless defined $_[0];
41
42 my @c = map {ord} split // => shift;
43
44 return ($c[0] & 31) << 6 | $c[1] & 63
45 if (
46 @c == 2
47 and ($c[0] & 224) == 192
48 and ($c[1] & 192) == 128
49 );
50
51 return ($c[0] & 15) << 12 | ($c[1] & 63) << 6 | $c[2] && 63
52 if (
53 @c == 3
54 and ($c[0] & 240) == 224
55 and ($c[1] & 192) == 128
56 and ($c[2] & 192) == 128
57 );
58
59 return;
60 }
61
62 ### sub plain ##################################################################
63 #
64 # encode characters of plain text into entities for HTML output
65 # (includes < > " &)
66 # (excludes space problem)
67 #
68 # Params: $old - String (or scalar reference) to encode
69 # $ref - (optional) (hash reference) Options
70 # (-amp -except -utf8)
71 #
72 # Return: encoded string (or scalar reference)
73 #
74 sub plain ($;$) {
75 my ($old, $ref) = @_;
76 my $exreg;
77
78 return unless (defined $old);
79
80 my $new = ref ($old) ? $$old : $old;
81 $ref = $ref || {};
82 $new ='' unless (defined $new);
83
84 my $unicode = defined ($ref -> {-utf8})
85 ? $ref -> {-utf8}
86 : $utf8;
87
88 # Exceptions
89 #
90 my $except = exists($ref->{-except});
91 if ($except) {
92
93 if (ref ($ref -> {-except})) {
94 # turn list into a regex
95 #
96 $exreg = join '|' => map {quotemeta $_} @{$ref -> {-except}};
97 }
98 else {
99 # quote regex delimiters
100 #
101 $exreg = $ref -> {-except};
102 $exreg =~ s|/|\\/|g;
103 }
104 }
105
106 # encode the &-character
107 #
108 if (lc($ref->{-amp}) eq 'soft') {
109
110 if ($except) {
111 $new=~s/($exreg)|(?:\&(?!(?:#[Xx][\da-fA-F]+|#\d+|[a-zA-Z]+);))/defined($1)?$1:'&amp;'/eg;
112 }
113 else {
114 $new=~s/\&(?!(?:#[Xx][\da-fA-F]+|#\d+|[a-zA-Z]+);)/&amp;/g;
115 }
116 }
117 elsif (lc($ref->{-amp}) ne 'no') {
118
119 if ($except) {
120 $new=~s/($exreg)|\&/defined($1)?$1:'&amp;'/eg;
121 }
122 else {
123 $new=~s/\&/&amp;/g;
124 }
125 }
126
127 # further characters
128 #
129 if ($except) {
130 $new =~ s/($exreg)|</defined($1)?$1:'&lt;'/eg;
131 $new =~ s/($exreg)|>/defined($1)?$1:'&gt;'/eg;
132 $new =~ s/($exreg)|\|/defined($1)?$1:'&#124;'/eg;
133 $new =~ s/($exreg)|"/defined($1)?$1:'&quot;'/eg;
134
135 # the big hash
136 #
137 if ($unicode) {
138 my $x;
139 if ($v56) {
140 $new =~ s/($exreg)|([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
141 defined($1)
142 ? $1
143 : ( exists($unimap{$x = unpack('U',$2)})
144 ? $unimap{$x}
145 : "&#$x;"
146 )
147 /eg;
148 }
149 else {
150 $new =~ s/($exreg)|([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
151 defined($1)
152 ? $1
153 : ( exists($unimap{$x = myunpack($2)})
154 ? $unimap{$x}
155 : "&#$x;"
156 )
157 /eg;
158 }
159 }
160 $new =~ s/($exreg)|([\177-\377])/defined($1)?$1:$sonder{$2}/eg;
161 }
162 else {
163 # no exceptions
164 #
165 $new =~ s/</&lt;/g;
166 $new =~ s/>/&gt;/g;
167 $new =~ s/\|/&#124;/g;
168 $new =~ s/"/&quot;/g;
169
170 # the big hash
171 #
172 if ($unicode) {
173 my $x;
174 if ($v56) {
175 $new =~ s/([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
176 exists($unimap{$x = unpack('U',$1)})
177 ? $unimap{$x}
178 : "&#$x;"
179 /eg;
180 }
181 else {
182 $new =~ s/([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
183 exists($unimap{$x = myunpack($1)})
184 ? $unimap{$x}
185 : "&#$x;"
186 /eg;
187 }
188 }
189 $new =~ s/([\177-\377])/$sonder{$1}/g;
190 }
191
192 # characters < 32, but whitespaces
193 #
194 $new=~s/([^\041-\377\000\s])/
195 '&#' . ord($1) . ';'
196 /eg;
197 $new=~s/\000/ /g;
198
199 # return
200 #
201 ref $old
202 ? \$new
203 : $new;
204 }
205
206 ### sub multiline ##############################################################
207 #
208 # solve the space problem
209 #
210 # Params: $old - String (or scalar reference): text to encode
211 #
212 # Return: scalar reference: encoded string
213 #
214 sub multiline ($) {
215 my $old = shift;
216 my $string=(ref ($old))
217 ? $$old
218 : $old;
219
220 $string='' unless (defined $string);
221
222 # normalize newlines
223 #
224 $string=~s/\015\012|\015|\012/\n/g;
225
226 # turn \n into <br>
227 #
228 $string=~s/\n/<br>/g;
229
230 # more than 1 space => &nbsp;
231 #
232 $string=~s/(\s\s+)/('&nbsp;' x (length($1)-1)) . ' '/eg;
233
234 # Single Spaces after <br> => &nbsp;
235 # (save ascii arts ;)
236 #
237 $string=~s/(?:^|(<br>))\s/($1?$1:'').'&nbsp;'/eg;
238
239 # return
240 #
241 \$string;
242 }
243
244 ### sub toUTF8 #################################################################
245 #
246 # map ISO-8859-1 to UTF8
247 #
248 # Params: String or scalar reference: string to map
249 #
250 # Return: String or scalar reference: mapped string
251 #
252 sub toUTF8 ($) {
253 my $ref = shift;
254 my $string = ref($ref)
255 ? $$ref
256 : $ref;
257
258 # if ($v56) {
259 # no warnings 'utf8';
260 # $string =~ tr/\x80-\xff//CU;
261 # }
262 # else {
263 $string =~ s
264 {([\x80-\xff])}
265 { chr((ord ($1) >> 6) | 192)
266 .chr((ord ($1) & 191))
267 }eg;
268 # }
269
270 ref($ref)
271 ? \$string
272 : $string;
273 }
274
275 ################################################################################
276 #
277 # package init
278 #
279 BEGIN {
280 $utf8 = 0;
281
282 # Latin 1 + guessed
283 #
284 %sonder=(
285 "\177" => '&#127;',
286 "\200" => '&#8364;',
287 "\201" => '&uuml;',
288 "\202" => '&#8218;',
289 "\203" => '&#402;',
290 "\204" => '&#8222;',
291 "\205" => '&#8230;',
292 "\206" => '&#8224;',
293 "\207" => '&#8225;',
294 "\210" => '&#710;',
295 "\211" => '&#8240;',
296 "\212" => '&#352;',
297 "\213" => '&#8249;',
298 "\214" => '&#338;',
299 "\215" => '&igrave;',
300 "\216" => '&#381;',
301 "\217" => '&Aring;',
302 "\220" => '&uuml;',
303 "\221" => "'",
304 "\222" => "'",
305 "\223" => '&#8220;',
306 "\224" => '&#8220;',
307 "\225" => '&#8226;',
308 "\226" => '-',
309 "\227" => '-',
310 "\230" => '&#732;',
311 "\231" => '&#8482;',
312 "\232" => '&#353;',
313 "\233" => '&#8250;',
314 "\234" => '&#339;',
315 "\235" => '&#216;',
316 "\236" => '&#215;',
317 "\237" => '&#376;',
318 "\240" => '&nbsp;',
319 "\241" => '&#161;',
320 "\242" => '&#162;',
321 "\243" => '&pound;',
322 "\244" => '&#164;',
323 "\245" => '&yen;',
324 "\246" => '&#166;',
325 "\247" => '&sect;',
326 "\250" => '&#168;',
327 "\251" => '&copy;',
328 "\252" => '&#170;',
329 "\253" => '&laquo;',
330 "\254" => '&#172;',
331 "\255" => '-',
332 "\256" => '&reg;',
333 "\257" => '&szlig;',
334 "\260" => '&#176;',
335 "\261" => '&#177;',
336 "\262" => '&#178;',
337 "\263" => '&#179;',
338 "\264" => '&acute;',
339 "\265" => '&#181;',
340 "\266" => '&#182;',
341 "\267" => '&#183;',
342 "\270" => '&cedil;',
343 "\271" => '&sup1;',
344 "\272" => '&#186;',
345 "\273" => '&raquo;',
346 "\274" => '&#188;',
347 "\275" => '&#189;',
348 "\276" => '&#190;',
349 "\277" => '&#191;',
350 "\300" => '&Agrave;',
351 "\301" => '&Aacute;',
352 "\302" => '&Acirc;',
353 "\303" => '&Atilde;',
354 "\304" => '&Auml;',
355 "\305" => '&Aring;',
356 "\306" => '&AElig;',
357 "\307" => '&Ccedil;',
358 "\310" => '&Egrave;',
359 "\311" => '&Eacute;',
360 "\312" => '&Ecirc;',
361 "\313" => '&Euml;',
362 "\314" => '&Igrave;',
363 "\315" => '&Iacute;',
364 "\316" => '&Icirc;',
365 "\317" => '&Iuml;',
366 "\320" => '&ETH;',
367 "\321" => '&Ntilde;',
368 "\322" => '&Ograve;',
369 "\323" => '&Oacute;',
370 "\324" => '&Ocirc;',
371 "\325" => '&Otilde;',
372 "\326" => '&Ouml;',
373 "\327" => '&#215;',
374 "\330" => '&Oslash;',
375 "\331" => '&Ugrave;',
376 "\332" => '&Uacute;',
377 "\333" => '&Ucirc;',
378 "\334" => '&Uuml;',
379 "\335" => '&Yacute;',
380 "\336" => '&THORN;',
381 "\337" => '&szlig;',
382 "\340" => '&agrave;',
383 "\341" => '&aacute;',
384 "\342" => '&acirc;',
385 "\343" => '&atilde;',
386 "\344" => '&auml;',
387 "\345" => '&aring;',
388 "\346" => '&aelig;',
389 "\347" => '&ccedil;',
390 "\350" => '&egrave;',
391 "\351" => '&eacute;',
392 "\352" => '&ecirc;',
393 "\353" => '&euml;',
394 "\354" => '&igrave;',
395 "\355" => '&iacute;',
396 "\356" => '&icirc;',
397 "\357" => '&iuml;',
398 "\360" => '&eth;',
399 "\361" => '&ntilde;',
400 "\362" => '&ograve;',
401 "\363" => '&oacute;',
402 "\364" => '&ocirc;',
403 "\365" => '&otilde;',
404 "\366" => '&ouml;',
405 "\367" => '&divide;',
406 "\370" => '&oslash;',
407 "\371" => '&ugrave;',
408 "\372" => '&uacute;',
409 "\373" => '&ucirc;',
410 "\374" => '&uuml;',
411 "\375" => '&yacute;',
412 "\376" => '&thorn;',
413 "\377" => '&yuml;'
414 );
415
416 # Unicode-Mapping
417 %unimap=(
418 128 => '&#8364;',
419 129 => '&uuml;',
420 130 => '&#8218;',
421 131 => '&#402;',
422 132 => '&#8222;',
423 133 => '&#8230;',
424 134 => '&#8224;',
425 135 => '&#8225;',
426 136 => '&#710;',
427 137 => '&#8240;',
428 138 => '&#352;',
429 139 => '&#8249;',
430 140 => '&#338;',
431 141 => '&igrave;',
432 142 => '&#381;',
433 143 => '&Aring;',
434 144 => '&uuml;',
435 145 => "'",
436 146 => "'",
437 147 => '&#8220;',
438 148 => '&#8220;',
439 149 => '&#8226;',
440 150 => '-',
441 151 => '-',
442 152 => '&#732;',
443 153 => '&#8482;',
444 154 => '&#353;',
445 155 => '&#8250;',
446 156 => '&#339;',
447 157 => '&#216;',
448 158 => '&#215;',
449 159 => '&#376;',
450 160 => '&nbsp;',
451 163 => '&pound;',
452 165 => '&yen;',
453 167 => '&sect;',
454 169 => '&copy;',
455 171 => '&laquo;',
456 173 => '-',
457 174 => '&reg;',
458 175 => '&szlig;',
459 180 => '&acute;',
460 184 => '&cedil;',
461 185 => '&sup1;',
462 187 => '&raquo;',
463 192 => '&Agrave;',
464 193 => '&Aacute;',
465 194 => '&Acirc;',
466 195 => '&Atilde;',
467 196 => '&Auml;',
468 197 => '&Aring;',
469 198 => '&AElig;',
470 199 => '&Ccedil;',
471 200 => '&Egrave;',
472 201 => '&Eacute;',
473 202 => '&Ecirc;',
474 203 => '&Euml;',
475 204 => '&Igrave;',
476 205 => '&Iacute;',
477 206 => '&Icirc;',
478 207 => '&Iuml;',
479 208 => '&ETH;',
480 209 => '&Ntilde;',
481 210 => '&Ograve;',
482 211 => '&Oacute;',
483 212 => '&Ocirc;',
484 213 => '&Otilde;',
485 214 => '&Ouml;',
486 216 => '&Oslash;',
487 217 => '&Ugrave;',
488 218 => '&Uacute;',
489 219 => '&Ucirc;',
490 220 => '&Uuml;',
491 221 => '&Yacute;',
492 222 => '&THORN;',
493 223 => '&szlig;',
494 224 => '&agrave;',
495 225 => '&aacute;',
496 226 => '&acirc;',
497 227 => '&atilde;',
498 228 => '&auml;',
499 229 => '&aring;',
500 230 => '&aelig;',
501 231 => '&ccedil;',
502 232 => '&egrave;',
503 233 => '&eacute;',
504 234 => '&ecirc;',
505 235 => '&euml;',
506 236 => '&igrave;',
507 237 => '&iacute;',
508 238 => '&icirc;',
509 239 => '&iuml;',
510 240 => '&eth;',
511 241 => '&ntilde;',
512 242 => '&ograve;',
513 243 => '&oacute;',
514 244 => '&ocirc;',
515 245 => '&otilde;',
516 246 => '&ouml;',
517 247 => '&divide;',
518 248 => '&oslash;',
519 249 => '&ugrave;',
520 250 => '&uacute;',
521 251 => '&ucirc;',
522 252 => '&uuml;',
523 253 => '&yacute;',
524 254 => '&thorn;',
525 255 => '&yuml;'
526 );
527 }
528
529 # keeping require happy
530 1;
531
532 #
533 #
534 ### end of Encode::Plain #######################################################

patrick-canterino.de