]> git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/Encode/Plain.pm
modified version check
[selfforum.git] / selfforum-cgi / shared / Encode / Plain.pm
1 package Encode::Plain;
2
3 ################################################################################
4 # #
5 # File: shared/Encode/Plain.pm #
6 # #
7 # Authors: André Malo <nd@o3media.de> #
8 # #
9 # Description: Encode text for HTML Output (entities, spaces) #
10 # #
11 ################################################################################
12
13 use strict;
14 use vars qw(
15 @EXPORT
16 %sonder
17 %unimap
18 $utf8
19 $v56
20 );
21
22 ################################################################################
23 #
24 # Version check
25 #
26 # last modified:
27 # $Date$ (GMT)
28 # by $Author$
29 #
30 sub VERSION {(q$Revision$ =~ /([\d.]+)\s*$/)[0] or '0.0'}
31
32 $v56 = eval {local $SIG{__DIE__}; require 5.6.0;};
33
34 ################################################################################
35 #
36 # Export
37 #
38 use base qw(Exporter);
39 @EXPORT = qw(plain multiline toUTF8);
40
41 ### sub myunpack ###############################################################
42 #
43 # if perl version < 5.6 use myunpack instead of unpack 'U' ;(
44 #
45 # Params: $string - UTF8-encoded string to unpack
46 #
47 # Return: Number - unpacked UTF8
48 #
49 sub myunpack ($) {
50 return unless defined $_[0];
51
52 my @c = map {ord} split // => shift;
53
54 return ($c[0] & 31) << 6 | $c[1] & 63
55 if (
56 @c == 2
57 and ($c[0] & 224) == 192
58 and ($c[1] & 192) == 128
59 );
60
61 return ($c[0] & 15) << 12 | ($c[1] & 63) << 6 | $c[2] && 63
62 if (
63 @c == 3
64 and ($c[0] & 240) == 224
65 and ($c[1] & 192) == 128
66 and ($c[2] & 192) == 128
67 );
68
69 return;
70 }
71
72 ### sub plain ##################################################################
73 #
74 # encode characters of plain text into entities for HTML output
75 # (includes < > " &)
76 # (excludes space problem)
77 #
78 # Params: $old - String (or scalar reference) to encode
79 # $ref - (optional) (hash reference) Options
80 # (-amp -except -utf8)
81 #
82 # Return: encoded string (or scalar reference)
83 #
84 sub plain ($;$) {
85 my ($old, $ref) = @_;
86 my $exreg;
87
88 return unless (defined $old);
89
90 my $new = ref ($old) ? $$old : $old;
91 $ref = $ref || {};
92 $new ='' unless (defined $new);
93
94 my $unicode = defined ($ref -> {-utf8})
95 ? $ref -> {-utf8}
96 : $utf8;
97
98 # Exceptions
99 #
100 my $except = exists($ref->{-except});
101 if ($except) {
102
103 if (ref ($ref -> {-except})) {
104 # turn list into a regex
105 #
106 $exreg = join '|' => map {quotemeta $_} @{$ref -> {-except}};
107 }
108 else {
109 # quote regex delimiters
110 #
111 $exreg = $ref -> {-except};
112 $exreg =~ s|/|\\/|g;
113 }
114 }
115
116 # encode the &-character
117 #
118 if (lc($ref->{-amp}) eq 'soft') {
119
120 if ($except) {
121 $new=~s/($exreg)|(?:\&(?!(?:#[Xx][\da-fA-F]+|#\d+|[a-zA-Z]+);))/defined($1)?$1:'&amp;'/eg;
122 }
123 else {
124 $new=~s/\&(?!(?:#[Xx][\da-fA-F]+|#\d+|[a-zA-Z]+);)/&amp;/g;
125 }
126 }
127 elsif (lc($ref->{-amp}) ne 'no') {
128
129 if ($except) {
130 $new=~s/($exreg)|\&/defined($1)?$1:'&amp;'/eg;
131 }
132 else {
133 $new=~s/\&/&amp;/g;
134 }
135 }
136
137 # further characters
138 #
139 if ($except) {
140 $new =~ s/($exreg)|</defined($1)?$1:'&lt;'/eg;
141 $new =~ s/($exreg)|>/defined($1)?$1:'&gt;'/eg;
142 $new =~ s/($exreg)|"/defined($1)?$1:'&quot;'/eg;
143
144 # the big hash
145 #
146 if ($unicode) {
147 my $x;
148 if ($v56) {
149 $new =~ s/($exreg)|([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
150 defined($1)
151 ? $1
152 : ( exists($unimap{$x = unpack('U',$2)})
153 ? $unimap{$x}
154 : "&#$x;"
155 )
156 /eg;
157 }
158 else {
159 $new =~ s/($exreg)|([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
160 defined($1)
161 ? $1
162 : ( exists($unimap{$x = myunpack($2)})
163 ? $unimap{$x}
164 : "&#$x;"
165 )
166 /eg;
167 }
168 }
169 $new =~ s/($exreg)|([\177-\377])/defined($1)?$1:$sonder{$2}/eg;
170 }
171 else {
172 # no exceptions
173 #
174 $new =~ s/</&lt;/g;
175 $new =~ s/>/&gt;/g;
176 $new =~ s/"/&quot;/g;
177
178 # the big hash
179 #
180 if ($unicode) {
181 my $x;
182 if ($v56) {
183 $new =~ s/([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
184 exists($unimap{$x = unpack('U',$1)})
185 ? $unimap{$x}
186 : "&#$x;"
187 /eg;
188 }
189 else {
190 $new =~ s/([\300-\337][\200-\277]|[\340-\357][\200-\277][\200-\277])/
191 exists($unimap{$x = myunpack($1)})
192 ? $unimap{$x}
193 : "&#$x;"
194 /eg;
195 }
196 }
197 $new =~ s/([\177-\377])/$sonder{$1}/g;
198 }
199
200 # characters < 32, but whitespaces
201 #
202 $new=~s/([^\041-\377\000\s])/
203 '&#' . ord($1) . ';'
204 /eg;
205 $new=~s/\000/ /g;
206
207 # return
208 #
209 ref $old
210 ? \$new
211 : $new;
212 }
213
214 ### sub multiline ##############################################################
215 #
216 # solve the space problem
217 #
218 # Params: $old - String (or scalar reference): text to encode
219 #
220 # Return: scalar reference: encoded string
221 #
222 sub multiline ($) {
223 my $old = shift;
224 my $string=(ref ($old))
225 ? $$old
226 : $old;
227
228 $string='' unless (defined $string);
229
230 # normalize newlines
231 #
232 $string=~s/\015\012|\015|\012/\n/g;
233
234 # turn \n into <br>
235 #
236 $string=~s!\n!<br />!g;
237
238 # more than 1 space => &nbsp;
239 #
240 $string=~s/(\s\s+)/('&nbsp;' x (length($1)-1)) . ' '/eg;
241
242 # Single Spaces after <br> => &nbsp;
243 # (save ascii arts ;)
244 #
245 $string=~s!(?:^|(<br(?:\s*/)?>))\s!($1?$1:'').'&nbsp;'!eg;
246
247 # return
248 #
249 \$string;
250 }
251
252 ### sub toUTF8 #################################################################
253 #
254 # map ISO-8859-1 to UTF8
255 #
256 # Params: String or scalar reference: string to map
257 #
258 # Return: String or scalar reference: mapped string
259 #
260 sub toUTF8 ($) {
261 my $ref = shift;
262 my $string = ref($ref)
263 ? $$ref
264 : $ref;
265
266 $string =~ s
267 {([\x80-\xff])}
268 { chr((ord ($1) >> 6) | 192)
269 .chr((ord ($1) & 191))
270 }eg;
271
272 ref($ref)
273 ? \$string
274 : $string;
275 }
276
277 ################################################################################
278 #
279 # package init
280 #
281 BEGIN {
282 $utf8 = 0;
283
284 # Latin 1 + guessed
285 #
286 %sonder=(
287 "\177" => '&#127;',
288 "\200" => '&#8364;',
289 "\201" => '&uuml;',
290 "\202" => '&#8218;',
291 "\203" => '&#402;',
292 "\204" => '&#8222;',
293 "\205" => '&#8230;',
294 "\206" => '&#8224;',
295 "\207" => '&#8225;',
296 "\210" => '&#710;',
297 "\211" => '&#8240;',
298 "\212" => '&#352;',
299 "\213" => '&#8249;',
300 "\214" => '&#338;',
301 "\215" => '&igrave;',
302 "\216" => '&#381;',
303 "\217" => '&Aring;',
304 "\220" => '&uuml;',
305 "\221" => "'",
306 "\222" => "'",
307 "\223" => '&#8220;',
308 "\224" => '&#8220;',
309 "\225" => '&#8226;',
310 "\226" => '-',
311 "\227" => '-',
312 "\230" => '&#732;',
313 "\231" => '&#8482;',
314 "\232" => '&#353;',
315 "\233" => '&#8250;',
316 "\234" => '&#339;',
317 "\235" => '&#216;',
318 "\236" => '&#215;',
319 "\237" => '&#376;',
320 "\240" => '&nbsp;',
321 "\241" => '&#161;',
322 "\242" => '&#162;',
323 "\243" => '&pound;',
324 "\244" => '&#164;',
325 "\245" => '&yen;',
326 "\246" => '&#166;',
327 "\247" => '&sect;',
328 "\250" => '&#168;',
329 "\251" => '&copy;',
330 "\252" => '&#170;',
331 "\253" => '&laquo;',
332 "\254" => '&#172;',
333 "\255" => '-',
334 "\256" => '&reg;',
335 "\257" => '&szlig;',
336 "\260" => '&#176;',
337 "\261" => '&#177;',
338 "\262" => '&#178;',
339 "\263" => '&#179;',
340 "\264" => '&acute;',
341 "\265" => '&#181;',
342 "\266" => '&#182;',
343 "\267" => '&#183;',
344 "\270" => '&cedil;',
345 "\271" => '&sup1;',
346 "\272" => '&#186;',
347 "\273" => '&raquo;',
348 "\274" => '&#188;',
349 "\275" => '&#189;',
350 "\276" => '&#190;',
351 "\277" => '&#191;',
352 "\300" => '&Agrave;',
353 "\301" => '&Aacute;',
354 "\302" => '&Acirc;',
355 "\303" => '&Atilde;',
356 "\304" => '&Auml;',
357 "\305" => '&Aring;',
358 "\306" => '&AElig;',
359 "\307" => '&Ccedil;',
360 "\310" => '&Egrave;',
361 "\311" => '&Eacute;',
362 "\312" => '&Ecirc;',
363 "\313" => '&Euml;',
364 "\314" => '&Igrave;',
365 "\315" => '&Iacute;',
366 "\316" => '&Icirc;',
367 "\317" => '&Iuml;',
368 "\320" => '&ETH;',
369 "\321" => '&Ntilde;',
370 "\322" => '&Ograve;',
371 "\323" => '&Oacute;',
372 "\324" => '&Ocirc;',
373 "\325" => '&Otilde;',
374 "\326" => '&Ouml;',
375 "\327" => '&#215;',
376 "\330" => '&Oslash;',
377 "\331" => '&Ugrave;',
378 "\332" => '&Uacute;',
379 "\333" => '&Ucirc;',
380 "\334" => '&Uuml;',
381 "\335" => '&Yacute;',
382 "\336" => '&THORN;',
383 "\337" => '&szlig;',
384 "\340" => '&agrave;',
385 "\341" => '&aacute;',
386 "\342" => '&acirc;',
387 "\343" => '&atilde;',
388 "\344" => '&auml;',
389 "\345" => '&aring;',
390 "\346" => '&aelig;',
391 "\347" => '&ccedil;',
392 "\350" => '&egrave;',
393 "\351" => '&eacute;',
394 "\352" => '&ecirc;',
395 "\353" => '&euml;',
396 "\354" => '&igrave;',
397 "\355" => '&iacute;',
398 "\356" => '&icirc;',
399 "\357" => '&iuml;',
400 "\360" => '&eth;',
401 "\361" => '&ntilde;',
402 "\362" => '&ograve;',
403 "\363" => '&oacute;',
404 "\364" => '&ocirc;',
405 "\365" => '&otilde;',
406 "\366" => '&ouml;',
407 "\367" => '&divide;',
408 "\370" => '&oslash;',
409 "\371" => '&ugrave;',
410 "\372" => '&uacute;',
411 "\373" => '&ucirc;',
412 "\374" => '&uuml;',
413 "\375" => '&yacute;',
414 "\376" => '&thorn;',
415 "\377" => '&yuml;'
416 );
417
418 # Unicode-Mapping
419 %unimap=(
420 128 => '&#8364;',
421 129 => '&uuml;',
422 130 => '&#8218;',
423 131 => '&#402;',
424 132 => '&#8222;',
425 133 => '&#8230;',
426 134 => '&#8224;',
427 135 => '&#8225;',
428 136 => '&#710;',
429 137 => '&#8240;',
430 138 => '&#352;',
431 139 => '&#8249;',
432 140 => '&#338;',
433 141 => '&igrave;',
434 142 => '&#381;',
435 143 => '&Aring;',
436 144 => '&uuml;',
437 145 => "'",
438 146 => "'",
439 147 => '&#8220;',
440 148 => '&#8220;',
441 149 => '&#8226;',
442 150 => '-',
443 151 => '-',
444 152 => '&#732;',
445 153 => '&#8482;',
446 154 => '&#353;',
447 155 => '&#8250;',
448 156 => '&#339;',
449 157 => '&#216;',
450 158 => '&#215;',
451 159 => '&#376;',
452 160 => '&nbsp;',
453 163 => '&pound;',
454 165 => '&yen;',
455 167 => '&sect;',
456 169 => '&copy;',
457 171 => '&laquo;',
458 173 => '-',
459 174 => '&reg;',
460 175 => '&szlig;',
461 180 => '&acute;',
462 184 => '&cedil;',
463 185 => '&sup1;',
464 187 => '&raquo;',
465 192 => '&Agrave;',
466 193 => '&Aacute;',
467 194 => '&Acirc;',
468 195 => '&Atilde;',
469 196 => '&Auml;',
470 197 => '&Aring;',
471 198 => '&AElig;',
472 199 => '&Ccedil;',
473 200 => '&Egrave;',
474 201 => '&Eacute;',
475 202 => '&Ecirc;',
476 203 => '&Euml;',
477 204 => '&Igrave;',
478 205 => '&Iacute;',
479 206 => '&Icirc;',
480 207 => '&Iuml;',
481 208 => '&ETH;',
482 209 => '&Ntilde;',
483 210 => '&Ograve;',
484 211 => '&Oacute;',
485 212 => '&Ocirc;',
486 213 => '&Otilde;',
487 214 => '&Ouml;',
488 216 => '&Oslash;',
489 217 => '&Ugrave;',
490 218 => '&Uacute;',
491 219 => '&Ucirc;',
492 220 => '&Uuml;',
493 221 => '&Yacute;',
494 222 => '&THORN;',
495 223 => '&szlig;',
496 224 => '&agrave;',
497 225 => '&aacute;',
498 226 => '&acirc;',
499 227 => '&atilde;',
500 228 => '&auml;',
501 229 => '&aring;',
502 230 => '&aelig;',
503 231 => '&ccedil;',
504 232 => '&egrave;',
505 233 => '&eacute;',
506 234 => '&ecirc;',
507 235 => '&euml;',
508 236 => '&igrave;',
509 237 => '&iacute;',
510 238 => '&icirc;',
511 239 => '&iuml;',
512 240 => '&eth;',
513 241 => '&ntilde;',
514 242 => '&ograve;',
515 243 => '&oacute;',
516 244 => '&ocirc;',
517 245 => '&otilde;',
518 246 => '&ouml;',
519 247 => '&divide;',
520 248 => '&oslash;',
521 249 => '&ugrave;',
522 250 => '&uacute;',
523 251 => '&ucirc;',
524 252 => '&uuml;',
525 253 => '&yacute;',
526 254 => '&thorn;',
527 255 => '&yuml;'
528 );
529 }
530
531 # keep 'require' happy
532 1;
533
534 #
535 #
536 ### end of Encode::Plain #######################################################

patrick-canterino.de