]> git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/Posting/_lib.pm
improved the behavior of get_all_threads and create_forum_xml_string
[selfforum.git] / selfforum-cgi / shared / Posting / _lib.pm
1 package Posting::_lib;
2
3 ################################################################################
4 # #
5 # File: shared/Posting/_lib.pm #
6 # #
7 # Authors: André Malo <nd@o3media.de> #
8 # Frank Schönmann <fs@tower.de> #
9 # #
10 # Description: Message access interface, time format routines #
11 # #
12 ################################################################################
13
14 use strict;
15 use vars qw(
16 @EXPORT_OK
17 );
18
19 use Encode::Plain; $Encode::Plain::utf8 = 1;
20
21 use Time::German qw(localtime);
22 use XML::DOM;
23
24 ################################################################################
25 #
26 # Version check
27 #
28 # last modified:
29 # $Date$ (GMT)
30 # by $Author$
31 #
32 sub VERSION {(q$Revision$ =~ /([\d.]+)\s*$/)[0] or '0.0'}
33
34 ################################################################################
35 #
36 # Export
37 #
38 use constant SORT_ASCENT => 0; # (latest postings first)
39 use constant SORT_DESCENT => 1;
40 use constant KEEP_DELETED => 1;
41 use constant KILL_DELETED => 0;
42
43 use constant FORUM_DTD => 'http://selfforum.sourceforge.net/dtd/forum.dtd';
44
45 use base qw(Exporter);
46 @EXPORT_OK = qw(
47 get_message_header
48 get_message_body
49 get_message_node
50 get_body_node
51 parse_single_thread
52 parse_xml_file
53 create_new_thread
54 create_message
55
56 hr_time
57 short_hr_time
58 long_hr_time
59 very_short_hr_time
60 month
61
62 get_all_threads
63 create_forum_xml_string
64
65 save_file
66
67 SORT_ASCENT
68 SORT_DESCENT
69 KEEP_DELETED
70 KILL_DELETED
71 );
72
73 ################################################################################
74 #
75 # Access via XML::DOM
76 #
77
78 ### sub create_message ($$) ####################################################
79 #
80 # create the 'Message' subtree
81 #
82 # Params: $xml - XML::DOM::Document object
83 # $par - hash reference
84 # (msg, ip, name, email, home, image, category, subject, time)
85 #
86 # Return: XML::DOM::Element object
87 #
88 sub create_message ($$) {
89 my ($xml,$par) = @_;
90
91 my $message = $xml -> createElement ('Message');
92 $message -> setAttribute ('id' => $par -> {msg});
93 $message -> setAttribute ('ip' => $par -> {ip});
94
95 my $header = $xml -> createElement ('Header');
96 my $author = $xml -> createElement ('Author');
97 $header -> appendChild ($author);
98
99 my @may = (
100 ['name' => 'Name' => $author],
101 ['email' => 'Email' => $author],
102 ['home' => 'HomepageUrl' => $author],
103 ['image' => 'ImageUrl' => $author],
104 ['category' => 'Category' => $header],
105 ['subject' => 'Subject' => $header]
106 );# key => element name => superior
107
108 for (@may) {
109
110 # create element
111 my $obj = $xml -> createElement ($_->[1]);
112
113 # insert content
114 $obj -> addText (
115 defined $par -> {$_->[0]}
116 ? $par -> {$_->[0]}
117 : ''
118 );
119
120 # link to superior element
121 $_ -> [2] -> appendChild ($obj);
122 }
123
124 my $date = $xml -> createElement ('Date');
125 $date -> setAttribute ('longSec'=> $par -> {time});
126
127 $header -> appendChild ($date);
128 $message -> appendChild ($header);
129
130 # return
131 #
132 $message;
133 }
134
135 ### sub create_new_thread ($) ##################################################
136 #
137 # create a XML::DOM::Document object of a thread containing one posting
138 #
139 # Params: hash reference
140 # (thread, msg, body, ip, name, email, home,
141 # image, category, subject, time)
142 #
143 # Return: XML::DOM::Document object
144 #
145 sub create_new_thread ($) {
146 my $par = shift;
147
148 # new document
149 #
150 my $xml = new XML::DOM::Document;
151
152 # xml declaration
153 #
154 my $decl = new XML::DOM::XMLDecl;
155 $decl -> setVersion ('1.0');
156 $decl -> setEncoding ('UTF-8');
157 $xml -> setXMLDecl ($decl);
158
159 # set doctype
160 #
161 my $dtd = $xml -> createDocumentType ('Forum' => FORUM_DTD);
162 $xml -> setDoctype ($dtd);
163
164 # create root element 'Forum'
165 # create element 'Thread'
166 # create 'Message' subtree
167 # create element 'ContentList'
168 # create 'MessageContent' subtree
169 #
170 my $forum = $xml -> createElement ('Forum');
171 my $thread = $xml -> createElement ('Thread');
172 $thread -> setAttribute ('id' => $par -> {thread});
173 my $message = create_message ($xml,$par);
174 my $content = $xml -> createElement ('ContentList');
175 my $mcontent = $xml -> createElement ('MessageContent');
176 $mcontent -> setAttribute ('mid' => $par -> {msg});
177 $mcontent -> appendChild (
178 $xml -> createCDATASection (${$par -> {body}})
179 );
180
181 # link all the nodes to
182 # their superior elements
183 #
184 $thread -> appendChild ($message);
185 $forum -> appendChild ($thread);
186 $content -> appendChild ($mcontent);
187 $forum -> appendChild ($content);
188 $xml -> appendChild ($forum);
189
190 # return
191 #
192 $xml;
193 }
194
195 ### get_message_header () ######################################################
196 #
197 # Read message header, return as a hash
198 #
199 # Params: $node - XML message node
200 # Return: hash reference (name, category, subject, email, home, image, time)
201 #
202 sub get_message_header ($)
203 {
204 my $node = shift;
205 my %conf;
206
207 my $header = $node -> getElementsByTagName ('Header' , 0) -> item (0);
208 my $author = $header -> getElementsByTagName ('Author' , 0) -> item (0);
209 my $name = $author -> getElementsByTagName ('Name' , 0) -> item (0);
210 my $email = $author -> getElementsByTagName ('Email' , 0) -> item (0);
211 my $home = $author -> getElementsByTagName ('HomepageUrl', 0) -> item (0);
212 my $image = $author -> getElementsByTagName ('ImageUrl' , 0) -> item (0);
213 my $cat = $header -> getElementsByTagName ('Category' , 0) -> item (0);
214 my $subject = $header -> getElementsByTagName ('Subject' , 0) -> item (0);
215 my $date = $header -> getElementsByTagName ('Date' , 0) -> item (0);
216
217 %conf = (
218 name => ($name -> hasChildNodes)?$name -> getFirstChild -> getData:undef,
219 category => ($cat -> hasChildNodes)?$cat -> getFirstChild -> getData:undef,
220 subject => ($subject -> hasChildNodes)?$subject -> getFirstChild -> getData:undef,
221 email => (defined ($email) and $email -> hasChildNodes)?$email -> getFirstChild -> getData:undef,
222 home => (defined ($home) and $home -> hasChildNodes)?$home -> getFirstChild -> getData:undef,
223 image => (defined ($image) and $image -> hasChildNodes)?$image -> getFirstChild -> getData:undef,
224 time => $date -> getAttribute ('longSec')
225 );
226
227 \%conf;
228 }
229
230 ### get_body_node () ########################################################
231 #
232 # Search a specific message body in a XML tree
233 #
234 # Params: $xml XML::DOM::Document Object (Document Node)
235 # $mid Message ID
236 #
237 # Return: MessageContent XML node (or -none-)
238 #
239 sub get_body_node ($$)
240 {
241 my ($xml, $mid) = @_;
242
243 for ($xml -> getElementsByTagName ('ContentList', 1) -> item (0) -> getElementsByTagName ('MessageContent', 0)) {
244 return $_ if ($_ -> getAttribute ('mid') eq $mid);
245 }
246
247 return;
248 }
249
250 ### get_message_body () ########################################################
251 #
252 # Read message body
253 #
254 # Params: $xml XML::DOM::Document Object (Document Node)
255 # $mid Message ID
256 #
257 # Return: Scalar reference
258 #
259 sub get_message_body ($$)
260 {
261 my $cnode = get_body_node ($_[0], $_[1]);
262 my $body;
263
264 $body = ($cnode -> hasChildNodes)?$cnode -> getFirstChild -> getData:'' if $cnode;
265
266 \$body;
267 }
268
269 ### get_message_node () ########################################################
270 #
271 # Search a specific message in a XML tree
272 #
273 # Params: $xml XML::DOM::Document Object (Document Node)
274 # $tid Thread ID
275 # $mid Message ID
276 #
277 # Return: Message XML node, Thread XML node (or -none-)
278 #
279 sub get_message_node ($$$)
280 {
281 my ($xml, $tid, $mid) = @_;
282 my ($mnode, $tnode);
283
284 for ($xml->getElementsByTagName ('Thread')) {
285 if ($_->getAttribute ('id') eq $tid) {
286 $tnode = $_;
287
288 for ($tnode -> getElementsByTagName ('Message')) {
289 if ($_ -> getAttribute ('id') eq $mid) {
290 $mnode = $_;
291 last;
292 }
293 }
294 last;
295 }
296 }
297
298 wantarray
299 ? ($mnode, $tnode)
300 : $mnode;
301 }
302
303 ### sub parse_xml_file ($) #####################################################
304 #
305 # load the specified XML-File and create the DOM tree
306 # this sub is only to avoid errors and to centralize the parse process
307 #
308 # Params: $file filename
309 #
310 # Return: XML::DOM::Document Object (Document Node) or false
311 #
312 sub parse_xml_file ($) {
313 my $file = shift;
314
315 my $xml = eval {
316 local $SIG{__DIE__}; # CGI::Carp works unreliable ;-(
317 new XML::DOM::Parser(KeepCDATA => 1)->parsefile ($file);
318 };
319
320 return if ($@);
321
322 $xml;
323 }
324
325 ### parse_single_thread () #####################################################
326 #
327 # parse a thread file
328 #
329 # Params: $tnode - Thread element node
330 # $deleted - keep deleted (boolean)
331 # $sorted - sorting order
332 #
333 # Return: arrayref
334 #
335 sub parse_single_thread ($$;$) {
336 my ($tnode, $deleted, $sorted) = @_;
337 my ($header, @msg, %mno);
338
339 for ($tnode -> getElementsByTagName ('Message')) {
340 $header = get_message_header ($_);
341
342 push @msg => {
343 mid => ($_ -> getAttribute ('id') =~ /(\d+)/)[0],
344 ip => $_ -> getAttribute ('ip'),
345 kids => [$_ -> getElementsByTagName ('Message', 0)],
346 answers => $_ -> getElementsByTagName ('Message') -> getLength,
347 deleted => $_ -> getAttribute ('invisible'),
348 archive => $_ -> getAttribute ('archive'),
349 name => plain($header -> {name}),
350 cat => plain($header -> {category} or ''),
351 subject => plain($header -> {subject}),
352 time => plain($header -> {time})
353 };
354 $mno{$_} = $#msg;
355 }
356
357 my $level;
358 $msg[0] -> {level} = 0;
359 for (@msg) {
360 $level = $_ -> {level} + 1;
361 @{$_ -> {kids}} = map {$msg[$mno{$_}] -> {level} = $level; $mno{$_}} @{$_ -> {kids}};
362 }
363
364 # sort and process deleted files
365 #
366 my $smsg = sort_thread (\@msg, $sorted);
367 delete_messages ($smsg) unless ($deleted);
368
369 $smsg;
370 }
371
372 ################################################################################
373 #
374 # Access via regexps and native perl ;)
375 #
376
377 ### sort_thread () #############################################################
378 #
379 # sort the message array
380 #
381 # Params: $msg - arrayref
382 # $sorted - sorting order
383 #
384 # Return: sorted arrayref
385 #
386 sub sort_thread ($$) {
387 my ($msg, $sorted) = @_;
388
389 my ($z, %mhash) = (0);
390
391 if ($sorted) { # oldest first
392 for (@$msg) {
393 @$msg[@{$_ -> {kids}}] = sort {$a -> {mid} <=> $b -> {mid}} @$msg[@{$_ -> {kids}}] if (@{$_ -> {kids}} > 1);
394 $mhash{$_ -> {mid}} = [@$msg[@{$_ -> {kids}}]];}}
395
396 else { # latest first
397 for (@$msg) {
398 @$msg[@{$_ -> {kids}}] = sort {$b -> {mid} <=> $a -> {mid}} @$msg[@{$_ -> {kids}}] if (@{$_ -> {kids}} > 1);
399 $mhash{$_ -> {mid}} = [@$msg[@{$_ -> {kids}}]];}}
400
401 # sort the children
402 #
403 my @smsg = ($msg -> [0]);
404 for (@smsg) {
405 ++$z;
406 splice @smsg,$z,0,@{$mhash{$_ -> {mid}}} if ($_ -> {answers});
407 delete $_ -> {kids};
408 }
409
410 # return
411 \@smsg;
412 }
413
414 ### delete_messages () #########################################################
415 #
416 # Filter out deleted messages
417 #
418 # Params: $smsg Reference of array of references of hashs
419 # Return: -none-
420 #
421 sub delete_messages ($) {
422 my $smsg = shift;
423 my ($z, $oldlevel, @path) = (0,0,0);
424
425 while ($z <= $#{$smsg}) {
426
427 if ($smsg -> [$z] -> {level} > $oldlevel) {
428 push @path => $z;
429 $oldlevel = $smsg -> [$z] -> {level};
430 }
431 elsif ($smsg -> [$z] -> {level} < $oldlevel) {
432 splice @path, $smsg -> [$z] -> {level};
433 push @path => $z;
434 $oldlevel = $smsg -> [$z] -> {'level'};
435 }
436 else {
437 $path[-1] = $z;
438 }
439
440 if ($smsg -> [$z] -> {deleted}) {
441 my $n = $smsg -> [$z] -> {answers} + 1;
442 $smsg -> [$_] -> {answers} -= $n for (@path);
443 splice @$smsg, $z, $n;
444 }
445 else {
446 $z++;
447 }
448 }
449
450 return;
451 }
452
453 ### get_all_threads () #########################################################
454 #
455 # Read and Parse the main file (without any XML-module, they are too slow)
456 #
457 # Params: $file - /path/to/filename of the main file
458 # $deleted - hold deleted (invisible) messages in result (1) oder not (0)
459 # $sorted - direction of message sort: descending (0) (default) or ascending (1)
460 #
461 # Return: scalar context: hash reference (\%threads)
462 # list context: list (\%threads, $last_thread, $last_message, $dtd, \@unids)
463 #
464 sub get_all_threads ($$;$) {
465 my ($file, $deleted, $sorted) = @_;
466 my ($last_thread, $last_message, $dtd, @unids, %threads);
467 local (*FILE, $/);
468
469 open FILE,"< $file" or return;
470 my $xml = join '', <FILE>;
471 close(FILE) or return;
472
473 if (wantarray) {
474 $dtd = FORUM_DTD;
475 ($last_thread) = map {/(\d+)/} $xml =~ /<Forum.+?lastThread="([^"]+)"[^>]*>/;
476 ($last_message) = map {/(\d+)/} $xml =~ /<Forum.+?lastMessage="([^"]+)"[^>]*>/;
477 }
478
479 my $reg_msg = qr~(?:</Message>
480 |<Message\s+id="m(\d+)"(?:\s+unid="([^"]*)")?(?:\s+invisible="([^"]*)")?(?:\s+archive="([^"]*)")?[^>]*>\s*
481 <Header>[^<]*(?:<(?!Name>)[^<]*)*
482 <Name>([^<]+)</Name>[^<]*(?:<(?!Category>)[^<]*)*
483 <Category>([^<]*)</Category>\s*
484 <Subject>([^<]+)</Subject>\s*
485 <Date\s+longSec="(\d+)"[^>]*>\s*
486 </Header>\s*(?:(<)/Message>|(?=(<)Message\s*)))~sx;
487
488 while ($xml =~ /<Thread id="t(\d+)">([^<]*(?:<(?!\/Thread>)[^<]*)*)<\/Thread>/g)
489 {
490 my ($tid, $thread) = ($1, $2);
491 my ($level, $cmno, @msg, @stack) = (0);
492
493 while ($thread =~ m;$reg_msg;g)
494 {
495 if (defined($10))
496 {
497 push @stack,$cmno if (defined $cmno);
498 push @msg, {
499 mid => $1,
500 unid => (defined $2) ? $2 : '',
501 deleted => $3 || 0,
502 archive => $4 || 0,
503 name => $5,
504 cat => $6,
505 subject => $7,
506 time => $8,
507 level => $level++,
508 unids => [],
509 kids => [],
510 answers => 0
511 };
512
513 if (defined $cmno)
514 {
515 push @{$msg[$cmno] -> {kids}} => $#msg;
516 push @{$msg[$cmno] -> {unids}} => (defined $2) ? $2 : '#';
517 }
518 else
519 {
520 push @unids => (defined $2) ? $2 : '';
521 }
522
523 $msg[$_] -> {answers}++ for (@stack);
524
525 $cmno=$#msg;
526
527 $msg[-1] -> {name} =~ s/&amp;/&/g;
528 $msg[-1] -> {cat} =~ s/&amp;/&/g;
529 $msg[-1] -> {subject} =~ s/&amp;/&/g;
530
531 }
532 elsif (defined ($9))
533 {
534 push @msg, {
535 mid => $1,
536 unid => (defined $2) ? $2 : '',
537 deleted => $3 || 0,
538 archive => $4 || 0,
539 name => $5,
540 cat => $6,
541 subject => $7,
542 time => $8,
543 level => $level,
544 unids => [],
545 kids => [],
546 answers => 0
547 };
548
549 if (defined $cmno)
550 {
551 push @{$msg[$cmno] -> {kids}} => $#msg;
552 push @{$msg[$cmno] -> {unids}} => (defined $2) ? $2 : '';
553 $msg[$cmno] -> {answers}++;
554 }
555 else
556 {
557 push @unids => (defined $2) ? $2 : '';
558 }
559
560 $msg[$_] -> {answers}++ for (@stack);
561
562 $msg[-1] -> {name} =~ s/&amp;/&/g;
563 $msg[-1] -> {cat} =~ s/&amp;/&/g;
564 $msg[-1] -> {subject} =~ s/&amp;/&/g;
565 }
566 else
567 {
568 $cmno = pop @stack; $level--;
569 }
570 }
571
572 my $smsg = sort_thread (\@msg, $sorted); # sort messages
573 delete_messages ($smsg) unless ($deleted); # remove invisible messages
574
575 $threads{$tid} = $smsg if (@$smsg);
576 }
577
578 wantarray
579 ? (\%threads, $last_thread, $last_message, $dtd, \@unids)
580 : \%threads;
581 }
582
583 ### create_forum_xml_string () #################################################
584 #
585 # compose main file xml string
586 #
587 # Params: $threads - parsed threads (see also 'get_all_threads')
588 # $params - hashref (see doc for details)
589 #
590 # Return: scalarref of the xml string
591 #
592 sub create_forum_xml_string ($$) {
593 my ($threads, $param) = @_;
594 my ($level, $thread, $msg);
595
596 my $xml =
597 '<?xml version="1.0"?>'."\n"
598 . '<!DOCTYPE Forum SYSTEM "'.FORUM_DTD.'">'."\n"
599 . '<Forum lastMessage="m'.($param->{lastMessage} =~ /(\d+)/)[0].'" lastThread="t'.($param->{lastThread} =~ /(\d+)/)[0].'">';
600
601 for $thread (sort {$b <=> $a} keys %$threads) {
602 $xml .= '<Thread id="t'.$thread.'">';
603 $level = -1;
604
605 for $msg (@{$threads -> {$thread}}) {
606 $xml .= '</Message>' x ($level - $msg -> {level} + 1) if ($msg -> {level} <= $level);
607
608 $level = $msg -> {level};
609 $xml .=
610 '<Message id="m'.$msg -> {mid}.'"'
611 . (($msg -> {unid}) ?' unid="'.$msg -> {unid}.'"':'')
612 . (($msg -> {deleted})?' invisible="1"':'')
613 . (($msg -> {archive})?' archive="1"':'')
614 . '>'
615 . '<Header>'
616 . '<Author>'
617 . '<Name>'
618 . plain($msg -> {name})
619 . '</Name>'
620 . '<Email />'
621 . '</Author>'
622 . '<Category>'
623 . ((length $msg -> {cat})?plain($msg -> {cat}):'')
624 . '</Category>'
625 . '<Subject>'
626 . plain($msg -> {subject})
627 . '</Subject>'
628 . '<Date longSec="'
629 . $msg -> {time}
630 . '"/>'
631 . '</Header>';
632 }
633
634 $xml .= '</Message>' x ($level + 1);
635 $xml .= '</Thread>';}
636
637 $xml.='</Forum>';
638
639 \$xml;
640 }
641
642 ### save_file () ###############################################################
643 #
644 # Save a file
645 #
646 # Params: $filename Filename
647 # $content File content as scalar reference
648 # Return: Status (1 - ok, 0 - error)
649 #
650 sub save_file ($$)
651 {
652 my ($filename, $content) = @_;
653 local *FILE;
654
655 open FILE, ">$filename.temp" or return;
656
657 unless (print FILE $$content)
658 {
659 close FILE;
660 return;
661 }
662
663 close FILE or return;
664
665 rename "$filename.temp", $filename or return;
666
667 1;
668 }
669
670 ################################################################################
671 #
672 # several time formatting routines
673 #
674 # hr_time
675 # 02. Januar 2001, 12:02 Uhr
676 #
677 # short_hr_time
678 # 02. 01. 2001, 12:02 Uhr
679 #
680 # long_hr_time
681 # Dienstag, 02. Januar 2001, 12:02:01 Uhr
682 #
683 # very_short_hr_time
684 # 02. 01. 2001
685 #
686 sub month($) {
687 (qw(Januar Februar), "M\303\244rz", qw(April Mai Juni Juli August September Oktober November Dezember))[shift (@_) - 1];
688 # ^^^^^^^^ - UTF8 #
689 }
690
691 sub hr_time ($) {
692 my (undef, $min, $hour, $day, $mon, $year) = localtime (shift);
693
694 sprintf ('%02d. %s %04d, %02d:%02d Uhr', $day, month($mon+1), $year+1900, $hour, $min);
695 }
696
697 sub short_hr_time ($) {
698 my (undef, $min, $hour, $day, $mon, $year) = localtime (shift);
699
700 sprintf ('%02d. %02d. %04d, %02d:%02d Uhr', $day, $mon+1, $year+1900, $hour, $min);
701 }
702
703 sub long_hr_time ($) {
704 my @wday = qw(Sonntag Montag Dienstag Mittwoch Donnerstag Freitag Samstag);
705 my ($sek, $min, $hour, $day, $mon, $year, $wday) = localtime (shift);
706
707 sprintf ('%s, %02d. %s %04d, %02d:%02d:%02d Uhr', $wday[$wday], $day, month($mon+1), $year+1900, $hour, $min, $sek);
708 }
709
710 sub very_short_hr_time($) {
711 my (undef, $min, $hour, $day, $mon, $year) = localtime (shift);
712
713 sprintf ('%02d. %02d. %04d', $day, $mon+1, $year+1900);
714 }
715
716 # keep 'require' happy
717 1;
718
719 #
720 #
721 ### end of Posting::_lib #######################################################

patrick-canterino.de