]> git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/Posting/_lib.pm
added sub parse_xml_file
[selfforum.git] / selfforum-cgi / shared / Posting / _lib.pm
1 package Posting::_lib;
2
3 ################################################################################
4 # #
5 # File: shared/Posting/_lib.pm #
6 # #
7 # Authors: André Malo <nd@o3media.de>, 2001-03-03 #
8 # Frank Schoenmann <fs@tower.de>, 2001-03-13 #
9 # #
10 # Description: Message access interface, time format routines #
11 # #
12 ################################################################################
13
14 use strict;
15
16 use vars qw(@EXPORT_OK);
17 use base qw(Exporter);
18
19 use Encode::Plain; $Encode::Plain::utf8 = 1;
20
21 use XML::DOM;
22
23 # ====================================================
24 # Export
25 # ====================================================
26
27 @EXPORT_OK = qw(get_message_header get_message_body get_message_node get_body_node parse_single_thread parse_xml_file
28 hr_time short_hr_time long_hr_time
29 get_all_threads create_forum_xml_string
30 save_file);
31
32 # ====================================================
33 # Access via XML::DOM
34 # ====================================================
35
36 ### get_message_header () ######################################################
37 #
38 # Read message header, return as a hash
39 #
40 # Params: $node XML message node
41 # Return: Hash reference (name, category, subject, email, home, image, time)
42 #
43 sub get_message_header ($)
44 {
45 my $node = shift;
46 my %conf;
47
48 my $header = $node -> getElementsByTagName ('Header', 0) -> item (0);
49 my $author = $header -> getElementsByTagName ('Author', 0) -> item (0);
50 my $name = $author -> getElementsByTagName ('Name', 0) -> item (0);
51 my $email = $author -> getElementsByTagName ('Email', 0) -> item (0);
52 my $home = $author -> getElementsByTagName ('HomepageUrl', 0) -> item (0);
53 my $image = $author -> getElementsByTagName ('ImageUrl', 0) -> item (0);
54 my $cat = $header -> getElementsByTagName ('Category', 0) -> item (0);
55 my $subject = $header -> getElementsByTagName ('Subject', 0) -> item (0);
56 my $date = $header -> getElementsByTagName ('Date', 0) -> item (0);
57
58 %conf = (
59 name => ($name -> hasChildNodes)?$name -> getFirstChild -> getData:undef,
60 category => ($cat -> hasChildNodes)?$cat -> getFirstChild -> getData:undef,
61 subject => ($subject -> hasChildNodes)?$subject -> getFirstChild -> getData:undef,
62 email => (defined ($email) and $email -> hasChildNodes)?$email -> getFirstChild -> getData:undef,
63 home => (defined ($home) and $home -> hasChildNodes)?$home -> getFirstChild -> getData:undef,
64 image => (defined ($image) and $image -> hasChildNodes)?$image -> getFirstChild -> getData:undef,
65 time => $date -> getAttribute ('longSec')
66 );
67
68 \%conf;
69 }
70
71 ### get_body_node () ########################################################
72 #
73 # Search a specific message body in a XML tree
74 #
75 # Params: $xml XML::DOM::Document Object (Document Node)
76 # $mid Message ID
77 # Return: MessageContent XML node (or -none-)
78 #
79 sub get_body_node ($$)
80 {
81 my ($xml, $mid) = @_;
82
83 for ($xml->getElementsByTagName ('ContentList', 1)->item (0)->getElementsByTagName ('MessageContent', 0))
84 {
85 return $_ if ($_ -> getAttribute ('mid') eq $mid);
86 }
87
88 return;
89 }
90
91 ### get_message_body () ########################################################
92 #
93 # Read message body
94 #
95 # Params: $xml XML::DOM::Document Object (Document Node)
96 # $mid Message ID
97 # Return: Scalar reference
98 #
99 sub get_message_body ($$)
100 {
101 my $cnode = get_body_node ($_[0], $_[1]);
102 my $body;
103
104 $body = ($cnode -> hasChildNodes)?$cnode -> getFirstChild -> getData:'' if $cnode;
105
106 \$body;
107 }
108
109 ### get_message_node () ########################################################
110 #
111 # Search a specific message in a XML tree
112 #
113 # Params: $xml XML::DOM::Document Object (Document Node)
114 # $tid Thread ID
115 # $mid Message ID
116 # Return: Message XML node, Thread XML node (or -none-)
117 #
118 sub get_message_node ($$$)
119 {
120 my ($xml, $tid, $mid) = @_;
121 my ($mnode, $tnode);
122
123 for ($xml->getElementsByTagName ('Thread'))
124 {
125 if ($_->getAttribute ('id') eq $tid)
126 {
127 $tnode = $_;
128 for ($tnode -> getElementsByTagName ('Message'))
129 {
130 if ($_ -> getAttribute ('id') eq $mid)
131 {
132 $mnode = $_;
133 last;
134 }
135 }
136 last;
137 }
138 }
139
140 wantarray ? ($mnode, $tnode) : $mnode;
141 }
142
143 ### sub parse_xml_file ($) #####################################################
144 #
145 # load the specified XML-File and create the DOM tree
146 # this sub is only to avoid errors and to centralize the parse process
147 #
148 # Params: $file filename
149 # Return: XML::DOM::Document Object (Document Node) or false
150 #
151 sub parse_xml_file ($) {
152 my $file = shift;
153 my $xml = eval {
154 local $SIG{__DIE__};
155 new XML::DOM::Parser (KeepCDATA => 1) -> parsefile ($file);
156 };
157
158 return if ($@);
159
160 $xml;
161 }
162
163 ###########################
164 # sub parse_single_thread
165 #
166 # einzelne Threaddatei
167 # parsen
168 ###########################
169
170 sub parse_single_thread ($$;$) {
171 my ($tnode, $deleted, $sorted) = @_;
172 my ($header, @msg, %mno);
173
174 for ($tnode -> getElementsByTagName ('Message')) {
175 $header = get_message_header ($_);
176
177 push @msg,{mid => ($_ -> getAttribute ('id') =~ /(\d+)/)[0],
178 ip => $_ -> getAttribute ('ip'),
179 kids => [$_ -> getElementsByTagName ('Message', 0)],
180 answers => $_ -> getElementsByTagName ('Message') -> getLength,
181 deleted => $_ -> getAttribute ('invisible'),
182 archive => $_ -> getAttribute ('archive'),
183 name => plain($header -> {name}),
184 cat => plain($header -> {category} or ''),
185 subject => plain($header -> {subject}),
186 time => plain($header -> {time})};
187 $mno{$_} = $#msg;}
188
189 # Eintraege ergaenzen und korrigieren
190 my $level;
191 $msg[0] -> {level} = 0;
192 for (@msg) {
193 $level = $_ -> {level} + 1;
194 @{$_ -> {kids}} = map {$msg[$mno{$_}] -> {level} = $level; $mno{$_}} @{$_ -> {kids}};}
195
196 # ============
197 # Sortieren und bei Bedarf
198 # geloeschte Messages entfernen
199
200 my $smsg = sort_thread (\@msg, $sorted);
201 delete_messages ($smsg) unless ($deleted);
202
203 $smsg;
204 }
205
206 ###########################
207 # sub create_message_xml
208 #
209 # Message-XML-String
210 # erzeugen
211 ###########################
212
213 sub create_message_xml ($$$) {
214 my ($xml, $msges, $num) = @_;
215
216 my $msg = $msges -> [$num];
217
218 my $message = $xml -> createElement ('Message');
219 $message -> setAttribute ('id', 'm'.$msg -> {mid});
220 $message -> setAttribute ('invisible', '1') if ($msg -> {deleted});
221 $message -> setAttribute ('archive', '1') if ($msg -> {archive});
222
223 # Header erzeugen
224 my $header = $xml -> createElement ('Header');
225
226 # alles inside of 'Header'
227 my $author = $xml -> createElement ('Author');
228
229 my $name = $xml -> createElement ('Name');
230 $name -> addText (toUTF8($msg -> {name}));
231
232 my $email = $xml -> createElement ('Email');
233
234 my $category = $xml -> createElement ('Category');
235 $category -> addText (toUTF8($msg -> {cat}));
236
237 my $subject = $xml -> createElement ('Subject');
238 $subject -> addText (toUTF8($msg -> {subject}));
239
240 my $date = $xml -> createElement ('Date');
241 $date -> setAttribute ('longSec', $msg -> {time});
242
243 $author -> appendChild ($name);
244 $author -> appendChild ($email);
245 $header -> appendChild ($author);
246 $header -> appendChild ($category);
247 $header -> appendChild ($subject);
248 $header -> appendChild ($date);
249 $message -> appendChild ($header);
250
251 if ($msg -> {kids}) {
252 for (@{$msg -> {kids}}) {
253 $message -> appendChild (&create_message_xml ($xml, $msges, $_));
254 }
255 }
256
257 $message;
258 }
259
260 # ====================================================
261 # XML-Parsen von Hand
262 # ====================================================
263
264 ###########################
265 # sub sort_thread
266 #
267 # Messages eines
268 # Threads sortieren
269 ###########################
270
271 sub sort_thread ($$) {
272 my ($msg, $sorted) = @_;
273
274 my ($z, %mhash) = (0);
275
276 if ($sorted) { # aelteste zuerst
277 for (@$msg) {
278 @$msg[@{$_ -> {kids}}] = sort {$a -> {mid} <=> $b -> {mid}} @$msg[@{$_ -> {kids}}] if (@{$_ -> {kids}} > 1);
279 $mhash{$_ -> {mid}} = [@$msg[@{$_ -> {kids}}]];}}
280
281 else { # juengste zuerst
282 for (@$msg) {
283 @$msg[@{$_ -> {kids}}] = sort {$b -> {mid} <=> $a -> {mid}} @$msg[@{$_ -> {kids}}] if (@{$_ -> {kids}} > 1);
284 $mhash{$_ -> {mid}} = [@$msg[@{$_ -> {kids}}]];}}
285
286 # Kinder wieder richtig einsortieren
287 my @smsg = ($msg -> [0]);
288 for (@smsg) {
289 ++$z;
290 splice @smsg,$z,0,@{$mhash{$_ -> {mid}}} if ($_ -> {answers});
291 delete $_ -> {kids};}
292
293 \@smsg;
294 }
295
296 ### delete_messages () #########################################################
297 #
298 # Filter out deleted messages
299 #
300 # Params: $smsg Reference of array of references of hashs
301 # Return: -none-
302 #
303 sub delete_messages ($)
304 {
305 my $smsg = shift;
306
307 my ($z, $oldlevel, @path) = (0,0,0);
308
309 for (@$smsg)
310 {
311 if ($_ -> {'deleted'})
312 {
313 my $n = $_ -> {'answers'} + 1;
314 $smsg -> [$_] -> {'answers'} -= $n for (@path);
315 splice @$smsg,$z,$n;
316 }
317 else
318 {
319 if ($_ -> {'level'} > $oldlevel)
320 {
321 push @path,$z;
322 $oldlevel = $_ -> {'level'};
323 }
324 elsif ($_ -> {'level'} < $oldlevel)
325 {
326 splice @path,$_ -> {'level'} - $oldlevel;
327 $oldlevel = $_ -> {'level'};
328 }
329 else
330 {
331 $path[-1] = $z;
332 }
333
334 $z++;
335 }
336 }
337
338 return;
339 }
340
341 ### get_all_threads () #########################################################
342 #
343 # Read and Parse the main file (without any XML-module, they are too slow)
344 #
345 # Params: $file /path/to/filename of the main file
346 # $deleted hold deleted (invisible) messages in result (1) oder not (0)
347 # $sorted direction of message sort: descending (0) (default) or ascending (1)
348 # Return: scalar context: hash reference
349 # list context: list (\%threads, $last_thread, $last_message, $dtd, \@unids)
350 #
351 sub get_all_threads ($$;$)
352 {
353 my ($file, $deleted, $sorted) = @_;
354 my ($last_thread, $last_message, $dtd, @unids, %threads);
355 local (*FILE, $/);
356
357 open FILE, $file or return undef;
358 my $xml = join '', <FILE>;
359 close(FILE) or return undef;
360
361 if (wantarray)
362 {
363 ($dtd) = $xml =~ /<!DOCTYPE\s+\S+\s+SYSTEM\s+"([^"]+)">/;
364 ($last_thread) = map {/(\d+)/} $xml =~ /<Forum.+?lastThread="([^"]+)"[^>]*>/;
365 ($last_message) = map {/(\d+)/} $xml =~ /<Forum.+?lastMessage="([^"]+)"[^>]*>/;
366 }
367
368 my $reg_msg = qr~(?:</Message>
369 |<Message\s+id="m(\d+)"\s+unid="([^"]*)"(?:\s+invisible="([^"]*)")?(?:\s+archive="([^"]*)")?[^>]*>\s*
370 <Header>[^<]*(?:<(?!Name>)[^<]*)*
371 <Name>([^<]+)</Name>[^<]*(?:<(?!Category>)[^<]*)*
372 <Category>([^<]*)</Category>\s*
373 <Subject>([^<]+)</Subject>\s*
374 <Date\s+longSec="(\d+)"[^>]*>\s*
375 </Header>\s*(?:(<)/Message>|(?=(<)Message\s*)))~sx;
376
377 while ($xml =~ /<Thread id="t(\d+)">([^<]*(?:<(?!\/Thread>)[^<]*)*)<\/Thread>/g)
378 {
379 my ($tid, $thread) = ($1, $2);
380 my ($level, $cmno, @msg, @stack) = (0);
381
382 while ($thread =~ m;$reg_msg;g)
383 {
384 if (defined($10))
385 {
386 push @stack,$cmno if (defined $cmno);
387 push @msg, {mid => $1,
388 unid => $2,
389 deleted => $3,
390 archive => $4,
391 name => $5,
392 cat => $6,
393 subject => $7,
394 time => $8,
395 level => $level++,
396 unids => [],
397 kids => [],
398 answers => 0};
399
400 if (defined $cmno)
401 {
402 push @{$msg[$cmno] -> {kids}} => $#msg;
403 push @{$msg[$cmno] -> {unids}} => $2;
404 }
405 else
406 {
407 push @unids => $2;
408 }
409
410 $msg[$_] -> {answers}++ for (@stack);
411
412 $cmno=$#msg;
413
414 $msg[-1] -> {name} =~ s/&amp;/&/g;
415 $msg[-1] -> {cat} =~ s/&amp;/&/g;
416 $msg[-1] -> {subject} =~ s/&amp;/&/g;
417
418 }
419 elsif (defined ($9))
420 {
421 push @msg, {mid => $1,
422 unid => $2,
423 deleted => $3,
424 archive => $4,
425 name => $5,
426 cat => $6,
427 subject => $7,
428 time => $8,
429 level => $level,
430 unids => [],
431 kids => [],
432 answers => 0};
433
434 if (defined $cmno)
435 {
436 push @{$msg[$cmno] -> {kids}} => $#msg;
437 push @{$msg[$cmno] -> {unids}} => $2;
438 $msg[$cmno] -> {answers}++;
439 }
440 else
441 {
442 push @unids => $2;
443 }
444
445 $msg[$_] -> {answers}++ for (@stack);
446
447 $msg[-1] -> {name} =~ s/&amp;/&/g;
448 $msg[-1] -> {cat} =~ s/&amp;/&/g;
449 $msg[-1] -> {subject} =~ s/&amp;/&/g;
450 }
451 else
452 {
453 $cmno = pop @stack; $level--;
454 }
455 }
456
457 my $smsg = sort_thread (\@msg, $sorted); # sort messages
458 delete_messages ($smsg) unless ($deleted); # remove invisible messages
459
460 $threads{$tid} = $smsg if (@$smsg);
461 }
462
463 wantarray
464 ? (\%threads, $last_thread, $last_message, $dtd, \@unids)
465 : \%threads;
466 }
467
468 ###########################
469 # sub create_forum_xml_string
470 #
471 # Forumshauptdatei erzeugen
472 ###########################
473
474 sub create_forum_xml_string ($$) {
475 my ($threads, $param) = @_;
476 my ($level, $thread, $msg);
477
478 my $xml = '<?xml version="1.0" encoding="UTF-8"?>'."\n"
479 .'<!DOCTYPE Forum SYSTEM "'.$param -> {dtd}.'">'."\n"
480 .'<Forum lastMessage="'.$param -> {lastMessage}.'" lastThread="'.$param -> {lastThread}.'">';
481
482 foreach $thread (sort {$b <=> $a} keys %$threads) {
483 $xml .= '<Thread id="t'.$thread.'">';
484 $level = -1;
485
486 foreach $msg (@{$threads -> {$thread}}) {
487 $xml .= '</Message>' x ($level - $msg -> {level} + 1) if ($msg -> {level} <= $level);
488 $level = $msg -> {level};
489 $xml .= '<Message id="m'.$msg -> {mid}.'"'
490 .' unid="'.$msg -> {unid}.'"'
491 .(($msg -> {deleted})?' invisible="1"':'')
492 .(($msg -> {archive})?' archive="1"':'')
493 .'>'
494 .'<Header>'
495 .'<Author>'
496 .'<Name>'
497 .plain($msg -> {name})
498 .'</Name>'
499 .'<Email></Email>'
500 .'</Author>'
501 .'<Category>'
502 .((length $msg -> {cat})?plain($msg -> {cat}):'')
503 .'</Category>'
504 .'<Subject>'
505 .plain($msg -> {subject})
506 .'</Subject>'
507 .'<Date longSec="'
508 .$msg -> {time}
509 .'"/>'
510 .'</Header>';}
511
512 $xml .= '</Message>' x ($level + 1);
513 $xml .= '</Thread>';}
514
515 $xml.='</Forum>';
516
517 \$xml;
518 }
519
520 ### save_file () ###############################################################
521 #
522 # Save a file
523 #
524 # Params: $filename Filename
525 # $content File content as scalar reference
526 # Return: Status (1 - ok, 0 - error)
527 #
528 sub save_file ($$)
529 {
530 my ($filename, $content) = @_;
531 local *FILE;
532
533 open FILE, ">$filename.temp" or return;
534
535 unless (print FILE $$content)
536 {
537 close FILE;
538 return;
539 }
540
541 close FILE or return;
542
543 rename "$filename.temp", $filename or return;
544
545 1;
546 }
547
548 # ====================================================
549 # Zeitdarstellung
550 # ====================================================
551
552 ###########################
553 # sub hr_time
554 # 02. Januar 2001, 12:02 Uhr
555 #
556 # sub short_hr_time
557 # 02. 01. 2001, 12:02 Uhr
558 #
559 # sub long_hr_time
560 # Dienstag, 02. Januar 2001, 12:02:01 Uhr
561 #
562 # formatierte Zeitangabe
563 ###########################
564
565 sub hr_time ($) {
566 my @month = (qw(Januar Februar), "M\303\244rz", qw(April Mail Juni Juli August September Oktober November Dezember));
567 # ^^^^^^^^ - UTF8 #
568
569 my (undef, $min, $hour, $day, $mon, $year) = localtime ($_[0]);
570
571 sprintf ('%02d. %s %04d, %02d:%02d Uhr', $day, $month[$mon], $year+1900, $hour, $min);
572 }
573
574 sub short_hr_time ($) {
575 my (undef, $min, $hour, $day, $mon, $year) = localtime ($_[0]);
576
577 sprintf ('%02d. %02d. %04d, %02d:%02d Uhr', $day, $mon+1, $year+1900, $hour, $min);
578 }
579
580 sub long_hr_time ($) {
581 my @month = (qw(Januar Februar), "M\303\244rz", qw(April Mail Juni Juli August September Oktober November Dezember));
582 # ^^^^^^^^ - UTF8 #
583
584 my @wday = qw(Sonntag Montag Dienstag Mittwoch Donnerstag Freitag Samstag);
585 my ($sek, $min, $hour, $day, $mon, $year, $wday) = localtime ($_[0]);
586
587 sprintf ('%s, %02d. %s %04d, %02d:%02d:%02d Uhr', $wday[$wday], $day, $month[$mon], $year+1900, $hour, $min, $sek);
588 }
589
590 # ====================================================
591 # Modulinitialisierung
592 # ====================================================
593
594 # making require happy
595 1;

patrick-canterino.de