]> git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/Arc/Archive.pm
improved the behavior of get_all_threads and create_forum_xml_string
[selfforum.git] / selfforum-cgi / shared / Arc / Archive.pm
1 package Arc::Archive;
2
3 ################################################################################
4 # #
5 # File: shared/Arc/Archive.pm #
6 # #
7 # Authors: André Malo <nd@o3media.de> #
8 # #
9 # Description: Severance of Threads and archiving #
10 # #
11 ################################################################################
12
13 use strict;
14 use vars qw(
15 @EXPORT
16 );
17
18 use Arc::Test;
19 use Lock;
20 use Posting::_lib qw(
21 get_all_threads
22 create_forum_xml_string
23 parse_xml_file
24 parse_single_thread
25 get_message_node
26 get_body_node
27 save_file
28 KEEP_DELETED
29 );
30 use Posting::Cache;
31 use Time::German 'localtime';
32
33 use XML::DOM;
34
35 ################################################################################
36 #
37 # Version check
38 #
39 # last modified:
40 # $Date$ (GMT)
41 # by $Author$
42 #
43 sub VERSION {(q$Revision$ =~ /([\d.]+)\s*$/)[0] or '0.0'}
44
45 ################################################################################
46 #
47 # Export
48 #
49 use base qw(Exporter);
50 @EXPORT = qw(cut_tail);
51
52 ### delete_no_archived () ######################################################
53 #
54 # remove no archived branches vom thread
55 #
56 # Params: $xml - XML::DOM::Document node
57 # $msg - arrayref - messages
58 # $percent - voting limit (percent)
59 #
60 # Return: ~none~
61 #
62 sub delete_no_archived ($) {
63 my $par = shift;
64
65 my ($xml, $sum, $tid, $msg, $percent) = map {$par->{$_}}
66 qw( xml sum tid msg percent);
67
68 # $oldlevel: contains the level of last checked msg
69 # @path : contains the current branch
70 # %archive : contains the mids, that will be archived
71 # %hidden : contains the invisible mids
72 #
73 my ($oldlevel, @path, %archive, %hidden) = (0, 0);
74
75 # check all messages of thread
76 #
77 for my $z (0..$#{$msg}) {
78
79 if ($msg -> [$z] -> {level} > $oldlevel) {
80 # this msg is a child of the last one
81 #
82 push @path => $z;
83 $oldlevel = $msg -> [$z] -> {level};
84 }
85
86 elsif ($msg -> [$z] -> {level} < $oldlevel) {
87 # this starts a new subbranch (-1+ level(s))
88 #
89
90 # remove last msg (incl. kids), that is on same level
91 #
92 splice @path, $msg -> [$z] -> {level};
93 push @path => $z;
94 $oldlevel = $msg -> [$z] -> {level};
95 }
96
97 else {
98 # the msg is a sister of the last one
99 #
100 $path[-1] = $z;
101 }
102
103 # 'archive' is an admin flag
104 # if set, the message (incl. branch) MUST be archived
105 #
106 if (defined $msg->[$z]->{archive} and $msg->[$z]->{archive}) {
107 $archive{$msg->[$_]->{mid}} = 1 for (@path);
108 }
109
110 # notice invisble messages
111 # while they are in @path and archive flag is not set,
112 # they and their kids WON'T be archived
113 #
114 $hidden{$z} = 1 if ($msg->[$z]->{deleted});
115
116 # if 'archive' is NOT set and message not deleted,
117 #
118 unless ($msg->[$z]->{archive} or $msg->[$z]->{deleted}) {
119 my $key = $sum->{$tid}->{$msg->[$z]->{mid}};
120
121 # ...and they've voted enough, it will be archived
122 #
123 if ($percent == 0 or ($key->{views} and ($key->{votings} * 100 / $key->{views}) >= $percent)) {
124 my $hidden_in_path;
125
126 # check on hidden messages in @path
127 #
128 for (@path) {
129 if ($hidden{$_}) {
130 $hidden_in_path = 1;
131 last;
132 }
133 }
134
135 # set archive-flag for messages in @path,
136 # unless a parent message is hidden
137 #
138 unless ($hidden_in_path) {
139 $archive{$msg->[$_]->{mid}} = 1 for (@path);
140 }
141 }
142 }
143 }
144
145 # now remove messages without 'archive'-flag
146 # from thread xml
147 #
148 for (reverse grep {!$archive{$_->{mid}}} @$msg) {
149 my $h = get_message_node($xml, "t$tid", 'm'.$_->{mid});
150
151 # remove message entry
152 #
153 $h -> getParentNode -> removeChild ($h);
154
155 # remove message text
156 #
157 $h = get_body_node($xml, 'm'.$_->{mid});
158 $h -> getParentNode -> removeChild ($h);
159
160 # 'remove' from $msg
161 #
162 $_->{deleted} = 1;
163 }
164 }
165
166 ### create_arcdir () ###########################################################
167 #
168 # check, if specific directories for year and month exist, create
169 # it, if necessary
170 #
171 # Params: $path - archive root
172 # $time - Thread time (GMT)
173 #
174 # Return: List: $path - /path/to/ to archived thread file
175 # $error - error or undef
176 #
177 sub create_arcdir ($$) {
178 my ($path, $time) = @_;
179
180 my ($month, $year) = (localtime ($time))[4,5];
181
182 # use the 'real' values for directory names
183 #
184 $month++; $year+=1900;
185
186 my $yeardir = $path . $year;
187 my $monthdir = $yeardir . '/' . $month;
188 my $monthpath = $monthdir . '/';
189
190 mkdir $yeardir, 0777 unless (-d $yeardir);
191 return ('', "could not create directory '$yeardir'") unless (-d $yeardir);
192
193 mkdir $monthdir, 0777 unless (-d $monthdir);
194 return ('', "could not create directory '$monthdir'") unless (-d $monthdir);
195
196 # return path, successfully created
197 #
198 $monthpath;
199 }
200
201 ### process_threads () #########################################################
202 #
203 # process obsolete threads
204 # (transmit views/votings from cache, do archive, if necessary)
205 #
206 # Params: $par - hash reference
207 # (opt, cache, failed, obsolete, messagePath,
208 # archivePath, adminDefault)
209 #
210 # Return: hashref (tid => $msg)
211 #
212 sub process_threads ($) {
213 my $par = shift;
214
215 my ($opt, $failed, $obsolete, $cache) = map {$par->{$_}} qw
216 ( opt failed obsolete cache);
217
218 my %archived;
219
220 if ($opt->{exArchiving}) {
221
222 # yes, we do archive
223 #
224 my $sum = $cache -> summary;
225 if ($sum) {
226
227 # iterate over all obsolete threads, that are not failed yet
228 #
229 for my $tid (grep {not exists ($failed->{$_})} @$obsolete) {
230 my $xml = parse_xml_file ($par->{messagePath}."t$tid.xml");
231
232 unless ($xml) {
233 # xml parse error
234 #
235 $failed->{$tid} = 'could not parse thread file.';
236 }
237 else {
238 # ok, parse thread
239 #
240 my $tnode = $xml -> getElementsByTagName ('Thread') -> item(0);
241 my $msg = parse_single_thread ($tnode, KEEP_DELETED);
242
243 if ($opt->{archiving} eq 'UserVotings') {
244
245 # filter out the bad stuff
246 #
247 delete_no_archived ({
248 xml => $xml,
249 sum => $sum,
250 tid => $tid,
251 msg => $msg,
252 percent => $par->{adminDefault}->{Voting}->{Limit}
253 });
254 }
255
256 # save back xml file (into archive)
257 #
258 if ($tnode -> hasChildNodes) {
259
260 # insert views and votings counter
261 #
262 for ($tnode -> getElementsByTagName ('Message')) {
263 my ($id) = $_ -> getAttribute ('id') =~ /(\d+)/;
264 $_ -> setAttribute ('views' => $sum->{$tid}->{$id}->{views});
265 $_ -> setAttribute ('votings' => $sum->{$tid}->{$id}->{votings});
266 }
267
268 # create archive dir, unless exists
269 #
270 my ($path, $error) = create_arcdir ($par -> {archivePath}, $msg->[0]->{time});
271
272 if ($error) {
273 $failed->{$tid} = $error;
274 }
275 else {
276 # save thread file
277 #
278 my $file = "${path}t$tid.xml";
279 unless (save_file ($file => \($xml -> toString))) {
280 $failed->{$tid} = "could not save '$file'";
281 }
282 else {
283 $archived{$tid} = $msg
284 }
285 }
286 }
287 }
288 }
289 }
290 else {
291 @$failed{@$obsolete} = 'error: could not load summary';
292 }
293 }
294
295 \%archived;
296 }
297
298 ### append_threads () ##########################################################
299 #
300 # open specified index file, append threads, save it back
301 #
302 # Params: $file - /path/to/indexfile
303 # $threads - hashref (threads)
304 #
305 # Return: success code (boolean)
306 #
307 sub append_threads ($$) {
308 my ($file, $threads) = @_;
309 my $thash={};
310
311 my $index = new Lock ($file);
312
313 return unless ($index -> lock (LH_EXCL));
314
315 if (-f $file) {
316 $thash = get_all_threads ($file => KEEP_DELETED);
317 $thash->{$_} = $threads->{$_} for (keys %$threads);
318 }
319 else {
320 $thash = $threads;
321 }
322
323 # save it back...
324 #
325 my $saved = save_file (
326 $file => create_forum_xml_string (
327 $thash,
328 {
329 dtd => 'forum.dtd',
330 lastMessage => 0,
331 lastThread => 0
332 }
333 )
334 );
335
336 $index -> unlock;
337
338 return unless $saved;
339
340 1;
341 }
342
343 ### indexpath () ###############################################################
344 #
345 # compose relative path of archive index file
346 #
347 # Params: $param - hash reference
348 # ($msg->[0])
349 #
350 # Return: $string (relative path)
351 #
352 sub indexpath ($) {
353 my $root = shift;
354
355 my ($month, $year) = (localtime ($root->{time}))[4,5];
356
357 # use the 'real' values for directory names
358 #
359 $month++; $year+=1900;
360
361 "$year/$month/";
362 }
363
364 ### index_threads () ###########################################################
365 #
366 # add threads to their specific archive index file
367 #
368 # Params: $param - hash reference
369 # (threads, archivePath, archiveIndex, failed)
370 #
371 # Return: ~none~
372 #
373 sub index_threads ($) {
374 my $par = shift;
375
376 my ($threads, $failed) = map {$par->{$_}} qw
377 ( threads failed);
378
379 # indexfile => hashref of threads
380 # for more efficiency (open each index file *once*)
381 #
382 my %index;
383
384 # iterate over all archived threads,
385 # prepare indexing and assign threads to indexfiles
386 #
387 for my $thread (keys %$threads) {
388
389 # index only, if the root is visible
390 #
391 unless ($threads->{$thread}->[0]->{deleted}) {
392 my $file = $par->{archivePath} . indexpath ($threads->{$thread}->[0]) . $par->{archiveIndex};
393 $index{$file} = {} unless exists($index{$file});
394
395 $index{$file} -> {$thread} = [$threads->{$thread}->[0]];
396 }
397 }
398
399 # now append threads to index files
400 #
401 for my $file (keys %index) {
402 unless (append_threads ($file => $index{$file})) {
403 $failed->{$_} = "error: could not list in '$file'" for (keys %{$index{$file}});
404 }
405 }
406 }
407
408 ### cut_tail () ################################################################
409 #
410 # shorten the main file and archive, if necessary
411 #
412 # Params: $param - hash reference
413 # (forumFile, messagePath, archivePath, lockFile, adminDefault,
414 # cachePath, archiveIndex)
415 #
416 # Return: hash reference - empty if all right done
417 #
418 sub cut_tail ($) {
419 my $param = shift;
420 my %failed;
421
422 if (
423 $param->{adminDefault}->{Severance}->{severance} ne 'instant' or
424 $param->{adminDefault}->{Instant}->{execute}
425 ) {
426 # run only one instance at the same time
427 # (exlusive lock on sev_lock)
428 #
429 my $sev = new Lock ($param->{lockFile});
430 if ($sev -> lock(LH_EXCL)) {
431
432 # lock and parse forum main file
433 #
434 my $forum = new Lock ($param->{forumFile});
435 if ($forum -> lock (LH_EXCL)) {
436 my (
437 $threads,
438 $last_thread,
439 $last_message,
440 $dtd
441 ) = get_all_threads ($forum->filename, KEEP_DELETED);
442
443 # get obsolete threads...
444 #
445 my $obsolete = get_obsolete_threads ({
446 parsedThreads => $threads,
447 adminDefault => $param->{adminDefault}
448 });
449
450 unless (@$obsolete) {
451 # nothing to cut - we're ready
452 #
453 $forum -> unlock;
454 }
455 else {
456 # ...and delete them from main
457 #
458 my %obsolete;
459 for (@$obsolete) {
460 $obsolete{$_} = $threads->{$_};
461 delete $threads->{$_};
462 }
463
464 # save it back...
465 #
466 my $saved = save_file (
467 $param -> {forumFile},
468 create_forum_xml_string (
469 $threads,
470 {
471 dtd => $dtd,
472 lastMessage => $last_message,
473 lastThread => $last_thread
474 }
475 )
476 );
477
478 # ...and masterlock the obsolete thread files
479 #
480 if ($saved) {
481 for (@$obsolete) {
482 new Lock($param->{messagePath}."t$_.xml")->lock(LH_MASTER) or $failed{$_} = 'error: could not set master lock';
483 }
484 }
485
486 # release forum main file...
487 #
488 $forum -> unlock;
489
490 if ($saved) {
491 # ...and now process thread files
492 #
493 my $sev_opt = ($param -> {adminDefault} -> {Severance} -> {severance} eq 'instant')
494 ? $param -> {adminDefault} -> {Instant} -> {Severance}
495 : ($param -> {adminDefault} -> {Severance});
496
497 my $cache = new Posting::Cache ($param->{cachePath});
498
499 process_threads ({
500 opt => $sev_opt,
501 cache => $cache,
502 failed => \%failed,
503 obsolete => $obsolete,
504 messagePath => $param->{messagePath},
505 archivePath => $param->{archivePath},
506 adminDefault => $param->{adminDefault}
507 });
508
509 # delete processed files, that are not failed
510 #
511 my @removed;
512
513 for (grep {not exists($failed{$_})} @$obsolete) {
514 if (exists($failed{$_})) {
515 delete $obsolete{$_};
516 }
517 else {
518 my $tfile = new Lock ($param->{messagePath}."t$_.xml");
519 unless (unlink ($tfile->filename)) {
520 $failed{$_} = 'warning: could not delete thread file';
521 }
522 else {
523 push @removed => $_;
524 $tfile -> purge;
525 }
526 }
527 }
528
529 if (@removed) {
530 $cache -> delete_threads (@removed);
531 $cache -> garbage_collection;
532 }
533
534 # add archived threads to archive index
535 #
536 index_threads ({
537 threads => \%obsolete,
538 archivePath => $param->{archivePath},
539 archiveIndex => $param->{archiveIndex},
540 failed => \%failed
541 });
542 }
543 }
544
545 # we're ready, tell this other (waiting?) instances
546 #
547 $sev -> unlock;
548 }
549 }
550 }
551
552 # return
553 \%failed;
554 }
555
556 # keep 'require' happy
557 1;
558
559 #
560 #
561 ### end of Arc::Archive ########################################################

patrick-canterino.de