]> git.p6c8.net - selfforum.git/blob - selfforum-cgi/shared/Arc/Archive.pm
ef2cee5b0b33116bd938a09fba0f4be95010a2dd
[selfforum.git] / selfforum-cgi / shared / Arc / Archive.pm
1 package Arc::Archive;
2
3 ################################################################################
4 # #
5 # File: shared/Arc/Archive.pm #
6 # #
7 # Authors: André Malo <nd@o3media.de> #
8 # #
9 # Description: Severance of Threads and archiving #
10 # #
11 ################################################################################
12
13 use strict;
14 use vars qw(
15 @EXPORT
16 $VERSION
17 );
18
19 use Arc::Test;
20 use Lock;
21 use Posting::_lib qw(
22 get_all_threads
23 create_forum_xml_string
24 parse_xml_file
25 parse_single_thread
26 get_message_node
27 get_body_node
28 save_file
29 KEEP_DELETED
30 );
31 use Posting::Cache;
32 use Time::German 'localtime';
33
34 use XML::DOM;
35
36 ################################################################################
37 #
38 # Version check
39 #
40 # last modified:
41 # $Date$ (GMT)
42 # by $Author$
43 #
44 sub VERSION {(q$Revision$ =~ /([\d.]+)\s*$/)[0] or '0.0'}
45
46 ################################################################################
47 #
48 # Export
49 #
50 use base qw(Exporter);
51 @EXPORT = qw(cut_tail);
52
53 ### delete_no_archived () ######################################################
54 #
55 # remove no archived branches vom thread
56 #
57 # Params: $xml - XML::DOM::Document node
58 # $msg - arrayref - messages
59 # $percent - voting limit (percent)
60 #
61 # Return: ~none~
62 #
63 sub delete_no_archived ($) {
64 my $par = shift;
65
66 my ($xml, $sum, $tid, $msg, $percent) = map {$par->{$_}}
67 qw( xml sum tid msg percent);
68
69 # $oldlevel: contains the level of last checked msg
70 # @path : contains the current branch
71 # %archive : contains the mids, that will be archived
72 # %hidden : contains the invisible mids
73 #
74 my ($oldlevel, @path, %archive, %hidden) = (0, 0);
75
76 # check all messages of thread
77 #
78 for my $z (0..$#{$msg}) {
79
80 if ($msg -> [$z] -> {level} > $oldlevel) {
81 # this msg is a child of the last one
82 #
83 push @path => $z;
84 $oldlevel = $msg -> [$z] -> {level};
85 }
86
87 elsif ($msg -> [$z] -> {level} < $oldlevel) {
88 # this starts a new subbranch (-1+ level(s))
89 #
90
91 # remove last msg (incl. kids), that is on same level
92 #
93 splice @path, $msg -> [$z] -> {level};
94 push @path => $z;
95 $oldlevel = $msg -> [$z] -> {level};
96 }
97
98 else {
99 # the msg is a sister of the last one
100 #
101 $path[-1] = $z;
102 }
103
104 # 'archive' is an admin flag
105 # if set, the message (incl. branch) MUST be archived
106 #
107 if (defined $msg->[$z]->{archive} and $msg->[$z]->{archive}) {
108 $archive{$msg->[$_]->{mid}} = 1 for (@path);
109 }
110
111 # notice invisble messages
112 # while they are in @path and archive flag is not set,
113 # they and their kids WON'T be archived
114 #
115 $hidden{$z} = 1 if ($msg->[$z]->{deleted});
116
117 # if 'archive' is NOT set and message not deleted,
118 #
119 unless ($msg->[$z]->{archive} or $msg->[$z]->{deleted}) {
120 my $key = $sum->{$tid}->{$msg->[$z]->{mid}};
121
122 # ...and they've voted enough, it will be archived
123 #
124 if ($percent == 0 or ($key->{views} and ($key->{votings} * 100 / $key->{views}) >= $percent)) {
125 my $hidden_in_path;
126
127 # check on hidden messages in @path
128 #
129 for (@path) {
130 if ($hidden{$_}) {
131 $hidden_in_path = 1;
132 last;
133 }
134 }
135
136 # set archive-flag for messages in @path,
137 # unless a parent message is hidden
138 #
139 unless ($hidden_in_path) {
140 $archive{$msg->[$_]->{mid}} = 1 for (@path);
141 }
142 }
143 }
144 }
145
146 # now remove messages without 'archive'-flag
147 # from thread xml
148 #
149 for (reverse grep {!$archive{$_->{mid}}} @$msg) {
150 my $h = get_message_node($xml, "t$tid", 'm'.$_->{mid});
151
152 # remove message entry
153 #
154 $h -> getParentNode -> removeChild ($h);
155
156 # remove message text
157 #
158 $h = get_body_node($xml, 'm'.$_->{mid});
159 $h -> getParentNode -> removeChild ($h);
160
161 # 'remove' from $msg
162 #
163 $_->{deleted} = 1;
164 }
165 }
166
167 ### create_arcdir () ###########################################################
168 #
169 # check, if specific directories for year and month exist, create
170 # it, if necessary
171 #
172 # Params: $path - archive root
173 # $time - Thread time (GMT)
174 #
175 # Return: List: $path - /path/to/ to archived thread file
176 # $error - error or undef
177 #
178 sub create_arcdir ($$) {
179 my ($path, $time) = @_;
180
181 my ($month, $year) = (localtime ($time))[4,5];
182
183 # use the 'real' values for directory names
184 #
185 $month++; $year+=1900;
186
187 my $yeardir = $path . $year;
188 my $monthdir = $yeardir . '/' . $month;
189 my $monthpath = $monthdir . '/';
190
191 mkdir $yeardir, 0777 unless (-d $yeardir);
192 return ('', "could not create directory '$yeardir'") unless (-d $yeardir);
193
194 mkdir $monthdir, 0777 unless (-d $monthdir);
195 return ('', "could not create directory '$monthdir'") unless (-d $monthdir);
196
197 # return path, successfully created
198 #
199 $monthpath;
200 }
201
202 ### process_threads () #########################################################
203 #
204 # process obsolete threads
205 # (transmit views/votings from cache, do archive, if necessary)
206 #
207 # Params: $par - hash reference
208 # (opt, cache, failed, obsolete, messagePath,
209 # archivePath, adminDefault)
210 #
211 # Return: hashref (tid => $msg)
212 #
213 sub process_threads ($) {
214 my $par = shift;
215
216 my ($opt, $failed, $obsolete, $cache) = map {$par->{$_}} qw
217 ( opt failed obsolete cache);
218
219 my %archived;
220
221 if ($opt->{exArchiving}) {
222
223 # yes, we do archive
224 #
225 my $sum = $cache -> summary;
226 if ($sum) {
227
228 # iterate over all obsolete threads, that are not failed yet
229 #
230 for my $tid (grep {not exists ($failed->{$_})} @$obsolete) {
231 my $xml = parse_xml_file ($par->{messagePath}."t$tid.xml");
232
233 unless ($xml) {
234 # xml parse error
235 #
236 $failed->{$tid} = 'could not parse thread file.';
237 }
238 else {
239 # ok, parse thread
240 #
241 my $tnode = $xml -> getElementsByTagName ('Thread') -> item(0);
242 my $msg = parse_single_thread ($tnode, KEEP_DELETED);
243
244 if ($opt->{archiving} eq 'UserVotings') {
245
246 # filter out the bad stuff
247 #
248 delete_no_archived ({
249 xml => $xml,
250 sum => $sum,
251 tid => $tid,
252 msg => $msg,
253 percent => $par->{adminDefault}->{Voting}->{Limit}
254 });
255 }
256
257 # save back xml file (into archive)
258 #
259 if ($tnode -> hasChildNodes) {
260
261 # insert views and votings counter
262 #
263 for ($tnode -> getElementsByTagName ('Message')) {
264 my ($id) = $_ -> getAttribute ('id') =~ /(\d+)/;
265 $_ -> setAttribute ('views' => $sum->{$tid}->{$id}->{views});
266 $_ -> setAttribute ('votings' => $sum->{$tid}->{$id}->{votings});
267 }
268
269 # create archive dir, unless exists
270 #
271 my ($path, $error) = create_arcdir ($par -> {archivePath}, $msg->[0]->{time});
272
273 if ($error) {
274 $failed->{$tid} = $error;
275 }
276 else {
277 # save thread file
278 #
279 my $file = "${path}t$tid.xml";
280 unless (save_file ($file => \($xml -> toString))) {
281 $failed->{$tid} = "could not save '$file'";
282 }
283 else {
284 $archived{$tid} = $msg
285 }
286 }
287 }
288 }
289 }
290 }
291 else {
292 @$failed{@$obsolete} = 'error: could not load summary';
293 }
294 }
295
296 \%archived;
297 }
298
299 ### append_threads () ##########################################################
300 #
301 # open specified index file, append threads, save it back
302 #
303 # Params: $file - /path/to/indexfile
304 # $threads - hashref (threads)
305 #
306 # Return: success code (boolean)
307 #
308 sub append_threads ($$) {
309 my ($file, $threads) = @_;
310 my $thash={};
311
312 my $index = new Lock ($file);
313
314 return unless ($index -> lock (LH_EXCL));
315
316 if (-f $file) {
317 $thash = get_all_threads ($file => KEEP_DELETED);
318 $thash->{$_} = $threads->{$_} for (keys %$threads);
319 }
320 else {
321 $thash = $threads;
322 }
323
324 # save it back...
325 #
326 my $saved = save_file (
327 $file => create_forum_xml_string (
328 $threads,
329 {
330 dtd => 'forum.dtd',
331 lastMessage => 0,
332 lastThread => 0
333 }
334 )
335 );
336
337 $index -> unlock;
338
339 return unless $saved;
340
341 1;
342 }
343
344 ### indexpath () ###############################################################
345 #
346 # compose relative path of archive index file
347 #
348 # Params: $param - hash reference
349 # ($msg->[0])
350 #
351 # Return: $string (relative path)
352 #
353 sub indexpath ($) {
354 my $root = shift;
355
356 my ($month, $year) = (localtime ($root->{time}))[4,5];
357
358 # use the 'real' values for directory names
359 #
360 $month++; $year+=1900;
361
362 "$year/$month/";
363 }
364
365 ### index_threads () ###########################################################
366 #
367 # add threads to their specific archive index file
368 #
369 # Params: $param - hash reference
370 # (threads, archivePath, archiveIndex, failed)
371 #
372 # Return: ~none~
373 #
374 sub index_threads ($) {
375 my $par = shift;
376
377 my ($threads, $failed) = map {$par->{$_}} qw
378 ( threads failed);
379
380 # indexfile => hashref of threads
381 # for more efficiency (open each index file *once*)
382 #
383 my %index;
384
385 # iterate over all archived threads,
386 # prepare indexing and assign threads to indexfiles
387 #
388 for my $thread (keys %$threads) {
389
390 # index only, if the root is visible
391 #
392 unless ($threads->{$thread}->[0]->{deleted}) {
393 my $file = $par->{archivePath} . indexpath ($threads->{$thread}->[0]) . $par->{archiveIndex};
394 $index{$file} = {} unless exists($index{$file});
395
396 $index{$file} -> {$thread} = [$threads->{$thread}->[0]];
397 }
398 }
399
400 # now append threads to index files
401 #
402 for my $file (keys %index) {
403 unless (append_threads ($file => $index{$file})) {
404 $failed->{$_} = "error: could not list in '$file'" for (keys %{$index{$file}});
405 }
406 }
407 }
408
409 ### cut_tail () ################################################################
410 #
411 # shorten the main file and archive, if necessary
412 #
413 # Params: $param - hash reference
414 # (forumFile, messagePath, archivePath, lockFile, adminDefault,
415 # cachePath, archiveIndex)
416 #
417 # Return: hash reference - empty if all right done
418 #
419 sub cut_tail ($) {
420 my $param = shift;
421 my %failed;
422
423 if (
424 $param->{adminDefault}->{Severance}->{severance} ne 'instant' or
425 $param->{adminDefault}->{Instant}->{execute}
426 ) {
427 # run only one instance at the same time
428 # (exlusive lock on sev_lock)
429 #
430 my $sev = new Lock ($param->{lockFile});
431 if ($sev -> lock(LH_EXCL)) {
432
433 # lock and parse forum main file
434 #
435 my $forum = new Lock ($param->{forumFile});
436 if ($forum -> lock (LH_EXCL)) {
437 my (
438 $threads,
439 $last_thread,
440 $last_message,
441 $dtd
442 ) = get_all_threads ($forum->filename, KEEP_DELETED);
443
444 # get obsolete threads...
445 #
446 my $obsolete = get_obsolete_threads ({
447 parsedThreads => $threads,
448 adminDefault => $param->{adminDefault}
449 });
450
451 unless (@$obsolete) {
452 # nothing to cut - we're ready
453 #
454 $forum -> unlock;
455 }
456 else {
457 # ...and delete them from main
458 #
459 my %obsolete;
460 for (@$obsolete) {
461 $obsolete{$_} = $threads->{$_};
462 delete $threads->{$_};
463 }
464
465 # save it back...
466 #
467 my $saved = save_file (
468 $param -> {forumFile},
469 create_forum_xml_string (
470 $threads,
471 {
472 dtd => $dtd,
473 lastMessage => $last_message,
474 lastThread => $last_thread
475 }
476 )
477 );
478
479 # ...and masterlock the obsolete thread files
480 #
481 if ($saved) {
482 for (@$obsolete) {
483 new Lock($param->{messagePath}."t$_.xml")->lock(LH_MASTER) or $failed{$_} = 'error: could not set master lock';
484 }
485 }
486
487 # release forum main file...
488 #
489 $forum -> unlock;
490
491 if ($saved) {
492 # ...and now process thread files
493 #
494 my $sev_opt = ($param -> {adminDefault} -> {Severance} -> {severance} eq 'instant')
495 ? $param -> {adminDefault} -> {Instant} -> {Severance}
496 : ($param -> {adminDefault} -> {Severance});
497
498 my $cache = new Posting::Cache ($param->{cachePath});
499
500 process_threads ({
501 opt => $sev_opt,
502 cache => $cache,
503 failed => \%failed,
504 obsolete => $obsolete,
505 messagePath => $param->{messagePath},
506 archivePath => $param->{archivePath},
507 adminDefault => $param->{adminDefault}
508 });
509
510 # delete processed files, that are not failed
511 #
512 my @removed;
513
514 for (grep {not exists($failed{$_})} @$obsolete) {
515 if (exists($failed{$_})) {
516 delete $obsolete{$_};
517 }
518 else {
519 unless (unlink ($param->{messagePath}."t$_.xml")) {
520 $failed{$_} = 'warning: could not delete thread file';
521 }
522 else {
523 push @removed => $_;
524 #file_removed ($param->{messagePath}."t$_.xml");
525 }
526 }
527 }
528
529 if (@removed) {
530 $cache -> delete_threads (@removed);
531 $cache -> garbage_collection;
532 }
533
534 # add archived threads to archive index
535 #
536 index_threads ({
537 threads => \%obsolete,
538 archivePath => $param->{archivePath},
539 archiveIndex => $param->{archiveIndex},
540 failed => \%failed
541 });
542 }
543 }
544
545 # we're ready, tell this other (waiting?) instances
546 #
547 $sev -> unlock;
548 }
549 }
550 }
551
552 # return
553 \%failed;
554 }
555
556 # keep 'require' happy
557 1;
558
559 #
560 #
561 ### end of Arc::Archive ########################################################

patrick-canterino.de