X-Git-Url: https://git.p6c8.net/selfforum.git/blobdiff_plain/df7bba65634dbc997ba06409e25a49b8be9dd139..f91793544f0d5c7f978e5cf6214d9882d7c4db7c:/selfforum-cgi/shared/Arc/Archive.pm diff --git a/selfforum-cgi/shared/Arc/Archive.pm b/selfforum-cgi/shared/Arc/Archive.pm index cb56877..7fe7424 100644 --- a/selfforum-cgi/shared/Arc/Archive.pm +++ b/selfforum-cgi/shared/Arc/Archive.pm @@ -4,7 +4,7 @@ package Arc::Archive; # # # File: shared/Arc/Archive.pm # # # -# Authors: Andre Malo , 2001-06-16 # +# Authors: André Malo # # # # Description: Severance of Threads and archiving # # # @@ -13,7 +13,6 @@ package Arc::Archive; use strict; use vars qw( @EXPORT - $VERSION ); use Arc::Test; @@ -29,6 +28,7 @@ use Posting::_lib qw( KEEP_DELETED ); use Posting::Cache; +use Time::German 'localtime'; use XML::DOM; @@ -36,7 +36,11 @@ use XML::DOM; # # Version check # -$VERSION = do { my @r =(q$Revision$ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +# last modified: +# $Date$ (GMT) +# by $Author$ +# +sub VERSION {(q$Revision$ =~ /([\d.]+)\s*$/)[0] or '0.0'} ################################################################################ # @@ -45,13 +49,369 @@ $VERSION = do { my @r =(q$Revision$ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r } use base qw(Exporter); @EXPORT = qw(cut_tail); -### sub cut_tail ($) ########################################################### +### delete_no_archived () ###################################################### +# +# remove no archived branches vom thread +# +# Params: $xml - XML::DOM::Document node +# $msg - arrayref - messages +# $percent - voting limit (percent) +# +# Return: ~none~ +# +sub delete_no_archived ($) { + my $par = shift; + + my ($xml, $sum, $tid, $msg, $percent) = map {$par->{$_}} + qw( xml sum tid msg percent); + + # $oldlevel: contains the level of last checked msg + # @path : contains the current branch + # %archive : contains the mids, that will be archived + # %hidden : contains the invisible mids + # + my ($oldlevel, @path, %archive, %hidden) = (0, 0); + + # check all messages of thread + # + for my $z (0..$#{$msg}) { + + if ($msg -> [$z] -> {level} > $oldlevel) { + # this msg is a child of the last one + # + push @path => $z; + $oldlevel = $msg -> [$z] -> {level}; + } + + elsif ($msg -> [$z] -> {level} < $oldlevel) { + # this starts a new subbranch (-1+ level(s)) + # + + # remove last msg (incl. kids), that is on same level + # + splice @path, $msg -> [$z] -> {level}; + push @path => $z; + $oldlevel = $msg -> [$z] -> {level}; + } + + else { + # the msg is a sister of the last one + # + $path[-1] = $z; + } + + # 'archive' is an admin flag + # if set, the message (incl. branch) MUST be archived + # + if (defined $msg->[$z]->{archive} and $msg->[$z]->{archive}) { + $archive{$msg->[$_]->{mid}} = 1 for (@path); + } + + # notice invisble messages + # while they are in @path and archive flag is not set, + # they and their kids WON'T be archived + # + $hidden{$z} = 1 if ($msg->[$z]->{deleted}); + + # if 'archive' is NOT set and message not deleted, + # + unless ($msg->[$z]->{archive} or $msg->[$z]->{deleted}) { + my $key = $sum->{$tid}->{$msg->[$z]->{mid}}; + + # ...and they've voted enough, it will be archived + # + if ($percent == 0 or ($key->{views} and ($key->{votings} * 100 / $key->{views}) >= $percent)) { + my $hidden_in_path; + + # check on hidden messages in @path + # + for (@path) { + if ($hidden{$_}) { + $hidden_in_path = 1; + last; + } + } + + # set archive-flag for messages in @path, + # unless a parent message is hidden + # + unless ($hidden_in_path) { + $archive{$msg->[$_]->{mid}} = 1 for (@path); + } + } + } + } + + # now remove messages without 'archive'-flag + # from thread xml + # + for (reverse grep {!$archive{$_->{mid}}} @$msg) { + my $h = get_message_node($xml, "t$tid", 'm'.$_->{mid}); + + # remove message entry + # + $h -> getParentNode -> removeChild ($h); + + # remove message text + # + $h = get_body_node($xml, 'm'.$_->{mid}); + $h -> getParentNode -> removeChild ($h); + + # 'remove' from $msg + # + $_->{deleted} = 1; + } +} + +### create_arcdir () ########################################################### +# +# check, if specific directories for year and month exist, create +# it, if necessary +# +# Params: $path - archive root +# $time - Thread time (GMT) +# +# Return: List: $path - /path/to/ to archived thread file +# $error - error or undef +# +sub create_arcdir ($$) { + my ($path, $time) = @_; + + my ($month, $year) = (localtime ($time))[4,5]; + + # use the 'real' values for directory names + # + $month++; $year+=1900; + + my $yeardir = $path . $year; + my $monthdir = $yeardir . '/' . $month; + my $monthpath = $monthdir . '/'; + + mkdir $yeardir, 0777 unless (-d $yeardir); + return ('', "could not create directory '$yeardir'") unless (-d $yeardir); + + mkdir $monthdir, 0777 unless (-d $monthdir); + return ('', "could not create directory '$monthdir'") unless (-d $monthdir); + + # return path, successfully created + # + $monthpath; +} + +### process_threads () ######################################################### +# +# process obsolete threads +# (transmit views/votings from cache, do archive, if necessary) +# +# Params: $par - hash reference +# (opt, cache, failed, obsolete, messagePath, +# archivePath, adminDefault) +# +# Return: hashref (tid => $msg) +# +sub process_threads ($) { + my $par = shift; + + my ($opt, $failed, $obsolete, $cache) = map {$par->{$_}} qw + ( opt failed obsolete cache); + + my %archived; + + if ($opt->{exArchiving}) { + + # yes, we do archive + # + my $sum = $cache -> summary; + if ($sum) { + + # iterate over all obsolete threads, that are not failed yet + # + for my $tid (grep {not exists ($failed->{$_})} @$obsolete) { + my $xml = parse_xml_file ($par->{messagePath}."t$tid.xml"); + + unless ($xml) { + # xml parse error + # + $failed->{$tid} = 'could not parse thread file.'; + } + else { + # ok, parse thread + # + my $tnode = $xml -> getElementsByTagName ('Thread') -> item(0); + my $msg = parse_single_thread ($tnode, KEEP_DELETED); + + if ($opt->{archiving} eq 'UserVotings') { + + # filter out the bad stuff + # + delete_no_archived ({ + xml => $xml, + sum => $sum, + tid => $tid, + msg => $msg, + percent => $par->{adminDefault}->{Voting}->{Limit} + }); + } + + # save back xml file (into archive) + # + if ($tnode -> hasChildNodes) { + + # insert views and votings counter + # + for ($tnode -> getElementsByTagName ('Message')) { + my ($id) = $_ -> getAttribute ('id') =~ /(\d+)/; + $_ -> setAttribute ('views' => $sum->{$tid}->{$id}->{views}); + $_ -> setAttribute ('votings' => $sum->{$tid}->{$id}->{votings}); + } + + # create archive dir, unless exists + # + my ($path, $error) = create_arcdir ($par -> {archivePath}, $msg->[0]->{time}); + + if ($error) { + $failed->{$tid} = $error; + } + else { + # save thread file + # + my $file = "${path}t$tid.xml"; + unless (save_file ($file => \($xml -> toString))) { + $failed->{$tid} = "could not save '$file'"; + } + else { + $archived{$tid} = $msg + } + } + } + } + } + } + else { + @$failed{@$obsolete} = 'error: could not load summary'; + } + } + + \%archived; +} + +### append_threads () ########################################################## +# +# open specified index file, append threads, save it back +# +# Params: $file - /path/to/indexfile +# $threads - hashref (threads) +# +# Return: success code (boolean) +# +sub append_threads ($$) { + my ($file, $threads) = @_; + my $thash={}; + + my $index = new Lock ($file); + + return unless ($index -> lock (LH_EXCL)); + + if (-f $file) { + $thash = get_all_threads ($file => KEEP_DELETED); + $thash->{$_} = $threads->{$_} for (keys %$threads); + } + else { + $thash = $threads; + } + + # save it back... + # + my $saved = save_file ( + $file => create_forum_xml_string ( + $thash, + { + dtd => 'forum.dtd', + lastMessage => 0, + lastThread => 0 + } + ) + ); + + $index -> unlock; + + return unless $saved; + + 1; +} + +### indexpath () ############################################################### +# +# compose relative path of archive index file +# +# Params: $param - hash reference +# ($msg->[0]) +# +# Return: $string (relative path) +# +sub indexpath ($) { + my $root = shift; + + my ($month, $year) = (localtime ($root->{time}))[4,5]; + + # use the 'real' values for directory names + # + $month++; $year+=1900; + + "$year/$month/"; +} + +### index_threads () ########################################################### +# +# add threads to their specific archive index file +# +# Params: $param - hash reference +# (threads, archivePath, archiveIndex, failed) +# +# Return: ~none~ +# +sub index_threads ($) { + my $par = shift; + + my ($threads, $failed) = map {$par->{$_}} qw + ( threads failed); + + # indexfile => hashref of threads + # for more efficiency (open each index file *once*) + # + my %index; + + # iterate over all archived threads, + # prepare indexing and assign threads to indexfiles + # + for my $thread (keys %$threads) { + + # index only, if the root is visible + # + unless ($threads->{$thread}->[0]->{deleted}) { + my $file = $par->{archivePath} . indexpath ($threads->{$thread}->[0]) . $par->{archiveIndex}; + $index{$file} = {} unless exists($index{$file}); + + $index{$file} -> {$thread} = [$threads->{$thread}->[0]]; + } + } + + # now append threads to index files + # + for my $file (keys %index) { + unless (append_threads ($file => $index{$file})) { + $failed->{$_} = "error: could not list in '$file'" for (keys %{$index{$file}}); + } + } +} + +### cut_tail () ################################################################ # # shorten the main file and archive, if necessary # # Params: $param - hash reference # (forumFile, messagePath, archivePath, lockFile, adminDefault, -# cachePath) +# cachePath, archiveIndex) # # Return: hash reference - empty if all right done # @@ -59,166 +419,133 @@ sub cut_tail ($) { my $param = shift; my %failed; - if ( $param->{adminDefault}->{Severance}->{severance} ne 'instant' - or $param->{adminDefault}->{Instant}->{execute} + if ( + $param->{adminDefault}->{Severance}->{severance} ne 'instant' or + $param->{adminDefault}->{Instant}->{execute} ) { + # run only one instance at the same time + # (exlusive lock on sev_lock) + # my $sev = new Lock ($param->{lockFile}); if ($sev -> lock(LH_EXCL)) { + + # lock and parse forum main file + # my $forum = new Lock ($param->{forumFile}); if ($forum -> lock (LH_EXCL)) { my ( $threads, $last_thread, $last_message, - $dtd, - undef + $dtd ) = get_all_threads ($forum->filename, KEEP_DELETED); + # get obsolete threads... + # my $obsolete = get_obsolete_threads ({ parsedThreads => $threads, adminDefault => $param->{adminDefault} }); - delete $threads->{$_} for (@$obsolete); + unless (@$obsolete) { + # nothing to cut - we're ready + # + $forum -> unlock; + } + else { + # ...and delete them from main + # + my %obsolete; + for (@$obsolete) { + $obsolete{$_} = $threads->{$_}; + delete $threads->{$_}; + } + + # save it back... + # + my $saved = save_file ( + $param -> {forumFile}, + create_forum_xml_string ( + $threads, + { + dtd => $dtd, + lastMessage => $last_message, + lastThread => $last_thread + } + ) + ); - my $saved = save_file ( - $param -> {forumFile}, - create_forum_xml_string ( - $threads, - { dtd => $dtd, - lastMessage => $last_message, - lastThread => $last_thread + # ...and masterlock the obsolete thread files + # + if ($saved) { + for (@$obsolete) { + new Lock($param->{messagePath}."t$_.xml")->lock(LH_MASTER) or $failed{$_} = 'error: could not set master lock'; } - ) - ); - if ($saved) { - for (@$obsolete) { - new Lock($param->{messagePath}."t$_.xml")->lock(LH_MASTER) or $failed{$_} = 'could not set master lock'; } - } - $forum -> unlock; - if ($saved) { - # now process thread files + # release forum main file... # - my $sev_opt = ($param -> {adminDefault} -> {Severance} -> {severance} eq 'instant') - ? $param -> {adminDefault} -> {Instant} -> {Severance} - : ($param -> {adminDefault} -> {Severance}); + $forum -> unlock; + + if ($saved) { + # ...and now process thread files + # + my $sev_opt = ($param -> {adminDefault} -> {Severance} -> {severance} eq 'instant') + ? $param -> {adminDefault} -> {Instant} -> {Severance} + : ($param -> {adminDefault} -> {Severance}); - my $cache = new Posting::Cache ($param->{cachePath}); + my $cache = new Posting::Cache ($param->{cachePath}); - if ($sev_opt->{exArchiving}) { - # yes, we cut & archive + process_threads ({ + opt => $sev_opt, + cache => $cache, + failed => \%failed, + obsolete => $obsolete, + messagePath => $param->{messagePath}, + archivePath => $param->{archivePath}, + adminDefault => $param->{adminDefault} + }); + + # delete processed files, that are not failed # - my $sum = $cache -> summary; - if ($sum) { - for my $tid (grep {not exists ($failed{$_})} @$obsolete) { - my $xml = parse_xml_file ($param->{messagePath}."t$tid.xml"); - unless ($xml) { - $failed{$tid} = 'could not parse thread file.'; + my @removed; + + for (grep {not exists($failed{$_})} @$obsolete) { + if (exists($failed{$_})) { + delete $obsolete{$_}; + } + else { + my $tfile = new Lock ($param->{messagePath}."t$_.xml"); + unless (unlink ($tfile->filename)) { + $failed{$_} = 'warning: could not delete thread file'; } else { - my $tnode = $xml -> getElementsByTagName ('Thread') -> item(0); - my $msg = parse_single_thread ($tnode, KEEP_DELETED); - - if ($sev_opt->{archiving} eq 'UserVotings') { - # filter out the bad stuff - # - my $percent = $param->{adminDefault}->{Voting}->{Limit}; - my ($oldlevel, @path, $z, %archive) = (0, 0); - - for $z (0..$#{$msg}) { - if ($msg -> [$z] -> {level} > $oldlevel) { - push @path => $z; - $oldlevel = $msg -> [$z] -> {level}; - } - elsif ($msg -> [$z] -> {level} < $oldlevel) { - splice @path, $msg -> [$z] -> {level}; - push @path => $z; - $oldlevel = $msg -> [$z] -> {level}; - } - else { - $path[-1] = $z; - } - - if (defined $msg->[$z]->{archive}) { - if ($msg->[$z]->{archive}) { - $archive{$msg->[$_]->{mid}} = 1 for (@path); - } - } - unless ($msg->[$z]->{archive} or $msg->[$z]->{deleted}) { - my $key = $sum->{$tid}->{$msg->[$z]->{mid}}; - if ($percent == 0 or ($key->{views} and ($key->{votings} * 100 / $key->{views}) >= $percent)) { - $archive{$msg->[$_]->{mid}} = 1 for (@path); - } - } - } - - # now filter out - # - for (reverse grep {!$archive{$_->{mid}}} @$msg) { - my $h = get_message_node($xml, "t$tid", 'm'.$_->{mid}); - $h -> getParentNode -> removeChild ($h); - - $h = get_body_node($xml, 'm'.$_->{mid}); - $h -> getParentNode -> removeChild ($h); - } - } - # save back xml file (into archive) - # - if ($tnode -> hasChildNodes) { - # insert views and votings counter - # - for ($tnode -> getElementsByTagName ('Message')) { - my ($id) = $_ -> getAttribute ('id') =~ /(\d+)/; - $_ -> setAttribute ('views' => $sum->{$tid}->{$id}->{views}); - $_ -> setAttribute ('votings' => $sum->{$tid}->{$id}->{votings}); - } - - my ($month, $year) = (localtime ($msg->[0]->{time}))[4,5]; - $month++; $year+=1900; - my $yeardir = $param -> {archivePath} . $year; - my $yearpath = $yeardir . '/'; - my $monthdir = $yearpath . $month; - my $monthpath = $monthdir . '/'; - my $file = $monthpath . "t$tid.xml"; - - mkdir $yeardir unless (-d $yeardir); - if (-d $yeardir) { - mkdir $monthdir unless (-d $monthdir); - if (-d $monthdir) { - save_file ( - $file, - \($xml -> toString) - ) or $failed{$tid} = "could not save '$file'"; - } - else { - $failed{$tid} = "could not create directory '$monthdir'"; - } - } - else { - $failed{$tid} = "could not create directory '$yeardir'"; - } - } - + push @removed => $_; + $tfile -> purge; } } } - else { - @failed{@$obsolete} = 'could not load summary'; + + if (@removed) { + $cache -> delete_threads (@removed); + $cache -> garbage_collection; } + + # add archived threads to archive index + # + index_threads ({ + threads => \%obsolete, + archivePath => $param->{archivePath}, + archiveIndex => $param->{archiveIndex}, + failed => \%failed + }); } - # delete processed files - # - for (grep {not exists($failed{$_})} @$obsolete) { - unlink ($param->{messagePath}."t$_.xml") or $failed{$_} = 'could not delete thread file'; - #file_removed ($param->{messagePath}."t$_.xml"); - } - $cache -> delete_threads (@$obsolete); - $cache -> garbage_collection; } + + # we're ready, tell this other (waiting?) instances + # + $sev -> unlock; } - $sev -> unlock; } }