From a267dbbdd7f5d6be2248af064671c0a14cc5e1b8 Mon Sep 17 00:00:00 2001 From: ndparker <> Date: Mon, 23 Jul 2001 02:22:47 +0000 Subject: [PATCH 1/1] made the code more readable (splittet sub cut_tail into several subs, added comments) improved the selection of the archive branches --- selfforum-cgi/shared/Arc/Archive.pm | 442 +++++++++++++++++++--------- 1 file changed, 309 insertions(+), 133 deletions(-) diff --git a/selfforum-cgi/shared/Arc/Archive.pm b/selfforum-cgi/shared/Arc/Archive.pm index cb56877..40e6bd5 100644 --- a/selfforum-cgi/shared/Arc/Archive.pm +++ b/selfforum-cgi/shared/Arc/Archive.pm @@ -29,6 +29,7 @@ use Posting::_lib qw( KEEP_DELETED ); use Posting::Cache; +use Time::German 'localtime'; use XML::DOM; @@ -45,7 +46,240 @@ $VERSION = do { my @r =(q$Revision$ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r } use base qw(Exporter); @EXPORT = qw(cut_tail); -### sub cut_tail ($) ########################################################### +### delete_no_archived () ###################################################### +# +# remove no archived branches vom thread +# +# Params: $xml - XML::DOM::Document node +# $msg - arrayref - messages +# $percent - voting limit (percent) +# +# Return: ~none~ +# +sub delete_no_archived ($) { + my $par = shift; + + my ($xml, $sum, $tid, $msg, $percent) = map {$par->{$_}} + qw( xml sum tid msg percent); + + # $oldlevel: contains the level of last checked msg + # @path : contains the current branch + # %archive : contains the mids, that will be archived + # %hidden : contains the invisible mids + # + my ($oldlevel, @path, %archive, %hidden) = (0, 0); + + # check all messages of thread + # + for my $z (0..$#{$msg}) { + + if ($msg -> [$z] -> {level} > $oldlevel) { + # this msg is a child of the last one + # + push @path => $z; + $oldlevel = $msg -> [$z] -> {level}; + } + + elsif ($msg -> [$z] -> {level} < $oldlevel) { + # this starts a new subbranch (-1+ level(s)) + # + + # remove last msg (incl. kids), that is on same level + # + splice @path, $msg -> [$z] -> {level}; + push @path => $z; + $oldlevel = $msg -> [$z] -> {level}; + } + + else { + # the msg is a sister of the last one + # + $path[-1] = $z; + } + + # 'archive' is an admin flag + # if set, the message (incl. branch) MUST be archived + # + if (defined $msg->[$z]->{archive} and $msg->[$z]->{archive}) { + $archive{$msg->[$_]->{mid}} = 1 for (@path); + } + + # notice invisble messages + # while they are in @path and archive flag is not set, + # they and their kids WON'T be archived + # + $hidden{$z} = 1 if ($msg->[$z]->{deleted}); + + # if 'archive' is NOT set and message not deleted, + # + unless ($msg->[$z]->{archive} or $msg->[$z]->{deleted}) { + my $key = $sum->{$tid}->{$msg->[$z]->{mid}}; + + # ...and they've voted enough, it will be archived + # + if ($percent == 0 or ($key->{views} and ($key->{votings} * 100 / $key->{views}) >= $percent)) { + my $hidden_in_path; + + # check on hidden messages in @path + # + for (@path) { + if ($hidden{$_}) { + $hidden_in_path = 1; + last; + } + } + + # set archive-flag for messages in @path, + # unless a parent message is hidden + # + unless ($hidden_in_path) { + $archive{$msg->[$_]->{mid}} = 1 for (@path); + } + } + } + } + + # now remove messages without 'archive'-flag + # from thread xml + # + for (reverse grep {!$archive{$_->{mid}}} @$msg) { + my $h = get_message_node($xml, "t$tid", 'm'.$_->{mid}); + + # remove message entry + # + $h -> getParentNode -> removeChild ($h); + + # remove message text + # + $h = get_body_node($xml, 'm'.$_->{mid}); + $h -> getParentNode -> removeChild ($h); + } +} + +### create_arcdir () ########################################################### +# +# check, if specific directories for year and month exist, create +# it, if necessary +# +# Params: $path - archive root +# $time - Thread time (GMT) +# +# Return: List: $path - /path/to/ to archived thread file +# $error - error or undef +# +sub create_arcdir ($$) { + my ($path, $time) = @_; + + my ($month, $year) = (localtime ($time))[4,5]; + + # use the 'real' values for directory names + # + $month++; $year+=1900; + + my $yeardir = $path . $year; + my $monthdir = $yeardir . '/' . $month; + my $monthpath = $monthdir . '/'; + + mkdir $yeardir, 0777 unless (-d $yeardir); + return ('', "could not create directory '$yeardir'") unless (-d $yeardir); + + mkdir $monthdir, 0777 unless (-d $monthdir); + return ('', "could not create directory '$monthdir'") unless (-d $monthdir); + + # return path, successfully created + # + $monthpath; +} + +### process_threads () ######################################################### +# +# process obsolete threads +# (transmit views/votings from cache, do archive, if necessary) +# +# Params: $par - hash reference +# (opt, cache, failed, obsolete, messagePath, +# archivePath, adminDefault) +# +# Return: ~none~ +# +sub process_threads ($) { + my $par = shift; + + my ($opt, $failed, $obsolete, $cache) = map {$par->{$_}} qw + ( opt failed obsolete cache); + + if ($opt->{exArchiving}) { + + # yes, we do archive + # + my $sum = $cache -> summary; + if ($sum) { + + # iterate over all obsolete threads, that are not failed yet + # + for my $tid (grep {not exists ($failed->{$_})} @$obsolete) { + my $xml = parse_xml_file ($par->{messagePath}."t$tid.xml"); + + unless ($xml) { + # xml parse error + # + $failed->{$tid} = 'could not parse thread file.'; + } + else { + # ok, parse thread + # + my $tnode = $xml -> getElementsByTagName ('Thread') -> item(0); + my $msg = parse_single_thread ($tnode, KEEP_DELETED); + + if ($opt->{archiving} eq 'UserVotings') { + + # filter out the bad stuff + # + delete_no_archived ({ + xml => $xml, + sum => $sum, + tid => $tid, + msg => $msg, + percent => $par->{adminDefault}->{Voting}->{Limit} + }); + } + + # save back xml file (into archive) + # + if ($tnode -> hasChildNodes) { + + # insert views and votings counter + # + for ($tnode -> getElementsByTagName ('Message')) { + my ($id) = $_ -> getAttribute ('id') =~ /(\d+)/; + $_ -> setAttribute ('views' => $sum->{$tid}->{$id}->{views}); + $_ -> setAttribute ('votings' => $sum->{$tid}->{$id}->{votings}); + } + + # create archive dir, unless exists + # + my ($path, $error) = create_arcdir ($par -> {archivePath}, $msg->[0]->{time}); + + if ($error) { + $failed->{$tid} = $error; + } + else { + # save thread file + # + my $file = "${path}t$tid.xml"; + save_file ($file => \($xml -> toString)) or $failed->{$tid} = "could not save '$file'"; + } + } + } + } + } + else { + @$failed{@$obsolete} = 'could not load summary'; + } + } +} + +### cut_tail () ################################################################ # # shorten the main file and archive, if necessary # @@ -59,166 +293,108 @@ sub cut_tail ($) { my $param = shift; my %failed; - if ( $param->{adminDefault}->{Severance}->{severance} ne 'instant' - or $param->{adminDefault}->{Instant}->{execute} + if ( + $param->{adminDefault}->{Severance}->{severance} ne 'instant' or + $param->{adminDefault}->{Instant}->{execute} ) { + # run only one instance at the same time + # (exlusive lock on sev_lock) + # my $sev = new Lock ($param->{lockFile}); if ($sev -> lock(LH_EXCL)) { + + # lock and parse forum main file + # my $forum = new Lock ($param->{forumFile}); if ($forum -> lock (LH_EXCL)) { my ( $threads, $last_thread, $last_message, - $dtd, - undef + $dtd ) = get_all_threads ($forum->filename, KEEP_DELETED); + # get obsolete threads... + # my $obsolete = get_obsolete_threads ({ parsedThreads => $threads, adminDefault => $param->{adminDefault} }); - delete $threads->{$_} for (@$obsolete); + unless (@$obsolete) { + # nothing to cut - we're ready + # + $forum -> unlock; + } + else { + # ...and delete them from main + # + my %obsolete; + for (@$obsolete) { + $obsolete{$_} = $threads->{$_}; + delete $threads->{$_}; + } + + # save it back... + # + my $saved = save_file ( + $param -> {forumFile}, + create_forum_xml_string ( + $threads, + { + dtd => $dtd, + lastMessage => $last_message, + lastThread => $last_thread + } + ) + ); - my $saved = save_file ( - $param -> {forumFile}, - create_forum_xml_string ( - $threads, - { dtd => $dtd, - lastMessage => $last_message, - lastThread => $last_thread + # ...and masterlock the obsolete thread files + # + if ($saved) { + for (@$obsolete) { + new Lock($param->{messagePath}."t$_.xml")->lock(LH_MASTER) or $failed{$_} = 'could not set master lock'; } - ) - ); - if ($saved) { - for (@$obsolete) { - new Lock($param->{messagePath}."t$_.xml")->lock(LH_MASTER) or $failed{$_} = 'could not set master lock'; } - } - $forum -> unlock; - if ($saved) { - # now process thread files + # release forum main file... # - my $sev_opt = ($param -> {adminDefault} -> {Severance} -> {severance} eq 'instant') - ? $param -> {adminDefault} -> {Instant} -> {Severance} - : ($param -> {adminDefault} -> {Severance}); + $forum -> unlock; + + if ($saved) { + # ...and now process thread files + # + my $sev_opt = ($param -> {adminDefault} -> {Severance} -> {severance} eq 'instant') + ? $param -> {adminDefault} -> {Instant} -> {Severance} + : ($param -> {adminDefault} -> {Severance}); + + my $cache = new Posting::Cache ($param->{cachePath}); - my $cache = new Posting::Cache ($param->{cachePath}); + process_threads ({ + opt => $sev_opt, + cache => $cache, + failed => \%failed, + obsolete => $obsolete, + messagePath => $param->{messagePath}, + archivePath => $param->{archivePath}, + adminDefault => $param->{adminDefault} + }); - if ($sev_opt->{exArchiving}) { - # yes, we cut & archive + + # delete processed files, that are not failed # - my $sum = $cache -> summary; - if ($sum) { - for my $tid (grep {not exists ($failed{$_})} @$obsolete) { - my $xml = parse_xml_file ($param->{messagePath}."t$tid.xml"); - unless ($xml) { - $failed{$tid} = 'could not parse thread file.'; - } - else { - my $tnode = $xml -> getElementsByTagName ('Thread') -> item(0); - my $msg = parse_single_thread ($tnode, KEEP_DELETED); - - if ($sev_opt->{archiving} eq 'UserVotings') { - # filter out the bad stuff - # - my $percent = $param->{adminDefault}->{Voting}->{Limit}; - my ($oldlevel, @path, $z, %archive) = (0, 0); - - for $z (0..$#{$msg}) { - if ($msg -> [$z] -> {level} > $oldlevel) { - push @path => $z; - $oldlevel = $msg -> [$z] -> {level}; - } - elsif ($msg -> [$z] -> {level} < $oldlevel) { - splice @path, $msg -> [$z] -> {level}; - push @path => $z; - $oldlevel = $msg -> [$z] -> {level}; - } - else { - $path[-1] = $z; - } - - if (defined $msg->[$z]->{archive}) { - if ($msg->[$z]->{archive}) { - $archive{$msg->[$_]->{mid}} = 1 for (@path); - } - } - unless ($msg->[$z]->{archive} or $msg->[$z]->{deleted}) { - my $key = $sum->{$tid}->{$msg->[$z]->{mid}}; - if ($percent == 0 or ($key->{views} and ($key->{votings} * 100 / $key->{views}) >= $percent)) { - $archive{$msg->[$_]->{mid}} = 1 for (@path); - } - } - } - - # now filter out - # - for (reverse grep {!$archive{$_->{mid}}} @$msg) { - my $h = get_message_node($xml, "t$tid", 'm'.$_->{mid}); - $h -> getParentNode -> removeChild ($h); - - $h = get_body_node($xml, 'm'.$_->{mid}); - $h -> getParentNode -> removeChild ($h); - } - } - # save back xml file (into archive) - # - if ($tnode -> hasChildNodes) { - # insert views and votings counter - # - for ($tnode -> getElementsByTagName ('Message')) { - my ($id) = $_ -> getAttribute ('id') =~ /(\d+)/; - $_ -> setAttribute ('views' => $sum->{$tid}->{$id}->{views}); - $_ -> setAttribute ('votings' => $sum->{$tid}->{$id}->{votings}); - } - - my ($month, $year) = (localtime ($msg->[0]->{time}))[4,5]; - $month++; $year+=1900; - my $yeardir = $param -> {archivePath} . $year; - my $yearpath = $yeardir . '/'; - my $monthdir = $yearpath . $month; - my $monthpath = $monthdir . '/'; - my $file = $monthpath . "t$tid.xml"; - - mkdir $yeardir unless (-d $yeardir); - if (-d $yeardir) { - mkdir $monthdir unless (-d $monthdir); - if (-d $monthdir) { - save_file ( - $file, - \($xml -> toString) - ) or $failed{$tid} = "could not save '$file'"; - } - else { - $failed{$tid} = "could not create directory '$monthdir'"; - } - } - else { - $failed{$tid} = "could not create directory '$yeardir'"; - } - } - - } - } - } - else { - @failed{@$obsolete} = 'could not load summary'; + for (grep {not exists($failed{$_})} @$obsolete) { + unlink ($param->{messagePath}."t$_.xml") or $failed{$_} = 'could not delete thread file'; + #file_removed ($param->{messagePath}."t$_.xml"); } + $cache -> delete_threads (@$obsolete); + $cache -> garbage_collection; } - # delete processed files - # - for (grep {not exists($failed{$_})} @$obsolete) { - unlink ($param->{messagePath}."t$_.xml") or $failed{$_} = 'could not delete thread file'; - #file_removed ($param->{messagePath}."t$_.xml"); - } - $cache -> delete_threads (@$obsolete); - $cache -> garbage_collection; } + # we're ready, tell this other instances + # + $sev -> unlock; } - $sev -> unlock; } } -- 2.34.1