#!/usr/bin/perl # close-books.pl - Archive old GnuCash transactions # Copyright (C) 2005, 2008 Jonathan Kamens. # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # You may have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., 59 Temple # Place, Suite 330, Boston, MA 02111-1307 USA, or visit # http://www.gnu.org/licenses/licenses.html#GPL. # Please send enhancements or bug fixes to jik@kamens.brookline.ma.us. # This is $Revision: 1.12 $. # XXX I assume that all the transactions are currency transactions in the same # currency. That is, I don't deal with multiple currencies in the same file # and I don't deal with non-currency transactions, e.g., stocks. I also don't # do any error-checking in this area, so the wrong thing will probably happen # if you try to archive a file with different currencies and/or non-currency # transactions in it. So don't do that! I also assume that the commodity-scu # and denominator are 100 everywhere (I do a little bit of error-checking about # this, but not much). I also assume that split:value and split:quantity are # the same everywhere, because I don't know what it means when they're # different. Patches to fix all this are welcome. # # 2009-01-15 - spork - added (or so I think) the ability to deal with # stock transactions. (It works for me anyway.) I haven't tested with # multiple currencies because I didn't have a need for it. I also # added the '--immutable' option. Very little error checking is done # on the account names listed in the immutable file. If you specified them # in a goofy manner, then the goofy account name is the one you get. use strict; use XML::DOM; use XML::LibXML; use Date::Parse; use Getopt::Long; use File::Basename; use Carp; use POSIX qw(strftime); my $libxml = 0; my $whoami = basename $0; my $indent = ' ' x (length($whoami) + length('Usage: ')); my $usage = "Usage: $whoami [--help] [--verbose [...]] [--dryrun] $indent [--start-time=datetime] [--end-time=datetime] $indent [--archive-file=filename] [--output-file=filename] $indent [--archive-all=regexp [...]] [--reconcile-all] $indent [--immutable=filename] [--[no]libxml] $indent [input-filename] Reads the specified GnuCash file or standard intput. Archives reconciled transactions to the specified archive file, or discards them if no archive file is specified. Rolls up archived transactions into one transaction against the \"Opening Balances\" account. Sends the pruned file to stdout or the specified output file. If --start-time and/or --end-time are specified, only transactions posted within the specified range are archived. If you don't specify a time, 00:00:00 is assumed; the range includes the start time but not the end time, e.g., 1/1/2004 - 1/1/2005 includes all of 2004. If --archive-all is specified, unreconciled transactions in the matching accounts will also be archived. Be careful about using this without --start-time and/or --end-time! With --reconcile-all, archived transactions will be rolled up in a single archive transaction rather than in separate transactions for reconciled and unreconciled transactions. If --immutable is specified then the immutable filename will be read. This file should contain a list of 'immutable' accounts, each on one line with colons separating parent/child accounts. For example: Assets:Investments:Brokerage Account:Stock:Foomatic. Immutable accounts will be left untouched -- even if they are within the specified date range and would have otherwise been archived. Note: the intention of this is that subaccounts of immutable accounts are immutable as well. So, 'Assets:Investments' would include 'Assets:Investments:Brokerage Account:Stock' and 'Assets:Investments:Retirement Accounts'.\n"; my $id_length = 32; # How long are guids? my($help, $verbose, $dryrun, $start_str, $end_str, $start_time, $end_time, $archive_file, $output_file, $immutable_file, $input_file, @archive_all, $reconcile_all); die $usage if (! GetOptions('help' => \$help, 'verbose+' => \$verbose, 'dryrun' => \$dryrun, 'start-time=s' => \$start_str, 'end-time=s' => \$end_str, 'archive-file=s' => \$archive_file, 'output-file=s' => \$output_file, 'immutable=s' => \$immutable_file, "archive-all=s\@" => \@archive_all, 'reconcile-all' => \$reconcile_all, 'libxml!' => \$libxml)); map($_ = qr/$_/, @archive_all); if ($help) { print $usage; exit; } $input_file = shift @ARGV || '-'; die "$whoami: Too many arguments\n$usage" if (@ARGV); if ($start_str && ! ($start_time = str2time($start_str))) { die "$whoami: Couldn't parse start time \"$start_str\"\n$usage"; } if ($end_str && ! ($end_time = str2time($end_str))) { die "$whoami: Couldn't parse end time \"$end_str\"\n$usage"; } $end_time -= 1; my (%immutable_account_name, %immutable_account_guid); if ($immutable_file) { &verbose("Reading immutable file...\n"); open(IMFILE,"$immutable_file") or die "$whoami: Couldn't open immutable file $immutable_file: $!\n"; while () { chomp; &verbose("loaded immutable account name: $_\n", 2); $immutable_account_name{$_} = 1; } close IMFILE; } my $parser; if ($libxml) { $parser = XML::LibXML->new(); } else { $parser = new XML::DOM::Parser; } # We have two copies of the file we're archiving. The first is the # one we'll write out as the pruned file, and the second is the one # we'll write out as the archive file. We iterate through all # transactions in the pruned file. For each one we want to archive, # we check if we've created archive transactions in the pruned file # for each of the accounts in the transaction, create the ones that # are missing, add the transaction to all the archive transactions, # then remove the transaction from the pruned file but not from the # archive file. For each transaction we do *not* want to archive, we # remove it from the archive file but not from the pruned file. When # we're done, we should be left with the correct pruned file and # archive file and we can simply save them. &verbose("Parsing..."); my $doc; if ($libxml) { $doc = $parser->parse_file($input_file); } else { $doc = $parser->parsefile($input_file); } &verbose(" done.\n"); &verbose("Cloning..."); my $archive_doc = $doc->cloneNode(1); &verbose(" done.\n"); # We need the transaction count nodes for both files so that we can # adjust them as appropriate when we add or remove transactions. my($gnc_v2) = $doc->getElementsByTagName("gnc-v2", 0) or die; my($gnc_book) = $gnc_v2->getElementsByTagName("gnc:book", 0) or die; sub get_transaction_count_node { my($book) = @_; my(@counts) = $book->getElementsByTagName("gnc:count-data", 0); foreach my $count (@counts) { my $type_str; if ($libxml) { $type_str = $count->getAttribute("cd:type"); } else { my $attributes = $count->getAttributes; my($type) = $attributes->getNamedItem("cd:type"); $type_str = &node_value($type->getFirstChild); } if ($type_str eq "transaction") { return $count->getFirstChild; } } die "Could not find transaction count node"; } my $pruned_count_node = &get_transaction_count_node($gnc_book); my($archive_gnc_v2) = $archive_doc->getElementsByTagName("gnc-v2", 0) or die; my($archive_gnc_book) = $archive_gnc_v2->getElementsByTagName("gnc:book", 0) or die; my $archive_count_node = &get_transaction_count_node($archive_gnc_book); my $total = &node_value($pruned_count_node); &verbose("Transaction count is $total\n"); # Find the opening balances account and immutable accounts (if any) my $ob_account; my %account_parent_map; my %account_fullname; foreach my $account ($gnc_book->getElementsByTagName("gnc:account", 0)) { my($name_node) = $account->getElementsByTagName("act:name"); my($name_text_node) = $name_node->getFirstChild; next if (! $name_text_node); my $acct_name = &node_value($name_text_node); my $acct_guid = &node_value(($account->getElementsByTagName("act:id"))[0]->getFirstChild); my $parent_node = $account->getElementsByTagName("act:parent"); if ($parent_node) { $account_parent_map{$acct_guid} = $parent_node; my $parent_name = $account_fullname{$parent_node}; if ($parent_name eq 'Root Account') { $account_fullname{$acct_guid} = $acct_name; } else { $account_fullname{$acct_guid} = $account_fullname{$parent_node} . ':' . $acct_name; } } else { $account_fullname{$acct_guid} = $acct_name; } if (check_immutable($account_fullname{$acct_guid} )) { &verbose("Found immutable guid for $account_fullname{$acct_guid}\n", 2); $immutable_account_guid{$acct_guid} = $account_fullname{$acct_guid}; } if (&node_value($name_text_node) eq 'Opening Balances') { $ob_account = $acct_guid; } } die "Could not find Opening Balances account ID" if (! $ob_account); &verbose("Archiving..."); my $pct = 0; my $done = 0; my(@transactions) = $gnc_book->getElementsByTagName("gnc:transaction", 0) or die; foreach my $transaction (@transactions) { &do_transaction($transaction); print STDERR "."; my $new_pct = int($done++ / $total * 100); if ($new_pct != $pct) { print STDERR "$new_pct%\n"; $pct = $new_pct; } } &verbose(" done.\n"); if (! $dryrun) { if ($output_file) { if ($libxml) { $doc->toFile($output_file); } else { $doc->printToFile($output_file); } } else { if ($libxml) { $doc->toFH(\*STDOUT); } else { $doc->printToFileHandle(\*STDOUT); } } if ($archive_file) { if ($libxml) { $archive_doc->toFile($archive_file); } else { $archive_doc->printToFile($archive_file); } } } # Process a transaction. Ignore the return value. sub do_transaction { my($transaction) = @_; my $id = &get_child_value($transaction, 'trn:id'); if (&archive_transaction($id, $transaction)) { &remove_transaction($gnc_book, $pruned_count_node, $id); } else { &remove_transaction($archive_gnc_book, $archive_count_node, $id); } } # Try to archive a transaction. Returns true if it was archived. sub archive_transaction { my($id, $transaction) = @_; my($posted); # Confirm that it's in the valid date range if ($start_time || $end_time) { my($tmp1) = $transaction->getElementsByTagName("trn:date-posted", 0); my $tmp = &get_child_value($tmp1, 'ts:date'); if (! defined($posted = str2time($tmp))) { warn "$whoami: Couldn't parse timestamp \"$tmp\" in transaction $id\n"; return undef; } if ($start_time && $posted < $start_time) { &verbose("Skipping too-early transaction $id\n", 2); return undef; } if ($end_time && $posted > $end_time) { &verbose("Skipping too-late transaction $id\n", 2); return undef; } } # Confirm that all splits are reconciled or don't need to be, and that # none of the splits are in the Opening Balances account. my($splits) = $transaction->getElementsByTagName("trn:splits", 0) or die; my(@splits) = $splits->getElementsByTagName("trn:split", 0); foreach my $split (@splits) { my $split_account = &get_child_value($split, 'split:account'); if ($immutable_account_guid{$split_account}) { &verbose("Skipping immutable transaction $id\n", 2); return undef; } my $reconciled_state = &get_child_value($split, 'split:reconciled-state'); if (($reconciled_state ne 'y') && ! &split_is_all_account($split)) { &verbose("Skipping unreconciled transaction $id\n", 2); return undef; } if (&get_child_value($split, 'split:account') eq $ob_account) { &verbose("Skippping openine balances transaction $id\n", 2); return undef; } } &verbose("Archiving transaction $id.\n"); foreach my $split (@splits) { &archive_split($split); } 1; } # Archive the specified split in an archive transaction for its account, # creating an appropriate archive transaction if one does not already exist. my %archive_transactions; sub archive_split { my($split) = @_; my($acct, $rs, $rs_key, $at, $ot); # We first check if an archive transactions exists for the account ID and # reconciled state in the split we're archiving. If not, we create one as # follows: (1) clone the parent transaction of the split, give it a new id, # set its parent, set its description to "Archived transactions [start] - # [end]", "Archived transactions starting [start]", "Archived transactions # through [end]", or "Archived transactions" as appropriate, and clear its # check number if it has one; (2) remove from the clone all the splits # except the one we're archiving; (3) remove the memo from the split, if # it's set; (4) clone the split we're archiving, set its parent, give it a # new id, set its account to the opening balances account, and set its # value to the opposite of the value of the split we're archiving; (5) # increment the transaction account for the pruned file; (6) save it in # %archive_transactions. $ot = $split->getParentNode->getParentNode; $acct = &get_child_value($split, 'split:account'); $rs = &get_child_value($split, 'split:reconciled-state'); $rs_key = $reconcile_all ? 'y' : $rs; if (! ($at = $archive_transactions{$acct}{$rs_key})) { my $split_id = &get_child_value($split, 'split:id'); &verbose("New archive transaction from split ID $split_id.\n", 2); $at = $ot->cloneNode(1); &set_id($at, 'trn:id'); # Remove notes, if any my($slots) = $at->getElementsByTagName("trn:slots", 0); if ($slots and (my(@slots) = $slots->getElementsByTagName("slot", 0))) { foreach my $slot (@slots) { my($key) = $slot->getElementsByTagName("slot:key", 0); my($text) = $key->getFirstChild; if ($text eq 'notes') { $slots->removeChild($slot); } } if (! $slots->getFirstChild) { $at->removeChild($slots); } } $ot->getParentNode->appendChild($at); &set_child_value($at, 'trn:description', ($start_str ? ($end_str ? "Archived transactions $start_str - $end_str" : "Archived transactions starting $start_str") : ($end_str ? "Archived transactions through $end_str" : "Archived transactions"))); if (my($number) = $at->getElementsByTagName("trn:num", 0)) { $at->removeChild($number); } my($splits) = $at->getElementsByTagName("trn:splits", 0) or die; my(@splits) = $splits->getElementsByTagName("trn:split", 0) or die; foreach my $split2 (@splits) { my($id) = &get_child_value($split2, "split:id"); if ($id eq $split_id) { &make_reconciled($split2); if (my($memo) = $split2->getElementsByTagName("split:memo", 0)) { $split2->removeChild($memo); } $split = $split2; } else { $splits->removeChild($split2); } } my $ob_split = $split->cloneNode(1); $split->getParentNode->appendChild($ob_split); &set_id($ob_split, 'split:id'); &set_child_value($ob_split, 'split:account', $ob_account); my $val = &get_child_value($ob_split, 'split:value'); my $quan = &get_child_value($ob_split, 'split:quantity'); if ($val !~ s/^-//) { $val = '-' . $val; } if ($quan !~ s/^-//) { $quan = '-' . $quan; } &set_child_value($ob_split, 'split:value', $val); &set_child_value($ob_split, 'split:quantity', $quan); if ($libxml) { $pruned_count_node->setData(&node_value($pruned_count_node) + 1); } else { $pruned_count_node->setNodeValue(&node_value($pruned_count_node) + 1); } $archive_transactions{$acct}{$rs_key} = $at; return; } # If the archive transaction for the account ID we're reconciling *does* # exist, then add the split we're archiving to it as follows: (1) if its # date-posted is earlier than the date-posted of the new transaction, reset # it; (2) if its date-entered is earlier than the date-entered of the new # transaction, reset it; (3) if our split has its reconcile-date set, and # the archive transaction doesn't or it's earlier, reset it; (4) adjust the # amounts of the dates in the archive transaction to include the # transaction we're archiving. &verbose("Existing archive transaction.\n", 2); my($old_node, $new_node, $old_val, $new_val, $new_quan, $old_quan); my($old_node) = $at->getElementsByTagName('trn:date-posted', 0) or die; my($new_node) = $ot->getElementsByTagName('trn:date-posted', 0) or die; my($old_val) = &get_child_value($old_node, 'ts:date'); my($new_val) = &get_child_value($new_node, 'ts:date'); if ($old_val lt $new_val) { &set_child_value($old_node, 'ts:date', $new_val); } my($old_node) = $at->getElementsByTagName('trn:date-entered', 0) or die; my($new_node) = $ot->getElementsByTagName('trn:date-entered', 0) or die; my($old_val) = &get_child_value($old_node, 'ts:date'); my($new_val) = &get_child_value($new_node, 'ts:date'); if ($old_val lt $new_val) { &set_child_value($old_node, 'ts:date', $new_val); } my($this_acct_node, $ob_acct_node); my($splits) = $at->getElementsByTagName("trn:splits", 0); foreach my $split ($splits->getElementsByTagName("trn:split", 0)) { my $acct_text = &get_child_value($split, "split:account"); if ($acct_text eq $acct) { $this_acct_node = $split; } elsif ($acct_text eq $ob_account) { $ob_acct_node = $split; } } die if (! ($this_acct_node && $ob_acct_node)); if (my($new_node) = $split->getElementsByTagName("split:reconcile-date", 0)) { my($at_splits) = $at->getElementsByTagName("trn:splits", 0) or die; my($at_split) = $at_splits->getElementsByTagName("trn:split") or die; my($old_node) = $at_split->getElementsByTagName("split:reconcile-date", 0); if (! $old_node) { $new_node = $new_node->cloneNode(1); $this_acct_node->appendChild($new_node); $new_node = $new_node->cloneNode(1); $ob_acct_node->appendChild($new_node); } else { $new_val = &get_child_value($new_node, 'ts:date'); $old_val = &get_child_value($old_node, 'ts:date'); if ($old_val lt $new_val) { my($tmp) = $this_acct_node->getElementsByTagName("split:reconcile-date", 0); &set_child_value($tmp, 'ts:date', $new_val); ($tmp) = $ob_acct_node->getElementsByTagName("split:reconcile-date", 0); &set_child_value($tmp, 'ts:date', $new_val); } } } $new_val = &get_child_value($split, 'split:value'); $old_val = &get_child_value($this_acct_node, 'split:value'); $new_val =~ s,(/.*)$,,; my $denom = $1; $old_val =~ s,(/.*)$,,; if ($1 ne $denom) { die("Incompatible value denominators $1 and $denom:\n" . $split->toString . "\n" . $this_acct_node->toString . "\n"); } $new_val += $old_val; $new_val .= $denom; $new_quan = &get_child_value($split, 'split:quantity'); $old_quan = &get_child_value($this_acct_node, 'split:quantity'); $new_quan =~ s,(/.*)$,,; my $denom_quan = $1; $old_quan =~ s,(/.*)$,,; if ($1 ne $denom_quan) { die("Incompatible quantity denominators $1 and $denom_quan:\n" . $split->toString . "\n" . $this_acct_node->toString . "\n"); } $new_quan += $old_quan; $new_quan .= $denom_quan; &set_child_value($this_acct_node, 'split:value', $new_val); &set_child_value($this_acct_node, 'split:quantity', $new_quan); if ($new_val !~ s/^-//) { $new_val = '-' . $new_val; } if ($new_quan !~ s/^-//) { $new_quan = '-' . $new_quan; } &set_child_value($ob_acct_node, 'split:value', $new_val); &set_child_value($ob_acct_node, 'split:quantity', $new_quan); } # Mark a split reconciled and give it a reconcile date if it isn't already # marked reconciled. sub make_reconciled { my($split) = @_; if (&get_child_value($split, 'split:reconciled-state') eq 'y') { return; } &set_child_value($split, 'split:reconciled-state', 'y'); if ($split->getElementsByTagName('split:reconcile-date', 0)) { return; } my $reconcile_date = $doc->createElement('split:reconcile-date'); $split->appendChild($reconcile_date); my $ts_date = $doc->createElement('ts:date'); $reconcile_date->appendChild($ts_date); if ($libxml) { $ts_date->appendText(&gnc_date(time())); } else { $ts_date->addText(&gnc_date(time())); } } # Format a timestamp in gnc timestamp format sub gnc_date { my($t) = @_; strftime("%Y-%m-%d %H:%M:%S %z", localtime($t)); } # Give a node a new ID. sub set_id { my($node, $id_tag) = @_; my($new_id); $new_id = sprintf("%x", time()); while (length($new_id) < $id_length) { $new_id .= sprintf("%x", int(rand(16))); } &set_child_value($node, $id_tag, $new_id); } # Set the text value of the specified (by tag) child node of the specified node. sub set_child_value { my($node, $tag, $value) = @_; if (! $node) { die; } my($child) = $node->getElementsByTagName($tag, 0) or die "No child named $tag"; my($value_node) = $child->getFirstChild; if (! $value_node) { $value_node = $doc->createTextNode(''); $node->appendChild($value_node); } if ($libxml) { $value_node->setData($value); } else { $value_node->setNodeValue($value); } } # Get the text value of the specified (by tag) child node of the specified node. sub get_child_value { my($node, $tag) = @_; confess "Undefined node" if (! $node); my($child) = $node->getElementsByTagName($tag, 0) or die "No child named $tag"; my($value_node) = $child->getFirstChild; die("$whoami: Couldn't find tag $tag for node:\n" . $node->toString . "\n") if (! $value_node); &node_value($value_node); } # Determine if a particular split is from an account all of whose transactions # are being archived. sub split_is_all_account { return undef if (! @archive_all); my($split) = @_; my $account_id = &get_child_value($split, 'split:account'); my($account_name) = &account_name_from_id($account_id); return scalar grep($account_name =~ /$_/, @archive_all); } # Convert an account ID to its name. my %account_name_from_id; sub account_name_from_id { my($id) = @_; my($name); if (! %account_name_from_id) { my(%names, %parents); foreach my $account ($gnc_book->getElementsByTagName("gnc:account")) { my $id = &node_value(($account->getElementsByTagName("act:id", 0))[0]->getFirstChild); my($name) = $account->getElementsByTagName("act:name", 0); next if (! ($name = $name->getFirstChild)); $names{$id} = &node_value($name); my($parent) = $account->getElementsByTagName("act:parent", 0); if ($parent) { $parents{$id} = &node_value($parent->getFirstChild); } } foreach my $id (keys %names) { $account_name_from_id{$id} = $names{$id}; my $parent = $id; while ($parent = $parents{$parent}) { last if ($names{$parent} eq 'Root Account'); $account_name_from_id{$id} = $names{$parent} . ":" . $account_name_from_id{$id}; } &verbose("Converted account ID $id to $account_name_from_id{$id}.\n", 2); } } return $account_name_from_id{$id}; } # Remove transaction from indicated document and decrease indicated count node my %transactions; sub remove_transaction { my($book, $count, $id) = @_; if (! $transactions{$book}) { foreach my $transaction ($book->getElementsByTagName("gnc:transaction", 0)) { my $id = &node_value(($transaction->getElementsByTagName("trn:id", 0))[0]->getFirstChild); $transactions{$book}{$id} = $transaction; } } my($transaction) = $transactions{$book}{$id}; die "$whoami: Couldn't find transaction $id\n" if (! $transaction); $transaction->getParentNode->removeChild($transaction); if ($libxml) { $count->setData(&node_value($count) - 1); } else { $count->setNodeValue(&node_value($count) - 1); } } # Display a message to stderr if the verbose level is at least the specified # level (or 1 if no level is specified). sub verbose { my($msg, $level) = @_; $level = 1 if (! $level); if ($verbose >= $level) { print(STDERR $msg); } } sub node_value { my($node) = @_; if ($libxml) { $node->data; } else { $node->getNodeValue; } } sub check_immutable { # return 1 if the account name is among the immutables. One should # check the current account and all of the parent accounts # this is done by checking against the (global variable) # %immutable_account_name my ($act_name) = @_; while ($act_name) { my $level = split(/:/,$act_name); if ($immutable_account_name{$act_name}) { return 1; } $act_name =~ s/:[^:]+$//; last if ($level <= 1); } return 0; }