Saturday, January 1, 2000

statementToCategories.pl


Well, this is my first financial software, such as it is. I've been using and refining this over the last few months. It *does* work with my credit union, it may or may not work with your institutions downloadable bank statements.



It has only been tested on my own computer, which is a Ubuntu Linux computer. It requires Perl and the File::Basename and Date::Calc modules. It may or may not work on Windows.




#!/usr/bin/perl

#====================================================
#
# statementToCategories.pl
#
# This script will take in a csv formatted bank statement and output two
# csv files. The first file is a list of categories and how much was spent
# in each category. The other is the original csv, with a list of which
# cateogry each line item was categorized as.
#
# If a particular line hasn't been seen before, the script will prompt
# the user to select a category for the line. It saves the categories to
# ~/.finance_categories
#
# The format for the bank statement csv file that the script reads is:
# "Date","Check No.","Description","Debit","Credit"
#
# License and Copyright
# Copyright 2008, Richard Better <richerandbetter@gmail.com>
# This program is Free software. Use it at your own risk. I take
# no responsibility for the results of the use of this software.
#
# Licensed under the GNU GPL Version 3
# View the full license here: http://www.gnu.org/licenses/gpl.html
#
# Enjoy!

#====================================================
# Run more safely. Import needed modules
use strict;
use warnings;
use File::Basename;
use Date::Calc qw(Delta_Days);


#====================================================
# Make sure we were passed at least one file
if(scalar @ARGV < 1)
{
print "\nUSAGE: statementToCategories.pl financialData.csv [financialData.csv ... financialData.csv]\n\n";
exit(-1);
}


#====================================================
# Categories we'll be sorting into
# To customize categories, edit this array.
my @cats = (
'car-gas',
'car-insurance',
'car-payments',
'car-maintenance',
'cash',
'clothing',
'electronics',
'food-dining',
'food-groceries',
'gifts',
'home-maintenance',
'home-rent',
'medical',
'megamart',
'recreation',
'school',
'util-electric',
'util-gas',
'misc',
);

#====================================================
# Setup variables

# Global variables
# Set initial date range
my @mindate = (2050,12,31);
my @maxdate = (1901,1,1);

my %predef = (); # Already defined categories
my %values = (); # A hash of categories and amount spent
my @longdesc = (); # An array which stores the original data, plus it's categorization, so that you can figure out why categories have the values they do
my $predefvalues;
my $outdir;
my $homepath = $ENV{'HOME'};
chomp($homepath);

#====================================================
# Run the script

createOutdir();
prepCategories();
parseFiles();
saveDefs();
writeResults();

#====================================================
# Just functions below

sub createOutdir
{
# Create the output directory if it doesn't exist
$outdir = $homepath . '/Finance/';
mkdir($outdir) unless (-e $outdir);
}

sub prepCategories
{
# Prepopulate category totals
foreach(@cats){
$values{$_} = 0;
}

# Find existing categories
$predefvalues = $homepath . "/.finance_categories";

if (-e "$predefvalues")
{
open(FH,"<$predefvalues");
while(<FH>){
my @def = split(',',$_);
chomp($def[1]);
$predef{$def[0]} = $def[1];
}
close(FH);
}
}

sub parseFiles
{
# The main loop. It takes one or more files and reads them line by line, processing each and adding their contents to the totals
foreach(@ARGV){
my $file = $_;
print "Processing $file\n";

die("File $file does not exist\n") unless (-e $file);

open(COSTS,"<$file");
while(<COSTS>){

my $preline = $_;
$preline =~ s/\"//g;

my @line = split(',',$preline);
next if $line[0] =~ m/Date/; # Description line
checkDates($line[0]); # Find our date range

next if $line[3] !~ m/-/; # Payments / credits
next if $line[2] =~ m/Transfer/; # inter-account transfer
next if $line[2] =~ m/Savings\ Withdrawal/;


my $amount = $line[3];
$amount =~ s/-//g;

my $origPurchase = $line[2];
my $purchase = cleanupPurchase($line[2]);

# Flip the value to be positive

# Autocategorize
if($predef{$purchase}){
$values{$predef{$purchase}} += $amount;
push(@longdesc,"\"" . $predef{$purchase} . "\"," . $preline);
}
else
{
system 'clear';
print "Please enter a category number for the following item, then press ENTER:\n\n";

for(my $i = 0;$i<@cats;$i++){
print "($i) " . $cats[$i] . "\n";
}

print "\n$amount at '$origPurchase', short name is '$purchase' \n";
print "-----------------------\n";

# Read in the new category
my $newcat = <STDIN>;
$newcat =~ s/^.*[^\d](\d+)[^\d].*$/$1/;
if($newcat eq '')
{
$values{'misc'} += $amount;
push(@longdesc,"\"misc\"," . $preline);
}
else
{
$predef{$purchase} = $cats[$newcat];
$values{$predef{$purchase}} += $amount;
push(@longdesc,"\"" . $predef{$purchase} . "\"," . $preline);
}
}
}
close(COSTS);
}
}

sub saveDefs
{
# Write the definitions to a file, so we don't have to re-categorize everything each time
open(CATS,">$predefvalues");
foreach(sort keys %predef){
print CATS $_ . "," . $predef{$_} . "\n";
}
close(CATS);
}

sub writeResults
{
# Write the two results files
system 'clear';
print "What do you want the file named?\n";
my $outfile = <STDIN>;
chomp($outfile);
$outfile =~ s/\s+/_/g;

my $outfiledesc = $outdir . '/' . $outfile . "_desc.csv";
$outfile = $outdir . '/' . $outfile . ".csv";

open(OUTPUT,">$outfile");

my $months = getMonths();

print OUTPUT "\"Category\"\t\"Total Spent from " . join('/',@mindate) . " to " . join('/',@maxdate) . "\"\t\"Average per 30 day month ($months months total)\"\n";

foreach(sort(keys %values)){
print OUTPUT "\"$_\"\t\"" . $values{$_} . "\"\t\"". ($values{$_} / $months) ."\"\n";
}
close(OUTPUT);

open(DESC,">$outfiledesc");
foreach(sort(@longdesc)){
print DESC $_;
}
close(DESC);

print "Output is a tab separated file at $outfile\n";
}

sub getMonths
{
# Determine how much of a month is being considered here
my $days = Delta_Days(@mindate,@maxdate);
return ($days/30 + 1);
}

sub cleanupPurchase
{
# Strip un-needed info from the purchase description. This will let more similar names end up the same
my $purchase = shift;

$purchase =~ s/\s*Cr\ Crd\ Purchase\ \@\ //g;
$purchase =~ s/\s*\ \ Trace\ #[0-9]*//g;
$purchase =~ s/\s*[^\d\s]+\ \S\SUS//g;
$purchase =~ s/\t/\ /g;
$purchase =~ s/"/\ /g;
$purchase =~ s/\s*\ \S\SUS//g;
$purchase =~ s/\d*//g;
$purchase =~ s/#//g;
$purchase =~ s/[\s\@*-]+$//g;
chomp($purchase);
return $purchase;
}


sub checkDates
{
# Check if the current date is a new start or end date, and set the approriate variable if needed
my $strcurdate = shift;
my @curdate = split('/',$strcurdate);
@curdate = ($curdate[2],$curdate[0],$curdate[1]);
if(Delta_Days(@mindate,@curdate) < 0){
@mindate = @curdate;
}
if(Delta_Days(@maxdate,@curdate) > 0){
@maxdate = @curdate;
}
}