#!/usr/bin/perl =pod =head1 NAME geneal2gedcom.pl - convert McBeath genealogy format to GEDCOM =head1 SYNOPSIS geneal2gedcom.pl [B<-->[B]B] [B<-->[B]B] [B<--output-file> I] [I] =head1 DESCRIPTION B B) if you use this script.> I would really like to know if anyone else finds it useful. This script converts a data file in Jim McBeath's genealogy data file format, used by his "jimmc.roots" and "geneal" packages (see L), into GEDCOM format (see L). This script requires the Gedcom and Geo::StreetAddress::US Perl modules, which you will probably need to install from CPAN (see L). If you don't know how to install a module from CPAN, see L; please don't contact me for help with this. I wrote this specifically to convert my own data file into GEDCOM, so there's no reason to believe it's completely generalized. I only know for certain that it works with the tags that I used in my data file and the format that I used for those tags. If you need to modify this script to make it support more tags or enhance its support for existing tags, please send me patches so I can incorporate them into my version for other people to use! =head1 OPTIONS Specify the input file on the command line or feed the input to stdin. The following options are available: =over 4 =item --strict, --nostrict Enable (default) or disable strict checking. When strict checking is enabled, an unrecognized tag in the input causes the script to exit with an error. When strict checking is disabled, a warning is printed and the script ignored the unrecognized tag =item --strict-addresses, --nostrict-addresses Enable (default) or disable strict address checking. When strict address checking is enabled, and unparseable address in the input causes the script to exit with an error. When strict address checking is disabled, a warning is printed, and the script ignores the unparseable address. =item --output-file file-name Send the gedcom output to the specified file. If not specified, output is sent to stdout. =back =head1 AUTHOR Jonathan Kamens L =head1 COPYRIGHT Copyright (C) 2009 Jonathan Kamens. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. See L. =head1 VERSION $Id: geneal2gedcom.pl,v 1.8 2012/12/31 16:28:54 jik Exp $ =cut use strict; use warnings; use File::Basename; use File::Temp qw(tempfile); use Gedcom 1.17; use Geo::StreetAddress::US; use Getopt::Long; $/ = "\n\n"; my $whoami = basename $0; my $usage = "Usage: $whoami [--[no]strict] [--[no]strict-addresses [--output-file file-name] [input-file-name]\n"; my $strict = 1; my $strict_addresses = 1; my $output_file; die $usage if (! GetOptions("strict!" => \$strict, "strict-addresses!" => \$strict_addresses, "output-file=s" => \$output_file)); my $ged = Gedcom->new; my (%link_child_to_family, %link_spouse_to_family, %individual_records, %link_address_to_individual, %ignore_address_link); while (<>) { next if (! s/^(\d+).*\n//); my $record_identifier = $1; next if (! s/^T:(.)\n?//m); my $record_type = $1; my(@fields); for my $field_string (split(/\n\b/)) { my($tag, $value) = split(/:/, $field_string, 2); next if (! $tag); $value =~ s/^\+//gm; push(@fields, [$tag, $value]); } if ($record_type eq "I") { my $record = $ged->add_individual; my($first_name, $middle_name, $last_name, $nick_name, $name_prefix, $birth, $death, $burial, @extra_names); for my $ref (@fields) { my($tag, $value) = @{$ref}; if ($tag eq "LN") { $last_name = $value; } elsif ($tag eq "LN.A") { push(@extra_names, $value); } elsif ($tag eq "FN") { $first_name = $value; } elsif ($tag eq "MN") { $middle_name = $value; } elsif ($tag eq "NN") { $nick_name = $value; } elsif ($tag eq "PN") { $name_prefix = $value; } elsif ($tag eq "SX") { $record->add("SEX", $value); } elsif ($tag eq "B") { $birth = $record->add("BIRT") if (! $birth); $birth->add("DATE", $value); } elsif ($tag eq "BP") { $birth = $record->add("BIRT") if (! $birth); $birth->add("PLAC", $value); } elsif ($tag eq "D") { $death = $record->add("DEAT") if (! $death); $death->add("DATE", $value); } elsif ($tag eq "DP") { $death = $record->add("DEAT") if (! $death); $death->add("PLAC", $value); } elsif ($tag eq "BUR") { $burial = $record->add("BURI") if (! $burial); $burial->add("PLAC", $value); } elsif ($tag eq "P") { push(@{$link_child_to_family{$value}}, $record_identifier); } elsif ($tag eq "S") { $value =~ s/^\s+//; $value =~ s/\s+$//; for my $family (split(/[,\s]+/, $value)) { push(@{$link_spouse_to_family{$family}}, $record_identifier); } } elsif ($tag eq "ADDR") { # Skip current addresses push(@{$link_address_to_individual{$value}}, $record_identifier); } elsif ($tag eq "TNOTE" or $tag eq "GEN" or $tag eq "TGEN" or $tag eq "COM") { &add_multiline($record, "NOTE", $value); } elsif ($tag eq "EDITED") { $record->add("CHAN")->add("DATE", $value); } elsif ($strict) { die "Bad field: $tag:$value in $_"; } else { warn "Ignoring unrecognized field $tag:$value in $_"; } } if ($first_name || $middle_name || $last_name || $nick_name || $name_prefix || @extra_names) { my($given_name, $given_name2, $name); if (@extra_names && ! $last_name) { $last_name = shift @extra_names; } if ($first_name || $middle_name) { $given_name = join(" ", ($first_name || "???"), ($middle_name || ())); $given_name2 = join(", ", ($first_name || "???"), ($middle_name || ())); } $name = $record->add("NAME", join(" ", ($name_prefix || ()), ($given_name || ()), ($nick_name ? "($nick_name)" : ()), ($last_name ? "/$last_name/" : ()))); $name->add("NPFX", $name_prefix) if ($name_prefix); $name->add("GIVN", $given_name2) if ($given_name); $name->add("NICK", $nick_name) if ($nick_name); $name->add("SURN", $last_name) if ($last_name); for my $extra (@extra_names) { $record->add("NAME", join(" ", ($given_name || ()), "/$extra/")); } } $individual_records{$record_identifier} = $record; } elsif ($record_type eq "F") { my $record = $ged->add_family; my($married, $divorced); for my $ref (@fields) { my($tag, $value) = @{$ref}; if ($tag eq "N") { # Don't know what to do with this, so skip it next; } elsif ($tag eq "H") { die "Bad xref to individual $value" if (! $individual_records{$value}); $record->add("HUSB", $individual_records{$value}); if (! grep($_ == $value, @{$link_spouse_to_family{$record_identifier}})) { die "Family $record_identifier links husband to $value but not the reverse\n"; } $individual_records{$value}->add("FAMS", $record); } elsif ($tag eq "W") { die "Bad xref to individual $value" if (! $individual_records{$value}); $record->add("WIFE", $individual_records{$value}); if (! grep($_ == $value, @{$link_spouse_to_family{$record_identifier}})) { die "Family $record_identifier links wife to $value but not the reverse\n"; } $individual_records{$value}->add("FAMS", $record); } elsif ($tag eq "M") { $married = $record->add("MARR") if (! $married); $married->add("DATE", $value); } elsif ($tag eq "MP") { $married = $record->add("MARR") if (! $married); $married->add("PLAC", $value); } elsif ($tag eq "MD") { $divorced = $record->add("DIV") if (! $divorced); $divorced->add("DATE", $value); } elsif ($tag eq "C") { $value =~ s/^\s+//; $value =~ s/\s+$//; my(@children) = split(/[,\s]+/, $value); my(%children); map($children{$_} = 1, @children); foreach my $child (@{$link_child_to_family{$record_identifier}}) { if (! delete $children{$child}) { die "Individual $child points at parent $record_identifier but not the reverse\n"; } $record->add("CHIL", $individual_records{$child}); $individual_records{$child}->add("FAMC", $record); } if (%children) { map(warn("Family $record_identifier points at child $_ but not the reverse\n"), keys %children); exit(1); } } elsif ($tag eq "ADDR") { push(@{$ignore_address_link{$value}}, $record_identifier); next; } elsif ($tag eq "TNOTE" or $tag eq "GEN" or $tag eq "TGEN" or $tag eq "COM") { &add_multiline($record, "NOTE", $value); } elsif ($tag eq "EDITED") { $record->add("CHAN")->add("DATE", $value); } elsif ($strict) { die "Bad field: $tag:$value in $_"; } else { warn "Ignoring unrecognized field $tag:$value in $_"; } } } elsif ($record_type eq "A") { my(@who, $address, $phone); for my $ref (@fields) { my($tag, $value) = @{$ref}; if ($tag eq "WHO") { $value =~ s/^\s+//; $value =~ s/\s+$//; @who = split(/[,\s]+/, $value); } elsif ($tag eq "ADDR") { $address = $value; } elsif ($tag eq "PHONE") { $phone = $value; } elsif ($tag eq "EDITED") { next; } elsif ($strict) { die "Bad field: $tag:$value in $_"; } else { warn "Ignoring unrecognized field $tag:$value in $_"; } } next if (! ($address || $phone)); my(%to_link, %to_ignore); map($to_link{$_} = 1, @{$link_address_to_individual{$record_identifier}}); map($to_ignore{$_} = 1, @{$ignore_address_link{$record_identifier}}); map { if (! ($to_link{$_} or $to_ignore{$_})) { die "Address $record_identifier links to $_ but not the reverse\n"; } } @who; for my $id (@who) { next if (delete $to_ignore{$id}); delete $to_link{$id}; my $resi = $individual_records{$id}->add("RESI"); if ($address) { $address =~ s/^\s+//; $address =~ s/\s+$//; my(%fields); if ($address =~ s/\s+USA\s*$//) { $fields{"CTRY"} = "US"; } if ($address =~ s/[,\s]+(Apartment\s*[^\s,]+)//) { $fields{"ADR2"} = $1; } if ($address =~ /(.*),\s*([A-Z]{2})$/) { $fields{"CITY"} = $1; $fields{"STAE"} = $2; } else { my $ref = Geo::StreetAddress::US->parse_location($address); if (! $ref) { if ($strict_addresses) { die "Couldn't parse address: $address"; } else { warn "Ignoring unparseable address: $address"; } } else { $fields{"ADR1"} = &concat($ref->{number}, $ref->{prefix}, $ref->{street}, $ref->{type}, $ref->{suffix}); $fields{"CITY"} = $ref->{city}; $fields{"STAE"} = $ref->{state}; $fields{"POST"} = $ref->{zip}; } } if (%fields) { my $addr = $resi->add("ADDR"); map($addr->add($_, $fields{$_}), keys %fields); } } if ($phone) { $resi->add("PHON", $phone); } } if (%to_link || %to_ignore) { map { warn "Record $_ links to address $record_identifier but not the reverse\n"; } (keys %to_link, keys %to_ignore); exit 1; } } else { die "Unrecognized record type T:$record_type\n$_"; } } $ged->add_trailer; my($fh, $temp); if (! $output_file) { ($fh, $output_file) = tempfile() or die; $temp = 1; } else { open($fh, "+>", $output_file) or die; } $ged->order; $ged->write($output_file); if ($temp) { while (<$fh>) { print; } unlink($output_file); } close($fh); sub concat { my(@values) = @_; @values = grep($_, @values); join(" ", @values); } sub add_multiline { my($record, $tag, $value) = @_; $value =~ s/^\s+//; $value =~ s/(.*)\r?\n?//; my $new = $record->add($tag, $1); while ($value) { $value =~ s/(.*)\r?\n?//; $new->add("CONT", $1); } }