#!perl -w # Converts the csv file to the sql dump format, for easier comparison. # Note well that this messes with timestamps (in sub datefix), # destroying the higher resolution timestamp information in the csv. # Copyright 2015 Ken Takusagawa # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . &head(); while(<>){ if(/^"\d+",("([^"]|\\")*",){5}/){ &process($l); $l=$_; } else { $l.=$_; } } &process($l); &tail(); print STDERR "count $count\n"; sub process { my $s=shift; return unless defined $s; $count++; my @d; die unless (@d = $s =~ /^"(\d+)","((?:[^"]|\\")*?)","((?:[^"]|\\")*?)","((?:[^"]|\\")*?)","((?:[^"]|\\")*?)","((?:[^"]|\\")*?)","((?:[^"]|\\")*?)","(.*)"$/s); #for(0..$#d){ print "item $_ $d[$_]\n"; } print "\n"; die unless @d==8; for(@d){ s/\\(.)/$1/gs; s/'/''/gs; s/\\/\\\\/gs; } &do_work(@d); }; sub head { print << 'EOF'; -- -- PostgreSQL database dump -- SET statement_timeout = 0; SET client_encoding = 'SQL_ASCII'; SET standard_conforming_strings = off; SET check_function_bodies = false; SET client_min_messages = warning; SET escape_string_warning = off; SET search_path = public, pg_catalog; SET default_tablespace = ''; SET default_with_oids = false; -- -- Name: cable; Type: TABLE; Schema: public; Owner: -; Tablespace: -- CREATE TABLE cable ( id integer NOT NULL, date timestamp without time zone, refid character varying, classification character varying, origin character varying, destination text, header text, content text ); -- -- Name: cable_id_seq; Type: SEQUENCE; Schema: public; Owner: - -- CREATE SEQUENCE cable_id_seq START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE CACHE 1; -- -- Name: cable_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - -- ALTER SEQUENCE cable_id_seq OWNED BY cable.id; -- -- Name: cable_id_seq; Type: SEQUENCE SET; Schema: public; Owner: - -- SELECT pg_catalog.setval('cable_id_seq', 251287, true); -- -- Name: id; Type: DEFAULT; Schema: public; Owner: - -- ALTER TABLE cable ALTER COLUMN id SET DEFAULT nextval('cable_id_seq'::regclass); -- -- Data for Name: cable; Type: TABLE DATA; Schema: public; Owner: - -- EOF } sub tail { print << 'EOF'; -- -- Name: cable_pkey; Type: CONSTRAINT; Schema: public; Owner: -; Tablespace: -- ALTER TABLE ONLY cable ADD CONSTRAINT cable_pkey PRIMARY KEY (id); -- -- PostgreSQL database dump complete -- EOF } sub do_work { die unless @_==8; die if defined $id{$_[0]}; $id{$_[0]}++; die if $_[0]>=1000000; die if $_[0]<0; print "INSERT INTO cable (id, date, refid, classification, origin, destination, header, content) VALUES ($_[0], '",&datefix($_[1]),"', '$_[2]', '$_[4]', '$_[3]', '$_[5]', '$_[6]', '$_[7]\n');\n"; } sub datefix { for($_[0]){ if(m,(\d+)/(\d+)/(\d+),){ $_[0] = sprintf("%d-%02d-%02d 00:00:00",$3,$1,$2); } } $_[0]; }