#!/usr/bin/env python """Concatenate csv files. """ import csv import sys import CommandLineApp class csvcat(CommandLineApp.CommandLineApp): """Concatenate comma separated value files. """ EXAMPLES_DESCRIPTION = ''' To concatenate 2 files, including all columns and headers: $ csvcat file1.csv file2.csv To concatenate 2 files, skipping the headers in the second file: $ csvcat --skip-headers file1.csv file2.csv To concatenate 2 files, including only the first and third columns: $ csvcat --col 0,2 file1.csv file2.csv ''' def showVerboseHelp(self): CommandLineApp.CommandLineApp.showVerboseHelp(self) print print 'OUTPUT DIALECTS:' print for name in csv.list_dialects(): print '\t%s' % name print return skip_headers = False def optionHandler_skip_headers(self): """Treat the first line of each file as a header, and only include one copy in the output. """ self.skip_headers = True return dialect = "excel" def optionHandler_dialect(self, name): """Specify the output dialect name. Defaults to "excel". """ self.dialect = name return optionHandler_d = optionHandler_dialect columns = [] def optionHandler_columns(self, *col): """Limit the output to the specified columns. Columns are identified by number, starting with 0. """ self.columns.extend([int(c) for c in col]) return optionHandler_c = optionHandler_columns def getPrintableColumns(self, row): """Return only the part of the row which should be printed. """ if not self.columns: return row # Extract the column values, in the order specified. response = () for c in self.columns: response += (row[c],) return response def getWriter(self): return csv.writer(sys.stdout, dialect=self.dialect) def main(self, *filename): """ The names of comma separated value files, such as might be exported from a spreadsheet or database program. """ headers_written = False writer = self.getWriter() # process the files in order for name in filename: f = open(name, 'rt') try: reader = csv.reader(f) if self.skip_headers: if not headers_written: # This row must include the headers for the output headers = reader.next() writer.writerow(self.getPrintableColumns(headers)) headers_written = True else: # We have seen headers before, and are skipping, # so do not write the first row of this file. ignore = reader.next() # Process the rest of the file for row in reader: writer.writerow(self.getPrintableColumns(row)) finally: f.close() return if __name__ == '__main__': csvcat().run()