# cyon csv fixer # 2007-01-16 # author: oedalpha@gmail.com import sys FILE_OUTPUT = 1 REMOVE_BAD_DASH = 1 # 02-345-67890 -> 0234567890 INSERT_DASH = 1 # 01234567890 -> 012-3456-7890 NUM_PER_PAGE = -1 # set this to -1 to save output in one file # default: 15 def remove_bad_dash( number ): r = number dash1 = number.find('-') dash2 = number.rfind('-') if dash1 == dash2: return r if dash1 < 2: r = number[:dash1] + number[dash1+1:] elif len(number) - dash2 != 5: r = number[:dash2] + number[dash2+1:] return r def fix_phone_number( number ): r = number if not number or len(number) < 1: return r if number[0] != '0' and len(number) >= 7: r = '0' + number if INSERT_DASH and len(r) >= 7 and r.count('-') < 2: if r[-4] != '-': r = r[:-4] + '-' + r[-4:] if len(r) >= 10: if r[0] == '0' and r[1] == '2' and r[2] != '-': r = r[:2] + '-' + r[2:] elif r[0] == '0' and r[2] != '-' and r[3] != '-': r = r[:3] + '-' + r[3:] return r def get_csv(values, cols): r = "" for i in range(0, cols): v = "" if len(values) > i: v = values[i] if v: r = r + v if i + 1 < cols: r = r + "," return r def fix_cyon_csv(filename): fi = open(filename) lines = fi.readlines() n = 0 fo = None subject = "" num_of_cols = 0 for l in lines: l = l.strip() items = l.split(',') #print items if FILE_OUTPUT: for i in items: print i, print if n == 0: n = n + 1 subject = l num_of_cols = l.count(',') + 1 continue # subjects fo_name = "" if NUM_PER_PAGE < 0: if n == 1: fo_name = "%s_fix.csv" % filename elif (n - 1) % NUM_PER_PAGE == 0: if fo: fo.close() fo_name = "%s_%03d_fix.csv" % (filename, (n-1)/NUM_PER_PAGE) if FILE_OUTPUT: if fo_name: fo = open( fo_name, 'w') fo.write(subject + '\n') else: print fo_name print subject # check 3, 4, 5th columns for i in range(3, 6): if REMOVE_BAD_DASH: items[i] = remove_bad_dash( items[i]) items[i] = fix_phone_number( items[i] ) output = get_csv(items, num_of_cols) if FILE_OUTPUT: fo.write(output + "\n") print '-->', print output n = n + 1 if fo: fo.close() def main(): if len(sys.argv) >= 2: fix_cyon_csv( sys.argv[1] ) else: print 'Usage: %s csvfile' % sys.argv[0] if __name__ == '__main__': main()