blob: 621c7a926ae7e6804de1ac148fb2391c2272a81f [file] [log] [blame]
#!/usr/bin/env python
# This is a simple script that reads in a csv file, selects a column, and then
# forms an "s-curve" graph of that column.
import argparse
import csv
import sys
def get_data(input_file, before_column, after_column):
def get_selected_csv_rows(input_file, before_column, after_column):
for row in csv.DictReader(input_file):
before = float(row[before_column])
after = float(row[after_column])
delta = after / before
yield delta
def f(input_data):
result = list(enumerate(sorted(input_data)))
count = float(len(result) - 1)
return [(x[0] / count, x[1]) for x in result]
return f(get_selected_csv_rows(input_file, before_column, after_column))
def main():
p = argparse.ArgumentParser(description="""
A script that reads in a csv file, splices out selected before/after
column, and then outputs a new csv file with that data in s-curve form. An
s-curve is a graph where one sorts the output %-change and graphs the %-n
vs %-change.
NOTE: We assume that the csv has a csv header that maps to the before and
after column names passed in.
""")
p.add_argument('input_file', type=argparse.FileType('r'))
p.add_argument('before_column_name', type=str)
p.add_argument('after_column_name', type=str)
args = p.parse_args()
data = get_data(args.input_file, args.before_column_name,
args.after_column_name)
w = csv.DictWriter(sys.stdout, fieldnames=['N/total', 'New/Old'])
w.writeheader()
for d in data:
w.writerow({'N/total': d[0], 'New/Old': d[1]})
if __name__ == "__main__":
main()