#!/usr/bin/python
import scipy, pylab

# This programlet is intended to illustrate some useful features of the
# Python language: conditionals, loops, lists, dictionaries, text file I/O,
# basic string manipulations, functions---as well as some capabilities of
# the SciPy and matplotlib/pylab libraries.  We read in a data file, take
# the parts of the data we need and plot them.  This job could be done in
# less space, but illustrating how these features work might be a useful
# warm-up for more complicated tasks.

def parse_line(line):
    """
    Turns a single line of HadCRUT data into a dictionary structure.  The
    dictionary will contain an integer (the year), a floating-point number
    (the average temperature anomaly) and a list (the anomalies by month).
    """
    split_line = line.split()        # divide the text by whitespace
    year = int(split_line[0])        # extract the year from 1st item
    average = float(split_line[-1])  # ... and the average from last
    # the items in between are the monthly temperature anomalies
    anomalies = [float(s) for s in split_line[1:-1]]
    result = {"year": year,
              "average": average,
              "anomalies": anomalies}
    return result

def parse_file(file_name):
    """
    Reads HadCRUT data line-by-line and makes a dictionary of dictionaries,
    keyed by the year of each line.
    """
    f = open(file_name, "r")     # open file for reading
    results_by_year = {}         # declare dictionary to hold results
    parity = 1
    for line in f:
        line = line.strip()    # remove excess whitespace
        if len(line) > 0 and line[0] != "#":  # ignore blank lines, comments
            if parity == 1:    # read every other line
                try:
                    new_row = parse_line(line)
                    results_by_year[new_row["year"]] = new_row
                except:
                    pass
            parity = -1 * parity
    f.close()
    return results_by_year

def list_all_anomalies(results_by_year):
    """
    Takes HadCRUT data in a dictionary-of-dictionaries and extracts the
    monthly temperature anomalies as a single list, suitable for plotting.
    Also return a second list with time values to aid plotting.
    """
    
    # for present purposes, we might as well treat each month as an
    # even 1/12-th of a year
    month_offsets = scipy.arange(0, 12) / 12.0
    
    years = results_by_year.keys()
    years.sort()
    all_anomalies = []
    all_time_points = []
    # loop over all the years we have and build the list of anomalies
    for year in years:
        new_anomalies = results_by_year[year]["anomalies"]
        all_anomalies.extend(new_anomalies)
        all_time_points.extend(year + month_offsets)
    return all_anomalies, all_time_points

# execution starts here

# read and parse the data
results_by_year = parse_file("HadCRUT4-gl.dat")
all_anomalies, all_time_points = list_all_anomalies(results_by_year)

# make labels for the time axis by decades
decades = scipy.arange(all_time_points[0], all_time_points[-1], 10)
decade_labels = [str(int(decade)) for decade in decades]

# make a plot
pylab.figure()
pylab.plot(all_time_points, all_anomalies)
pylab.xticks(decades, decade_labels)
pylab.ylabel("Temperature Anomaly ($^\circ$C)")
pylab.xlabel("Year")
pylab.show()