import matplotlib.pyplot as plt
import numpy as np
import random


# Set up some global plot parameters to make plots look better
# set line width
plt.rcParams['lines.linewidth'] = 4
# set font size for titles
plt.rcParams['axes.titlesize'] = 16
# set font size for labels on axes
plt.rcParams['axes.labelsize'] = 16
# set size of numbers on x-axis
plt.rcParams['xtick.labelsize'] = 12
# set size of numbers on y-axis
plt.rcParams['ytick.labelsize'] = 12
# set size of ticks on x-axis
plt.rcParams['xtick.major.size'] = 7
# set size of ticks on y-axis
plt.rcParams['ytick.major.size'] = 7
# set size of markers
plt.rcParams['lines.markersize'] = 10
# set numpoints for legend
plt.rcParams['legend.numpoints'] = 1


def get_data(filename):
    with open(filename, 'r') as data_file:
        y_vals = []
        x_vals = []
        data_file.readline() # discard header
        for line in data_file:
            y, x = line.split()
            y_vals.append(float(y))
            x_vals.append(float(x))
        return np.array(x_vals), np.array(y_vals)


############################################################
# Plot spring data and fit a line to it
############################################################


# x_vals, y_vals = get_data('springData.txt')
# x_vals = x_vals * 9.81 # force due to gravity
# plt.figure()
# plt.plot(x_vals, y_vals, 'bo', label='Measured displacements')
# plt.xlabel('|Force| (Newtons)')
# plt.ylabel('Distance (meters)')
# plt.ylim(0, 0.5)
# plt.title('Measured Displacement of Spring')

# i1, i2 = random.sample(range(len(x_vals)), 2)
# x1, y1 = x_vals[i1], y_vals[i1]
# x2, y2 = x_vals[i2], y_vals[i2]
# k = (y2 - y1) / (x2 - x1)
# b = y1 - k * x1
# y_pred = k * x_vals + b
# plt.plot(x_vals, y_pred, 'r', label=f'Linear fit, k = {k:.5f}')
# plt.legend()

########################################

# a, b = np.polyfit(x_vals, y_vals, 1)
# y_pred = a * x_vals + b
# k = 1 / a
# print(f'a = {a:.5f}, b = {b:.5f}')
# plt.plot(x_vals, y_pred, 'r', label=f'Linear fit, k = {k:.5f}')
# plt.legend()

# model = np.polyfit(x_vals, y_vals, 1)
# y_pred = np.polyval(model, x_vals)
# k = 1 / model[0]
# print(f'a = {model[0]:.5f}, b = {model[1]:.5f}')
# plt.plot(x_vals, y_pred, 'r', label=f'Linear fit, k = {k:.5f}')
# plt.legend()


############################################################
# Evaluate different degree models for mystery data
############################################################


# x_vals, y_vals = get_data('mysteryData.txt')
# plt.figure()
# plt.plot(x_vals, y_vals, 'o', label='Data Points')
# plt.ylim(-100, 350)
# plt.title('Mystery Data')

# model1 = np.polyfit(x_vals, y_vals, 1)
# plt.plot(x_vals, np.polyval(model1, x_vals), label='Linear Model')
# model2 = np.polyfit(x_vals, y_vals, 2)
# plt.plot(x_vals, np.polyval(model2, x_vals), 'r--', label='Quadratic Model')
# plt.legend()


def mean_squared_error(data, predicted):
    error = 0.0
    for i in range(len(data)):
        error += (data[i] - predicted[i])**2
    return error/len(data)


# y_pred = np.polyval(model1, x_vals)
# print('Mean squared error for linear model = ',
#       mean_squared_error(y_vals, y_pred))

# y_pred = np.polyval(model2, x_vals)
# print('Mean squared error for quadratic model =',
#       mean_squared_error(y_vals, y_pred))


def r_squared(observed, predicted):
    error = ((predicted - observed)**2).sum()
    mean_error = error / len(observed)
    return 1 - mean_error / np.var(observed)


def gen_fits(x_vals, y_vals, degrees):
    models = []
    for d in degrees:
        model = np.polyfit(x_vals, y_vals, d)
        models.append(model)
        print([round(coeff, 2) for coeff in model])
    return models


def evaluate_fits(models, degrees, x_vals, y_vals, title):
    plt.figure()
    plt.plot(x_vals, y_vals, 'o', label='Data')
    for i in range(len(models)):
        y_pred = np.polyval(models[i], x_vals)
        error = r_squared(y_vals, y_pred)
        plt.plot(x_vals, y_pred, label=f'Fit of degree {degrees[i]}, R2 = {error:.5f}')
    plt.legend()
    plt.title(title)


# x_vals, y_vals = get_data('mysteryData.txt')
# degrees = (1, 2)
# # degrees = (2, 4, 8, 16)
# models = gen_fits(x_vals, y_vals, degrees)
# evaluate_fits(models, degrees, x_vals, y_vals, 'Mystery Data')


# x = [-1, 0, 1, 2]
# y = [2, -5, -8, 9]
# model = np.polyfit(x, y, 1)
# print(r_squared(y, np.polyval(model, x)))
# model = np.polyfit(x, y, 2)
# print(r_squared(y, np.polyval(model, x)))
# model = np.polyfit(x, y, 3)
# print(r_squared(y, np.polyval(model, x)))
# model = np.polyfit(x, y, 4)
# print(r_squared(y, np.polyval(model, x)))
# model = np.polyfit(x, y, 5)
# print(r_squared(y, np.polyval(model, x)))


############################################################
# Run trained model on validation data set
############################################################


def gen_noisy_parabolic_data(a, b, c, x_vals, filename):
    y_vals = []
    for x in x_vals:
        theoretical_val = a*x**2 + b*x + c
        y_vals.append(theoretical_val + random.gauss(0, 35))
    with open(filename,'w') as f:
        f.write('y        x\n')
        for i in range(len(y_vals)):
            f.write(str(y_vals[i]) + ' ' + str(x_vals[i]) + '\n')


# random.seed(0)
# x_vals = range(-10, 11, 1)
# a, b, c = 3, 0, 0
# gen_noisy_parabolic_data(a, b, c, x_vals, 'parabola1.txt')
# gen_noisy_parabolic_data(a, b, c, x_vals, 'parabola2.txt')

# degrees = (1, 2, 16)

# x_vals1, y_vals1 = get_data('parabola1.txt')
# models1 = gen_fits(x_vals1, y_vals1, degrees)
# evaluate_fits(models1, degrees, x_vals1, y_vals1, 'Parabola 1')

# x_vals2, y_vals2 = get_data('parabola2.txt')
# models2 = gen_fits(x_vals2, y_vals2, degrees)
# evaluate_fits(models2, degrees, x_vals2, y_vals2, 'Parabola 2')

# evaluate_fits(models1, degrees, x_vals2, y_vals2, 'Apply Parabola 1 Model to Parabola 2')


def split_data(x_vals, y_vals, frac_training, plot=True):
    train_size = int(len(x_vals) * frac_training)
    train_indices = random.sample(range(len(x_vals)), train_size)
    train_x, train_y, validate_x, validate_y = [], [], [], []
    for i in range(len(x_vals)):
        if i in train_indices:
            train_x.append(x_vals[i])
            train_y.append(y_vals[i])
        else:
            validate_x.append(x_vals[i])
            validate_y.append(y_vals[i])
    if plot:
        plt.figure()
        plt.plot(train_x, train_y, '.', label='Training')
        plt.plot(validate_x, validate_y, '.', label='Validation')
        plt.legend()
        plt.title('Training and Validation Data Splits')
    return (train_x, train_y), (validate_x, validate_y)


def fit_and_validate(x_vals, y_vals, degrees):
    train, validate = split_data(x_vals, y_vals, 0.5)
    models = []
    for d in degrees:
        models.append(np.polyfit(train[0], train[1], d))
    for m in models:
        print([round(coeff, 2) for coeff in m])
    evaluate_fits(models, degrees, train[0], train[1], 'Fit to Training Data')
    evaluate_fits(models, degrees, validate[0], validate[1], 'Applied to Validation Data')


# random.seed(0)
# x_vals = range(-20, 20, 1)
# a, b, c = 3, 0, 0
# gen_noisy_parabolic_data(a, b, c, x_vals, 'parabola.txt')

# x_vals, y_vals = get_data('parabola.txt')
# degrees = (1, 2, 16)
# fit_and_validate(x_vals, y_vals, degrees)


# random.seed(0)
# x_vals, y_vals = get_data('parabola.txt')
# degrees = (1, 2, 4, 8, 16)
# num_trials = 200
# for d in degrees:
#     r2_vals = []
#     for _ in range(num_trials):
#         train, validate = split_data(x_vals, y_vals, 0.5, plot=False)
#         model = np.polyfit(train[0], train[1], d)
#         validate_pred = np.polyval(model, validate[0])
#         r2 = r_squared(validate[1], validate_pred)
#         r2_vals.append(r2)
#     r2_mean = np.mean(r2_vals)
#     print(f'Fit of degree {d}, mean R2 = {r2_mean}')


############################################################
# Show plots
############################################################


plt.show()
