linregression.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. #!/usr/bin/env python3
  2. import numpy as np
  3. import pandas as pd
  4. import matplotlib.pyplot as plt
  5. plt.style.use('seaborn-darkgrid')
  6. # create a dataframe
  7. df = pd.read_csv("Salary_Data.csv")
  8. # read x and y values
  9. x = df.iloc[:,0]
  10. y = df.iloc[:,1]
  11. # number of rows
  12. n = df.size//2
  13. # summation of x and y
  14. sum_x = df.iloc[:,0].sum()
  15. sum_y = df.iloc[:,1].sum()
  16. # summation of x.y
  17. sum_xy = (df.iloc[:,0] * df.iloc[:,1]).sum()
  18. # compute theta_1 and theta_2 values
  19. theta2 = (sum_xy - n*np.mean(x)*np.mean(y))/(sum(x**2) - n*np.mean(x)**2)
  20. theta1 = (n*np.mean(x)*np.mean(y) - theta2*n*np.mean(x)**2)/(n*np.mean(x))
  21. print("__________Linear Regression__________")
  22. print("\ntheta1 = {}".format(theta1))
  23. print("theta2 = {}".format(theta2))
  24. E = [(y - theta1 - theta2*x)**2 for x,y in zip(x,y)]
  25. print("\nSummation of epsilon-square: {}".format(sum(E)))
  26. cal_y = [theta2*x + theta1 for x in x]
  27. y = np.array(y)
  28. print("\nObeserved y-values: {}".format(y))
  29. print("Calculated y-values: {}".format(cal_y))
  30. epsilon = [(y - cal_y) for y,cal_y in zip(y,cal_y)]
  31. print("\nError term: {}".format(epsilon))
  32. ax=plt.figure().add_subplot(111)
  33. ax.plot(x, y, 'r--o', label='Observed Value')
  34. ax.plot(x, cal_y, 'b--o', label='Calculated Value')
  35. ax.set(xlabel='x', ylabel='y', title='Linear Regression')
  36. plt.legend(); plt.savefig('linreg.png'); plt.show()