Multiple linear Regression
#Multiple linear Regresssion
#import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset =pd.read_csv("C:\\ClassStudies_Python\\50_Startups.csv")
#dataset =pd.read_csv("C:\DATA\\Data\\gdp.csv")
x=dataset.iloc[:,:-1].values
y=dataset.iloc[:,4].values
#Encoding categorical dataset
#Encoding the Independent Variabl
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
labelencoder= LabelEncoder()
x[:,3]=labelencoder.fit_transform(x[:,3])
onehotencoder=OneHotEncoder(categorical_features=[3])
x=onehotencoder.fit_transform(x).toarray()
#print(x)
#Avoiding the Dummy Variable Trap
x=x[:,1:]
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)
#print(y_test)
#Feature Scaling
'''from sklearn.preprocessing import StandardScaler
sc_X=StandardScaler()
x_train=sc_X.fit_transform(x_train)
x_test=sc_X.transform(x_test)
sc_y=StandardScaler()
y_train=sc_y.fit_transform(y_train)
'''
#Fitting Multiple Linear Regresssion to the Training set
from sklearn.linear_model import LinearRegression
regressor=LinearRegression()
regressor.fit(x_train,y_train)
#predicting the Test set Results
y_pred=regressor.predict(x_test)
print(y_pred)
#using R2 method
from sklearn.metrics import r2_score
print(r2_score(y_pred,y_test)) #print the occurency of the score
#plt.plot(x_train,y_train,c='r')
plt.scatter(y_pred,y_test,c='black')
plt.plot(y_pred,y_test,c='c')
Output :
