Essentials of Machine Learning Algorithms using R
https://www.analyticsvidhya.com/blog/2015/08/common-machine-learning-algorithms/
https://www.datascience.com/blog/introduction-to-forecasting-with-arima-in-r-learn-data-science-tutorials
https://techietweak.wordpress.com/2016/05/18/r-data-frame-the-one-data-type-for-predictive-analytics/
Linear Regression Model:
TestDataset <- read.csv("D:\\Dinesh\\AI\\TestDataset.csv")
TrainingDataset <- read.csv("D:\\Dinesh\\AI\\TrainingDataset.csv")
TrainingDataset
TestDataset
date <- as.Date(TrainingDataset$dteday)
TrainingDataset <- cbind(TrainingDataset,date)
sapply(TrainingDataset, class)
date <- as.Date(TestDataset$dteday)
TestDataset <- cbind(TestDataset,date)
sapply(TestDataset, class)
lmModel <- lm(cnt ~ date + season + yr + mnth + holiday + weekday + workingday + weathersit + temp + atemp + hum + windspeed + casual + registered, data = TrainingDataset)
glmModel <- glm(cnt ~ date + season + yr + mnth + holiday + weekday + workingday + weathersit + temp + atemp + hum + windspeed + casual + registered, data = TrainingDataset)
predict1 <- predict(lmModel,TestDataset)
predict2 <- predict(glmModel,TestDataset)
output1 <- cbind(TestDataset,predict1)
output2 <- cbind(TestDataset,predict2)
output1
output2
write.csv(output1, "D:\\Dinesh\\AI\\output1.csv")
write.csv(output2, "D:\\Dinesh\\AI\\output2.csv")
Linear Regression Model with Loop:
datalist=list()
Summary<-NULL
j=min((Train$Product))
repeat
{
LR1 <- lm(as.numeric(NetSales)~as.Date(Date)+Date+Product+City
,data = Train[(Train$Product == j),])
p_value= as.double(pf(summary(LR1)$fstatistic[1],summary(LR1)$fstatistic[2], summary(LR1)$fstatistic[3],lower.tail=F))
a<-(cbind(Product=j, p_value, residuals_mean=mean(resid(LR1)),
residuals_min=min(resid(LR1)),
residuals_median=median(resid(LR1)),
residuals_max=max(resid(LR1)),
multiple_r_squared=summary(LR1)$r.squared,
adjusted_r_squared=summary(LR1)$adj.r.squared,
f_statistic=summary(LR1)$fstatistic[1]))
c<-data.frame(a)
Summary<-rbind(Summary,c)
New_Test<-Test[(Test$Product == j),]
SalesPredict<- (predict(LR1,newdata=New_Test))
m <- data.frame(Date=New_Test$Date, Date = as.Date(New_Test$Date),Product=New_Test$Product,
City = New_Test$City,IsWeekend=New_Test$IsWeekend,
NetSales=Sales_Predict)
datalist[[j]]<-m
if(j==max(Train$Product)) {break}
else {j=j+1}
}
Predicted = do.call(rbind, datalist)
Final_Result<-merge(x=Predicted, y=Summary, by="Product",all=T)
install.packages("randomForest")
library(randomForest)
rfModel <- randomForest(cnt ~ date + season + yr + mnth + holiday + weekday + workingday + weathersit + temp + atemp + hum + windspeed + casual + registered, data = TrainingDataset)
predict3 <- predict(rfModel,TestDataset)
output3 <- cbind(TestDataset,predict3)
write.csv(output3, "D:\\Dinesh\\Work\\AI\\output3.csv")
Prediction using SQL Server Procedure
Execute sp_execute_external_script @language =N'R',
@script=N'
sapply(TrainData, class);
TestData <- subset(TrainData, Sales == -1, select = -Sales);
sapply(TestData, class);
lmModel <- rxLinMod(Sales ~ as.Date(Date) + Brand, data = TrainData);
Sales <- rxPredict(lmModel, TestData);
output <- cbind(TestData, Sales);
OutputDataSet <- output;
',
@input_data_1 = N'select * from (select d.Date Date, Brand, Sum(Sales) Sales from [Sales] S
join [DimDate] d ON S.DateKey = D.Datekey
group by d.Date, Brand
UNION
select distinct dateadd(year,1,d.Date) Date, Brand, -1 Sales from [Sales] S
join [ops].[DimDate] d ON S.DateKey = D.Datekey) t
order by 1
',
@input_data_1_name = N'TrainData',
@parallel = 1
WITH RESULT SETS (
(
[Date] datetime not null
,[Brand] int not null
,[Sales] float not null
)
);
Comments
Post a Comment