Essentials of Machine Learning Algorithms using R


https://www.analyticsvidhya.com/blog/2015/08/common-machine-learning-algorithms/

https://www.datascience.com/blog/introduction-to-forecasting-with-arima-in-r-learn-data-science-tutorials


https://techietweak.wordpress.com/2016/05/18/r-data-frame-the-one-data-type-for-predictive-analytics/


Linear Regression Model:
TestDataset <- read.csv("D:\\Dinesh\\AI\\TestDataset.csv")
TrainingDataset <- read.csv("D:\\Dinesh\\AI\\TrainingDataset.csv")
TrainingDataset
TestDataset
date <- as.Date(TrainingDataset$dteday)
TrainingDataset <- cbind(TrainingDataset,date)
sapply(TrainingDataset, class)
date <- as.Date(TestDataset$dteday)
TestDataset <- cbind(TestDataset,date)
sapply(TestDataset, class)
lmModel <- lm(cnt ~ date + season + yr + mnth + holiday + weekday + workingday + weathersit + temp + atemp + hum + windspeed + casual + registered, data = TrainingDataset)
glmModel <- glm(cnt ~ date + season + yr + mnth + holiday + weekday + workingday + weathersit + temp + atemp + hum + windspeed + casual + registered, data = TrainingDataset)
predict1 <- predict(lmModel,TestDataset)
predict2 <- predict(glmModel,TestDataset)
output1 <- cbind(TestDataset,predict1)
output2 <- cbind(TestDataset,predict2)
output1
output2
write.csv(output1, "D:\\Dinesh\\AI\\output1.csv")
write.csv(output2, "D:\\Dinesh\\AI\\output2.csv")

Linear Regression Model with Loop:


datalist=list()
Summary<-NULL
j=min((Train$Product))

repeat
{
    LR1 <- lm(as.numeric(NetSales)~as.Date(Date)+Date+Product+City
                  ,data = Train[(Train$Product == j),])
    p_value= as.double(pf(summary(LR1)$fstatistic[1],summary(LR1)$fstatistic[2], summary(LR1)$fstatistic[3],lower.tail=F))
 
    a<-(cbind(Product=j, p_value, residuals_mean=mean(resid(LR1)),
              residuals_min=min(resid(LR1)),
              residuals_median=median(resid(LR1)),
              residuals_max=max(resid(LR1)),
              multiple_r_squared=summary(LR1)$r.squared,
              adjusted_r_squared=summary(LR1)$adj.r.squared,
              f_statistic=summary(LR1)$fstatistic[1]))
    c<-data.frame(a)
    Summary<-rbind(Summary,c)

    New_Test<-Test[(Test$Product == j),]
    SalesPredict<- (predict(LR1,newdata=New_Test))
    m <- data.frame(Date=New_Test$Date, Date = as.Date(New_Test$Date),Product=New_Test$Product,
                  City = New_Test$City,IsWeekend=New_Test$IsWeekend,
                  NetSales=Sales_Predict)
    datalist[[j]]<-m

    if(j==max(Train$Product))  {break}
    else   {j=j+1}
}

Predicted = do.call(rbind, datalist)

Final_Result<-merge(x=Predicted, y=Summary, by="Product",all=T)


RandomForecast Model:
install.packages("randomForest")
library(randomForest)
rfModel <- randomForest(cnt ~ date + season + yr + mnth + holiday + weekday + workingday + weathersit + temp + atemp + hum + windspeed + casual + registered, data = TrainingDataset)
predict3 <- predict(rfModel,TestDataset)
output3 <- cbind(TestDataset,predict3)
write.csv(output3, "D:\\Dinesh\\Work\\AI\\output3.csv")


Prediction using SQL Server Procedure

Execute sp_execute_external_script @language =N'R',
@script=N'
sapply(TrainData, class);
TestData <- subset(TrainData, Sales == -1, select = -Sales);
sapply(TestData, class);
lmModel <- rxLinMod(Sales ~ as.Date(Date) + Brand, data = TrainData);
Sales <- rxPredict(lmModel, TestData);
output <- cbind(TestData, Sales);
OutputDataSet <- output;
',
@input_data_1 = N'select * from (select d.Date Date, Brand,  Sum(Sales) Sales from [Sales] S
  join [DimDate] d ON S.DateKey = D.Datekey
  group by d.Date, Brand
  UNION
  select distinct dateadd(year,1,d.Date) Date, Brand, -1 Sales from [Sales] S
  join [ops].[DimDate] d ON S.DateKey = D.Datekey) t
  order by 1
',
@input_data_1_name = N'TrainData',
@parallel = 1
WITH RESULT SETS (
(
[Date] datetime not null
,[Brand] int not null
,[Sales] float not null
)
);



Comments

Popular posts from this blog

SSRS Report Design: Best Practices

SSAS OLAP Design - Best Practices

Enable Usage-Based Optimization in SSAS