Logistic Regression with Regularization without using R library
costFunctionReg.R
costFunctionReg <- function(theta,X,y,lambda){
m <- length(y);
trunctheta = theta[2:length(theta)]
J = -1/m *( sum( y * log(sigmoid(X%*%theta))) + sum((1 - y )*log(1 - sigmoid(X %*% theta))))+(lambda/(2 * m))*sum(trunctheta ^ 2);
}
}
Feature Mapping-
mapFeature <- function(x1,x2,degree){
out <- matrix(1,length(x1));
for (i in 1:degree){
for (j in 0:i){
out <- cbind(out, (x1^(i-j))*(x2^j));
}
}
return(out)
}
sigmoid function
sigmoid <- function(z) {
1/(1 + exp(-z))
}
Predict Function
# Predictor function
predict <- function(theta, X){
m <- nrow(X)
p <- sigmoid(X%*%theta) > .5
}
entire-
require(Hmisc);
source('sigmoid.R')
source('mapFeature.R')
source('costFunctionReg.R')
source('predict.R')
setwd("D:/tmp/mlclass-ex1-005/mlclass-ex2-005/R-Studio/matlab");
mydata <- read.table("D:/tmp/mlclass-ex1-005/mlclass-ex2-005/R-Studio/ex2data2.txt",header=FALSE,sep=",")
names(mydata) <- c("Microchip Test 1","Microchip Test 2","Accepted");
x <- mydata[,1:2];
y <- mydata[,3];
# Plot data to understand the classification problem
plot(x,pch=c(24,3)[y+1],bg='yellow')
legend(.8,1,c('y = 1','y = 0'),pch=c(3,23),pt.bg='yellow')
x <- mapFeature(x[,1],x[,2],6)
#Following as well can be used
#x <- mapFeature(mydata$"Microchip Test 1",mydata$"Microchip Test 2",6)
initial_theta <- matrix(0,ncol(x));
# Contour plot of decision boundary
Xbase <- mydata[,1:2]
y <- mydata[,3]
plot(Xbase,pch=c(24,3)[y+1],bg='yellow')
legend(.8,1,c('y = 1','y = 0'),pch=c(3,23),pt.bg='yellow')
legend(.85,-0.20,c('BL = 0','B = 1','Y = 20','R = 50','G = 100','GR = 400'),pch=c(3,23),pt.bg='yellow')
lambda.array <- c(0,1,20,50,100,150);
color.array <- c('black','darkblue','yellow','red','green','gainsboro');
# Set regularization parameter lambda to 1
lambda = 1;
d <- 0;
VAT <- numeric(0);
for( templambda in lambda.array){
d <- d + 1;
result <- optim(initial_theta,costFunctionReg,gradFunction,method="BFGS",hessian = FALSE,X=x,y=y,lambda = templambda,control=list(maxit=400));
theta <- result$par;
p <- predict(theta,x);
message('Train Accuracy: ', mean((p == y)) * 100)
u <- seq(from=-1, to=1.5, by=.05)
v <- u
z <- matrix(0,length(u),length(v))
for (i in 1:length(u)){
for (j in 1:length(v)){
z[i,j] = mapFeature(u[i],v[j],6)%*%theta
}
}
contour(u,v,z,nlevels=1,add=T,lwd=2,col=color.array[d],drawlabels=F)
}
The output with various lambda value-
Logistic Regression with Regularization WITH R glm library
data <- read.table("D:/tmp/mlclass-ex1-005/mlclass-ex2-005/R-Studio/ex2data2.txt",header=TRUE,sep=",")
names(data) <- c("test1","test2","accept");
head(data)
model <- glm(accept ~ test1 + test2 , family = binomial("logit"),data=data )
summary(model)
plot(model,which=1)
plot(predict.glm(model),residuals(model))
abline(h=0,lty=2,col="green")
in_frame<-data.frame(test1=1.0709,test2=0.10015)
#?predict.glm
output <- predict.glm(model,newdata=in_frame,type = "response")
#Any probability or output value if greater than 0.5 means , its 1 else 0
if(output >= 0.5){
message(" Success ",output)
}else{
message(" FAILURE ",output)
}
No comments:
Post a Comment