Logistic Regression assumes that the probability of the dependent varaiable y equaling a positive case can be interpreted as logistic function

We can now defind the cost function as

To minimize cost function, gradient descent method can be used
The process begins with computing the derivative of cost function

Then improved by repeating

``````import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from pylab import scatter, show, legend, xlabel, ylabel

y = df["label"].as_matrix()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

# train scikit learn model
clf = LogisticRegression()
clf.fit(X_train,y_train)
print 'scikit score:', clf.score(X_test, y_test)
#scikit score: 0.9

# visualize data
def visualize(X, y):
pos = np.where(y == 1)
neg = np.where(y == 0)
scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
xlabel('Exam 1 score')
ylabel('Exam 2 score')
show()

visualize(X, y)``````

``````class myLogisticRegression:

def sigmoid(self, x):
return 1.0/(1.0+np.exp(-x))

def logistic(self, X, theta):
return self.sigmoid(np.dot(X, theta))

def cost(self, X, y, theta):
m = X.shape[0]
return -1.0 / m * (np.dot(y, np.log(self.logistic(X, theta))) + np.dot((1-y), (1-self.logistic(X, theta))))

def derivative_of_cost(self, X, y, theta):
m = X.shape[0]
return 1.0 / m * np.dot(self.logistic(X, theta)-y, X)

def gradient_descent(self, X, y, theta, alpha):
return theta - alpha * self.derivative_of_cost(X, y, theta)

def fit(self, X, y, alpha, num_iter):
self.theta = np.zeros(X.shape[1])
for i in range(num_iter):
self.theta = self.gradient_descent(X, y, self.theta, alpha)

def predict(self, X):
return self.logistic(X, self.theta)

def score(self, X, y):
return 1.0 - 1.0 * np.count_nonzero(np.round(self.predict(X)) - y) / len(y)

lr = myLogisticRegression()
lr.fit(X_train, y_train, .1, 1000)
print 'score:', lr.score(X_test, y_test)
#score: 0.86``````

