# Least Angle Regression

2019/05/23 11:30

[TOC]

Efron B, Hastie T, Johnstone I M, et al. Least angle regression[J]. Annals of Statistics, 2004, 32(2): 407-499.

## LARS算法

### 算法

Input: 标准化后的$X = [x_1, x_2, \ldots, x_m]$和$y=[y_1, \ldots, y_n]^T$, 特征数$r$; 令：$\mu_{\mathcal{A}}=0, \beta=[0, 0, \ldots, 0] \in \mathbb{R}^m$; 计算$c = X^Ty$, 找出其中绝对值最大的元素，令其指标集为$\mathcal{A}$,最大值为$C$，令 $X_{\mathcal{A}}=[\ldots, s_j x_j, \ldots]{j \in \mathcal{A}}$, $\mathcal{G_A}=X{\mathcal{A}}^TX_{\mathcal{A}}, A_{\mathcal{A}}=(1_\mathcal{A}^T\mathcal{G_A}^{-1}1_{\mathcal{A}})^{-1/2}$, $\mathrm{u}{\mathcal{A}}=X{\mathcal{A}}w_{\mathcal{A}},w_{\mathcal{A}}=A_{\mathcal{A}}\mathcal{G_A}^{-1}1_{\mathcal{A}}$ For $k = 1, 2, \ldots, r$: 1. 根据公式(2.13)计算$\widehat{\gamma}$,记录相应的$j$,如果$\widehat{\gamma}=0$，停止迭代。 2. $\mu_\mathcal{A}=\mu_{\mathcal{A}}+\widehat{\gamma}\mathrm{u}{\mathcal{A}}$ 3. $\beta = \beta+\widehat{\gamma}w{\mathcal{A}}\otimes s_{\mathcal{A}}$ 4. 更新$\mathcal{A}=\mathcal{A} \cup {j}$,$C=C-\widehat{\gamma}A_{\mathcal{A}}$,$c=X^T(y-\mu_\mathcal{A})$ 5. 更新$X_{\mathcal{A}},\mathcal{G_A}, A_{\mathcal{A}}, \mathrm{u}{\mathcal{A}}$ 输出：$\beta, \mu{\mathcal{A}}$

## 代码



import numpy as np
import matplotlib.pyplot as plt

class LARS_LASSO:

def __init__(self, data, response):
self.__data = data
self.__response = response
self.n, self.m = self.data.shape
self.mu = np.zeros(self.n, dtype=float)
self.beta = np.zeros(self.m, dtype=float)
self.compute_c()
self.compute_index()
self.compute_basic()
self.progress_beta = []
self.progress_mu = []

@property
def data(self):
return self.__data

@property
def response(self):
return self.__response

def compute_c(self):
"""计算关系度c"""
self.c = self.data.T @ (self.response-self.mu)

def compute_index(self):
"""找出最大值C和指标集A，以及sj"""
self.index = [np.argmax(np.abs(self.c))]
newc = self.c[self.index]
self.maxC = np.abs(newc[0])
sign = lambda x: 1. if x >= 0 else -1.
self.s = np.array(
[sign(item) for item in newc],
dtype=float
)

def compute_basic(self):
"""计算一些基本的东西
index_A: A_A
index_w: w_A
index_u: u_A
"""
index_X = self.data[:, self.index] * self.s
index_G = index_X.T @ index_X
index_G_inv = np.linalg.inv(index_G)
self.index_A = 1 / np.sqrt(np.sum(index_G_inv))
self.index_w = np.sum(index_G_inv, 1) * self.index_A
self.index_u = index_X @ self.index_w

def update_c(self):
"""更新c"""
self.compute_c()

def update_index(self, j):
"""更新指示集合
index:  指示集合A
maxC: 最大的c
s: 符号
"""
if j in self.index:
self.index.remove(j)
else:
self.index.append(j)
self.index.sort()
newc = self.c[self.index]
self.maxC = np.abs(newc[0])
sign = lambda x: 1. if x >= 0 else -1.
self.s = np.array(
[sign(item) for item in newc],
dtype=float
)

def update_basic(self):
"""更新基本的东西"""
self.compute_basic()

def current_gamma(self):
"""找第一次改变符号的位置"""
const = 999999999.
d = self.s * self.index_w
index_beta = self.beta[self.index]
z = []
for i in range(len(d)):
if -index_beta[i] * d[i] <= 0:
z.append(const)
else:
z.append(-index_beta[i] / d[i])
z = np.array(z, dtype=float)
label = np.argmin(z)
themin = z[label]

return themin, self.index[label]

def step(self):
"""操作一步"""
const = 9999999999.
def divide(x, y):
z = []
for i in range(len(x)):
if x[i] * y[i] <= 0:
z.append(const)
else:
z.append(x[i] / y[i])
return z

complement_index = list(set(range(self.m))
- set(self.index))
a = self.data.T @ self.index_u
complement_a = a[complement_index]
complement_c = self.c[complement_index]
index_reduce_a = self.index_A - complement_a
index_plus_a = self.index_A + complement_a
maxC_reduce_c = self.maxC - complement_c
maxc_plus_c = self.maxC + complement_c
min1 = divide(maxC_reduce_c, index_reduce_a)
min2 = divide(maxc_plus_c, index_plus_a)
totalmin = np.array(
[min1, min2]
)
allmin = np.min(totalmin, 0)
min_beta, label2 = self.current_gamma()
self.progress_beta.append(np.array(self.beta))
self.progress_mu.append(np.array(self.mu))
try:
label = np.argmin(allmin)
except ValueError:
index_X = self.data[:, self.index] * self.s
index_G = index_X.T @ index_X
index_G_inv = np.linalg.inv(index_G)
deltau = index_G_inv @ index_X.T @ (self.response - self.mu)
self.mu = self.mu + index_X @ deltau
self.beta = self.beta + deltau * self.s
return 0
print(min_beta, allmin[label])
if min_beta < allmin[label]:
gamma = min_beta
j = label2
else:
gamma = 0. if allmin[label] == const else allmin[label]
j = complement_index[label]
self.mu = self.mu + gamma * self.index_u
self.beta[self.index] = self.beta[self.index] + (self.s * self.index_w) * gamma
if self.life == 0:
return 1
self.update_c()
self.update_index(j)
self.update_basic()
return 1

def process(self, r=1):
self.life = r
for i in range(r):
self.life -= 1
print("step:", i)
self.step()
self.progress_beta.append(np.array(self.beta))
self.progress_mu.append(np.array(self.mu))
index_X = self.data[:, self.index] * self.s
index_G = index_X.T @ index_X
index_G_inv = np.linalg.inv(index_G)
deltau = index_G_inv @ index_X.T @ (self.response - self.mu)
self.mu = self.mu + index_X @ deltau
self.beta[self.index] = self.beta[self.index] + deltau * self.s
self.progress_beta.append(np.array(self.beta))
self.progress_mu.append(np.array(self.mu))

def plot(self):
"""plot beta, error"""
fig, ax = plt.subplots(nrows=1, ncols=2,
figsize=(10, 5), constrained_layout=True)
beta = np.array(self.progress_beta)
mu = np.array(self.progress_mu)
r, m = beta.shape
error = np.sum((mu - self.response) ** 2, 1)
x = np.arange(1, r+1)
for i in range(m):
y =  beta[:, i]
ax[0].plot(x, y, label="feature{0}".format(i))
ax[0].text(x[-1]+0.05, y[-1], str(i))
ax[0].set_title(r"$\beta$ with iterations")
ax[0].set_xlabel(r"iterations")
ax[0].set_ylabel(r"$\beta$")
ax[0].legend(loc="best", ncol=2)
ax[1].plot(x, error)
ax[1].set_title("square error with iterations")
ax[1].set_xlabel("iterations")
ax[1].set_ylabel("square error")
plt.show()

mu = np.mean(data1, 0)
std = np.std(data1, 0)
data1 = (data1 - mu) / std
data = data1[:, :10]
response = data1[:, 10]
test = LARS_LASSO(data, response)
test.process(r=7)
test.plot()
print(test.progress_beta)



"""

"""
import numpy as np
import matplotlib.pyplot as plt

class LARS_LASSO:

def __init__(self, data, response):
self.__data = data
self.__response = response
self.n, self.m = self.data.shape
self.mu = np.zeros(self.n, dtype=float)
self.beta = np.zeros(self.m, dtype=float)
self.compute_c()
self.compute_index()
self.compute_basic()
self.progress_beta = []
self.progress_mu = []

@property
def data(self):
return self.__data

@property
def response(self):
return self.__response

def compute_c(self):
"""计算关系度c"""
self.c = self.data.T @ (self.response-self.mu)

def compute_index(self):
"""找出最大值C和指标集A，以及sj"""
self.index = [np.argmax(np.abs(self.c))]
newc = self.c[self.index]
self.maxC = np.abs(newc[0])
sign = lambda x: 1. if x >= 0 else -1.
self.s = np.array(
[sign(item) for item in newc],
dtype=float
)

def compute_basic(self):
"""计算一些基本的东西
index_A: A_A
index_w: w_A
index_u: u_A
"""
index_X = self.data[:, self.index] * self.s
index_G = index_X.T @ index_X
index_G_inv = np.linalg.inv(index_G)
self.index_A = 1 / np.sqrt(np.sum(index_G_inv))
self.index_w = np.sum(index_G_inv, 1) * self.index_A
self.index_u = index_X @ self.index_w

def update_c(self):
"""更新c"""
self.compute_c()

def update_index(self, j):
"""更新指示集合
index:  指示集合A
maxC: 最大的c
s: 符号
"""
if j in self.index:
self.index.remove(j)
else:
self.index.append(j)
self.index.sort()
newc = self.c[self.index]
self.maxC = np.abs(newc[0])
sign = lambda x: 1. if x >= 0 else -1.
self.s = np.array(
[sign(item) for item in newc],
dtype=float
)

def update_basic(self):
"""更新基本的东西"""
self.compute_basic()

def current_gamma(self):
"""找第一次改变符号的位置"""
const = 999999999.
d = self.s * self.index_w
index_beta = self.beta[self.index]
z = []
for i in range(len(d)):
if -index_beta[i] * d[i] <= 0:
z.append(const)
else:
z.append(-index_beta[i] / d[i])
z = np.array(z, dtype=float)
label = np.argmin(z)
themin = z[label]

return themin, self.index[label]

def step(self):
"""操作一步"""
const = 9999999999.
def divide(x, y):
z = []
for i in range(len(x)):
if x[i] * y[i] <= 0:
z.append(const)
else:
z.append(x[i] / y[i])
return z

complement_index = list(set(range(self.m))
- set(self.index))
a = self.data.T @ self.index_u
complement_a = a[complement_index]
complement_c = self.c[complement_index]
index_reduce_a = self.index_A - complement_a
index_plus_a = self.index_A + complement_a
maxC_reduce_c = self.maxC - complement_c
maxc_plus_c = self.maxC + complement_c
min1 = divide(maxC_reduce_c, index_reduce_a)
min2 = divide(maxc_plus_c, index_plus_a)
totalmin = np.array(
[min1, min2]
)
allmin = np.min(totalmin, 0)
min_beta, label2 = self.current_gamma()
print(len(self.progress_beta))
self.progress_beta.append(np.array(self.beta))
self.progress_mu.append(np.array(self.mu))
try:
label = np.argmin(allmin)
except ValueError:
index_X = self.data[:, self.index] * self.s
index_G = index_X.T @ index_X
index_G_inv = np.linalg.inv(index_G)
deltau = index_G_inv @ index_X.T @ (self.response - self.mu)
self.mu = self.mu + index_X @ deltau
self.beta = self.beta + deltau * self.s
return 0
if min_beta < allmin[label]:
gamma = min_beta
label = label2
else:
gamma = 0. if allmin[label] == const else allmin[label]
self.mu = self.mu + gamma * self.index_u
self.beta[self.index] = self.beta[self.index] + (self.s * self.index_w) * gamma
if self.life == 0:
return 1
j = complement_index[label]
self.update_c()
self.update_index(j)
self.update_basic()
return 1

def process(self, r=1):
self.life = r
for i in range(r):
self.life -= 1
print("step:", i)
self.step()
self.progress_beta.append(np.array(self.beta))
self.progress_mu.append(np.array(self.mu))
index_X = self.data[:, self.index] * self.s
index_G = index_X.T @ index_X
index_G_inv = np.linalg.inv(index_G)
deltau = index_G_inv @ index_X.T @ (self.response - self.mu)
self.mu = self.mu + index_X @ deltau
self.beta[self.index] = self.beta[self.index] + deltau * self.s
self.progress_beta.append(np.array(self.beta))
self.progress_mu.append(np.array(self.mu))

def plot(self):
"""plot beta, error"""
fig, ax = plt.subplots(nrows=1, ncols=2,
figsize=(10, 5), constrained_layout=True)
beta = np.array(self.progress_beta)
mu = np.array(self.progress_mu)
r, m = beta.shape
error = np.sum((mu - self.response) ** 2, 1)
x = np.arange(1, r+1)
for i in range(m):
y =  beta[:, i]
ax[0].plot(x, y, label="feature{0}".format(i))
ax[0].text(x[-1]+0.05, y[-1], str(i))
ax[0].set_title(r"$\beta$ with iterations")
ax[0].set_xlabel(r"iterations")
ax[0].set_ylabel(r"$\beta$")
ax[0].legend(loc="best", ncol=2)
ax[1].plot(x, error)
ax[1].set_title("square error with iterations")
ax[1].set_xlabel("iterations")
ax[1].set_ylabel("square error")
plt.show()



0
0 收藏

0 评论
0 收藏
0