比较两个迭代法

模拟数据

rm(list = ls())
set.seed(123)  # 保证结果可复现
n <- 100  # 样本数量
mu_true <- 5  # 真实均值
sigma_true <- 2  # 真实标准差

# 生成正态分布样本数据
data <- rnorm(n, mean = mu_true, sd = sigma_true)

精确解

mean(data)
## [1] 5.180812
var(data) * (n - 1) / n
## [1] 3.299602
# Newton
# 定义一阶、二阶导数的函数 gradient() 和 hessian()
# 输入的theta是一个二维向量

l <- function(theta, x) {
  return(
    -1/(2*theta[2]) * sum((x-theta[1])^2) - length(x) / 2 * log(theta[2]) - length(x)/2*log(2*pi)
  )
}

gradient <- function(theta, x) {
  return(c(1/theta[2] *sum(x - theta[1]),
        1/theta[2]^2*sum((x-theta[1])^2) -length(x)/theta[2]
    ))
}
hessian <- function(theta, x) {
  return(matrix(
    -c(
      length(x)/theta[2],
      1/theta[2]^2 * sum(x-theta[1]),
      1/theta[2]^2 * sum(x-theta[1]),
      1/theta[2]^3 * sum((x-theta[1])^2) - length(x)/(2*theta[2]^2)
    ),
    nrow = 2,
    byrow = TRUE
  ))
}
information <- function(theta, x) {
  return(matrix(
    -c(
      length(x)/theta[2],
      0,
      0,
      length(x)/(2*theta[2]^2)
    ),
    nrow = 2,
    byrow = TRUE
  ))
}

根据题目中的数据, 用Newton迭代法:

# 定义牛顿迭代法所需的初始值 theta0,tolerance 和最大迭代次数 max_iter
theta0 <- c(4, 2)
# theta <- c()
tolerance <- 1e-5
max_iter <- 1000
# 试验结果
# 迭代求解
iter <- 0
seq_newton <- c()
learning_rate <- 0.5
while (iter < max_iter) {
  # 计算当前迭代点的梯度和Hessian矩阵
  g <- gradient(theta = theta0, x = data)
  H <- hessian(theta = theta0, x = data)
  # 更新迭代点 
  theta <- theta0 - solve(H, g)*learning_rate
  iter <- iter + 1
  seq_newton <- c(seq_newton, abs(l(theta0, data) - l(theta, data)))
  # 判断是否满足停止准则
  if (abs(l(theta0, data) - l(theta, data)) < tolerance) {
    break
  }
  # if (norm(as.matrix(gradient(theta0, n_trial) - gradient(theta, n_trial))) < tolerance) {
  #   break
  # }
  # 更新迭代点 
  theta0 <- theta
}
# 输出结果
if (iter == max_iter) {
  paste0("Maximum iterations reached. Solution may not be accurate.")
} else {
  paste0("Solution found: theta_1 = ", theta[1], "; theta_2 = ", theta[2], ". With ", iter," iterations.")
}
## [1] "Solution found: theta_1 = 5.18044920799126; theta_2 = 3.29960200758313. With 13 iterations."

得分法

theta0 <- c(4, 2)
# theta <- c()
tolerance <- 1e-5
max_iter <- 1000
# 试验结果
# 迭代求解
iter <- 0
seq_score <- c()

learning_rate <- 0.5
while (iter < max_iter) {
  # 计算当前迭代点的梯度和Hessian矩阵
  g <- gradient(theta = theta0, x = data)
  I <- information(theta = theta0, x = data)
  # 更新迭代点 
  theta <- theta0 - solve(I, g) * learning_rate
  iter <- iter + 1
  seq_score <- c(seq_score, abs(l(theta0, data) - l(theta, data)))
  # 判断是否满足停止准则
  if (abs(l(theta0, data) - l(theta, data)) < tolerance) {
    break
  }
  # if (norm(as.matrix(gradient(theta0, n_trial) - gradient(theta, n_trial))) < tolerance) {
  #   break
  # }
  # 更新迭代点 
  theta0 <- theta
}
# 输出结果
if (iter == max_iter) {
  paste0("Maximum iterations reached. Solution may not be accurate.")
} else {
  paste0("Solution found: theta_1 = ", theta[1], "; theta_2 = ", theta[2], ". With ", iter," iterations.")
}
## [1] "Solution found: theta_1 = 5.18052353313734; theta_2 = 3.2996023400141. With 12 iterations."

比较

# 比较收敛速度
plot(seq_newton, type = "l", lwd = 2, col = "red", xlab = "Iteration", ylab = "|l(new) - l(old)|")
lines(seq_score, type = "l", lwd = 2, col = "blue")
abline(h = tolerance, lty = "dashed")
legend("topright", legend = c("Newton", "Score"), lwd = 2, col = c("red", "blue"))