ARPACK-Armadillo
DoubleShiftQR.h
1 // Copyright (C) 2015 Yixuan Qiu
2 //
3 // This Source Code Form is subject to the terms of the Mozilla Public
4 // License, v. 2.0. If a copy of the MPL was not distributed with this
5 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 
7 #ifndef DOUBLE_SHIFT_QR_H
8 #define DOUBLE_SHIFT_QR_H
9 
10 #include <armadillo>
11 #include <vector> // std::vector
12 #include <algorithm> // std::min, std::fill
13 #include <cmath> // std::abs, std::sqrt, std::pow
14 #include <limits> // std::numeric_limits
15 #include <stdexcept> // std::invalid_argument, std::logic_error
16 
17 template <typename Scalar = double>
19 {
20 private:
21  typedef arma::Mat<Scalar> Matrix;
22  typedef arma::Col<Scalar> Vector;
23 
24  int n; // Dimension of the matrix
25  Matrix mat_H; // A copy of the matrix to be factorized
26  Scalar shift_s; // Shift constant
27  Scalar shift_t; // Shift constant
28  Matrix ref_u; // Householder reflectors
29  const Scalar prec; // Approximately zero
30  const Scalar prec2;
31  bool computed; // Whether matrix has been factorized
32 
33  void compute_reflector(const Scalar &x1, const Scalar &x2, const Scalar &x3, int ind)
34  {
35  Scalar *u = ref_u.memptr() + 3 * ind;
36 
37  if(std::abs(x1) + std::abs(x2) + std::abs(x3) <= 3 * prec)
38  {
39  u[0] = u[1] = u[2] = 0;
40  return;
41  }
42  // x1' = x1 - rho * ||x||
43  // rho = -sign(x1)
44  Scalar tmp = x2 * x2 + x3 * x3;
45  Scalar x1_new = x1 - ((x1 < 0) - (x1 > 0)) * std::sqrt(x1 * x1 + tmp);
46  Scalar x_norm = std::sqrt(x1_new * x1_new + tmp);
47  u[0] = x1_new / x_norm;
48  u[1] = x2 / x_norm;
49  u[2] = x3 / x_norm;
50  }
51 
52  void compute_reflector(const Scalar *x, int ind)
53  {
54  compute_reflector(x[0], x[1], x[2], ind);
55  }
56 
57  void compute_reflectors_from_block(Matrix &X, int oi, int block_size, int start_ind)
58  {
59  // For the sub-block of X, we can assume all sub-diagonal elements are non-zero
60  const int mat_size = X.n_rows;
61  // For block size == 1, there is no need to apply reflectors
62  if(block_size == 1)
63  {
64  compute_reflector(0, 0, 0, start_ind);
65  return;
66  }
67 
68  // For block size == 2, do a Givens rotation on M = X * X - s * X + t * I
69  // x00 => X(oi, oi), x01 => X(oi, oi + 1)
70  Scalar *x00 = X.colptr(oi) + oi, *x01 = x00 + mat_size;
71  if(block_size == 2)
72  {
73  Scalar x = x00[0] * (x00[0] - shift_s) + x01[0] * x00[1] + shift_t;
74  Scalar y = x00[1] * (x00[0] + x01[1] - shift_s);
75  compute_reflector(x, y, 0, start_ind);
76  apply_PX(X, oi, oi, 2, mat_size - oi, start_ind);
77  apply_XP(X, 0, oi, oi + 2, 2, start_ind);
78  compute_reflector(0, 0, 0, start_ind + 1);
79  return;
80  }
81 
82  // For block size >= 3, use the regular strategy
83  // Scalar x = X(0, 0) * (X(0, 0) - shift_s) + X(0, 1) * X(1, 0) + shift_t;
84  // Scalar y = X(1, 0) * (X(0, 0) + X(1, 1) - shift_s);
85  // Scalar z = X(2, 1) * X(1, 0);
86  Scalar x = x00[0] * (x00[0] - shift_s) + x01[0] * x00[1] + shift_t;
87  Scalar y = x00[1] * (x00[0] + x01[1] - shift_s);
88  Scalar z = x01[2] * x00[1];
89  compute_reflector(x, y, z, start_ind);
90  // Apply the first reflector
91  apply_PX(X, oi, oi, 3, mat_size - oi, start_ind);
92  apply_XP(X, 0, oi, oi + std::min(block_size, 4), 3, start_ind);
93 
94  // Calculate the remaining reflectors
95  // If entering this loop, nrow is at least 4.
96  for(int i = 1; i < block_size - 2; i++)
97  {
98  compute_reflector(&X(oi + i, oi + i - 1), start_ind + i);
99  // Apply the reflector to X
100  apply_PX(X, oi + i, oi + i - 1, 3, mat_size - oi - i + 1, start_ind + i);
101  apply_XP(X, 0, oi + i, oi + std::min(block_size, i + 4), 3, start_ind + i);
102  }
103 
104  // The last reflector
105  compute_reflector(X(oi + block_size - 2, oi + block_size - 3), X(oi + block_size - 1, oi + block_size - 3), 0, start_ind + block_size - 2);
106  // Apply the reflector to X
107  apply_PX(X, oi + block_size - 2, oi + block_size - 3, 2, mat_size - oi - block_size + 3, start_ind + block_size - 2);
108  apply_XP(X, 0, oi + block_size - 2, oi + block_size, 2, start_ind + block_size - 2);
109 
110  compute_reflector(0, 0, 0, start_ind + block_size - 1);
111  }
112 
113  // P = I - 2 * u * u' = P'
114  // PX = X - 2 * u * (u'X)
115  void apply_PX(Matrix &X, int oi, int oj, int nrow, int ncol, int u_ind)
116  {
117  Scalar *u = ref_u.memptr() + 3 * u_ind;
118 
119  if(std::abs(u[0]) + std::abs(u[1]) + std::abs(u[2]) <= 3 * prec)
120  return;
121 
122  const int stride = X.n_rows;
123  const Scalar u0_2 = 2 * u[0];
124  const Scalar u1_2 = 2 * u[1];
125 
126  Scalar *xptr = X.colptr(oj) + oi;
127  if(nrow == 2)
128  {
129  for(int i = 0; i < ncol; i++, xptr += stride)
130  {
131  Scalar tmp = u0_2 * xptr[0] + u1_2 * xptr[1];
132  xptr[0] -= tmp * u[0];
133  xptr[1] -= tmp * u[1];
134  }
135  } else {
136  const Scalar u2_2 = 2 * u[2];
137  for(int i = 0; i < ncol; i++, xptr += stride)
138  {
139  Scalar tmp = u0_2 * xptr[0] + u1_2 * xptr[1] + u2_2 * xptr[2];
140  xptr[0] -= tmp * u[0];
141  xptr[1] -= tmp * u[1];
142  xptr[2] -= tmp * u[2];
143  }
144  }
145  }
146 
147  // x is a pointer to a vector
148  // Px = x - 2 * dot(x, u) * u
149  void apply_PX(Scalar *x, int u_ind)
150  {
151  Scalar u0 = ref_u(0, u_ind),
152  u1 = ref_u(1, u_ind),
153  u2 = ref_u(2, u_ind);
154 
155  if(std::abs(u0) + std::abs(u1) + std::abs(u2) <= 3 * prec)
156  return;
157 
158  // When the reflector only contains two elements, u2 has been set to zero
159  bool u2_is_zero = (std::abs(u2) <= prec);
160  Scalar dot2 = x[0] * u0 + x[1] * u1 + (u2_is_zero ? 0 : (x[2] * u2));
161  dot2 *= 2;
162  x[0] -= dot2 * u0;
163  x[1] -= dot2 * u1;
164  if(!u2_is_zero)
165  x[2] -= dot2 * u2;
166  }
167 
168  // XP = X - 2 * (X * u) * u'
169  void apply_XP(Matrix &X, int oi, int oj, int nrow, int ncol, int u_ind)
170  {
171  Scalar *u = ref_u.memptr() + 3 * u_ind;
172 
173  if(std::abs(u[0]) + std::abs(u[1]) + std::abs(u[2]) <= 3 * prec)
174  return;
175 
176  int stride = X.n_rows;
177  const Scalar u0_2 = 2 * u[0];
178  const Scalar u1_2 = 2 * u[1];
179  Scalar *X0 = X.colptr(oj) + oi, *X1 = X0 + stride; // X0 => X(oi, oj), X1 => X(oi, oj + 1)
180 
181  if(ncol == 2)
182  {
183  // tmp = 2 * u0 * X0 + 2 * u1 * X1
184  // X0 => X0 - u0 * tmp
185  // X1 => X1 - u1 * tmp
186  for(int i = 0; i < nrow; i++)
187  {
188  Scalar tmp = u0_2 * X0[i] + u1_2 * X1[i];
189  X0[i] -= tmp * u[0];
190  X1[i] -= tmp * u[1];
191  }
192  } else {
193  Scalar *X2 = X1 + stride; // X2 => X(oi, oj + 2)
194  const Scalar u2_2 = 2 * u[2];
195  for(int i = 0; i < nrow; i++)
196  {
197  Scalar tmp = u0_2 * X0[i] + u1_2 * X1[i] + u2_2 * X2[i];
198  X0[i] -= tmp * u[0];
199  X1[i] -= tmp * u[1];
200  X2[i] -= tmp * u[2];
201  }
202  }
203  }
204 
205 public:
206  DoubleShiftQR(int size) :
207  n(size),
208  prec(std::numeric_limits<Scalar>::epsilon()),
209  prec2(std::min(std::pow(prec, Scalar(2.0) / 3), n * prec)),
210  computed(false)
211  {}
212 
213  DoubleShiftQR(const Matrix &mat, Scalar s, Scalar t) :
214  n(mat.n_rows),
215  mat_H(n, n),
216  shift_s(s),
217  shift_t(t),
218  ref_u(3, n),
219  prec(std::numeric_limits<Scalar>::epsilon()),
220  prec2(std::min(std::pow(prec, Scalar(2.0) / 3), n * prec)),
221  computed(false)
222  {
223  compute(mat, s, t);
224  }
225 
226  void compute(const Matrix &mat, Scalar s, Scalar t)
227  {
228  if(!mat.is_square())
229  throw std::invalid_argument("DoubleShiftQR: matrix must be square");
230 
231  n = mat.n_rows;
232  mat_H.set_size(n, n);
233  shift_s = s;
234  shift_t = t;
235  ref_u.set_size(3, n);
236 
237  // Make a copy of mat
238  mat_H = mat;
239 
240  // Obtain the indices of zero elements in the subdiagonal,
241  // so that H can be divided into several blocks
242  std::vector<int> zero_ind;
243  zero_ind.reserve(n / 2);
244  zero_ind.push_back(0);
245  Scalar *Hii = mat_H.memptr();
246  for(int i = 0; i < n - 2; i++, Hii += (n + 1))
247  {
248  // Hii[1] => mat_H(i + 1, i)
249  if(std::abs(Hii[1]) <= prec2)
250  {
251  Hii[1] = 0;
252  zero_ind.push_back(i + 1);
253  }
254  // Make sure mat_H is upper Hessenberg
255  // Zero the elements below mat_H(i + 1, i)
256  std::fill(Hii + 2, Hii + n - i, Scalar(0));
257  }
258  zero_ind.push_back(n);
259 
260  for(std::vector<int>::size_type i = 0; i < zero_ind.size() - 1; i++)
261  {
262  int start = zero_ind[i];
263  int end = zero_ind[i + 1] - 1;
264  int block_size = end - start + 1;
265  // Compute refelctors from each block X
266  compute_reflectors_from_block(mat_H, start, block_size, start);
267  }
268 
269  computed = true;
270  }
271 
272  Matrix matrix_QtHQ()
273  {
274  if(!computed)
275  throw std::logic_error("DoubleShiftQR: need to call compute() first");
276 
277  return mat_H;
278  }
279 
280  // Q = P0 * P1 * ...
281  // Q'y = P_{n-2} * ... * P1 * P0 * y
282  void apply_QtY(Vector &y)
283  {
284  if(!computed)
285  throw std::logic_error("DoubleShiftQR: need to call compute() first");
286 
287  Scalar *y_ptr = y.memptr();
288  for(int i = 0; i < n - 1; i++, y_ptr++)
289  {
290  apply_PX(y_ptr, i);
291  }
292  }
293 
294  // Q = P0 * P1 * ...
295  // YQ = Y * P0 * P1 * ...
296  void apply_YQ(Matrix &Y)
297  {
298  if(!computed)
299  throw std::logic_error("DoubleShiftQR: need to call compute() first");
300 
301  int nrow = Y.n_rows;
302  for(int i = 0; i < n - 2; i++)
303  {
304  apply_XP(Y, 0, i, nrow, 3, i);
305  }
306  apply_XP(Y, 0, n - 2, nrow, 2, n - 2);
307  }
308 };
309 
310 
311 #endif // DOUBLE_SHIFT_QR_H