sym-ildl  1.2
Incomplete LDL' factorizations of indefinite symmetric and skew-symmetric matrices.
lilc_matrix_ildl_inplace.h
1 #ifndef _LILC_MATRIX_ILDL_INPLACE_H_
2 #define _LILC_MATRIX_ILDL_INPLACE_H_
3 
4 
5 using std::endl;
6 using std::cout;
7 using std::abs;
8 
9 template <class el_type>
10 void lilc_matrix<el_type> :: ildl_inplace(block_diag_matrix<el_type>& D, idx_vector_type& perm, const double& fill_factor, const double& tol, const double& pp_tol, int piv_type)
11 {
12 
13  //----------------- initialize temporary variables --------------------//
14  const int ncols = n_cols(); //number of cols in A.
15 
16  int lfil;
17  if (fill_factor > 1e4) lfil = ncols; //just incase users decide to enter a giant fill factor for fun...
18  else lfil = 2*fill_factor*nnz()/ncols; //roughly a factor of 2 since only lower tri. of A is stored
19 
20  const el_type alpha = (1.0+sqrt(17.0))/8.0; //for use in pivoting.
21  el_type w1(-1), wr(-1), d1(-1), dr(-1); //for use in bk-pivoting
22  el_type det_D, D_inv11, D_inv22, D_inv12; //for use in 2x2 pivots
23  el_type l_11, l_12; //for use in 2x2 pivots
24 
25  vector<bool> in_set(ncols, false); //bitset used for unsorted merges
26  swap_struct<el_type> s; //struct containing temp vars used in pivoting.
27 
28  elt_vector_type work(ncols, 0), temp(ncols, 0);
29  idx_vector_type curr_nnzs, temp_nnzs; //non-zeros on current col.
30  curr_nnzs.reserve(ncols); //reserves space for worse case (entire col is non-zero)
31 
32  int count = 0; //the total number of nonzeros stored in L.
33  int i, j, k, r, offset, col_size, col_size2(-1);
34  bool size_two_piv = false; //boolean indicating if the pivot is 2x2 or 1x1
35 
36  //--------------- allocate memory for L and D ------------------//
37  D.resize(ncols);
38 
39  //------------------- main loop: factoring begins -------------------------//
40  for (k = 0; k < ncols; k++) {
41  //curr nnz vector starts out empty and is cleared at the end of each loop iteration.
42  //assign nonzeros indices of A(k:n, k) to curr_nnzs
43  curr_nnzs.assign (m_idx[k].begin(), m_idx[k].end());
44 
45  //assign nonzero values of A(k:n, k) to work
46  for (j = 0; j < (int) curr_nnzs.size(); j++) {
47  work[curr_nnzs[j]] = m_x[k][j];
48  }
49  sort(curr_nnzs.begin(), curr_nnzs.end());
50 
51  //--------------begin pivoting--------------//
52 
53  //do delayed updates on current column. work = Sum_{i=0}^{k-1} L(k,i) * D(i,i) * L(k:n, i)
54  //(the formula above generalizes to block matrix form in the case of 2x2 pivots).
55  update(k, work, curr_nnzs, *this, D, in_set);
56 
57  //store diagonal element in d1. set diagonal element in work vector to 0
58  //since we want to find the maximum off-diagonal element.
59  d1 = work[k];
60  work[k] = 0;
61 
62  //find maximum element in work and store its index in r.
63  w1 = max(work, curr_nnzs, r);
64 
65  if (piv_type == pivot_type::BKP) {
66  //we do partial pivoting here, where we take the first element u in the column that satisfies
67  //|u| > pp_tol*|wi|. for more information, consult "A Partial Pivoting Strategy for Sparse
68  //Symmetric Matrix Decomposition" by J.H. Liu (1987).
69  int t = r; //stores location of u
70  el_type u = w1; //stores value of u
71  for (i = 0; i < (int) curr_nnzs.size(); i++) {
72  if (abs(work[curr_nnzs[i]])-pp_tol*w1 > eps ) {
73  t = curr_nnzs[i];
74  u = work[t];
75  break;
76  }
77  }
78 
79  //bunch-kaufman partial pivoting is used below. for a more detailed reference,
80  //refer to "Accuracy and Stability of Numerical Algorithms." by Higham (2002).
81  //------------------- begin bunch-kaufman pivoting ------------------//
82  if (w1 < eps) {
83  //case 0: do nothing. pivot is k.
84  } else if ( (alpha * w1 - abs(d1)) < eps ) {
85  //case 1: do nothing. pivot is k.
86  } else {
87  //since we are doing partial pivoting, we should treat u and t like wi and r, so
88  //we'll just reassign wi and r. note: this has to go in the else clause since
89  //we still use the old wi for case 0 and case 1.
90  w1 = u;
91  r = t;
92 
93  offset = row_first[r];
94  //assign all nonzero indices and values in A(r, k:r)
95  //( not including A(r,r) ) to temp and temp_nnzs
96  for (j = offset; j < (int) list[r].size(); j++) {
97  temp_nnzs.push_back(list[r][j]);
98  temp[list[r][j]] = coeff(r, list[r][j]);
99  }
100 
101  //assign nonzero indices of A(r:n, r) to temp_nnzs
102  temp_nnzs.insert(temp_nnzs.end(), m_idx[r].begin(), m_idx[r].end());
103 
104  //assign nonzero values of to temp
105  for (j = 0; j < (int) m_idx[r].size(); j++) {
106  temp[m_idx[r][j]] = m_x[r][j];
107  }
108 
109  //perform delayed updates on temp. temp = Sum_{i=0}^{k-1} L(r,i) * D(i,i) * L(k:n, i).
110  //(the formula above generalizes to block matrix form in the case of 2x2 pivots).
111  update(r, temp, temp_nnzs, *this, D, in_set);
112 
113  dr = temp[r];
114  temp[r] = 0;
115 
116  //find maximum element in temp.
117  wr = max(temp, temp_nnzs, j);
118 
119  if ((alpha*w1*w1 - abs(d1)*wr) < eps) {
120  //case 2: do nothing. pivot is k.
121 
122  } else if ( (alpha * wr - abs(dr)) < eps) {
123 
124  //case 3: pivot is k with r: 1x1 pivot case.
125  temp[r] = dr;
126  work[k] = d1;
127 
128  //--------pivot A ---------//
129  pivotA(s, in_set, k, r);
130 
131  //----------pivot rest ----------//
132  std::swap(d1, dr);
133 
134  //permute perm
135  std::swap(perm[k], perm[r]);
136 
137  work.swap(temp); //swap work with temp.
138  std::swap(work[k], work[r]); //swap kth and rth row of work
139 
140  curr_nnzs.swap(temp_nnzs); //swap curr_nnzs with temp_nnzs
141 
142  safe_swap(curr_nnzs, k, r); //swap k and r if they are present in curr_nnzs
143 
144  //--------end pivot rest---------//
145 
146  } else {
147  //case 4: pivot is k+1 with r: 2x2 pivot case.
148 
149  // remote col k for convenience while we are pivoting on col k+1
150  for (int i = 0; i < m_idx[k].size(); i++) {
151  int l = m_idx[k][i];
152  if (l == k) continue;
153  for (int j = row_first[l]; j < list[l].size(); j++) {
154  if (list[l][j] == k) {
155  std::swap(list[l][j], list[l][list[l].size()-1]);
156  list[l].pop_back();
157  break;
158  }
159  }
160  }
161  m_x[k].clear();
162  m_idx[k].clear();
163  col_first[k] = 0;
164 
165  //restore diagonal elements in work and temp
166  temp[r] = dr;
167  work[k] = d1;
168 
169  //indicate that pivot is 2x2
170  size_two_piv = true;
171 
172  if (k+1 < r) {
173  //symmetrically permute row/col k+1 and r.
174  pivotA(s, in_set, k+1, r);
175 
176  //----------pivot rest ----------//
177 
178  //permute perm
179  std::swap(perm[k+1], perm[r]);
180 
181  //swap rows k+1 and r of work and temp
182  std::swap(work[k+1], work[r]);
183  std::swap(temp[k+1], temp[r]);
184 
185  //swap k+1 and r in curr_nnzs and temp_nnzs
186  safe_swap(curr_nnzs, k+1, r);
187  safe_swap(temp_nnzs, k+1, r);
188  }
189  }
190  }
191  //--------------end bkp pivoting--------------//
192  } else if (piv_type == pivot_type::ROOK) {
193  //--------------begin rook pivoting--------------//
194  i = k;
195  work[k] = d1;
196 
197  if (alpha * w1 <= abs(d1) + eps) {
198  // do nothing
199  } else {
200  while (true) {
201  // assign nonzeros indices and values of A(k:n, r) to col_r_nnzs
202  for (idx_it it = temp_nnzs.begin(); it != temp_nnzs.end(); it++) {
203  temp[*it] = 0;
204  }
205  temp_nnzs.clear();
206 
207  offset = row_first[r];
208  //assign all nonzero indices and values in A(r, k:r)
209  //( not including A(r,r) ) to temp and temp_nnzs
210  for (j = offset; j < (int) list[r].size(); j++) {
211  temp_nnzs.push_back(list[r][j]);
212  temp[list[r][j]] = coeff(r, list[r][j]);
213  }
214 
215  //assign nonzero indices of A(r:n, r) to temp_nnzs
216  temp_nnzs.insert(temp_nnzs.end(), m_idx[r].begin(), m_idx[r].end());
217 
218  //assign nonzero values of to temp
219  for (j = 0; j < (int) m_idx[r].size(); j++) {
220  temp[m_idx[r][j]] = m_x[r][j];
221  }
222 
223  //perform delayed updates on temp. temp = Sum_{i=0}^{k-1} L(r,i) * D(i,i) * L(k:n, i).
224  //(the formula above generalizes to block matrix form in the case of 2x2 pivots).
225  update(r, temp, temp_nnzs, *this, D, in_set);
226 
227  dr = temp[r];
228  temp[r] = 0;
229 
230  //find maximum element in temp.
231  wr = max(temp, temp_nnzs, j);
232  temp[r] = dr;
233 
234  if (alpha * wr <= abs(dr) + eps) {
235  // swap rows and columns k and r
236  this->pivotA(s, in_set, k, r);
237 
238  std::swap(perm[k], perm[r]);
239 
240  std::swap(temp[k], temp[r]);
241  work.swap(temp);
242 
243  safe_swap(temp_nnzs, k, r);
244  curr_nnzs.swap(temp_nnzs);
245 
246  d1 = work[k];
247  break;
248  } else if (abs(w1 - wr) < eps) {
249  size_two_piv = true;
250  // swap rows and columns k and i, k+1 and r
251  if (k != i) {
252  this->pivotA(s, in_set, k, i);
253 
254  std::swap(perm[k], perm[i]);
255 
256  std::swap(work[k], work[i]);
257  std::swap(temp[k], temp[i]);
258 
259  safe_swap(curr_nnzs, k, i);
260  safe_swap(temp_nnzs, k, i);
261 
262  d1 = work[k];
263  }
264 
265  if (k+1 < r) {
266  this->pivotA(s, in_set, k+1, r);
267 
268  std::swap(perm[k+1], perm[r]);
269 
270  std::swap(work[k+1], work[r]);
271  std::swap(temp[k+1], temp[r]);
272 
273  safe_swap(curr_nnzs, k+1, r);
274  safe_swap(temp_nnzs, k+1, r);
275 
276  dr = temp[k+1];
277  }
278  break;
279  } else {
280  i = r;
281  w1 = wr;
282  r = j;
283  work.swap(temp);
284  curr_nnzs.swap(temp_nnzs);
285  }
286  }
287  }
288  //--------------end rook pivoting--------------//
289  }
290 
291  //erase diagonal element from non-zero indices (to exclude it from being dropped)
292  curr_nnzs.erase(std::remove(curr_nnzs.begin(), curr_nnzs.end(), k), curr_nnzs.end());
293 
294  //performs the dual dropping procedure.
295  if (!size_two_piv) {
296  //perform dual dropping criteria on work
297  drop_tol(work, curr_nnzs, lfil, tol);
298 
299  } else {
300  //erase diagonal 2x2 block from non-zero indices (to exclude it from being dropped)
301  temp_nnzs.erase(std::remove(temp_nnzs.begin(), temp_nnzs.end(), k), temp_nnzs.end());
302  curr_nnzs.erase(std::remove(curr_nnzs.begin(), curr_nnzs.end(), k+1), curr_nnzs.end());
303  temp_nnzs.erase(std::remove(temp_nnzs.begin(), temp_nnzs.end(), k+1), temp_nnzs.end());
304 
305  //compute inverse of the 2x2 block diagonal pivot.
306  det_D = d1*dr - work[k+1]*work[k+1];
307  if ( abs(det_D) < eps) det_D = 1e-6; //statically pivot;
308  D_inv11 = dr/det_D;
309  D_inv22 = d1/det_D;
310  D_inv12 = -work[k+1]/det_D;
311 
312  //assign pivot to D (d1 is assigned to D(k,k) later)
313  D.off_diagonal(k) = work[k+1];
314  D[k+1] = dr;
315 
316  //merge nonzeros of curr and temp together so iterating through them will be easier
317  unordered_inplace_union(curr_nnzs, temp_nnzs.begin(), temp_nnzs.end(), in_set);
318 
319 
320  //multiply inverse of pivot to work and temp (gives us two columns of l)
321  for (idx_it it = curr_nnzs.begin(); it != curr_nnzs.end(); it++) {
322  l_11 = work[*it]*D_inv11 + temp[*it]*D_inv12;
323  l_12 = work[*it]*D_inv12 + temp[*it]*D_inv22;
324 
325  //note that work and temp roughly share the same non-zero indices
326  work[*it] = l_11;
327  temp[*it] = l_12;
328  }
329 
330  //since the work and temp non-zero indices are roughly the same,
331  //we can copy it over to temp_nnzs
332  temp_nnzs.assign(curr_nnzs.begin(), curr_nnzs.end());
333 
334  //perform dual dropping procedure on work and temp
335  drop_tol(temp, temp_nnzs, lfil, tol);
336  drop_tol(work, curr_nnzs, lfil, tol);
337  }
338 
339  // remove current column for list, and add new column back in (inplace)
340  for (i = 0; i < m_idx[k].size(); i++) {
341  r = m_idx[k][i];
342  if (r == k) continue;
343  for (j = row_first[r]; j < list[r].size(); j++) {
344  if (list[r][j] == k) {
345  std::swap(list[r][j], list[r][list[r].size()-1]);
346  list[r].pop_back();
347  break;
348  }
349  }
350  }
351 
352  //resize kth column of L to proper size.
353  m_idx[k].resize(curr_nnzs.size()+1);
354  m_x[k].resize(curr_nnzs.size()+1);
355 
356  //assign diagonal element to D
357  D[k] = d1;
358 
359  //assign 1s to diagonal of L.
360  m_x[k][0] = 1;
361  m_idx[k][0] = k;
362  count++;
363 
364  if (!size_two_piv) {
365  if ( abs(D[k]) < eps) D[k] = 1e-6; //statically pivot
366  i = 1;
367 
368  for (idx_it it = curr_nnzs.begin(); it != curr_nnzs.end(); it++) {
369  if ( abs(work[*it]) > eps) {
370  m_idx[k][i] = *it; //col k nonzero indices of L are stored
371  m_x[k][i] = work[*it]/D[k]; //col k nonzero values of L are stored
372 
373  list[*it].push_back(k); //update Llist
374  count++;
375  i++;
376  }
377  }
378 
379  col_size = i;
380  } else {
381  // remove occurences of current column from list, and add in new column
382  for (i = 0; i < m_idx[k+1].size(); i++) {
383  r = m_idx[k+1][i];
384  if (r == k+1) continue;
385  for (j = row_first[r]; j < list[r].size(); j++) {
386  if (list[r][j] == k+1) {
387  std::swap(list[r][j], list[r][list[r].size()-1]);
388  list[r].pop_back();
389  break;
390  }
391  }
392  }
393 
394  i = 1;
395  for (idx_it it = curr_nnzs.begin(); it != curr_nnzs.end(); it++) {
396  if ( abs(work[*it]) > eps) {
397  m_x[k][i] = work[*it]; //col k nonzero indices of L are stored
398  m_idx[k][i] = *it; //col k nonzero values of L are stored
399 
400  list[*it].push_back(k); //update Llist
401  count++;
402  i++;
403  }
404 
405  }
406 
407  //resize k+1th column of L to proper size.
408  m_idx[k+1].resize(temp_nnzs.size()+1);
409  m_x[k+1].resize(temp_nnzs.size()+1);
410 
411  //assign 1s to diagonal of L.
412  m_x[k+1][0] = 1;
413  m_idx[k+1][0] = k+1;
414  count++;
415 
416  j = 1;
417  for (idx_it it = temp_nnzs.begin(); it != temp_nnzs.end(); it++) {
418  if ( abs(temp[*it]) > eps) {
419  m_x[k+1][j] = temp[*it]; //col k+1 nonzero indices of L are stored
420  m_idx[k+1][j] = *it; //col k+1 nonzero values of L are stored
421 
422  list[*it].push_back(k+1); //update Llist
423  count++;
424  j++;
425  }
426 
427  }
428 
429  col_size = i;
430  col_size2 = j;
431  }
432 
433  // ------------- reset temp and work back to zero -----------------//
434  work[k] = 0;
435  temp[k] = 0;
436 
437  if (k + 1 < ncols) {
438  temp[k+1] = 0;
439  work[k+1] = 0;
440  }
441 
442  for (idx_it it = curr_nnzs.begin(); it != curr_nnzs.end(); it++) {
443  work[*it] = 0;
444  }
445  curr_nnzs.clear(); //zero out work vector
446 
447  for (idx_it it = temp_nnzs.begin(); it != temp_nnzs.end(); it++) {
448  temp[*it] = 0;
449  }
450  temp_nnzs.clear(); //zero out work vector
451 
452  //-------------------------------------------------------------------//
453 
454  //resize columns of L to correct size
455  m_x[k].resize(col_size);
456  m_idx[k].resize(col_size);
457 
458  //update list and L.first
459  col_first[k] = 1;
460  advance_list(k);
461  advance_first(k);
462 
463  if (size_two_piv) {
464  m_x[k+1].resize(col_size2);
465  m_idx[k+1].resize(col_size2);
466 
467  col_first[k+1] = 1;
468  advance_first(k+1);
469  advance_list(k+1);
470 
471  k++;
472 
473  size_two_piv = false;
474  }
475  }
476 
477  //assign number of non-zeros in L to L.nnz_count
478  this->nnz_count = count;
479 }
480 
481 #endif
el_type & off_diagonal(int i)
Definition: block_diag_matrix.h:114
void ildl_inplace(block_diag_matrix< el_type > &D, idx_vector_type &perm, const double &fill_factor, const double &tol, const double &pp_tol, int piv_type=pivot_type::BKP)
Performs an inplace LDL&#39; factorization of this matrix.
Definition: lilc_matrix_ildl_inplace.h:10
void resize(int n, el_type default_value)
Resizes this matrix to an n*n matrix with default_value on the main diagonal.
Definition: block_diag_matrix.h:70
A quick implementation of a diagonal matrix with 1x1 and 2x2 blocks.
Definition: block_diag_matrix.h:39
A structure containing variables used in pivoting a LIL-C matrix.
Definition: swap_struct.h:6