1 /*
2 Defines the basic matrix operations for the BAIJ (compressed row)
3 matrix storage format.
4 */
5 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blockinvert.h>
8 #include <petsc/private/kernels/blockmatmult.h>
9
10 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
11 #define TYPE BAIJ
12 #define TYPE_BS
13 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
14 #undef TYPE_BS
15 #define TYPE_BS _BS
16 #define TYPE_BS_ON
17 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
18 #undef TYPE_BS
19 #include "../src/mat/impls/aij/seq/seqhashmat.h"
20 #undef TYPE
21 #undef TYPE_BS_ON
22
23 #if defined(PETSC_HAVE_HYPRE)
24 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
25 #endif
26
27 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29 #endif
30 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
31
32 MatGetDiagonalMarkers(SeqBAIJ, A->rmap->bs)
33
MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal * reductions)34 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
35 {
36 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
37 PetscInt m, n, ib, jb, bs = A->rmap->bs;
38 MatScalar *a_val = a_aij->a;
39
40 PetscFunctionBegin;
41 PetscCall(MatGetSize(A, &m, &n));
42 PetscCall(PetscArrayzero(reductions, n));
43 if (type == NORM_2) {
44 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
45 for (jb = 0; jb < bs; jb++) {
46 for (ib = 0; ib < bs; ib++) {
47 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
48 a_val++;
49 }
50 }
51 }
52 } else if (type == NORM_1) {
53 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
54 for (jb = 0; jb < bs; jb++) {
55 for (ib = 0; ib < bs; ib++) {
56 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
57 a_val++;
58 }
59 }
60 }
61 } else if (type == NORM_INFINITY) {
62 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
63 for (jb = 0; jb < bs; jb++) {
64 for (ib = 0; ib < bs; ib++) {
65 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
66 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
67 a_val++;
68 }
69 }
70 }
71 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
72 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
73 for (jb = 0; jb < bs; jb++) {
74 for (ib = 0; ib < bs; ib++) {
75 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
76 a_val++;
77 }
78 }
79 }
80 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
81 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
82 for (jb = 0; jb < bs; jb++) {
83 for (ib = 0; ib < bs; ib++) {
84 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
85 a_val++;
86 }
87 }
88 }
89 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
90 if (type == NORM_2) {
91 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
92 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
93 for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
94 }
95 PetscFunctionReturn(PETSC_SUCCESS);
96 }
97
MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar ** values)98 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
99 {
100 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
101 PetscInt i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
102 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work;
103 PetscReal shift = 0.0;
104 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE;
105 const PetscInt *adiag;
106
107 PetscFunctionBegin;
108 allowzeropivot = PetscNot(A->erroriffailure);
109
110 if (a->idiagvalid) {
111 if (values) *values = a->idiag;
112 PetscFunctionReturn(PETSC_SUCCESS);
113 }
114 PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, NULL));
115 if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
116 diag = a->idiag;
117 if (values) *values = a->idiag;
118 /* factor and invert each block */
119 switch (bs) {
120 case 1:
121 for (i = 0; i < mbs; i++) {
122 odiag = v + 1 * adiag[i];
123 diag[0] = odiag[0];
124
125 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
126 PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
127 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
129 A->factorerror_zeropivot_row = i;
130 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
131 }
132
133 diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
134 diag += 1;
135 }
136 break;
137 case 2:
138 for (i = 0; i < mbs; i++) {
139 odiag = v + 4 * adiag[i];
140 diag[0] = odiag[0];
141 diag[1] = odiag[1];
142 diag[2] = odiag[2];
143 diag[3] = odiag[3];
144 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
145 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
146 diag += 4;
147 }
148 break;
149 case 3:
150 for (i = 0; i < mbs; i++) {
151 odiag = v + 9 * adiag[i];
152 diag[0] = odiag[0];
153 diag[1] = odiag[1];
154 diag[2] = odiag[2];
155 diag[3] = odiag[3];
156 diag[4] = odiag[4];
157 diag[5] = odiag[5];
158 diag[6] = odiag[6];
159 diag[7] = odiag[7];
160 diag[8] = odiag[8];
161 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163 diag += 9;
164 }
165 break;
166 case 4:
167 for (i = 0; i < mbs; i++) {
168 odiag = v + 16 * adiag[i];
169 PetscCall(PetscArraycpy(diag, odiag, 16));
170 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
171 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172 diag += 16;
173 }
174 break;
175 case 5:
176 for (i = 0; i < mbs; i++) {
177 odiag = v + 25 * adiag[i];
178 PetscCall(PetscArraycpy(diag, odiag, 25));
179 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
180 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181 diag += 25;
182 }
183 break;
184 case 6:
185 for (i = 0; i < mbs; i++) {
186 odiag = v + 36 * adiag[i];
187 PetscCall(PetscArraycpy(diag, odiag, 36));
188 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
189 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
190 diag += 36;
191 }
192 break;
193 case 7:
194 for (i = 0; i < mbs; i++) {
195 odiag = v + 49 * adiag[i];
196 PetscCall(PetscArraycpy(diag, odiag, 49));
197 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
198 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199 diag += 49;
200 }
201 break;
202 default:
203 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
204 for (i = 0; i < mbs; i++) {
205 odiag = v + bs2 * adiag[i];
206 PetscCall(PetscArraycpy(diag, odiag, bs2));
207 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
208 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
209 diag += bs2;
210 }
211 PetscCall(PetscFree2(v_work, v_pivots));
212 }
213 a->idiagvalid = PETSC_TRUE;
214 PetscFunctionReturn(PETSC_SUCCESS);
215 }
216
MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)217 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
218 {
219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
220 PetscScalar *x, *work, *w, *workt, *t;
221 const MatScalar *v, *aa = a->a, *idiag;
222 const PetscScalar *b, *xb;
223 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
224 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
225 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi;
226
227 PetscFunctionBegin;
228 its = its * lits;
229 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
230 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
231 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
232 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
233 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
234
235 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
236
237 if (!m) PetscFunctionReturn(PETSC_SUCCESS);
238 diag = a->diag;
239 idiag = a->idiag;
240 k = PetscMax(A->rmap->n, A->cmap->n);
241 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
242 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
243 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
244 work = a->mult_work;
245 t = a->sor_workt;
246 w = a->sor_work;
247
248 PetscCall(VecGetArray(xx, &x));
249 PetscCall(VecGetArrayRead(bb, &b));
250
251 if (flag & SOR_ZERO_INITIAL_GUESS) {
252 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
253 switch (bs) {
254 case 1:
255 PetscKernel_v_gets_A_times_w_1(x, idiag, b);
256 t[0] = b[0];
257 i2 = 1;
258 idiag += 1;
259 for (i = 1; i < m; i++) {
260 v = aa + ai[i];
261 vi = aj + ai[i];
262 nz = diag[i] - ai[i];
263 s[0] = b[i2];
264 for (j = 0; j < nz; j++) {
265 xw[0] = x[vi[j]];
266 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
267 }
268 t[i2] = s[0];
269 PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
270 x[i2] = xw[0];
271 idiag += 1;
272 i2 += 1;
273 }
274 break;
275 case 2:
276 PetscKernel_v_gets_A_times_w_2(x, idiag, b);
277 t[0] = b[0];
278 t[1] = b[1];
279 i2 = 2;
280 idiag += 4;
281 for (i = 1; i < m; i++) {
282 v = aa + 4 * ai[i];
283 vi = aj + ai[i];
284 nz = diag[i] - ai[i];
285 s[0] = b[i2];
286 s[1] = b[i2 + 1];
287 for (j = 0; j < nz; j++) {
288 idx = 2 * vi[j];
289 it = 4 * j;
290 xw[0] = x[idx];
291 xw[1] = x[1 + idx];
292 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
293 }
294 t[i2] = s[0];
295 t[i2 + 1] = s[1];
296 PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
297 x[i2] = xw[0];
298 x[i2 + 1] = xw[1];
299 idiag += 4;
300 i2 += 2;
301 }
302 break;
303 case 3:
304 PetscKernel_v_gets_A_times_w_3(x, idiag, b);
305 t[0] = b[0];
306 t[1] = b[1];
307 t[2] = b[2];
308 i2 = 3;
309 idiag += 9;
310 for (i = 1; i < m; i++) {
311 v = aa + 9 * ai[i];
312 vi = aj + ai[i];
313 nz = diag[i] - ai[i];
314 s[0] = b[i2];
315 s[1] = b[i2 + 1];
316 s[2] = b[i2 + 2];
317 while (nz--) {
318 idx = 3 * (*vi++);
319 xw[0] = x[idx];
320 xw[1] = x[1 + idx];
321 xw[2] = x[2 + idx];
322 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
323 v += 9;
324 }
325 t[i2] = s[0];
326 t[i2 + 1] = s[1];
327 t[i2 + 2] = s[2];
328 PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
329 x[i2] = xw[0];
330 x[i2 + 1] = xw[1];
331 x[i2 + 2] = xw[2];
332 idiag += 9;
333 i2 += 3;
334 }
335 break;
336 case 4:
337 PetscKernel_v_gets_A_times_w_4(x, idiag, b);
338 t[0] = b[0];
339 t[1] = b[1];
340 t[2] = b[2];
341 t[3] = b[3];
342 i2 = 4;
343 idiag += 16;
344 for (i = 1; i < m; i++) {
345 v = aa + 16 * ai[i];
346 vi = aj + ai[i];
347 nz = diag[i] - ai[i];
348 s[0] = b[i2];
349 s[1] = b[i2 + 1];
350 s[2] = b[i2 + 2];
351 s[3] = b[i2 + 3];
352 while (nz--) {
353 idx = 4 * (*vi++);
354 xw[0] = x[idx];
355 xw[1] = x[1 + idx];
356 xw[2] = x[2 + idx];
357 xw[3] = x[3 + idx];
358 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
359 v += 16;
360 }
361 t[i2] = s[0];
362 t[i2 + 1] = s[1];
363 t[i2 + 2] = s[2];
364 t[i2 + 3] = s[3];
365 PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
366 x[i2] = xw[0];
367 x[i2 + 1] = xw[1];
368 x[i2 + 2] = xw[2];
369 x[i2 + 3] = xw[3];
370 idiag += 16;
371 i2 += 4;
372 }
373 break;
374 case 5:
375 PetscKernel_v_gets_A_times_w_5(x, idiag, b);
376 t[0] = b[0];
377 t[1] = b[1];
378 t[2] = b[2];
379 t[3] = b[3];
380 t[4] = b[4];
381 i2 = 5;
382 idiag += 25;
383 for (i = 1; i < m; i++) {
384 v = aa + 25 * ai[i];
385 vi = aj + ai[i];
386 nz = diag[i] - ai[i];
387 s[0] = b[i2];
388 s[1] = b[i2 + 1];
389 s[2] = b[i2 + 2];
390 s[3] = b[i2 + 3];
391 s[4] = b[i2 + 4];
392 while (nz--) {
393 idx = 5 * (*vi++);
394 xw[0] = x[idx];
395 xw[1] = x[1 + idx];
396 xw[2] = x[2 + idx];
397 xw[3] = x[3 + idx];
398 xw[4] = x[4 + idx];
399 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
400 v += 25;
401 }
402 t[i2] = s[0];
403 t[i2 + 1] = s[1];
404 t[i2 + 2] = s[2];
405 t[i2 + 3] = s[3];
406 t[i2 + 4] = s[4];
407 PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
408 x[i2] = xw[0];
409 x[i2 + 1] = xw[1];
410 x[i2 + 2] = xw[2];
411 x[i2 + 3] = xw[3];
412 x[i2 + 4] = xw[4];
413 idiag += 25;
414 i2 += 5;
415 }
416 break;
417 case 6:
418 PetscKernel_v_gets_A_times_w_6(x, idiag, b);
419 t[0] = b[0];
420 t[1] = b[1];
421 t[2] = b[2];
422 t[3] = b[3];
423 t[4] = b[4];
424 t[5] = b[5];
425 i2 = 6;
426 idiag += 36;
427 for (i = 1; i < m; i++) {
428 v = aa + 36 * ai[i];
429 vi = aj + ai[i];
430 nz = diag[i] - ai[i];
431 s[0] = b[i2];
432 s[1] = b[i2 + 1];
433 s[2] = b[i2 + 2];
434 s[3] = b[i2 + 3];
435 s[4] = b[i2 + 4];
436 s[5] = b[i2 + 5];
437 while (nz--) {
438 idx = 6 * (*vi++);
439 xw[0] = x[idx];
440 xw[1] = x[1 + idx];
441 xw[2] = x[2 + idx];
442 xw[3] = x[3 + idx];
443 xw[4] = x[4 + idx];
444 xw[5] = x[5 + idx];
445 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
446 v += 36;
447 }
448 t[i2] = s[0];
449 t[i2 + 1] = s[1];
450 t[i2 + 2] = s[2];
451 t[i2 + 3] = s[3];
452 t[i2 + 4] = s[4];
453 t[i2 + 5] = s[5];
454 PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
455 x[i2] = xw[0];
456 x[i2 + 1] = xw[1];
457 x[i2 + 2] = xw[2];
458 x[i2 + 3] = xw[3];
459 x[i2 + 4] = xw[4];
460 x[i2 + 5] = xw[5];
461 idiag += 36;
462 i2 += 6;
463 }
464 break;
465 case 7:
466 PetscKernel_v_gets_A_times_w_7(x, idiag, b);
467 t[0] = b[0];
468 t[1] = b[1];
469 t[2] = b[2];
470 t[3] = b[3];
471 t[4] = b[4];
472 t[5] = b[5];
473 t[6] = b[6];
474 i2 = 7;
475 idiag += 49;
476 for (i = 1; i < m; i++) {
477 v = aa + 49 * ai[i];
478 vi = aj + ai[i];
479 nz = diag[i] - ai[i];
480 s[0] = b[i2];
481 s[1] = b[i2 + 1];
482 s[2] = b[i2 + 2];
483 s[3] = b[i2 + 3];
484 s[4] = b[i2 + 4];
485 s[5] = b[i2 + 5];
486 s[6] = b[i2 + 6];
487 while (nz--) {
488 idx = 7 * (*vi++);
489 xw[0] = x[idx];
490 xw[1] = x[1 + idx];
491 xw[2] = x[2 + idx];
492 xw[3] = x[3 + idx];
493 xw[4] = x[4 + idx];
494 xw[5] = x[5 + idx];
495 xw[6] = x[6 + idx];
496 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
497 v += 49;
498 }
499 t[i2] = s[0];
500 t[i2 + 1] = s[1];
501 t[i2 + 2] = s[2];
502 t[i2 + 3] = s[3];
503 t[i2 + 4] = s[4];
504 t[i2 + 5] = s[5];
505 t[i2 + 6] = s[6];
506 PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
507 x[i2] = xw[0];
508 x[i2 + 1] = xw[1];
509 x[i2 + 2] = xw[2];
510 x[i2 + 3] = xw[3];
511 x[i2 + 4] = xw[4];
512 x[i2 + 5] = xw[5];
513 x[i2 + 6] = xw[6];
514 idiag += 49;
515 i2 += 7;
516 }
517 break;
518 default:
519 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
520 PetscCall(PetscArraycpy(t, b, bs));
521 i2 = bs;
522 idiag += bs2;
523 for (i = 1; i < m; i++) {
524 v = aa + bs2 * ai[i];
525 vi = aj + ai[i];
526 nz = diag[i] - ai[i];
527
528 PetscCall(PetscArraycpy(w, b + i2, bs));
529 /* copy all rows of x that are needed into contiguous space */
530 workt = work;
531 for (j = 0; j < nz; j++) {
532 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
533 workt += bs;
534 }
535 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
536 PetscCall(PetscArraycpy(t + i2, w, bs));
537 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
538
539 idiag += bs2;
540 i2 += bs;
541 }
542 break;
543 }
544 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
545 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
546 xb = t;
547 } else xb = b;
548 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
549 idiag = a->idiag + bs2 * (a->mbs - 1);
550 i2 = bs * (m - 1);
551 switch (bs) {
552 case 1:
553 s[0] = xb[i2];
554 PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555 x[i2] = xw[0];
556 i2 -= 1;
557 for (i = m - 2; i >= 0; i--) {
558 v = aa + (diag[i] + 1);
559 vi = aj + diag[i] + 1;
560 nz = ai[i + 1] - diag[i] - 1;
561 s[0] = xb[i2];
562 for (j = 0; j < nz; j++) {
563 xw[0] = x[vi[j]];
564 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
565 }
566 PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
567 x[i2] = xw[0];
568 idiag -= 1;
569 i2 -= 1;
570 }
571 break;
572 case 2:
573 s[0] = xb[i2];
574 s[1] = xb[i2 + 1];
575 PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
576 x[i2] = xw[0];
577 x[i2 + 1] = xw[1];
578 i2 -= 2;
579 idiag -= 4;
580 for (i = m - 2; i >= 0; i--) {
581 v = aa + 4 * (diag[i] + 1);
582 vi = aj + diag[i] + 1;
583 nz = ai[i + 1] - diag[i] - 1;
584 s[0] = xb[i2];
585 s[1] = xb[i2 + 1];
586 for (j = 0; j < nz; j++) {
587 idx = 2 * vi[j];
588 it = 4 * j;
589 xw[0] = x[idx];
590 xw[1] = x[1 + idx];
591 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
592 }
593 PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
594 x[i2] = xw[0];
595 x[i2 + 1] = xw[1];
596 idiag -= 4;
597 i2 -= 2;
598 }
599 break;
600 case 3:
601 s[0] = xb[i2];
602 s[1] = xb[i2 + 1];
603 s[2] = xb[i2 + 2];
604 PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
605 x[i2] = xw[0];
606 x[i2 + 1] = xw[1];
607 x[i2 + 2] = xw[2];
608 i2 -= 3;
609 idiag -= 9;
610 for (i = m - 2; i >= 0; i--) {
611 v = aa + 9 * (diag[i] + 1);
612 vi = aj + diag[i] + 1;
613 nz = ai[i + 1] - diag[i] - 1;
614 s[0] = xb[i2];
615 s[1] = xb[i2 + 1];
616 s[2] = xb[i2 + 2];
617 while (nz--) {
618 idx = 3 * (*vi++);
619 xw[0] = x[idx];
620 xw[1] = x[1 + idx];
621 xw[2] = x[2 + idx];
622 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
623 v += 9;
624 }
625 PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
626 x[i2] = xw[0];
627 x[i2 + 1] = xw[1];
628 x[i2 + 2] = xw[2];
629 idiag -= 9;
630 i2 -= 3;
631 }
632 break;
633 case 4:
634 s[0] = xb[i2];
635 s[1] = xb[i2 + 1];
636 s[2] = xb[i2 + 2];
637 s[3] = xb[i2 + 3];
638 PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
639 x[i2] = xw[0];
640 x[i2 + 1] = xw[1];
641 x[i2 + 2] = xw[2];
642 x[i2 + 3] = xw[3];
643 i2 -= 4;
644 idiag -= 16;
645 for (i = m - 2; i >= 0; i--) {
646 v = aa + 16 * (diag[i] + 1);
647 vi = aj + diag[i] + 1;
648 nz = ai[i + 1] - diag[i] - 1;
649 s[0] = xb[i2];
650 s[1] = xb[i2 + 1];
651 s[2] = xb[i2 + 2];
652 s[3] = xb[i2 + 3];
653 while (nz--) {
654 idx = 4 * (*vi++);
655 xw[0] = x[idx];
656 xw[1] = x[1 + idx];
657 xw[2] = x[2 + idx];
658 xw[3] = x[3 + idx];
659 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
660 v += 16;
661 }
662 PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
663 x[i2] = xw[0];
664 x[i2 + 1] = xw[1];
665 x[i2 + 2] = xw[2];
666 x[i2 + 3] = xw[3];
667 idiag -= 16;
668 i2 -= 4;
669 }
670 break;
671 case 5:
672 s[0] = xb[i2];
673 s[1] = xb[i2 + 1];
674 s[2] = xb[i2 + 2];
675 s[3] = xb[i2 + 3];
676 s[4] = xb[i2 + 4];
677 PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
678 x[i2] = xw[0];
679 x[i2 + 1] = xw[1];
680 x[i2 + 2] = xw[2];
681 x[i2 + 3] = xw[3];
682 x[i2 + 4] = xw[4];
683 i2 -= 5;
684 idiag -= 25;
685 for (i = m - 2; i >= 0; i--) {
686 v = aa + 25 * (diag[i] + 1);
687 vi = aj + diag[i] + 1;
688 nz = ai[i + 1] - diag[i] - 1;
689 s[0] = xb[i2];
690 s[1] = xb[i2 + 1];
691 s[2] = xb[i2 + 2];
692 s[3] = xb[i2 + 3];
693 s[4] = xb[i2 + 4];
694 while (nz--) {
695 idx = 5 * (*vi++);
696 xw[0] = x[idx];
697 xw[1] = x[1 + idx];
698 xw[2] = x[2 + idx];
699 xw[3] = x[3 + idx];
700 xw[4] = x[4 + idx];
701 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
702 v += 25;
703 }
704 PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
705 x[i2] = xw[0];
706 x[i2 + 1] = xw[1];
707 x[i2 + 2] = xw[2];
708 x[i2 + 3] = xw[3];
709 x[i2 + 4] = xw[4];
710 idiag -= 25;
711 i2 -= 5;
712 }
713 break;
714 case 6:
715 s[0] = xb[i2];
716 s[1] = xb[i2 + 1];
717 s[2] = xb[i2 + 2];
718 s[3] = xb[i2 + 3];
719 s[4] = xb[i2 + 4];
720 s[5] = xb[i2 + 5];
721 PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
722 x[i2] = xw[0];
723 x[i2 + 1] = xw[1];
724 x[i2 + 2] = xw[2];
725 x[i2 + 3] = xw[3];
726 x[i2 + 4] = xw[4];
727 x[i2 + 5] = xw[5];
728 i2 -= 6;
729 idiag -= 36;
730 for (i = m - 2; i >= 0; i--) {
731 v = aa + 36 * (diag[i] + 1);
732 vi = aj + diag[i] + 1;
733 nz = ai[i + 1] - diag[i] - 1;
734 s[0] = xb[i2];
735 s[1] = xb[i2 + 1];
736 s[2] = xb[i2 + 2];
737 s[3] = xb[i2 + 3];
738 s[4] = xb[i2 + 4];
739 s[5] = xb[i2 + 5];
740 while (nz--) {
741 idx = 6 * (*vi++);
742 xw[0] = x[idx];
743 xw[1] = x[1 + idx];
744 xw[2] = x[2 + idx];
745 xw[3] = x[3 + idx];
746 xw[4] = x[4 + idx];
747 xw[5] = x[5 + idx];
748 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
749 v += 36;
750 }
751 PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
752 x[i2] = xw[0];
753 x[i2 + 1] = xw[1];
754 x[i2 + 2] = xw[2];
755 x[i2 + 3] = xw[3];
756 x[i2 + 4] = xw[4];
757 x[i2 + 5] = xw[5];
758 idiag -= 36;
759 i2 -= 6;
760 }
761 break;
762 case 7:
763 s[0] = xb[i2];
764 s[1] = xb[i2 + 1];
765 s[2] = xb[i2 + 2];
766 s[3] = xb[i2 + 3];
767 s[4] = xb[i2 + 4];
768 s[5] = xb[i2 + 5];
769 s[6] = xb[i2 + 6];
770 PetscKernel_v_gets_A_times_w_7(x, idiag, b);
771 x[i2] = xw[0];
772 x[i2 + 1] = xw[1];
773 x[i2 + 2] = xw[2];
774 x[i2 + 3] = xw[3];
775 x[i2 + 4] = xw[4];
776 x[i2 + 5] = xw[5];
777 x[i2 + 6] = xw[6];
778 i2 -= 7;
779 idiag -= 49;
780 for (i = m - 2; i >= 0; i--) {
781 v = aa + 49 * (diag[i] + 1);
782 vi = aj + diag[i] + 1;
783 nz = ai[i + 1] - diag[i] - 1;
784 s[0] = xb[i2];
785 s[1] = xb[i2 + 1];
786 s[2] = xb[i2 + 2];
787 s[3] = xb[i2 + 3];
788 s[4] = xb[i2 + 4];
789 s[5] = xb[i2 + 5];
790 s[6] = xb[i2 + 6];
791 while (nz--) {
792 idx = 7 * (*vi++);
793 xw[0] = x[idx];
794 xw[1] = x[1 + idx];
795 xw[2] = x[2 + idx];
796 xw[3] = x[3 + idx];
797 xw[4] = x[4 + idx];
798 xw[5] = x[5 + idx];
799 xw[6] = x[6 + idx];
800 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
801 v += 49;
802 }
803 PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
804 x[i2] = xw[0];
805 x[i2 + 1] = xw[1];
806 x[i2 + 2] = xw[2];
807 x[i2 + 3] = xw[3];
808 x[i2 + 4] = xw[4];
809 x[i2 + 5] = xw[5];
810 x[i2 + 6] = xw[6];
811 idiag -= 49;
812 i2 -= 7;
813 }
814 break;
815 default:
816 PetscCall(PetscArraycpy(w, xb + i2, bs));
817 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
818 i2 -= bs;
819 idiag -= bs2;
820 for (i = m - 2; i >= 0; i--) {
821 v = aa + bs2 * (diag[i] + 1);
822 vi = aj + diag[i] + 1;
823 nz = ai[i + 1] - diag[i] - 1;
824
825 PetscCall(PetscArraycpy(w, xb + i2, bs));
826 /* copy all rows of x that are needed into contiguous space */
827 workt = work;
828 for (j = 0; j < nz; j++) {
829 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
830 workt += bs;
831 }
832 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
833 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
834
835 idiag -= bs2;
836 i2 -= bs;
837 }
838 break;
839 }
840 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
841 }
842 its--;
843 }
844 while (its--) {
845 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846 idiag = a->idiag;
847 i2 = 0;
848 switch (bs) {
849 case 1:
850 for (i = 0; i < m; i++) {
851 v = aa + ai[i];
852 vi = aj + ai[i];
853 nz = ai[i + 1] - ai[i];
854 s[0] = b[i2];
855 for (j = 0; j < nz; j++) {
856 xw[0] = x[vi[j]];
857 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
858 }
859 PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
860 x[i2] += xw[0];
861 idiag += 1;
862 i2 += 1;
863 }
864 break;
865 case 2:
866 for (i = 0; i < m; i++) {
867 v = aa + 4 * ai[i];
868 vi = aj + ai[i];
869 nz = ai[i + 1] - ai[i];
870 s[0] = b[i2];
871 s[1] = b[i2 + 1];
872 for (j = 0; j < nz; j++) {
873 idx = 2 * vi[j];
874 it = 4 * j;
875 xw[0] = x[idx];
876 xw[1] = x[1 + idx];
877 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
878 }
879 PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
880 x[i2] += xw[0];
881 x[i2 + 1] += xw[1];
882 idiag += 4;
883 i2 += 2;
884 }
885 break;
886 case 3:
887 for (i = 0; i < m; i++) {
888 v = aa + 9 * ai[i];
889 vi = aj + ai[i];
890 nz = ai[i + 1] - ai[i];
891 s[0] = b[i2];
892 s[1] = b[i2 + 1];
893 s[2] = b[i2 + 2];
894 while (nz--) {
895 idx = 3 * (*vi++);
896 xw[0] = x[idx];
897 xw[1] = x[1 + idx];
898 xw[2] = x[2 + idx];
899 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
900 v += 9;
901 }
902 PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
903 x[i2] += xw[0];
904 x[i2 + 1] += xw[1];
905 x[i2 + 2] += xw[2];
906 idiag += 9;
907 i2 += 3;
908 }
909 break;
910 case 4:
911 for (i = 0; i < m; i++) {
912 v = aa + 16 * ai[i];
913 vi = aj + ai[i];
914 nz = ai[i + 1] - ai[i];
915 s[0] = b[i2];
916 s[1] = b[i2 + 1];
917 s[2] = b[i2 + 2];
918 s[3] = b[i2 + 3];
919 while (nz--) {
920 idx = 4 * (*vi++);
921 xw[0] = x[idx];
922 xw[1] = x[1 + idx];
923 xw[2] = x[2 + idx];
924 xw[3] = x[3 + idx];
925 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
926 v += 16;
927 }
928 PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
929 x[i2] += xw[0];
930 x[i2 + 1] += xw[1];
931 x[i2 + 2] += xw[2];
932 x[i2 + 3] += xw[3];
933 idiag += 16;
934 i2 += 4;
935 }
936 break;
937 case 5:
938 for (i = 0; i < m; i++) {
939 v = aa + 25 * ai[i];
940 vi = aj + ai[i];
941 nz = ai[i + 1] - ai[i];
942 s[0] = b[i2];
943 s[1] = b[i2 + 1];
944 s[2] = b[i2 + 2];
945 s[3] = b[i2 + 3];
946 s[4] = b[i2 + 4];
947 while (nz--) {
948 idx = 5 * (*vi++);
949 xw[0] = x[idx];
950 xw[1] = x[1 + idx];
951 xw[2] = x[2 + idx];
952 xw[3] = x[3 + idx];
953 xw[4] = x[4 + idx];
954 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
955 v += 25;
956 }
957 PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
958 x[i2] += xw[0];
959 x[i2 + 1] += xw[1];
960 x[i2 + 2] += xw[2];
961 x[i2 + 3] += xw[3];
962 x[i2 + 4] += xw[4];
963 idiag += 25;
964 i2 += 5;
965 }
966 break;
967 case 6:
968 for (i = 0; i < m; i++) {
969 v = aa + 36 * ai[i];
970 vi = aj + ai[i];
971 nz = ai[i + 1] - ai[i];
972 s[0] = b[i2];
973 s[1] = b[i2 + 1];
974 s[2] = b[i2 + 2];
975 s[3] = b[i2 + 3];
976 s[4] = b[i2 + 4];
977 s[5] = b[i2 + 5];
978 while (nz--) {
979 idx = 6 * (*vi++);
980 xw[0] = x[idx];
981 xw[1] = x[1 + idx];
982 xw[2] = x[2 + idx];
983 xw[3] = x[3 + idx];
984 xw[4] = x[4 + idx];
985 xw[5] = x[5 + idx];
986 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
987 v += 36;
988 }
989 PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
990 x[i2] += xw[0];
991 x[i2 + 1] += xw[1];
992 x[i2 + 2] += xw[2];
993 x[i2 + 3] += xw[3];
994 x[i2 + 4] += xw[4];
995 x[i2 + 5] += xw[5];
996 idiag += 36;
997 i2 += 6;
998 }
999 break;
1000 case 7:
1001 for (i = 0; i < m; i++) {
1002 v = aa + 49 * ai[i];
1003 vi = aj + ai[i];
1004 nz = ai[i + 1] - ai[i];
1005 s[0] = b[i2];
1006 s[1] = b[i2 + 1];
1007 s[2] = b[i2 + 2];
1008 s[3] = b[i2 + 3];
1009 s[4] = b[i2 + 4];
1010 s[5] = b[i2 + 5];
1011 s[6] = b[i2 + 6];
1012 while (nz--) {
1013 idx = 7 * (*vi++);
1014 xw[0] = x[idx];
1015 xw[1] = x[1 + idx];
1016 xw[2] = x[2 + idx];
1017 xw[3] = x[3 + idx];
1018 xw[4] = x[4 + idx];
1019 xw[5] = x[5 + idx];
1020 xw[6] = x[6 + idx];
1021 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1022 v += 49;
1023 }
1024 PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1025 x[i2] += xw[0];
1026 x[i2 + 1] += xw[1];
1027 x[i2 + 2] += xw[2];
1028 x[i2 + 3] += xw[3];
1029 x[i2 + 4] += xw[4];
1030 x[i2 + 5] += xw[5];
1031 x[i2 + 6] += xw[6];
1032 idiag += 49;
1033 i2 += 7;
1034 }
1035 break;
1036 default:
1037 for (i = 0; i < m; i++) {
1038 v = aa + bs2 * ai[i];
1039 vi = aj + ai[i];
1040 nz = ai[i + 1] - ai[i];
1041
1042 PetscCall(PetscArraycpy(w, b + i2, bs));
1043 /* copy all rows of x that are needed into contiguous space */
1044 workt = work;
1045 for (j = 0; j < nz; j++) {
1046 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1047 workt += bs;
1048 }
1049 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1050 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1051
1052 idiag += bs2;
1053 i2 += bs;
1054 }
1055 break;
1056 }
1057 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1058 }
1059 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1060 idiag = a->idiag + bs2 * (a->mbs - 1);
1061 i2 = bs * (m - 1);
1062 switch (bs) {
1063 case 1:
1064 for (i = m - 1; i >= 0; i--) {
1065 v = aa + ai[i];
1066 vi = aj + ai[i];
1067 nz = ai[i + 1] - ai[i];
1068 s[0] = b[i2];
1069 for (j = 0; j < nz; j++) {
1070 xw[0] = x[vi[j]];
1071 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1072 }
1073 PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1074 x[i2] += xw[0];
1075 idiag -= 1;
1076 i2 -= 1;
1077 }
1078 break;
1079 case 2:
1080 for (i = m - 1; i >= 0; i--) {
1081 v = aa + 4 * ai[i];
1082 vi = aj + ai[i];
1083 nz = ai[i + 1] - ai[i];
1084 s[0] = b[i2];
1085 s[1] = b[i2 + 1];
1086 for (j = 0; j < nz; j++) {
1087 idx = 2 * vi[j];
1088 it = 4 * j;
1089 xw[0] = x[idx];
1090 xw[1] = x[1 + idx];
1091 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1092 }
1093 PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1094 x[i2] += xw[0];
1095 x[i2 + 1] += xw[1];
1096 idiag -= 4;
1097 i2 -= 2;
1098 }
1099 break;
1100 case 3:
1101 for (i = m - 1; i >= 0; i--) {
1102 v = aa + 9 * ai[i];
1103 vi = aj + ai[i];
1104 nz = ai[i + 1] - ai[i];
1105 s[0] = b[i2];
1106 s[1] = b[i2 + 1];
1107 s[2] = b[i2 + 2];
1108 while (nz--) {
1109 idx = 3 * (*vi++);
1110 xw[0] = x[idx];
1111 xw[1] = x[1 + idx];
1112 xw[2] = x[2 + idx];
1113 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1114 v += 9;
1115 }
1116 PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1117 x[i2] += xw[0];
1118 x[i2 + 1] += xw[1];
1119 x[i2 + 2] += xw[2];
1120 idiag -= 9;
1121 i2 -= 3;
1122 }
1123 break;
1124 case 4:
1125 for (i = m - 1; i >= 0; i--) {
1126 v = aa + 16 * ai[i];
1127 vi = aj + ai[i];
1128 nz = ai[i + 1] - ai[i];
1129 s[0] = b[i2];
1130 s[1] = b[i2 + 1];
1131 s[2] = b[i2 + 2];
1132 s[3] = b[i2 + 3];
1133 while (nz--) {
1134 idx = 4 * (*vi++);
1135 xw[0] = x[idx];
1136 xw[1] = x[1 + idx];
1137 xw[2] = x[2 + idx];
1138 xw[3] = x[3 + idx];
1139 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1140 v += 16;
1141 }
1142 PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1143 x[i2] += xw[0];
1144 x[i2 + 1] += xw[1];
1145 x[i2 + 2] += xw[2];
1146 x[i2 + 3] += xw[3];
1147 idiag -= 16;
1148 i2 -= 4;
1149 }
1150 break;
1151 case 5:
1152 for (i = m - 1; i >= 0; i--) {
1153 v = aa + 25 * ai[i];
1154 vi = aj + ai[i];
1155 nz = ai[i + 1] - ai[i];
1156 s[0] = b[i2];
1157 s[1] = b[i2 + 1];
1158 s[2] = b[i2 + 2];
1159 s[3] = b[i2 + 3];
1160 s[4] = b[i2 + 4];
1161 while (nz--) {
1162 idx = 5 * (*vi++);
1163 xw[0] = x[idx];
1164 xw[1] = x[1 + idx];
1165 xw[2] = x[2 + idx];
1166 xw[3] = x[3 + idx];
1167 xw[4] = x[4 + idx];
1168 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1169 v += 25;
1170 }
1171 PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1172 x[i2] += xw[0];
1173 x[i2 + 1] += xw[1];
1174 x[i2 + 2] += xw[2];
1175 x[i2 + 3] += xw[3];
1176 x[i2 + 4] += xw[4];
1177 idiag -= 25;
1178 i2 -= 5;
1179 }
1180 break;
1181 case 6:
1182 for (i = m - 1; i >= 0; i--) {
1183 v = aa + 36 * ai[i];
1184 vi = aj + ai[i];
1185 nz = ai[i + 1] - ai[i];
1186 s[0] = b[i2];
1187 s[1] = b[i2 + 1];
1188 s[2] = b[i2 + 2];
1189 s[3] = b[i2 + 3];
1190 s[4] = b[i2 + 4];
1191 s[5] = b[i2 + 5];
1192 while (nz--) {
1193 idx = 6 * (*vi++);
1194 xw[0] = x[idx];
1195 xw[1] = x[1 + idx];
1196 xw[2] = x[2 + idx];
1197 xw[3] = x[3 + idx];
1198 xw[4] = x[4 + idx];
1199 xw[5] = x[5 + idx];
1200 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1201 v += 36;
1202 }
1203 PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1204 x[i2] += xw[0];
1205 x[i2 + 1] += xw[1];
1206 x[i2 + 2] += xw[2];
1207 x[i2 + 3] += xw[3];
1208 x[i2 + 4] += xw[4];
1209 x[i2 + 5] += xw[5];
1210 idiag -= 36;
1211 i2 -= 6;
1212 }
1213 break;
1214 case 7:
1215 for (i = m - 1; i >= 0; i--) {
1216 v = aa + 49 * ai[i];
1217 vi = aj + ai[i];
1218 nz = ai[i + 1] - ai[i];
1219 s[0] = b[i2];
1220 s[1] = b[i2 + 1];
1221 s[2] = b[i2 + 2];
1222 s[3] = b[i2 + 3];
1223 s[4] = b[i2 + 4];
1224 s[5] = b[i2 + 5];
1225 s[6] = b[i2 + 6];
1226 while (nz--) {
1227 idx = 7 * (*vi++);
1228 xw[0] = x[idx];
1229 xw[1] = x[1 + idx];
1230 xw[2] = x[2 + idx];
1231 xw[3] = x[3 + idx];
1232 xw[4] = x[4 + idx];
1233 xw[5] = x[5 + idx];
1234 xw[6] = x[6 + idx];
1235 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1236 v += 49;
1237 }
1238 PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1239 x[i2] += xw[0];
1240 x[i2 + 1] += xw[1];
1241 x[i2 + 2] += xw[2];
1242 x[i2 + 3] += xw[3];
1243 x[i2 + 4] += xw[4];
1244 x[i2 + 5] += xw[5];
1245 x[i2 + 6] += xw[6];
1246 idiag -= 49;
1247 i2 -= 7;
1248 }
1249 break;
1250 default:
1251 for (i = m - 1; i >= 0; i--) {
1252 v = aa + bs2 * ai[i];
1253 vi = aj + ai[i];
1254 nz = ai[i + 1] - ai[i];
1255
1256 PetscCall(PetscArraycpy(w, b + i2, bs));
1257 /* copy all rows of x that are needed into contiguous space */
1258 workt = work;
1259 for (j = 0; j < nz; j++) {
1260 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1261 workt += bs;
1262 }
1263 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1264 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1265
1266 idiag -= bs2;
1267 i2 -= bs;
1268 }
1269 break;
1270 }
1271 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1272 }
1273 }
1274 PetscCall(VecRestoreArray(xx, &x));
1275 PetscCall(VecRestoreArrayRead(bb, &b));
1276 PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278
1279 /*
1280 Special version for direct calls from Fortran (Used in PETSc-fun3d)
1281 */
1282 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1283 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1285 #define matsetvaluesblocked4_ matsetvaluesblocked4
1286 #endif
1287
matsetvaluesblocked4_(Mat * AA,PetscInt * mm,const PetscInt im[],PetscInt * nn,const PetscInt in[],const PetscScalar v[])1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1289 {
1290 Mat A = *AA;
1291 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1292 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1293 PetscInt *ai = a->i, *ailen = a->ilen;
1294 PetscInt *aj = a->j, stepval, lastcol = -1;
1295 const PetscScalar *value = v;
1296 MatScalar *ap, *aa = a->a, *bap;
1297
1298 PetscFunctionBegin;
1299 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1300 stepval = (n - 1) * 4;
1301 for (k = 0; k < m; k++) { /* loop over added rows */
1302 row = im[k];
1303 rp = aj + ai[row];
1304 ap = aa + 16 * ai[row];
1305 nrow = ailen[row];
1306 low = 0;
1307 high = nrow;
1308 for (l = 0; l < n; l++) { /* loop over added columns */
1309 col = in[l];
1310 if (col <= lastcol) low = 0;
1311 else high = nrow;
1312 lastcol = col;
1313 value = v + k * (stepval + 4 + l) * 4;
1314 while (high - low > 7) {
1315 t = (low + high) / 2;
1316 if (rp[t] > col) high = t;
1317 else low = t;
1318 }
1319 for (i = low; i < high; i++) {
1320 if (rp[i] > col) break;
1321 if (rp[i] == col) {
1322 bap = ap + 16 * i;
1323 for (ii = 0; ii < 4; ii++, value += stepval) {
1324 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1325 }
1326 goto noinsert2;
1327 }
1328 }
1329 N = nrow++ - 1;
1330 high++; /* added new column index thus must search to one higher than before */
1331 /* shift up all the later entries in this row */
1332 for (ii = N; ii >= i; ii--) {
1333 rp[ii + 1] = rp[ii];
1334 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1335 }
1336 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1337 rp[i] = col;
1338 bap = ap + 16 * i;
1339 for (ii = 0; ii < 4; ii++, value += stepval) {
1340 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1341 }
1342 noinsert2:;
1343 low = i;
1344 }
1345 ailen[row] = nrow;
1346 }
1347 PetscFunctionReturnVoid();
1348 }
1349
1350 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1351 #define matsetvalues4_ MATSETVALUES4
1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1353 #define matsetvalues4_ matsetvalues4
1354 #endif
1355
matsetvalues4_(Mat * AA,PetscInt * mm,PetscInt * im,PetscInt * nn,PetscInt * in,PetscScalar * v)1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1357 {
1358 Mat A = *AA;
1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1360 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1361 PetscInt *ai = a->i, *ailen = a->ilen;
1362 PetscInt *aj = a->j, brow, bcol;
1363 PetscInt ridx, cidx, lastcol = -1;
1364 MatScalar *ap, value, *aa = a->a, *bap;
1365
1366 PetscFunctionBegin;
1367 for (k = 0; k < m; k++) { /* loop over added rows */
1368 row = im[k];
1369 brow = row / 4;
1370 rp = aj + ai[brow];
1371 ap = aa + 16 * ai[brow];
1372 nrow = ailen[brow];
1373 low = 0;
1374 high = nrow;
1375 for (l = 0; l < n; l++) { /* loop over added columns */
1376 col = in[l];
1377 bcol = col / 4;
1378 ridx = row % 4;
1379 cidx = col % 4;
1380 value = v[l + k * n];
1381 if (col <= lastcol) low = 0;
1382 else high = nrow;
1383 lastcol = col;
1384 while (high - low > 7) {
1385 t = (low + high) / 2;
1386 if (rp[t] > bcol) high = t;
1387 else low = t;
1388 }
1389 for (i = low; i < high; i++) {
1390 if (rp[i] > bcol) break;
1391 if (rp[i] == bcol) {
1392 bap = ap + 16 * i + 4 * cidx + ridx;
1393 *bap += value;
1394 goto noinsert1;
1395 }
1396 }
1397 N = nrow++ - 1;
1398 high++; /* added new column thus must search to one higher than before */
1399 /* shift up all the later entries in this row */
1400 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1401 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1402 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1403 rp[i] = bcol;
1404 ap[16 * i + 4 * cidx + ridx] = value;
1405 noinsert1:;
1406 low = i;
1407 }
1408 ailen[brow] = nrow;
1409 }
1410 PetscFunctionReturnVoid();
1411 }
1412
MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt * nn,const PetscInt * inia[],const PetscInt * inja[],PetscBool * done)1413 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1414 {
1415 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1416 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1417 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1418
1419 PetscFunctionBegin;
1420 *nn = n;
1421 if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1422 if (symmetric) {
1423 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1424 nz = tia[n];
1425 } else {
1426 tia = a->i;
1427 tja = a->j;
1428 }
1429
1430 if (!blockcompressed && bs > 1) {
1431 (*nn) *= bs;
1432 /* malloc & create the natural set of indices */
1433 PetscCall(PetscMalloc1((n + 1) * bs, ia));
1434 if (n) {
1435 (*ia)[0] = oshift;
1436 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1437 }
1438
1439 for (i = 1; i < n; i++) {
1440 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1441 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1442 }
1443 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1444
1445 if (inja) {
1446 PetscCall(PetscMalloc1(nz * bs * bs, ja));
1447 cnt = 0;
1448 for (i = 0; i < n; i++) {
1449 for (j = 0; j < bs; j++) {
1450 for (k = tia[i]; k < tia[i + 1]; k++) {
1451 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1452 }
1453 }
1454 }
1455 }
1456
1457 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1458 PetscCall(PetscFree(tia));
1459 PetscCall(PetscFree(tja));
1460 }
1461 } else if (oshift == 1) {
1462 if (symmetric) {
1463 nz = tia[A->rmap->n / bs];
1464 /* add 1 to i and j indices */
1465 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1466 *ia = tia;
1467 if (ja) {
1468 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1469 *ja = tja;
1470 }
1471 } else {
1472 nz = a->i[A->rmap->n / bs];
1473 /* malloc space and add 1 to i and j indices */
1474 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1475 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1476 if (ja) {
1477 PetscCall(PetscMalloc1(nz, ja));
1478 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1479 }
1480 }
1481 } else {
1482 *ia = tia;
1483 if (ja) *ja = tja;
1484 }
1485 PetscFunctionReturn(PETSC_SUCCESS);
1486 }
1487
MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)1488 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1489 {
1490 PetscFunctionBegin;
1491 if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1492 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1493 PetscCall(PetscFree(*ia));
1494 if (ja) PetscCall(PetscFree(*ja));
1495 }
1496 PetscFunctionReturn(PETSC_SUCCESS);
1497 }
1498
MatDestroy_SeqBAIJ(Mat A)1499 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1500 {
1501 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1502
1503 PetscFunctionBegin;
1504 if (A->hash_active) {
1505 PetscInt bs;
1506 A->ops[0] = a->cops;
1507 PetscCall(PetscHMapIJVDestroy(&a->ht));
1508 PetscCall(MatGetBlockSize(A, &bs));
1509 if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1510 PetscCall(PetscFree(a->dnz));
1511 PetscCall(PetscFree(a->bdnz));
1512 A->hash_active = PETSC_FALSE;
1513 }
1514 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1515 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1516 PetscCall(ISDestroy(&a->row));
1517 PetscCall(ISDestroy(&a->col));
1518 PetscCall(PetscFree(a->diag));
1519 PetscCall(PetscFree(a->idiag));
1520 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1521 PetscCall(PetscFree(a->solve_work));
1522 PetscCall(PetscFree(a->mult_work));
1523 PetscCall(PetscFree(a->sor_workt));
1524 PetscCall(PetscFree(a->sor_work));
1525 PetscCall(ISDestroy(&a->icol));
1526 PetscCall(PetscFree(a->saved_values));
1527 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1528
1529 PetscCall(MatDestroy(&a->sbaijMat));
1530 PetscCall(MatDestroy(&a->parent));
1531 PetscCall(PetscFree(A->data));
1532
1533 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1534 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1535 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1536 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1537 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1538 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1539 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1540 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1541 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1542 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1543 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1544 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1545 #if defined(PETSC_HAVE_HYPRE)
1546 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1547 #endif
1548 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1549 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1550 PetscFunctionReturn(PETSC_SUCCESS);
1551 }
1552
MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)1553 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1554 {
1555 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1556
1557 PetscFunctionBegin;
1558 switch (op) {
1559 case MAT_ROW_ORIENTED:
1560 a->roworiented = flg;
1561 break;
1562 case MAT_KEEP_NONZERO_PATTERN:
1563 a->keepnonzeropattern = flg;
1564 break;
1565 case MAT_NEW_NONZERO_LOCATIONS:
1566 a->nonew = (flg ? 0 : 1);
1567 break;
1568 case MAT_NEW_NONZERO_LOCATION_ERR:
1569 a->nonew = (flg ? -1 : 0);
1570 break;
1571 case MAT_NEW_NONZERO_ALLOCATION_ERR:
1572 a->nonew = (flg ? -2 : 0);
1573 break;
1574 case MAT_UNUSED_NONZERO_LOCATION_ERR:
1575 a->nounused = (flg ? -1 : 0);
1576 break;
1577 default:
1578 break;
1579 }
1580 PetscFunctionReturn(PETSC_SUCCESS);
1581 }
1582
1583 /* used for both SeqBAIJ and SeqSBAIJ matrices */
MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v,PetscInt * ai,PetscInt * aj,PetscScalar * aa)1584 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1585 {
1586 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1587 MatScalar *aa_i;
1588 PetscScalar *v_i;
1589
1590 PetscFunctionBegin;
1591 bs = A->rmap->bs;
1592 bs2 = bs * bs;
1593 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1594
1595 bn = row / bs; /* Block number */
1596 bp = row % bs; /* Block Position */
1597 M = ai[bn + 1] - ai[bn];
1598 *nz = bs * M;
1599
1600 if (v) {
1601 *v = NULL;
1602 if (*nz) {
1603 PetscCall(PetscMalloc1(*nz, v));
1604 for (i = 0; i < M; i++) { /* for each block in the block row */
1605 v_i = *v + i * bs;
1606 aa_i = aa + bs2 * (ai[bn] + i);
1607 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1608 }
1609 }
1610 }
1611
1612 if (idx) {
1613 *idx = NULL;
1614 if (*nz) {
1615 PetscCall(PetscMalloc1(*nz, idx));
1616 for (i = 0; i < M; i++) { /* for each block in the block row */
1617 idx_i = *idx + i * bs;
1618 itmp = bs * aj[ai[bn] + i];
1619 for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1620 }
1621 }
1622 }
1623 PetscFunctionReturn(PETSC_SUCCESS);
1624 }
1625
MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1626 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1627 {
1628 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1629
1630 PetscFunctionBegin;
1631 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1632 PetscFunctionReturn(PETSC_SUCCESS);
1633 }
1634
MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1635 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1636 {
1637 PetscFunctionBegin;
1638 if (idx) PetscCall(PetscFree(*idx));
1639 if (v) PetscCall(PetscFree(*v));
1640 PetscFunctionReturn(PETSC_SUCCESS);
1641 }
1642
MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat * B)1643 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1644 {
1645 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1646 Mat C;
1647 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1648 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr;
1649 MatScalar *ata, *aa = a->a;
1650
1651 PetscFunctionBegin;
1652 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1653 PetscCall(PetscCalloc1(1 + nbs, &atfill));
1654 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1655 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1656
1657 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1658 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1659 PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1660 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1661
1662 at = (Mat_SeqBAIJ *)C->data;
1663 ati = at->i;
1664 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1665 } else {
1666 C = *B;
1667 at = (Mat_SeqBAIJ *)C->data;
1668 ati = at->i;
1669 }
1670
1671 atj = at->j;
1672 ata = at->a;
1673
1674 /* Copy ati into atfill so we have locations of the next free space in atj */
1675 PetscCall(PetscArraycpy(atfill, ati, nbs));
1676
1677 /* Walk through A row-wise and mark nonzero entries of A^T. */
1678 for (i = 0; i < mbs; i++) {
1679 anzj = ai[i + 1] - ai[i];
1680 for (j = 0; j < anzj; j++) {
1681 atj[atfill[*aj]] = i;
1682 for (kr = 0; kr < bs; kr++) {
1683 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1684 }
1685 atfill[*aj++] += 1;
1686 }
1687 }
1688 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1689 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1690
1691 /* Clean up temporary space and complete requests. */
1692 PetscCall(PetscFree(atfill));
1693
1694 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1695 PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs));
1696 *B = C;
1697 } else {
1698 PetscCall(MatHeaderMerge(A, &C));
1699 }
1700 PetscFunctionReturn(PETSC_SUCCESS);
1701 }
1702
MatCompare_SeqBAIJ_Private(Mat A,Mat B,PetscReal tol,PetscBool * flg)1703 static PetscErrorCode MatCompare_SeqBAIJ_Private(Mat A, Mat B, PetscReal tol, PetscBool *flg)
1704 {
1705 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1706
1707 PetscFunctionBegin;
1708 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1709 if (A->rmap->N != B->rmap->N || A->cmap->n != B->cmap->n || A->rmap->bs != B->rmap->bs || a->nz != b->nz) {
1710 *flg = PETSC_FALSE;
1711 PetscFunctionReturn(PETSC_SUCCESS);
1712 }
1713
1714 /* if the a->i are the same */
1715 PetscCall(PetscArraycmp(a->i, b->i, a->mbs + 1, flg));
1716 if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
1717
1718 /* if a->j are the same */
1719 PetscCall(PetscArraycmp(a->j, b->j, a->nz, flg));
1720 if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
1721
1722 if (tol == 0.0) PetscCall(PetscArraycmp(a->a, b->a, a->nz * A->rmap->bs * A->rmap->bs, flg)); /* if a->a are the same */
1723 else {
1724 *flg = PETSC_TRUE;
1725 for (PetscInt i = 0; (i < a->nz * A->rmap->bs * A->rmap->bs) && *flg; ++i)
1726 if (PetscAbsScalar(a->a[i] - b->a[i]) > tol) *flg = PETSC_FALSE;
1727 }
1728 PetscFunctionReturn(PETSC_SUCCESS);
1729 }
1730
MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool * f)1731 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1732 {
1733 Mat Btrans;
1734
1735 PetscFunctionBegin;
1736 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1737 PetscCall(MatCompare_SeqBAIJ_Private(A, Btrans, tol, f));
1738 PetscCall(MatDestroy(&Btrans));
1739 PetscFunctionReturn(PETSC_SUCCESS);
1740 }
1741
MatEqual_SeqBAIJ(Mat A,Mat B,PetscBool * flg)1742 static PetscErrorCode MatEqual_SeqBAIJ(Mat A, Mat B, PetscBool *flg)
1743 {
1744 PetscFunctionBegin;
1745 PetscCall(MatCompare_SeqBAIJ_Private(A, B, 0.0, flg));
1746 PetscFunctionReturn(PETSC_SUCCESS);
1747 }
1748
1749 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)1750 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1751 {
1752 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1753 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1754 PetscInt *rowlens, *colidxs;
1755 PetscScalar *matvals;
1756
1757 PetscFunctionBegin;
1758 PetscCall(PetscViewerSetUp(viewer));
1759
1760 M = mat->rmap->N;
1761 N = mat->cmap->N;
1762 m = mat->rmap->n;
1763 bs = mat->rmap->bs;
1764 nz = bs * bs * A->nz;
1765
1766 /* write matrix header */
1767 header[0] = MAT_FILE_CLASSID;
1768 header[1] = M;
1769 header[2] = N;
1770 header[3] = nz;
1771 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1772
1773 /* store row lengths */
1774 PetscCall(PetscMalloc1(m, &rowlens));
1775 for (cnt = 0, i = 0; i < A->mbs; i++)
1776 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1777 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1778 PetscCall(PetscFree(rowlens));
1779
1780 /* store column indices */
1781 PetscCall(PetscMalloc1(nz, &colidxs));
1782 for (cnt = 0, i = 0; i < A->mbs; i++)
1783 for (k = 0; k < bs; k++)
1784 for (j = A->i[i]; j < A->i[i + 1]; j++)
1785 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1786 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1787 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1788 PetscCall(PetscFree(colidxs));
1789
1790 /* store nonzero values */
1791 PetscCall(PetscMalloc1(nz, &matvals));
1792 for (cnt = 0, i = 0; i < A->mbs; i++)
1793 for (k = 0; k < bs; k++)
1794 for (j = A->i[i]; j < A->i[i + 1]; j++)
1795 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1796 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1797 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1798 PetscCall(PetscFree(matvals));
1799
1800 /* write block size option to the viewer's .info file */
1801 PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1802 PetscFunctionReturn(PETSC_SUCCESS);
1803 }
1804
MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)1805 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1806 {
1807 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1808 PetscInt i, bs = A->rmap->bs, k;
1809
1810 PetscFunctionBegin;
1811 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1812 for (i = 0; i < a->mbs; i++) {
1813 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1814 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1815 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1816 }
1817 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1818 PetscFunctionReturn(PETSC_SUCCESS);
1819 }
1820
MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)1821 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1822 {
1823 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1824 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1825 PetscViewerFormat format;
1826
1827 PetscFunctionBegin;
1828 if (A->structure_only) {
1829 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1830 PetscFunctionReturn(PETSC_SUCCESS);
1831 }
1832
1833 PetscCall(PetscViewerGetFormat(viewer, &format));
1834 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1835 } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1836 const char *matname;
1837 Mat aij;
1838 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1839 PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1840 PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1841 PetscCall(MatView(aij, viewer));
1842 PetscCall(MatDestroy(&aij));
1843 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1844 PetscFunctionReturn(PETSC_SUCCESS);
1845 } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1846 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1847 for (i = 0; i < a->mbs; i++) {
1848 for (j = 0; j < bs; j++) {
1849 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1850 for (k = a->i[i]; k < a->i[i + 1]; k++) {
1851 for (l = 0; l < bs; l++) {
1852 #if defined(PETSC_USE_COMPLEX)
1853 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1854 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1855 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1856 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1857 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1858 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1859 }
1860 #else
1861 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1862 #endif
1863 }
1864 }
1865 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1866 }
1867 }
1868 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1869 } else {
1870 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1871 for (i = 0; i < a->mbs; i++) {
1872 for (j = 0; j < bs; j++) {
1873 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1874 for (k = a->i[i]; k < a->i[i + 1]; k++) {
1875 for (l = 0; l < bs; l++) {
1876 #if defined(PETSC_USE_COMPLEX)
1877 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1878 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1879 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1880 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1881 } else {
1882 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1883 }
1884 #else
1885 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1886 #endif
1887 }
1888 }
1889 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1890 }
1891 }
1892 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1893 }
1894 PetscCall(PetscViewerFlush(viewer));
1895 PetscFunctionReturn(PETSC_SUCCESS);
1896 }
1897
1898 #include <petscdraw.h>
MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void * Aa)1899 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1900 {
1901 Mat A = (Mat)Aa;
1902 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1903 PetscInt row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
1904 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1905 MatScalar *aa;
1906 PetscViewer viewer;
1907 PetscViewerFormat format;
1908 int color;
1909
1910 PetscFunctionBegin;
1911 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1912 PetscCall(PetscViewerGetFormat(viewer, &format));
1913 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1914
1915 /* loop over matrix elements drawing boxes */
1916
1917 if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1918 PetscDrawCollectiveBegin(draw);
1919 /* Blue for negative, Cyan for zero and Red for positive */
1920 color = PETSC_DRAW_BLUE;
1921 for (i = 0, row = 0; i < mbs; i++, row += bs) {
1922 for (j = a->i[i]; j < a->i[i + 1]; j++) {
1923 y_l = A->rmap->N - row - 1.0;
1924 y_r = y_l + 1.0;
1925 x_l = a->j[j] * bs;
1926 x_r = x_l + 1.0;
1927 aa = a->a + j * bs2;
1928 for (k = 0; k < bs; k++) {
1929 for (l = 0; l < bs; l++) {
1930 if (PetscRealPart(*aa++) >= 0.) continue;
1931 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1932 }
1933 }
1934 }
1935 }
1936 color = PETSC_DRAW_CYAN;
1937 for (i = 0, row = 0; i < mbs; i++, row += bs) {
1938 for (j = a->i[i]; j < a->i[i + 1]; j++) {
1939 y_l = A->rmap->N - row - 1.0;
1940 y_r = y_l + 1.0;
1941 x_l = a->j[j] * bs;
1942 x_r = x_l + 1.0;
1943 aa = a->a + j * bs2;
1944 for (k = 0; k < bs; k++) {
1945 for (l = 0; l < bs; l++) {
1946 if (PetscRealPart(*aa++) != 0.) continue;
1947 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1948 }
1949 }
1950 }
1951 }
1952 color = PETSC_DRAW_RED;
1953 for (i = 0, row = 0; i < mbs; i++, row += bs) {
1954 for (j = a->i[i]; j < a->i[i + 1]; j++) {
1955 y_l = A->rmap->N - row - 1.0;
1956 y_r = y_l + 1.0;
1957 x_l = a->j[j] * bs;
1958 x_r = x_l + 1.0;
1959 aa = a->a + j * bs2;
1960 for (k = 0; k < bs; k++) {
1961 for (l = 0; l < bs; l++) {
1962 if (PetscRealPart(*aa++) <= 0.) continue;
1963 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1964 }
1965 }
1966 }
1967 }
1968 PetscDrawCollectiveEnd(draw);
1969 } else {
1970 /* use contour shading to indicate magnitude of values */
1971 /* first determine max of all nonzero values */
1972 PetscReal minv = 0.0, maxv = 0.0;
1973 PetscDraw popup;
1974
1975 for (i = 0; i < a->nz * a->bs2; i++) {
1976 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1977 }
1978 if (minv >= maxv) maxv = minv + PETSC_SMALL;
1979 PetscCall(PetscDrawGetPopup(draw, &popup));
1980 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1981
1982 PetscDrawCollectiveBegin(draw);
1983 for (i = 0, row = 0; i < mbs; i++, row += bs) {
1984 for (j = a->i[i]; j < a->i[i + 1]; j++) {
1985 y_l = A->rmap->N - row - 1.0;
1986 y_r = y_l + 1.0;
1987 x_l = a->j[j] * bs;
1988 x_r = x_l + 1.0;
1989 aa = a->a + j * bs2;
1990 for (k = 0; k < bs; k++) {
1991 for (l = 0; l < bs; l++) {
1992 MatScalar v = *aa++;
1993 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
1994 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1995 }
1996 }
1997 }
1998 }
1999 PetscDrawCollectiveEnd(draw);
2000 }
2001 PetscFunctionReturn(PETSC_SUCCESS);
2002 }
2003
MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)2004 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2005 {
2006 PetscReal xl, yl, xr, yr, w, h;
2007 PetscDraw draw;
2008 PetscBool isnull;
2009
2010 PetscFunctionBegin;
2011 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2012 PetscCall(PetscDrawIsNull(draw, &isnull));
2013 if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2014
2015 xr = A->cmap->n;
2016 yr = A->rmap->N;
2017 h = yr / 10.0;
2018 w = xr / 10.0;
2019 xr += w;
2020 yr += h;
2021 xl = -w;
2022 yl = -h;
2023 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2024 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2025 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2026 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2027 PetscCall(PetscDrawSave(draw));
2028 PetscFunctionReturn(PETSC_SUCCESS);
2029 }
2030
MatView_SeqBAIJ(Mat A,PetscViewer viewer)2031 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2032 {
2033 PetscBool isascii, isbinary, isdraw;
2034
2035 PetscFunctionBegin;
2036 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
2037 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2038 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2039 if (isascii) {
2040 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2041 } else if (isbinary) {
2042 PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2043 } else if (isdraw) {
2044 PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2045 } else {
2046 Mat B;
2047 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2048 PetscCall(MatView(B, viewer));
2049 PetscCall(MatDestroy(&B));
2050 }
2051 PetscFunctionReturn(PETSC_SUCCESS);
2052 }
2053
MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])2054 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2055 {
2056 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2057 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2058 PetscInt *ai = a->i, *ailen = a->ilen;
2059 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2060 MatScalar *ap, *aa = a->a;
2061
2062 PetscFunctionBegin;
2063 for (k = 0; k < m; k++) { /* loop over rows */
2064 row = im[k];
2065 brow = row / bs;
2066 if (row < 0) {
2067 v += n;
2068 continue;
2069 } /* negative row */
2070 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2071 rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2072 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2073 nrow = ailen[brow];
2074 for (l = 0; l < n; l++) { /* loop over columns */
2075 if (in[l] < 0) {
2076 v++;
2077 continue;
2078 } /* negative column */
2079 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2080 col = in[l];
2081 bcol = col / bs;
2082 cidx = col % bs;
2083 ridx = row % bs;
2084 high = nrow;
2085 low = 0; /* assume unsorted */
2086 while (high - low > 5) {
2087 t = (low + high) / 2;
2088 if (rp[t] > bcol) high = t;
2089 else low = t;
2090 }
2091 for (i = low; i < high; i++) {
2092 if (rp[i] > bcol) break;
2093 if (rp[i] == bcol) {
2094 *v++ = ap[bs2 * i + bs * cidx + ridx];
2095 goto finished;
2096 }
2097 }
2098 *v++ = 0.0;
2099 finished:;
2100 }
2101 }
2102 PetscFunctionReturn(PETSC_SUCCESS);
2103 }
2104
MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)2105 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2106 {
2107 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2108 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2109 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2110 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2111 PetscBool roworiented = a->roworiented;
2112 const PetscScalar *value = v;
2113 MatScalar *ap = NULL, *aa = a->a, *bap;
2114
2115 PetscFunctionBegin;
2116 if (roworiented) {
2117 stepval = (n - 1) * bs;
2118 } else {
2119 stepval = (m - 1) * bs;
2120 }
2121 for (k = 0; k < m; k++) { /* loop over added rows */
2122 row = im[k];
2123 if (row < 0) continue;
2124 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2125 rp = aj + ai[row];
2126 if (!A->structure_only) ap = aa + bs2 * ai[row];
2127 rmax = imax[row];
2128 nrow = ailen[row];
2129 low = 0;
2130 high = nrow;
2131 for (l = 0; l < n; l++) { /* loop over added columns */
2132 if (in[l] < 0) continue;
2133 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2134 col = in[l];
2135 if (!A->structure_only) {
2136 if (roworiented) {
2137 value = v + (k * (stepval + bs) + l) * bs;
2138 } else {
2139 value = v + (l * (stepval + bs) + k) * bs;
2140 }
2141 }
2142 if (col <= lastcol) low = 0;
2143 else high = nrow;
2144 lastcol = col;
2145 while (high - low > 7) {
2146 t = (low + high) / 2;
2147 if (rp[t] > col) high = t;
2148 else low = t;
2149 }
2150 for (i = low; i < high; i++) {
2151 if (rp[i] > col) break;
2152 if (rp[i] == col) {
2153 if (A->structure_only) goto noinsert2;
2154 bap = ap + bs2 * i;
2155 if (roworiented) {
2156 if (is == ADD_VALUES) {
2157 for (ii = 0; ii < bs; ii++, value += stepval) {
2158 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2159 }
2160 } else {
2161 for (ii = 0; ii < bs; ii++, value += stepval) {
2162 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2163 }
2164 }
2165 } else {
2166 if (is == ADD_VALUES) {
2167 for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2168 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2169 bap += bs;
2170 }
2171 } else {
2172 for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2173 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2174 bap += bs;
2175 }
2176 }
2177 }
2178 goto noinsert2;
2179 }
2180 }
2181 if (nonew == 1) goto noinsert2;
2182 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2183 if (A->structure_only) {
2184 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2185 } else {
2186 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2187 }
2188 N = nrow++ - 1;
2189 high++;
2190 /* shift up all the later entries in this row */
2191 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2192 rp[i] = col;
2193 if (!A->structure_only) {
2194 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2195 bap = ap + bs2 * i;
2196 if (roworiented) {
2197 for (ii = 0; ii < bs; ii++, value += stepval) {
2198 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2199 }
2200 } else {
2201 for (ii = 0; ii < bs; ii++, value += stepval) {
2202 for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2203 }
2204 }
2205 }
2206 noinsert2:;
2207 low = i;
2208 }
2209 ailen[row] = nrow;
2210 }
2211 PetscFunctionReturn(PETSC_SUCCESS);
2212 }
2213
MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)2214 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2215 {
2216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2217 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2218 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen;
2219 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2220 MatScalar *aa = a->a, *ap;
2221 PetscReal ratio = 0.6;
2222
2223 PetscFunctionBegin;
2224 if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2225
2226 if (m) rmax = ailen[0];
2227 for (i = 1; i < mbs; i++) {
2228 /* move each row back by the amount of empty slots (fshift) before it*/
2229 fshift += imax[i - 1] - ailen[i - 1];
2230 rmax = PetscMax(rmax, ailen[i]);
2231 if (fshift) {
2232 ip = aj + ai[i];
2233 ap = aa + bs2 * ai[i];
2234 N = ailen[i];
2235 PetscCall(PetscArraymove(ip - fshift, ip, N));
2236 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2237 }
2238 ai[i] = ai[i - 1] + ailen[i - 1];
2239 }
2240 if (mbs) {
2241 fshift += imax[mbs - 1] - ailen[mbs - 1];
2242 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2243 }
2244
2245 /* reset ilen and imax for each row */
2246 a->nonzerorowcnt = 0;
2247 if (A->structure_only) {
2248 PetscCall(PetscFree2(a->imax, a->ilen));
2249 } else { /* !A->structure_only */
2250 for (i = 0; i < mbs; i++) {
2251 ailen[i] = imax[i] = ai[i + 1] - ai[i];
2252 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2253 }
2254 }
2255 a->nz = ai[mbs];
2256
2257 /* diagonals may have moved, so kill the diagonal pointers */
2258 a->idiagvalid = PETSC_FALSE;
2259 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2260 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2261 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2262 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2263
2264 A->info.mallocs += a->reallocs;
2265 a->reallocs = 0;
2266 A->info.nz_unneeded = (PetscReal)fshift * bs2;
2267 a->rmax = rmax;
2268
2269 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2270 PetscFunctionReturn(PETSC_SUCCESS);
2271 }
2272
2273 /*
2274 This function returns an array of flags which indicate the locations of contiguous
2275 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
2276 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2277 Assume: sizes should be long enough to hold all the values.
2278 */
MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[],PetscInt * bs_max)2279 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2280 {
2281 PetscInt j = 0;
2282
2283 PetscFunctionBegin;
2284 for (PetscInt i = 0; i < n; j++) {
2285 PetscInt row = idx[i];
2286 if (row % bs != 0) { /* Not the beginning of a block */
2287 sizes[j] = 1;
2288 i++;
2289 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2290 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */
2291 i++;
2292 } else { /* Beginning of the block, so check if the complete block exists */
2293 PetscBool flg = PETSC_TRUE;
2294 for (PetscInt k = 1; k < bs; k++) {
2295 if (row + k != idx[i + k]) { /* break in the block */
2296 flg = PETSC_FALSE;
2297 break;
2298 }
2299 }
2300 if (flg) { /* No break in the bs */
2301 sizes[j] = bs;
2302 i += bs;
2303 } else {
2304 sizes[j] = 1;
2305 i++;
2306 }
2307 }
2308 }
2309 *bs_max = j;
2310 PetscFunctionReturn(PETSC_SUCCESS);
2311 }
2312
MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x,Vec b)2313 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2314 {
2315 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data;
2316 PetscInt i, j, k, count, *rows;
2317 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2318 PetscScalar zero = 0.0;
2319 MatScalar *aa;
2320 const PetscScalar *xx;
2321 PetscScalar *bb;
2322
2323 PetscFunctionBegin;
2324 /* fix right-hand side if needed */
2325 if (x && b) {
2326 PetscCall(VecGetArrayRead(x, &xx));
2327 PetscCall(VecGetArray(b, &bb));
2328 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2329 PetscCall(VecRestoreArrayRead(x, &xx));
2330 PetscCall(VecRestoreArray(b, &bb));
2331 }
2332
2333 /* Make a copy of the IS and sort it */
2334 /* allocate memory for rows,sizes */
2335 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2336
2337 /* copy IS values to rows, and sort them */
2338 for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2339 PetscCall(PetscSortInt(is_n, rows));
2340
2341 if (baij->keepnonzeropattern) {
2342 for (i = 0; i < is_n; i++) sizes[i] = 1;
2343 bs_max = is_n;
2344 } else {
2345 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2346 A->nonzerostate++;
2347 }
2348
2349 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2350 row = rows[j];
2351 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2352 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2353 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2354 if (sizes[i] == bs && !baij->keepnonzeropattern) {
2355 if (diag != (PetscScalar)0.0) {
2356 if (baij->ilen[row / bs] > 0) {
2357 baij->ilen[row / bs] = 1;
2358 baij->j[baij->i[row / bs]] = row / bs;
2359
2360 PetscCall(PetscArrayzero(aa, count * bs));
2361 }
2362 /* Now insert all the diagonal values for this bs */
2363 for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2364 } else { /* (diag == 0.0) */
2365 baij->ilen[row / bs] = 0;
2366 } /* end (diag == 0.0) */
2367 } else { /* (sizes[i] != bs) */
2368 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2369 for (k = 0; k < count; k++) {
2370 aa[0] = zero;
2371 aa += bs;
2372 }
2373 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2374 }
2375 }
2376
2377 PetscCall(PetscFree2(rows, sizes));
2378 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2379 PetscFunctionReturn(PETSC_SUCCESS);
2380 }
2381
MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x,Vec b)2382 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2383 {
2384 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data;
2385 PetscInt i, j, k, count;
2386 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2387 PetscScalar zero = 0.0;
2388 MatScalar *aa;
2389 const PetscScalar *xx;
2390 PetscScalar *bb;
2391 PetscBool *zeroed, vecs = PETSC_FALSE;
2392
2393 PetscFunctionBegin;
2394 /* fix right-hand side if needed */
2395 if (x && b) {
2396 PetscCall(VecGetArrayRead(x, &xx));
2397 PetscCall(VecGetArray(b, &bb));
2398 vecs = PETSC_TRUE;
2399 }
2400
2401 /* zero the columns */
2402 PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2403 for (i = 0; i < is_n; i++) {
2404 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2405 zeroed[is_idx[i]] = PETSC_TRUE;
2406 }
2407 for (i = 0; i < A->rmap->N; i++) {
2408 if (!zeroed[i]) {
2409 row = i / bs;
2410 for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2411 for (k = 0; k < bs; k++) {
2412 col = bs * baij->j[j] + k;
2413 if (zeroed[col]) {
2414 aa = baij->a + j * bs2 + (i % bs) + bs * k;
2415 if (vecs) bb[i] -= aa[0] * xx[col];
2416 aa[0] = 0.0;
2417 }
2418 }
2419 }
2420 } else if (vecs) bb[i] = diag * xx[i];
2421 }
2422 PetscCall(PetscFree(zeroed));
2423 if (vecs) {
2424 PetscCall(VecRestoreArrayRead(x, &xx));
2425 PetscCall(VecRestoreArray(b, &bb));
2426 }
2427
2428 /* zero the rows */
2429 for (i = 0; i < is_n; i++) {
2430 row = is_idx[i];
2431 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2432 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2433 for (k = 0; k < count; k++) {
2434 aa[0] = zero;
2435 aa += bs;
2436 }
2437 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2438 }
2439 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2440 PetscFunctionReturn(PETSC_SUCCESS);
2441 }
2442
MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)2443 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2444 {
2445 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2446 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2447 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2448 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2449 PetscInt ridx, cidx, bs2 = a->bs2;
2450 PetscBool roworiented = a->roworiented;
2451 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap;
2452
2453 PetscFunctionBegin;
2454 for (k = 0; k < m; k++) { /* loop over added rows */
2455 row = im[k];
2456 brow = row / bs;
2457 if (row < 0) continue;
2458 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2459 rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2460 if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2461 rmax = imax[brow];
2462 nrow = ailen[brow];
2463 low = 0;
2464 high = nrow;
2465 for (l = 0; l < n; l++) { /* loop over added columns */
2466 if (in[l] < 0) continue;
2467 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2468 col = in[l];
2469 bcol = col / bs;
2470 ridx = row % bs;
2471 cidx = col % bs;
2472 if (!A->structure_only) {
2473 if (roworiented) {
2474 value = v[l + k * n];
2475 } else {
2476 value = v[k + l * m];
2477 }
2478 }
2479 if (col <= lastcol) low = 0;
2480 else high = nrow;
2481 lastcol = col;
2482 while (high - low > 7) {
2483 t = (low + high) / 2;
2484 if (rp[t] > bcol) high = t;
2485 else low = t;
2486 }
2487 for (i = low; i < high; i++) {
2488 if (rp[i] > bcol) break;
2489 if (rp[i] == bcol) {
2490 bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2491 if (!A->structure_only) {
2492 if (is == ADD_VALUES) *bap += value;
2493 else *bap = value;
2494 }
2495 goto noinsert1;
2496 }
2497 }
2498 if (nonew == 1) goto noinsert1;
2499 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2500 if (A->structure_only) {
2501 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2502 } else {
2503 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2504 }
2505 N = nrow++ - 1;
2506 high++;
2507 /* shift up all the later entries in this row */
2508 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2509 rp[i] = bcol;
2510 if (!A->structure_only) {
2511 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2512 PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2513 ap[bs2 * i + bs * cidx + ridx] = value;
2514 }
2515 a->nz++;
2516 noinsert1:;
2517 low = i;
2518 }
2519 ailen[brow] = nrow;
2520 }
2521 PetscFunctionReturn(PETSC_SUCCESS);
2522 }
2523
MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo * info)2524 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2525 {
2526 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2527 Mat outA;
2528 PetscBool row_identity, col_identity;
2529
2530 PetscFunctionBegin;
2531 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2532 PetscCall(ISIdentity(row, &row_identity));
2533 PetscCall(ISIdentity(col, &col_identity));
2534 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2535
2536 outA = inA;
2537 inA->factortype = MAT_FACTOR_LU;
2538 PetscCall(PetscFree(inA->solvertype));
2539 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2540
2541 PetscCall(PetscObjectReference((PetscObject)row));
2542 PetscCall(ISDestroy(&a->row));
2543 a->row = row;
2544 PetscCall(PetscObjectReference((PetscObject)col));
2545 PetscCall(ISDestroy(&a->col));
2546 a->col = col;
2547
2548 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2549 PetscCall(ISDestroy(&a->icol));
2550 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2551
2552 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2553 if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2554 PetscCall(MatLUFactorNumeric(outA, inA, info));
2555 PetscFunctionReturn(PETSC_SUCCESS);
2556 }
2557
MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,const PetscInt * indices)2558 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2559 {
2560 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2561
2562 PetscFunctionBegin;
2563 baij->nz = baij->maxnz;
2564 PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2565 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2566 PetscFunctionReturn(PETSC_SUCCESS);
2567 }
2568
2569 /*@
2570 MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
2571
2572 Input Parameters:
2573 + mat - the `MATSEQBAIJ` matrix
2574 - indices - the block column indices
2575
2576 Level: advanced
2577
2578 Notes:
2579 This can be called if you have precomputed the nonzero structure of the
2580 matrix and want to provide it to the matrix object to improve the performance
2581 of the `MatSetValues()` operation.
2582
2583 You MUST have set the correct numbers of nonzeros per row in the call to
2584 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2585
2586 MUST be called before any calls to `MatSetValues()`
2587
2588 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2589 @*/
MatSeqBAIJSetColumnIndices(Mat mat,PetscInt * indices)2590 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2591 {
2592 PetscFunctionBegin;
2593 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2594 PetscAssertPointer(indices, 2);
2595 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
2596 PetscFunctionReturn(PETSC_SUCCESS);
2597 }
2598
MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])2599 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2600 {
2601 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2602 PetscInt i, j, n, row, bs, *ai, *aj, mbs;
2603 PetscReal atmp;
2604 PetscScalar *x, zero = 0.0;
2605 MatScalar *aa;
2606 PetscInt ncols, brow, krow, kcol;
2607
2608 PetscFunctionBegin;
2609 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2610 bs = A->rmap->bs;
2611 aa = a->a;
2612 ai = a->i;
2613 aj = a->j;
2614 mbs = a->mbs;
2615
2616 PetscCall(VecSet(v, zero));
2617 PetscCall(VecGetArray(v, &x));
2618 PetscCall(VecGetLocalSize(v, &n));
2619 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2620 for (i = 0; i < mbs; i++) {
2621 ncols = ai[1] - ai[0];
2622 ai++;
2623 brow = bs * i;
2624 for (j = 0; j < ncols; j++) {
2625 for (kcol = 0; kcol < bs; kcol++) {
2626 for (krow = 0; krow < bs; krow++) {
2627 atmp = PetscAbsScalar(*aa);
2628 aa++;
2629 row = brow + krow; /* row index */
2630 if (PetscAbsScalar(x[row]) < atmp) {
2631 x[row] = atmp;
2632 if (idx) idx[row] = bs * (*aj) + kcol;
2633 }
2634 }
2635 }
2636 aj++;
2637 }
2638 }
2639 PetscCall(VecRestoreArray(v, &x));
2640 PetscFunctionReturn(PETSC_SUCCESS);
2641 }
2642
MatGetRowSumAbs_SeqBAIJ(Mat A,Vec v)2643 static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2644 {
2645 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2646 PetscInt i, j, n, row, bs, *ai, mbs;
2647 PetscReal atmp;
2648 PetscScalar *x, zero = 0.0;
2649 MatScalar *aa;
2650 PetscInt ncols, brow, krow, kcol;
2651
2652 PetscFunctionBegin;
2653 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2654 bs = A->rmap->bs;
2655 aa = a->a;
2656 ai = a->i;
2657 mbs = a->mbs;
2658
2659 PetscCall(VecSet(v, zero));
2660 PetscCall(VecGetArrayWrite(v, &x));
2661 PetscCall(VecGetLocalSize(v, &n));
2662 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2663 for (i = 0; i < mbs; i++) {
2664 ncols = ai[1] - ai[0];
2665 ai++;
2666 brow = bs * i;
2667 for (j = 0; j < ncols; j++) {
2668 for (kcol = 0; kcol < bs; kcol++) {
2669 for (krow = 0; krow < bs; krow++) {
2670 atmp = PetscAbsScalar(*aa);
2671 aa++;
2672 row = brow + krow; /* row index */
2673 x[row] += atmp;
2674 }
2675 }
2676 }
2677 }
2678 PetscCall(VecRestoreArrayWrite(v, &x));
2679 PetscFunctionReturn(PETSC_SUCCESS);
2680 }
2681
MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)2682 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2683 {
2684 PetscFunctionBegin;
2685 /* If the two matrices have the same copy implementation, use fast copy. */
2686 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2687 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2688 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
2689 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2690
2691 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2692 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2693 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2694 PetscCall(PetscObjectStateIncrease((PetscObject)B));
2695 } else {
2696 PetscCall(MatCopy_Basic(A, B, str));
2697 }
2698 PetscFunctionReturn(PETSC_SUCCESS);
2699 }
2700
MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar * array[])2701 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2702 {
2703 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2704
2705 PetscFunctionBegin;
2706 *array = a->a;
2707 PetscFunctionReturn(PETSC_SUCCESS);
2708 }
2709
MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar * array[])2710 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2711 {
2712 PetscFunctionBegin;
2713 *array = NULL;
2714 PetscFunctionReturn(PETSC_SUCCESS);
2715 }
2716
MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt * nnz)2717 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2718 {
2719 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2720 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2721 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2722
2723 PetscFunctionBegin;
2724 /* Set the number of nonzeros in the new matrix */
2725 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2726 PetscFunctionReturn(PETSC_SUCCESS);
2727 }
2728
MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)2729 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2730 {
2731 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2732 PetscInt bs = Y->rmap->bs, bs2 = bs * bs;
2733 PetscBLASInt one = 1;
2734
2735 PetscFunctionBegin;
2736 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2737 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2738 if (e) {
2739 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2740 if (e) {
2741 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2742 if (e) str = SAME_NONZERO_PATTERN;
2743 }
2744 }
2745 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2746 }
2747 if (str == SAME_NONZERO_PATTERN) {
2748 PetscScalar alpha = a;
2749 PetscBLASInt bnz;
2750 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2751 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2752 PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2753 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2754 PetscCall(MatAXPY_Basic(Y, a, X, str));
2755 } else {
2756 Mat B;
2757 PetscInt *nnz;
2758 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2759 PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2760 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2761 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2762 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2763 PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2764 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2765 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2766 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2767 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2768 PetscCall(MatHeaderMerge(Y, &B));
2769 PetscCall(PetscFree(nnz));
2770 }
2771 PetscFunctionReturn(PETSC_SUCCESS);
2772 }
2773
MatConjugate_SeqBAIJ(Mat A)2774 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2775 {
2776 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2777 PetscInt i, nz = a->bs2 * a->i[a->mbs];
2778 MatScalar *aa = a->a;
2779
2780 PetscFunctionBegin;
2781 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2782 PetscFunctionReturn(PETSC_SUCCESS);
2783 }
2784
MatRealPart_SeqBAIJ(Mat A)2785 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2786 {
2787 #if PetscDefined(USE_COMPLEX)
2788 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2789 PetscInt i, nz = a->bs2 * a->i[a->mbs];
2790 MatScalar *aa = a->a;
2791
2792 PetscFunctionBegin;
2793 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2794 PetscFunctionReturn(PETSC_SUCCESS);
2795 #else
2796 (void)A;
2797 return PETSC_SUCCESS;
2798 #endif
2799 }
2800
MatImaginaryPart_SeqBAIJ(Mat A)2801 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2802 {
2803 #if PetscDefined(USE_COMPLEX)
2804 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2805 PetscInt i, nz = a->bs2 * a->i[a->mbs];
2806 MatScalar *aa = a->a;
2807
2808 PetscFunctionBegin;
2809 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2810 PetscFunctionReturn(PETSC_SUCCESS);
2811 #else
2812 (void)A;
2813 return PETSC_SUCCESS;
2814 #endif
2815 }
2816
2817 /*
2818 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2819 */
MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)2820 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2821 {
2822 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2823 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2824 PetscInt nz = a->i[m], row, *jj, mr, col;
2825
2826 PetscFunctionBegin;
2827 *nn = n;
2828 if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2829 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2830 PetscCall(PetscCalloc1(n, &collengths));
2831 PetscCall(PetscMalloc1(n + 1, &cia));
2832 PetscCall(PetscMalloc1(nz, &cja));
2833 jj = a->j;
2834 for (i = 0; i < nz; i++) collengths[jj[i]]++;
2835 cia[0] = oshift;
2836 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2837 PetscCall(PetscArrayzero(collengths, n));
2838 jj = a->j;
2839 for (row = 0; row < m; row++) {
2840 mr = a->i[row + 1] - a->i[row];
2841 for (i = 0; i < mr; i++) {
2842 col = *jj++;
2843
2844 cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2845 }
2846 }
2847 PetscCall(PetscFree(collengths));
2848 *ia = cia;
2849 *ja = cja;
2850 PetscFunctionReturn(PETSC_SUCCESS);
2851 }
2852
MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * n,const PetscInt * ia[],const PetscInt * ja[],PetscBool * done)2853 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2854 {
2855 PetscFunctionBegin;
2856 if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2857 PetscCall(PetscFree(*ia));
2858 PetscCall(PetscFree(*ja));
2859 PetscFunctionReturn(PETSC_SUCCESS);
2860 }
2861
2862 /*
2863 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2864 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2865 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2866 */
MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * nn,const PetscInt * ia[],const PetscInt * ja[],PetscInt * spidx[],PetscBool * done)2867 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2868 {
2869 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2870 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2871 PetscInt nz = a->i[m], row, *jj, mr, col;
2872 PetscInt *cspidx;
2873
2874 PetscFunctionBegin;
2875 *nn = n;
2876 if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2877
2878 PetscCall(PetscCalloc1(n, &collengths));
2879 PetscCall(PetscMalloc1(n + 1, &cia));
2880 PetscCall(PetscMalloc1(nz, &cja));
2881 PetscCall(PetscMalloc1(nz, &cspidx));
2882 jj = a->j;
2883 for (i = 0; i < nz; i++) collengths[jj[i]]++;
2884 cia[0] = oshift;
2885 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2886 PetscCall(PetscArrayzero(collengths, n));
2887 jj = a->j;
2888 for (row = 0; row < m; row++) {
2889 mr = a->i[row + 1] - a->i[row];
2890 for (i = 0; i < mr; i++) {
2891 col = *jj++;
2892 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2893 cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2894 }
2895 }
2896 PetscCall(PetscFree(collengths));
2897 *ia = cia;
2898 *ja = cja;
2899 *spidx = cspidx;
2900 PetscFunctionReturn(PETSC_SUCCESS);
2901 }
2902
MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt * n,const PetscInt * ia[],const PetscInt * ja[],PetscInt * spidx[],PetscBool * done)2903 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2904 {
2905 PetscFunctionBegin;
2906 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2907 PetscCall(PetscFree(*spidx));
2908 PetscFunctionReturn(PETSC_SUCCESS);
2909 }
2910
MatShift_SeqBAIJ(Mat Y,PetscScalar a)2911 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2912 {
2913 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2914
2915 PetscFunctionBegin;
2916 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2917 PetscCall(MatShift_Basic(Y, a));
2918 PetscFunctionReturn(PETSC_SUCCESS);
2919 }
2920
MatEliminateZeros_SeqBAIJ(Mat A,PetscBool keep)2921 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2922 {
2923 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2924 PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2925 PetscInt m = A->rmap->N, *ailen = a->ilen;
2926 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2927 MatScalar *aa = a->a, *ap;
2928 PetscBool zero;
2929
2930 PetscFunctionBegin;
2931 PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2932 if (m) rmax = ailen[0];
2933 for (i = 1; i <= mbs; i++) {
2934 for (k = ai[i - 1]; k < ai[i]; k++) {
2935 zero = PETSC_TRUE;
2936 ap = aa + bs2 * k;
2937 for (j = 0; j < bs2 && zero; j++) {
2938 if (ap[j] != 0.0) zero = PETSC_FALSE;
2939 }
2940 if (zero && (aj[k] != i - 1 || !keep)) fshift++;
2941 else {
2942 if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
2943 aj[k - fshift] = aj[k];
2944 PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
2945 }
2946 }
2947 ai[i - 1] -= fshift_prev;
2948 fshift_prev = fshift;
2949 ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
2950 a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
2951 rmax = PetscMax(rmax, ailen[i - 1]);
2952 }
2953 if (fshift) {
2954 if (mbs) {
2955 ai[mbs] -= fshift;
2956 a->nz = ai[mbs];
2957 }
2958 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
2959 A->nonzerostate++;
2960 A->info.nz_unneeded += (PetscReal)fshift;
2961 a->rmax = rmax;
2962 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2963 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2964 }
2965 PetscFunctionReturn(PETSC_SUCCESS);
2966 }
2967
2968 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2969 MatGetRow_SeqBAIJ,
2970 MatRestoreRow_SeqBAIJ,
2971 MatMult_SeqBAIJ_N,
2972 /* 4*/ MatMultAdd_SeqBAIJ_N,
2973 MatMultTranspose_SeqBAIJ,
2974 MatMultTransposeAdd_SeqBAIJ,
2975 NULL,
2976 NULL,
2977 NULL,
2978 /* 10*/ NULL,
2979 MatLUFactor_SeqBAIJ,
2980 NULL,
2981 NULL,
2982 MatTranspose_SeqBAIJ,
2983 /* 15*/ MatGetInfo_SeqBAIJ,
2984 MatEqual_SeqBAIJ,
2985 MatGetDiagonal_SeqBAIJ,
2986 MatDiagonalScale_SeqBAIJ,
2987 MatNorm_SeqBAIJ,
2988 /* 20*/ NULL,
2989 MatAssemblyEnd_SeqBAIJ,
2990 MatSetOption_SeqBAIJ,
2991 MatZeroEntries_SeqBAIJ,
2992 /* 24*/ MatZeroRows_SeqBAIJ,
2993 NULL,
2994 NULL,
2995 NULL,
2996 NULL,
2997 /* 29*/ MatSetUp_Seq_Hash,
2998 NULL,
2999 NULL,
3000 NULL,
3001 NULL,
3002 /* 34*/ MatDuplicate_SeqBAIJ,
3003 NULL,
3004 NULL,
3005 MatILUFactor_SeqBAIJ,
3006 NULL,
3007 /* 39*/ MatAXPY_SeqBAIJ,
3008 MatCreateSubMatrices_SeqBAIJ,
3009 MatIncreaseOverlap_SeqBAIJ,
3010 MatGetValues_SeqBAIJ,
3011 MatCopy_SeqBAIJ,
3012 /* 44*/ NULL,
3013 MatScale_SeqBAIJ,
3014 MatShift_SeqBAIJ,
3015 NULL,
3016 MatZeroRowsColumns_SeqBAIJ,
3017 /* 49*/ NULL,
3018 MatGetRowIJ_SeqBAIJ,
3019 MatRestoreRowIJ_SeqBAIJ,
3020 MatGetColumnIJ_SeqBAIJ,
3021 MatRestoreColumnIJ_SeqBAIJ,
3022 /* 54*/ MatFDColoringCreate_SeqXAIJ,
3023 NULL,
3024 NULL,
3025 NULL,
3026 MatSetValuesBlocked_SeqBAIJ,
3027 /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3028 MatDestroy_SeqBAIJ,
3029 MatView_SeqBAIJ,
3030 NULL,
3031 NULL,
3032 /* 64*/ NULL,
3033 NULL,
3034 NULL,
3035 NULL,
3036 MatGetRowMaxAbs_SeqBAIJ,
3037 /* 69*/ NULL,
3038 MatConvert_Basic,
3039 NULL,
3040 MatFDColoringApply_BAIJ,
3041 NULL,
3042 /* 74*/ NULL,
3043 NULL,
3044 NULL,
3045 NULL,
3046 MatLoad_SeqBAIJ,
3047 /* 79*/ NULL,
3048 NULL,
3049 NULL,
3050 NULL,
3051 NULL,
3052 /* 84*/ NULL,
3053 NULL,
3054 NULL,
3055 NULL,
3056 NULL,
3057 /* 89*/ NULL,
3058 NULL,
3059 NULL,
3060 NULL,
3061 MatConjugate_SeqBAIJ,
3062 /* 94*/ NULL,
3063 NULL,
3064 MatRealPart_SeqBAIJ,
3065 MatImaginaryPart_SeqBAIJ,
3066 NULL,
3067 /* 99*/ NULL,
3068 NULL,
3069 NULL,
3070 NULL,
3071 NULL,
3072 /*104*/ NULL,
3073 NULL,
3074 NULL,
3075 NULL,
3076 NULL,
3077 /*109*/ NULL,
3078 NULL,
3079 MatMultHermitianTranspose_SeqBAIJ,
3080 MatMultHermitianTransposeAdd_SeqBAIJ,
3081 NULL,
3082 /*114*/ NULL,
3083 MatGetColumnReductions_SeqBAIJ,
3084 MatInvertBlockDiagonal_SeqBAIJ,
3085 NULL,
3086 NULL,
3087 /*119*/ NULL,
3088 NULL,
3089 NULL,
3090 NULL,
3091 NULL,
3092 /*124*/ NULL,
3093 NULL,
3094 MatSetBlockSizes_Default,
3095 NULL,
3096 MatFDColoringSetUp_SeqXAIJ,
3097 /*129*/ NULL,
3098 MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3099 MatDestroySubMatrices_SeqBAIJ,
3100 NULL,
3101 NULL,
3102 /*134*/ NULL,
3103 NULL,
3104 MatEliminateZeros_SeqBAIJ,
3105 MatGetRowSumAbs_SeqBAIJ,
3106 NULL,
3107 /*139*/ NULL,
3108 NULL,
3109 MatCopyHashToXAIJ_Seq_Hash,
3110 NULL,
3111 NULL};
3112
MatStoreValues_SeqBAIJ(Mat mat)3113 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3114 {
3115 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3116 PetscInt nz = aij->i[aij->mbs] * aij->bs2;
3117
3118 PetscFunctionBegin;
3119 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3120
3121 /* allocate space for values if not already there */
3122 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3123
3124 /* copy values over */
3125 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3126 PetscFunctionReturn(PETSC_SUCCESS);
3127 }
3128
MatRetrieveValues_SeqBAIJ(Mat mat)3129 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3130 {
3131 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3132 PetscInt nz = aij->i[aij->mbs] * aij->bs2;
3133
3134 PetscFunctionBegin;
3135 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3136 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3137
3138 /* copy values over */
3139 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3140 PetscFunctionReturn(PETSC_SUCCESS);
3141 }
3142
3143 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3144 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3145
MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])3146 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3147 {
3148 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3149 PetscInt i, mbs, nbs, bs2;
3150 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3151
3152 PetscFunctionBegin;
3153 if (B->hash_active) {
3154 PetscInt bs;
3155 B->ops[0] = b->cops;
3156 PetscCall(PetscHMapIJVDestroy(&b->ht));
3157 PetscCall(MatGetBlockSize(B, &bs));
3158 if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3159 PetscCall(PetscFree(b->dnz));
3160 PetscCall(PetscFree(b->bdnz));
3161 B->hash_active = PETSC_FALSE;
3162 }
3163 if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3164 if (nz == MAT_SKIP_ALLOCATION) {
3165 skipallocation = PETSC_TRUE;
3166 nz = 0;
3167 }
3168
3169 PetscCall(MatSetBlockSize(B, bs));
3170 PetscCall(PetscLayoutSetUp(B->rmap));
3171 PetscCall(PetscLayoutSetUp(B->cmap));
3172 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3173
3174 B->preallocated = PETSC_TRUE;
3175
3176 mbs = B->rmap->n / bs;
3177 nbs = B->cmap->n / bs;
3178 bs2 = bs * bs;
3179
3180 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3181
3182 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3183 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3184 if (nnz) {
3185 for (i = 0; i < mbs; i++) {
3186 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3187 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3188 }
3189 }
3190
3191 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3192 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3193 PetscOptionsEnd();
3194
3195 if (!flg) {
3196 switch (bs) {
3197 case 1:
3198 B->ops->mult = MatMult_SeqBAIJ_1;
3199 B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3200 break;
3201 case 2:
3202 B->ops->mult = MatMult_SeqBAIJ_2;
3203 B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3204 break;
3205 case 3:
3206 B->ops->mult = MatMult_SeqBAIJ_3;
3207 B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3208 break;
3209 case 4:
3210 B->ops->mult = MatMult_SeqBAIJ_4;
3211 B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3212 break;
3213 case 5:
3214 B->ops->mult = MatMult_SeqBAIJ_5;
3215 B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3216 break;
3217 case 6:
3218 B->ops->mult = MatMult_SeqBAIJ_6;
3219 B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3220 break;
3221 case 7:
3222 B->ops->mult = MatMult_SeqBAIJ_7;
3223 B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3224 break;
3225 case 9: {
3226 PetscInt version = 1;
3227 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3228 switch (version) {
3229 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3230 case 1:
3231 B->ops->mult = MatMult_SeqBAIJ_9_AVX2;
3232 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3233 PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3234 break;
3235 #endif
3236 default:
3237 B->ops->mult = MatMult_SeqBAIJ_N;
3238 B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3239 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3240 break;
3241 }
3242 break;
3243 }
3244 case 11:
3245 B->ops->mult = MatMult_SeqBAIJ_11;
3246 B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3247 break;
3248 case 12: {
3249 PetscInt version = 1;
3250 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3251 switch (version) {
3252 case 1:
3253 B->ops->mult = MatMult_SeqBAIJ_12_ver1;
3254 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3255 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3256 break;
3257 case 2:
3258 B->ops->mult = MatMult_SeqBAIJ_12_ver2;
3259 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3260 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3261 break;
3262 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3263 case 3:
3264 B->ops->mult = MatMult_SeqBAIJ_12_AVX2;
3265 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3266 PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3267 break;
3268 #endif
3269 default:
3270 B->ops->mult = MatMult_SeqBAIJ_N;
3271 B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3272 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3273 break;
3274 }
3275 break;
3276 }
3277 case 15: {
3278 PetscInt version = 1;
3279 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3280 switch (version) {
3281 case 1:
3282 B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3283 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3284 break;
3285 case 2:
3286 B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3287 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3288 break;
3289 case 3:
3290 B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3291 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3292 break;
3293 case 4:
3294 B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3295 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3296 break;
3297 default:
3298 B->ops->mult = MatMult_SeqBAIJ_N;
3299 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3300 break;
3301 }
3302 B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3303 break;
3304 }
3305 default:
3306 B->ops->mult = MatMult_SeqBAIJ_N;
3307 B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3308 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3309 break;
3310 }
3311 }
3312 B->ops->sor = MatSOR_SeqBAIJ;
3313 b->mbs = mbs;
3314 b->nbs = nbs;
3315 if (!skipallocation) {
3316 if (!b->imax) {
3317 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3318
3319 b->free_imax_ilen = PETSC_TRUE;
3320 }
3321 /* b->ilen will count nonzeros in each block row so far. */
3322 for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3323 if (!nnz) {
3324 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3325 else if (nz < 0) nz = 1;
3326 nz = PetscMin(nz, nbs);
3327 for (i = 0; i < mbs; i++) b->imax[i] = nz;
3328 PetscCall(PetscIntMultError(nz, mbs, &nz));
3329 } else {
3330 PetscInt64 nz64 = 0;
3331 for (i = 0; i < mbs; i++) {
3332 b->imax[i] = nnz[i];
3333 nz64 += nnz[i];
3334 }
3335 PetscCall(PetscIntCast(nz64, &nz));
3336 }
3337
3338 /* allocate the matrix space */
3339 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3340 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
3341 PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3342 if (B->structure_only) {
3343 b->free_a = PETSC_FALSE;
3344 } else {
3345 PetscInt nzbs2 = 0;
3346 PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3347 PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
3348 b->free_a = PETSC_TRUE;
3349 PetscCall(PetscArrayzero(b->a, nzbs2));
3350 }
3351 b->free_ij = PETSC_TRUE;
3352 PetscCall(PetscArrayzero(b->j, nz));
3353
3354 b->i[0] = 0;
3355 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3356 } else {
3357 b->free_a = PETSC_FALSE;
3358 b->free_ij = PETSC_FALSE;
3359 }
3360
3361 b->bs2 = bs2;
3362 b->mbs = mbs;
3363 b->nz = 0;
3364 b->maxnz = nz;
3365 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3366 B->was_assembled = PETSC_FALSE;
3367 B->assembled = PETSC_FALSE;
3368 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3369 PetscFunctionReturn(PETSC_SUCCESS);
3370 }
3371
MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])3372 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3373 {
3374 PetscInt i, m, nz, nz_max = 0, *nnz;
3375 PetscScalar *values = NULL;
3376 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3377
3378 PetscFunctionBegin;
3379 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3380 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3381 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3382 PetscCall(PetscLayoutSetUp(B->rmap));
3383 PetscCall(PetscLayoutSetUp(B->cmap));
3384 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3385 m = B->rmap->n / bs;
3386
3387 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3388 PetscCall(PetscMalloc1(m + 1, &nnz));
3389 for (i = 0; i < m; i++) {
3390 nz = ii[i + 1] - ii[i];
3391 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3392 nz_max = PetscMax(nz_max, nz);
3393 nnz[i] = nz;
3394 }
3395 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3396 PetscCall(PetscFree(nnz));
3397
3398 values = (PetscScalar *)V;
3399 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3400 for (i = 0; i < m; i++) {
3401 PetscInt ncols = ii[i + 1] - ii[i];
3402 const PetscInt *icols = jj + ii[i];
3403 if (bs == 1 || !roworiented) {
3404 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3405 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3406 } else {
3407 PetscInt j;
3408 for (j = 0; j < ncols; j++) {
3409 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3410 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3411 }
3412 }
3413 }
3414 if (!V) PetscCall(PetscFree(values));
3415 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3416 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3417 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3418 PetscFunctionReturn(PETSC_SUCCESS);
3419 }
3420
3421 /*@C
3422 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3423
3424 Not Collective
3425
3426 Input Parameter:
3427 . A - a `MATSEQBAIJ` matrix
3428
3429 Output Parameter:
3430 . array - pointer to the data
3431
3432 Level: intermediate
3433
3434 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3435 @*/
MatSeqBAIJGetArray(Mat A,PetscScalar * array[])3436 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3437 {
3438 PetscFunctionBegin;
3439 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3440 PetscFunctionReturn(PETSC_SUCCESS);
3441 }
3442
3443 /*@C
3444 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3445
3446 Not Collective
3447
3448 Input Parameters:
3449 + A - a `MATSEQBAIJ` matrix
3450 - array - pointer to the data
3451
3452 Level: intermediate
3453
3454 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3455 @*/
MatSeqBAIJRestoreArray(Mat A,PetscScalar * array[])3456 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3457 {
3458 PetscFunctionBegin;
3459 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3460 PetscFunctionReturn(PETSC_SUCCESS);
3461 }
3462
3463 /*MC
3464 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3465 block sparse compressed row format.
3466
3467 Options Database Keys:
3468 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3469 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3470
3471 Level: beginner
3472
3473 Notes:
3474 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3475 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3476
3477 Run with `-info` to see what version of the matrix-vector product is being used
3478
3479 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3480 M*/
3481
3482 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3483
MatCreate_SeqBAIJ(Mat B)3484 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3485 {
3486 PetscMPIInt size;
3487 Mat_SeqBAIJ *b;
3488
3489 PetscFunctionBegin;
3490 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3491 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3492
3493 PetscCall(PetscNew(&b));
3494 B->data = (void *)b;
3495 B->ops[0] = MatOps_Values;
3496
3497 b->row = NULL;
3498 b->col = NULL;
3499 b->icol = NULL;
3500 b->reallocs = 0;
3501 b->saved_values = NULL;
3502
3503 b->roworiented = PETSC_TRUE;
3504 b->nonew = 0;
3505 b->diag = NULL;
3506 B->spptr = NULL;
3507 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2;
3508 b->keepnonzeropattern = PETSC_FALSE;
3509
3510 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3511 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3512 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3513 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3514 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3515 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3516 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3517 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3518 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3519 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3520 #if defined(PETSC_HAVE_HYPRE)
3521 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3522 #endif
3523 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3524 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3525 PetscFunctionReturn(PETSC_SUCCESS);
3526 }
3527
MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)3528 PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3529 {
3530 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3531 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3532
3533 PetscFunctionBegin;
3534 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3535 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3536
3537 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3538 c->imax = a->imax;
3539 c->ilen = a->ilen;
3540 c->free_imax_ilen = PETSC_FALSE;
3541 } else {
3542 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3543 for (i = 0; i < mbs; i++) {
3544 c->imax[i] = a->imax[i];
3545 c->ilen[i] = a->ilen[i];
3546 }
3547 c->free_imax_ilen = PETSC_TRUE;
3548 }
3549
3550 /* allocate the matrix space */
3551 if (mallocmatspace) {
3552 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3553 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3554 PetscCall(PetscArrayzero(c->a, bs2 * nz));
3555 c->free_a = PETSC_TRUE;
3556 c->i = a->i;
3557 c->j = a->j;
3558 c->free_ij = PETSC_FALSE;
3559 c->parent = A;
3560 C->preallocated = PETSC_TRUE;
3561 C->assembled = PETSC_TRUE;
3562
3563 PetscCall(PetscObjectReference((PetscObject)A));
3564 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3565 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3566 } else {
3567 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3568 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
3569 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3570 c->free_a = PETSC_TRUE;
3571 c->free_ij = PETSC_TRUE;
3572
3573 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3574 if (mbs > 0) {
3575 PetscCall(PetscArraycpy(c->j, a->j, nz));
3576 if (cpvalues == MAT_COPY_VALUES) {
3577 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3578 } else {
3579 PetscCall(PetscArrayzero(c->a, bs2 * nz));
3580 }
3581 }
3582 C->preallocated = PETSC_TRUE;
3583 C->assembled = PETSC_TRUE;
3584 }
3585 }
3586
3587 c->roworiented = a->roworiented;
3588 c->nonew = a->nonew;
3589
3590 PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3591 PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3592
3593 c->bs2 = a->bs2;
3594 c->mbs = a->mbs;
3595 c->nbs = a->nbs;
3596 c->nz = a->nz;
3597 c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3598 c->solve_work = NULL;
3599 c->mult_work = NULL;
3600 c->sor_workt = NULL;
3601 c->sor_work = NULL;
3602
3603 c->compressedrow.use = a->compressedrow.use;
3604 c->compressedrow.nrows = a->compressedrow.nrows;
3605 if (a->compressedrow.use) {
3606 i = a->compressedrow.nrows;
3607 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3608 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3609 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3610 } else {
3611 c->compressedrow.use = PETSC_FALSE;
3612 c->compressedrow.i = NULL;
3613 c->compressedrow.rindex = NULL;
3614 }
3615 c->nonzerorowcnt = a->nonzerorowcnt;
3616 C->nonzerostate = A->nonzerostate;
3617
3618 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3619 PetscFunctionReturn(PETSC_SUCCESS);
3620 }
3621
MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat * B)3622 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3623 {
3624 PetscFunctionBegin;
3625 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3626 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3627 PetscCall(MatSetType(*B, MATSEQBAIJ));
3628 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3629 PetscFunctionReturn(PETSC_SUCCESS);
3630 }
3631
3632 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)3633 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3634 {
3635 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3636 PetscInt *rowidxs, *colidxs;
3637 PetscScalar *matvals;
3638
3639 PetscFunctionBegin;
3640 PetscCall(PetscViewerSetUp(viewer));
3641
3642 /* read matrix header */
3643 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3644 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3645 M = header[1];
3646 N = header[2];
3647 nz = header[3];
3648 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3649 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3650 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3651
3652 /* set block sizes from the viewer's .info file */
3653 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3654 /* set local and global sizes if not set already */
3655 if (mat->rmap->n < 0) mat->rmap->n = M;
3656 if (mat->cmap->n < 0) mat->cmap->n = N;
3657 if (mat->rmap->N < 0) mat->rmap->N = M;
3658 if (mat->cmap->N < 0) mat->cmap->N = N;
3659 PetscCall(PetscLayoutSetUp(mat->rmap));
3660 PetscCall(PetscLayoutSetUp(mat->cmap));
3661
3662 /* check if the matrix sizes are correct */
3663 PetscCall(MatGetSize(mat, &rows, &cols));
3664 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3665 PetscCall(MatGetBlockSize(mat, &bs));
3666 PetscCall(MatGetLocalSize(mat, &m, &n));
3667 mbs = m / bs;
3668 nbs = n / bs;
3669
3670 /* read in row lengths, column indices and nonzero values */
3671 PetscCall(PetscMalloc1(m + 1, &rowidxs));
3672 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3673 rowidxs[0] = 0;
3674 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3675 sum = rowidxs[m];
3676 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3677
3678 /* read in column indices and nonzero values */
3679 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3680 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3681 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3682
3683 { /* preallocate matrix storage */
3684 PetscBT bt; /* helper bit set to count nonzeros */
3685 PetscInt *nnz;
3686 PetscBool sbaij;
3687
3688 PetscCall(PetscBTCreate(nbs, &bt));
3689 PetscCall(PetscCalloc1(mbs, &nnz));
3690 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3691 for (i = 0; i < mbs; i++) {
3692 PetscCall(PetscBTMemzero(nbs, bt));
3693 for (k = 0; k < bs; k++) {
3694 PetscInt row = bs * i + k;
3695 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3696 PetscInt col = colidxs[j];
3697 if (!sbaij || col >= row)
3698 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3699 }
3700 }
3701 }
3702 PetscCall(PetscBTDestroy(&bt));
3703 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3704 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3705 PetscCall(PetscFree(nnz));
3706 }
3707
3708 /* store matrix values */
3709 for (i = 0; i < m; i++) {
3710 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3711 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3712 }
3713
3714 PetscCall(PetscFree(rowidxs));
3715 PetscCall(PetscFree2(colidxs, matvals));
3716 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3717 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3718 PetscFunctionReturn(PETSC_SUCCESS);
3719 }
3720
MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)3721 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3722 {
3723 PetscBool isbinary;
3724
3725 PetscFunctionBegin;
3726 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3727 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3728 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3729 PetscFunctionReturn(PETSC_SUCCESS);
3730 }
3731
3732 /*@
3733 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3734 compressed row) format. For good matrix assembly performance the
3735 user should preallocate the matrix storage by setting the parameter `nz`
3736 (or the array `nnz`).
3737
3738 Collective
3739
3740 Input Parameters:
3741 + comm - MPI communicator, set to `PETSC_COMM_SELF`
3742 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3743 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3744 . m - number of rows
3745 . n - number of columns
3746 . nz - number of nonzero blocks per block row (same for all rows)
3747 - nnz - array containing the number of nonzero blocks in the various block rows
3748 (possibly different for each block row) or `NULL`
3749
3750 Output Parameter:
3751 . A - the matrix
3752
3753 Options Database Keys:
3754 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3755 - -mat_block_size - size of the blocks to use
3756
3757 Level: intermediate
3758
3759 Notes:
3760 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3761 MatXXXXSetPreallocation() paradigm instead of this routine directly.
3762 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3763
3764 The number of rows and columns must be divisible by blocksize.
3765
3766 If the `nnz` parameter is given then the `nz` parameter is ignored
3767
3768 A nonzero block is any block that as 1 or more nonzeros in it
3769
3770 The `MATSEQBAIJ` format is fully compatible with standard Fortran
3771 storage. That is, the stored row and column indices can begin at
3772 either one (as in Fortran) or zero.
3773
3774 Specify the preallocated storage with either `nz` or `nnz` (not both).
3775 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3776 allocation. See [Sparse Matrices](sec_matsparse) for details.
3777 matrices.
3778
3779 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3780 @*/
MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)3781 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3782 {
3783 PetscFunctionBegin;
3784 PetscCall(MatCreate(comm, A));
3785 PetscCall(MatSetSizes(*A, m, n, m, n));
3786 PetscCall(MatSetType(*A, MATSEQBAIJ));
3787 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3788 PetscFunctionReturn(PETSC_SUCCESS);
3789 }
3790
3791 /*@
3792 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3793 per row in the matrix. For good matrix assembly performance the
3794 user should preallocate the matrix storage by setting the parameter `nz`
3795 (or the array `nnz`).
3796
3797 Collective
3798
3799 Input Parameters:
3800 + B - the matrix
3801 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3802 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3803 . nz - number of block nonzeros per block row (same for all rows)
3804 - nnz - array containing the number of block nonzeros in the various block rows
3805 (possibly different for each block row) or `NULL`
3806
3807 Options Database Keys:
3808 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3809 - -mat_block_size - size of the blocks to use
3810
3811 Level: intermediate
3812
3813 Notes:
3814 If the `nnz` parameter is given then the `nz` parameter is ignored
3815
3816 You can call `MatGetInfo()` to get information on how effective the preallocation was;
3817 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3818 You can also run with the option `-info` and look for messages with the string
3819 malloc in them to see if additional memory allocation was needed.
3820
3821 The `MATSEQBAIJ` format is fully compatible with standard Fortran
3822 storage. That is, the stored row and column indices can begin at
3823 either one (as in Fortran) or zero.
3824
3825 Specify the preallocated storage with either `nz` or `nnz` (not both).
3826 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3827 allocation. See [Sparse Matrices](sec_matsparse) for details.
3828
3829 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3830 @*/
MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])3831 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3832 {
3833 PetscFunctionBegin;
3834 PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3835 PetscValidType(B, 1);
3836 PetscValidLogicalCollectiveInt(B, bs, 2);
3837 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3838 PetscFunctionReturn(PETSC_SUCCESS);
3839 }
3840
3841 /*@C
3842 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3843
3844 Collective
3845
3846 Input Parameters:
3847 + B - the matrix
3848 . bs - the blocksize
3849 . i - the indices into `j` for the start of each local row (indices start with zero)
3850 . j - the column indices for each local row (indices start with zero) these must be sorted for each row
3851 - v - optional values in the matrix, use `NULL` if not provided
3852
3853 Level: advanced
3854
3855 Notes:
3856 The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3857
3858 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs
3859 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3860 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
3861 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3862 block column and the second index is over columns within a block.
3863
3864 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3865
3866 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3867 @*/
MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[],const PetscScalar v[])3868 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3869 {
3870 PetscFunctionBegin;
3871 PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3872 PetscValidType(B, 1);
3873 PetscValidLogicalCollectiveInt(B, bs, 2);
3874 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3875 PetscFunctionReturn(PETSC_SUCCESS);
3876 }
3877
3878 /*@
3879 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3880
3881 Collective
3882
3883 Input Parameters:
3884 + comm - must be an MPI communicator of size 1
3885 . bs - size of block
3886 . m - number of rows
3887 . n - number of columns
3888 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3889 . j - column indices
3890 - a - matrix values
3891
3892 Output Parameter:
3893 . mat - the matrix
3894
3895 Level: advanced
3896
3897 Notes:
3898 The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3899 once the matrix is destroyed
3900
3901 You cannot set new nonzero locations into this matrix, that will generate an error.
3902
3903 The `i` and `j` indices are 0 based
3904
3905 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3906
3907 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3908 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3909 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3910 with column-major ordering within blocks.
3911
3912 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3913 @*/
MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat * mat)3914 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3915 {
3916 Mat_SeqBAIJ *baij;
3917
3918 PetscFunctionBegin;
3919 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3920 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3921
3922 PetscCall(MatCreate(comm, mat));
3923 PetscCall(MatSetSizes(*mat, m, n, m, n));
3924 PetscCall(MatSetType(*mat, MATSEQBAIJ));
3925 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3926 baij = (Mat_SeqBAIJ *)(*mat)->data;
3927 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3928
3929 baij->i = i;
3930 baij->j = j;
3931 baij->a = a;
3932
3933 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3934 baij->free_a = PETSC_FALSE;
3935 baij->free_ij = PETSC_FALSE;
3936 baij->free_imax_ilen = PETSC_TRUE;
3937
3938 for (PetscInt ii = 0; ii < m; ii++) {
3939 const PetscInt row_len = i[ii + 1] - i[ii];
3940
3941 baij->ilen[ii] = baij->imax[ii] = row_len;
3942 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3943 }
3944 if (PetscDefined(USE_DEBUG)) {
3945 for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
3946 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3947 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3948 }
3949 }
3950
3951 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3952 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3953 PetscFunctionReturn(PETSC_SUCCESS);
3954 }
3955
MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)3956 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3957 {
3958 PetscFunctionBegin;
3959 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3960 PetscFunctionReturn(PETSC_SUCCESS);
3961 }
3962