1 /*
2 Defines projective product routines where A is a MPIAIJ matrix
3 C = P^T * A * P
4 */
5
6 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
7 #include <../src/mat/utils/freespace.h>
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscbt.h>
10 #include <petsctime.h>
11 #include <petsc/private/hashmapiv.h>
12 #include <petsc/private/hashseti.h>
13 #include <petscsf.h>
14
MatView_MPIAIJ_PtAP(Mat A,PetscViewer viewer)15 static PetscErrorCode MatView_MPIAIJ_PtAP(Mat A, PetscViewer viewer)
16 {
17 PetscBool isascii;
18 PetscViewerFormat format;
19 MatProductCtx_APMPI *ptap;
20
21 PetscFunctionBegin;
22 MatCheckProduct(A, 1);
23 ptap = (MatProductCtx_APMPI *)A->product->data;
24 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
25 if (isascii) {
26 PetscCall(PetscViewerGetFormat(viewer, &format));
27 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
28 if (ptap->algType == 0) {
29 PetscCall(PetscViewerASCIIPrintf(viewer, "using scalable MatPtAP() implementation\n"));
30 } else if (ptap->algType == 1) {
31 PetscCall(PetscViewerASCIIPrintf(viewer, "using nonscalable MatPtAP() implementation\n"));
32 } else if (ptap->algType == 2) {
33 PetscCall(PetscViewerASCIIPrintf(viewer, "using allatonce MatPtAP() implementation\n"));
34 } else if (ptap->algType == 3) {
35 PetscCall(PetscViewerASCIIPrintf(viewer, "using merged allatonce MatPtAP() implementation\n"));
36 }
37 }
38 }
39 PetscFunctionReturn(PETSC_SUCCESS);
40 }
41
MatProductCtxDestroy_MPIAIJ_PtAP(PetscCtxRt data)42 PetscErrorCode MatProductCtxDestroy_MPIAIJ_PtAP(PetscCtxRt data)
43 {
44 MatProductCtx_APMPI *ptap = *(MatProductCtx_APMPI **)data;
45 MatMergeSeqsToMPI *merge;
46
47 PetscFunctionBegin;
48 PetscCall(PetscFree2(ptap->startsj_s, ptap->startsj_r));
49 PetscCall(PetscFree(ptap->bufa));
50 PetscCall(MatDestroy(&ptap->P_loc));
51 PetscCall(MatDestroy(&ptap->P_oth));
52 PetscCall(MatDestroy(&ptap->A_loc)); /* used by MatTransposeMatMult() */
53 PetscCall(MatDestroy(&ptap->Rd));
54 PetscCall(MatDestroy(&ptap->Ro));
55 if (ptap->AP_loc) { /* used by alg_rap */
56 Mat_SeqAIJ *ap = (Mat_SeqAIJ *)ptap->AP_loc->data;
57 PetscCall(PetscFree(ap->i));
58 PetscCall(PetscFree2(ap->j, ap->a));
59 PetscCall(MatDestroy(&ptap->AP_loc));
60 } else { /* used by alg_ptap */
61 PetscCall(PetscFree(ptap->api));
62 PetscCall(PetscFree(ptap->apj));
63 }
64 PetscCall(MatDestroy(&ptap->C_loc));
65 PetscCall(MatDestroy(&ptap->C_oth));
66 if (ptap->apa) PetscCall(PetscFree(ptap->apa));
67
68 PetscCall(MatDestroy(&ptap->Pt));
69
70 merge = ptap->merge;
71 if (merge) { /* used by alg_ptap */
72 PetscCall(PetscFree(merge->id_r));
73 PetscCall(PetscFree(merge->len_s));
74 PetscCall(PetscFree(merge->len_r));
75 PetscCall(PetscFree(merge->bi));
76 PetscCall(PetscFree(merge->bj));
77 PetscCall(PetscFree(merge->buf_ri[0]));
78 PetscCall(PetscFree(merge->buf_ri));
79 PetscCall(PetscFree(merge->buf_rj[0]));
80 PetscCall(PetscFree(merge->buf_rj));
81 PetscCall(PetscFree(merge->coi));
82 PetscCall(PetscFree(merge->coj));
83 PetscCall(PetscFree(merge->owners_co));
84 PetscCall(PetscLayoutDestroy(&merge->rowmap));
85 PetscCall(PetscFree(ptap->merge));
86 }
87 PetscCall(ISLocalToGlobalMappingDestroy(&ptap->ltog));
88
89 PetscCall(PetscSFDestroy(&ptap->sf));
90 PetscCall(PetscFree(ptap->c_othi));
91 PetscCall(PetscFree(ptap->c_rmti));
92 PetscCall(PetscFree(ptap));
93 PetscFunctionReturn(PETSC_SUCCESS);
94 }
95
MatPtAPNumeric_MPIAIJ_MPIAIJ_scalable(Mat A,Mat P,Mat C)96 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_scalable(Mat A, Mat P, Mat C)
97 {
98 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
99 Mat_SeqAIJ *ad = (Mat_SeqAIJ *)a->A->data, *ao = (Mat_SeqAIJ *)a->B->data;
100 Mat_SeqAIJ *ap, *p_loc, *p_oth = NULL, *c_seq;
101 MatProductCtx_APMPI *ptap;
102 Mat AP_loc, C_loc, C_oth;
103 PetscInt i, rstart, rend, cm, ncols, row, *api, *apj, am = A->rmap->n, apnz, nout;
104 PetscScalar *apa;
105 const PetscInt *cols;
106 const PetscScalar *vals;
107
108 PetscFunctionBegin;
109 MatCheckProduct(C, 3);
110 ptap = (MatProductCtx_APMPI *)C->product->data;
111 PetscCheck(ptap, PetscObjectComm((PetscObject)C), PETSC_ERR_ARG_WRONGSTATE, "PtAP cannot be computed. Missing data");
112 PetscCheck(ptap->AP_loc, PetscObjectComm((PetscObject)C), PETSC_ERR_ARG_WRONGSTATE, "PtAP cannot be reused. Do not call MatProductClear()");
113
114 PetscCall(MatZeroEntries(C));
115
116 /* 1) get R = Pd^T,Ro = Po^T */
117 if (ptap->reuse == MAT_REUSE_MATRIX) {
118 PetscCall(MatTranspose(p->A, MAT_REUSE_MATRIX, &ptap->Rd));
119 PetscCall(MatTranspose(p->B, MAT_REUSE_MATRIX, &ptap->Ro));
120 }
121
122 /* 2) get AP_loc */
123 AP_loc = ptap->AP_loc;
124 ap = (Mat_SeqAIJ *)AP_loc->data;
125
126 /* 2-1) get P_oth = ptap->P_oth and P_loc = ptap->P_loc */
127 if (ptap->reuse == MAT_REUSE_MATRIX) {
128 /* P_oth and P_loc are obtained in MatPtASymbolic() when reuse == MAT_INITIAL_MATRIX */
129 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_REUSE_MATRIX, &ptap->startsj_s, &ptap->startsj_r, &ptap->bufa, &ptap->P_oth));
130 PetscCall(MatMPIAIJGetLocalMat(P, MAT_REUSE_MATRIX, &ptap->P_loc));
131 }
132
133 /* 2-2) compute numeric A_loc*P - dominating part */
134 /* get data from symbolic products */
135 p_loc = (Mat_SeqAIJ *)ptap->P_loc->data;
136 if (ptap->P_oth) p_oth = (Mat_SeqAIJ *)ptap->P_oth->data;
137
138 api = ap->i;
139 apj = ap->j;
140 PetscCall(ISLocalToGlobalMappingApply(ptap->ltog, api[AP_loc->rmap->n], apj, apj));
141 for (i = 0; i < am; i++) {
142 /* AP[i,:] = A[i,:]*P = Ad*P_loc Ao*P_oth */
143 apnz = api[i + 1] - api[i];
144 apa = ap->a + api[i];
145 PetscCall(PetscArrayzero(apa, apnz));
146 AProw_scalable(i, ad, ao, p_loc, p_oth, api, apj, apa);
147 }
148 PetscCall(ISGlobalToLocalMappingApply(ptap->ltog, IS_GTOLM_DROP, api[AP_loc->rmap->n], apj, &nout, apj));
149 PetscCheck(api[AP_loc->rmap->n] == nout, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Incorrect mapping %" PetscInt_FMT " != %" PetscInt_FMT, api[AP_loc->rmap->n], nout);
150
151 /* 3) C_loc = Rd*AP_loc, C_oth = Ro*AP_loc */
152 /* Always use scalable version since we are in the MPI scalable version */
153 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(ptap->Rd, AP_loc, ptap->C_loc));
154 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(ptap->Ro, AP_loc, ptap->C_oth));
155
156 C_loc = ptap->C_loc;
157 C_oth = ptap->C_oth;
158
159 /* add C_loc and Co to C */
160 PetscCall(MatGetOwnershipRange(C, &rstart, &rend));
161
162 /* C_loc -> C */
163 cm = C_loc->rmap->N;
164 c_seq = (Mat_SeqAIJ *)C_loc->data;
165 cols = c_seq->j;
166 vals = c_seq->a;
167 PetscCall(ISLocalToGlobalMappingApply(ptap->ltog, c_seq->i[C_loc->rmap->n], c_seq->j, c_seq->j));
168
169 /* The (fast) MatSetValues_MPIAIJ_CopyFromCSRFormat function can only be used when C->was_assembled is PETSC_FALSE and */
170 /* when there are no off-processor parts. */
171 /* If was_assembled is true, then the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; in MatSetValues_MPIAIJ_CopyFromCSRFormat */
172 /* is no longer true. Then the more complex function MatSetValues_MPIAIJ() has to be used, where the column index is looked up from */
173 /* a table, and other, more complex stuff has to be done. */
174 if (C->assembled) {
175 C->was_assembled = PETSC_TRUE;
176 C->assembled = PETSC_FALSE;
177 }
178 if (C->was_assembled) {
179 for (i = 0; i < cm; i++) {
180 ncols = c_seq->i[i + 1] - c_seq->i[i];
181 row = rstart + i;
182 PetscCall(MatSetValues_MPIAIJ(C, 1, &row, ncols, cols, vals, ADD_VALUES));
183 cols += ncols;
184 vals += ncols;
185 }
186 } else {
187 PetscCall(MatSetValues_MPIAIJ_CopyFromCSRFormat(C, c_seq->j, c_seq->i, c_seq->a));
188 }
189 PetscCall(ISGlobalToLocalMappingApply(ptap->ltog, IS_GTOLM_DROP, c_seq->i[C_loc->rmap->n], c_seq->j, &nout, c_seq->j));
190 PetscCheck(c_seq->i[C_loc->rmap->n] == nout, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Incorrect mapping %" PetscInt_FMT " != %" PetscInt_FMT, c_seq->i[C_loc->rmap->n], nout);
191
192 /* Co -> C, off-processor part */
193 cm = C_oth->rmap->N;
194 c_seq = (Mat_SeqAIJ *)C_oth->data;
195 cols = c_seq->j;
196 vals = c_seq->a;
197 PetscCall(ISLocalToGlobalMappingApply(ptap->ltog, c_seq->i[C_oth->rmap->n], c_seq->j, c_seq->j));
198 for (i = 0; i < cm; i++) {
199 ncols = c_seq->i[i + 1] - c_seq->i[i];
200 row = p->garray[i];
201 PetscCall(MatSetValues(C, 1, &row, ncols, cols, vals, ADD_VALUES));
202 cols += ncols;
203 vals += ncols;
204 }
205 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
206 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
207
208 ptap->reuse = MAT_REUSE_MATRIX;
209
210 PetscCall(ISGlobalToLocalMappingApply(ptap->ltog, IS_GTOLM_DROP, c_seq->i[C_oth->rmap->n], c_seq->j, &nout, c_seq->j));
211 PetscCheck(c_seq->i[C_oth->rmap->n] == nout, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Incorrect mapping %" PetscInt_FMT " != %" PetscInt_FMT, c_seq->i[C_loc->rmap->n], nout);
212 PetscFunctionReturn(PETSC_SUCCESS);
213 }
214
MatPtAPSymbolic_MPIAIJ_MPIAIJ_scalable(Mat A,Mat P,PetscReal fill,Mat Cmpi)215 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_scalable(Mat A, Mat P, PetscReal fill, Mat Cmpi)
216 {
217 MatProductCtx_APMPI *ptap;
218 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
219 MPI_Comm comm;
220 PetscMPIInt size, rank;
221 Mat P_loc, P_oth;
222 PetscFreeSpaceList free_space = NULL, current_space = NULL;
223 PetscInt am = A->rmap->n, pm = P->rmap->n, pN = P->cmap->N, pn = P->cmap->n;
224 PetscInt *lnk, i, k, pnz, row;
225 PetscMPIInt tagi, tagj, *len_si, *len_s, *len_ri, nrecv, nsend, proc;
226 PETSC_UNUSED PetscMPIInt icompleted = 0;
227 PetscInt **buf_rj, **buf_ri, **buf_ri_k;
228 const PetscInt *owners;
229 PetscInt len, *dnz, *onz, nzi, nspacedouble;
230 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextci;
231 MPI_Request *swaits, *rwaits;
232 MPI_Status *sstatus, rstatus;
233 PetscLayout rowmap;
234 PetscInt *owners_co, *coi, *coj; /* i and j array of (p->B)^T*A*P - used in the communication */
235 PetscMPIInt *len_r, *id_r; /* array of length of comm->size, store send/recv matrix values */
236 PetscInt *api, *apj, *Jptr, apnz, *prmap = p->garray, con, j, Crmax, *aj, *ai, *pi, nout;
237 Mat_SeqAIJ *p_loc, *p_oth = NULL, *ad = (Mat_SeqAIJ *)a->A->data, *ao = NULL, *c_loc, *c_oth;
238 PetscScalar *apv;
239 PetscHMapI ta;
240 MatType mtype;
241 const char *prefix;
242 #if defined(PETSC_USE_INFO)
243 PetscReal apfill;
244 #endif
245
246 PetscFunctionBegin;
247 MatCheckProduct(Cmpi, 4);
248 PetscCheck(!Cmpi->product->data, PetscObjectComm((PetscObject)Cmpi), PETSC_ERR_PLIB, "Product data not empty");
249 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
250 PetscCallMPI(MPI_Comm_size(comm, &size));
251 PetscCallMPI(MPI_Comm_rank(comm, &rank));
252
253 if (size > 1) ao = (Mat_SeqAIJ *)a->B->data;
254
255 /* create symbolic parallel matrix Cmpi */
256 PetscCall(MatGetType(A, &mtype));
257 PetscCall(MatSetType(Cmpi, mtype));
258
259 /* create struct MatProductCtx_APMPI and attached it to C later */
260 PetscCall(PetscNew(&ptap));
261 ptap->reuse = MAT_INITIAL_MATRIX;
262 ptap->algType = 0;
263
264 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
265 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &ptap->startsj_s, &ptap->startsj_r, &ptap->bufa, &P_oth));
266 /* get P_loc by taking all local rows of P */
267 PetscCall(MatMPIAIJGetLocalMat(P, MAT_INITIAL_MATRIX, &P_loc));
268
269 ptap->P_loc = P_loc;
270 ptap->P_oth = P_oth;
271
272 /* (0) compute Rd = Pd^T, Ro = Po^T */
273 PetscCall(MatTranspose(p->A, MAT_INITIAL_MATRIX, &ptap->Rd));
274 PetscCall(MatTranspose(p->B, MAT_INITIAL_MATRIX, &ptap->Ro));
275
276 /* (1) compute symbolic AP = A_loc*P = Ad*P_loc + Ao*P_oth (api,apj) */
277 p_loc = (Mat_SeqAIJ *)P_loc->data;
278 if (P_oth) p_oth = (Mat_SeqAIJ *)P_oth->data;
279
280 /* create and initialize a linked list */
281 PetscCall(PetscHMapICreateWithSize(pn, &ta)); /* for compute AP_loc and Cmpi */
282 MatRowMergeMax_SeqAIJ(p_loc, P_loc->rmap->N, ta);
283 MatRowMergeMax_SeqAIJ(p_oth, P_oth->rmap->N, ta);
284 PetscCall(PetscHMapIGetSize(ta, &Crmax)); /* Crmax = nnz(sum of Prows) */
285
286 PetscCall(PetscLLCondensedCreate_Scalable(Crmax, &lnk));
287
288 /* Initial FreeSpace size is fill*(nnz(A) + nnz(P)) */
289 if (ao) {
290 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ad->i[am], PetscIntSumTruncate(ao->i[am], p_loc->i[pm]))), &free_space));
291 } else {
292 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ad->i[am], p_loc->i[pm])), &free_space));
293 }
294 current_space = free_space;
295 nspacedouble = 0;
296
297 PetscCall(PetscMalloc1(am + 1, &api));
298 api[0] = 0;
299 for (i = 0; i < am; i++) {
300 /* diagonal portion: Ad[i,:]*P */
301 ai = ad->i;
302 pi = p_loc->i;
303 nzi = ai[i + 1] - ai[i];
304 aj = PetscSafePointerPlusOffset(ad->j, ai[i]);
305 for (j = 0; j < nzi; j++) {
306 row = aj[j];
307 pnz = pi[row + 1] - pi[row];
308 Jptr = p_loc->j + pi[row];
309 /* add non-zero cols of P into the sorted linked list lnk */
310 PetscCall(PetscLLCondensedAddSorted_Scalable(pnz, Jptr, lnk));
311 }
312 /* off-diagonal portion: Ao[i,:]*P */
313 if (ao) {
314 ai = ao->i;
315 pi = p_oth->i;
316 nzi = ai[i + 1] - ai[i];
317 aj = PetscSafePointerPlusOffset(ao->j, ai[i]);
318 for (j = 0; j < nzi; j++) {
319 row = aj[j];
320 pnz = pi[row + 1] - pi[row];
321 Jptr = p_oth->j + pi[row];
322 PetscCall(PetscLLCondensedAddSorted_Scalable(pnz, Jptr, lnk));
323 }
324 }
325 apnz = lnk[0];
326 api[i + 1] = api[i] + apnz;
327
328 /* if free space is not available, double the total space in the list */
329 if (current_space->local_remaining < apnz) {
330 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(apnz, current_space->total_array_size), ¤t_space));
331 nspacedouble++;
332 }
333
334 /* Copy data into free space, then initialize lnk */
335 PetscCall(PetscLLCondensedClean_Scalable(apnz, current_space->array, lnk));
336
337 current_space->array += apnz;
338 current_space->local_used += apnz;
339 current_space->local_remaining -= apnz;
340 }
341 /* Allocate space for apj and apv, initialize apj, and */
342 /* destroy list of free space and other temporary array(s) */
343 PetscCall(PetscCalloc2(api[am], &apj, api[am], &apv));
344 PetscCall(PetscFreeSpaceContiguous(&free_space, apj));
345 PetscCall(PetscLLCondensedDestroy_Scalable(lnk));
346
347 /* Create AP_loc for reuse */
348 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, pN, api, apj, apv, &ptap->AP_loc));
349 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(ptap->AP_loc, &ptap->ltog));
350
351 #if defined(PETSC_USE_INFO)
352 if (ao) {
353 apfill = (PetscReal)api[am] / (ad->i[am] + ao->i[am] + p_loc->i[pm] + 1);
354 } else {
355 apfill = (PetscReal)api[am] / (ad->i[am] + p_loc->i[pm] + 1);
356 }
357 ptap->AP_loc->info.mallocs = nspacedouble;
358 ptap->AP_loc->info.fill_ratio_given = fill;
359 ptap->AP_loc->info.fill_ratio_needed = apfill;
360
361 if (api[am]) {
362 PetscCall(PetscInfo(ptap->AP_loc, "Scalable algorithm, AP_loc reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", nspacedouble, (double)fill, (double)apfill));
363 PetscCall(PetscInfo(ptap->AP_loc, "Use MatPtAP(A,B,MatReuse,%g,&C) for best AP_loc performance.;\n", (double)apfill));
364 } else {
365 PetscCall(PetscInfo(ptap->AP_loc, "Scalable algorithm, AP_loc is empty \n"));
366 }
367 #endif
368
369 /* (2-1) compute symbolic Co = Ro*AP_loc */
370 PetscCall(MatProductCreate(ptap->Ro, ptap->AP_loc, NULL, &ptap->C_oth));
371 PetscCall(MatGetOptionsPrefix(A, &prefix));
372 PetscCall(MatSetOptionsPrefix(ptap->C_oth, prefix));
373 PetscCall(MatAppendOptionsPrefix(ptap->C_oth, "inner_offdiag_"));
374
375 PetscCall(MatProductSetType(ptap->C_oth, MATPRODUCT_AB));
376 PetscCall(MatProductSetAlgorithm(ptap->C_oth, "sorted"));
377 PetscCall(MatProductSetFill(ptap->C_oth, fill));
378 PetscCall(MatProductSetFromOptions(ptap->C_oth));
379 PetscCall(MatProductSymbolic(ptap->C_oth));
380
381 /* (3) send coj of C_oth to other processors */
382 /* determine row ownership */
383 PetscCall(PetscLayoutCreate(comm, &rowmap));
384 PetscCall(PetscLayoutSetLocalSize(rowmap, pn));
385 PetscCall(PetscLayoutSetBlockSize(rowmap, 1));
386 PetscCall(PetscLayoutSetUp(rowmap));
387 PetscCall(PetscLayoutGetRanges(rowmap, &owners));
388
389 /* determine the number of messages to send, their lengths */
390 PetscCall(PetscMalloc4(size, &len_s, size, &len_si, size, &sstatus, size + 2, &owners_co));
391 PetscCall(PetscArrayzero(len_s, size));
392 PetscCall(PetscArrayzero(len_si, size));
393
394 c_oth = (Mat_SeqAIJ *)ptap->C_oth->data;
395 coi = c_oth->i;
396 coj = c_oth->j;
397 con = ptap->C_oth->rmap->n;
398 proc = 0;
399 PetscCall(ISLocalToGlobalMappingApply(ptap->ltog, coi[con], coj, coj));
400 for (i = 0; i < con; i++) {
401 while (prmap[i] >= owners[proc + 1]) proc++;
402 len_si[proc]++; /* num of rows in Co(=Pt*AP) to be sent to [proc] */
403 len_s[proc] += coi[i + 1] - coi[i]; /* num of nonzeros in Co to be sent to [proc] */
404 }
405
406 len = 0; /* max length of buf_si[], see (4) */
407 owners_co[0] = 0;
408 nsend = 0;
409 for (proc = 0; proc < size; proc++) {
410 owners_co[proc + 1] = owners_co[proc] + len_si[proc];
411 if (len_s[proc]) {
412 nsend++;
413 len_si[proc] = 2 * (len_si[proc] + 1); /* length of buf_si to be sent to [proc] */
414 len += len_si[proc];
415 }
416 }
417
418 /* determine the number and length of messages to receive for coi and coj */
419 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &nrecv));
420 PetscCall(PetscGatherMessageLengths2(comm, nsend, nrecv, len_s, len_si, &id_r, &len_r, &len_ri));
421
422 /* post the Irecv and Isend of coj */
423 PetscCall(PetscCommGetNewTag(comm, &tagj));
424 PetscCall(PetscPostIrecvInt(comm, tagj, nrecv, id_r, len_r, &buf_rj, &rwaits));
425 PetscCall(PetscMalloc1(nsend + 1, &swaits));
426 for (proc = 0, k = 0; proc < size; proc++) {
427 if (!len_s[proc]) continue;
428 i = owners_co[proc];
429 PetscCallMPI(MPIU_Isend(coj + coi[i], len_s[proc], MPIU_INT, proc, tagj, comm, swaits + k));
430 k++;
431 }
432
433 /* (2-2) compute symbolic C_loc = Rd*AP_loc */
434 PetscCall(MatProductCreate(ptap->Rd, ptap->AP_loc, NULL, &ptap->C_loc));
435 PetscCall(MatProductSetType(ptap->C_loc, MATPRODUCT_AB));
436 PetscCall(MatProductSetAlgorithm(ptap->C_loc, "default"));
437 PetscCall(MatProductSetFill(ptap->C_loc, fill));
438
439 PetscCall(MatSetOptionsPrefix(ptap->C_loc, prefix));
440 PetscCall(MatAppendOptionsPrefix(ptap->C_loc, "inner_diag_"));
441
442 PetscCall(MatProductSetFromOptions(ptap->C_loc));
443 PetscCall(MatProductSymbolic(ptap->C_loc));
444
445 c_loc = (Mat_SeqAIJ *)ptap->C_loc->data;
446 PetscCall(ISLocalToGlobalMappingApply(ptap->ltog, c_loc->i[ptap->C_loc->rmap->n], c_loc->j, c_loc->j));
447
448 /* receives coj are complete */
449 for (i = 0; i < nrecv; i++) PetscCallMPI(MPI_Waitany(nrecv, rwaits, &icompleted, &rstatus));
450 PetscCall(PetscFree(rwaits));
451 if (nsend) PetscCallMPI(MPI_Waitall(nsend, swaits, sstatus));
452
453 /* add received column indices into ta to update Crmax */
454 for (k = 0; k < nrecv; k++) { /* k-th received message */
455 Jptr = buf_rj[k];
456 for (j = 0; j < len_r[k]; j++) PetscCall(PetscHMapISet(ta, *(Jptr + j) + 1, 1));
457 }
458 PetscCall(PetscHMapIGetSize(ta, &Crmax));
459 PetscCall(PetscHMapIDestroy(&ta));
460
461 /* (4) send and recv coi */
462 PetscCall(PetscCommGetNewTag(comm, &tagi));
463 PetscCall(PetscPostIrecvInt(comm, tagi, nrecv, id_r, len_ri, &buf_ri, &rwaits));
464 PetscCall(PetscMalloc1(len + 1, &buf_s));
465 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
466 for (proc = 0, k = 0; proc < size; proc++) {
467 if (!len_s[proc]) continue;
468 /* form outgoing message for i-structure:
469 buf_si[0]: nrows to be sent
470 [1:nrows]: row index (global)
471 [nrows+1:2*nrows+1]: i-structure index
472 */
473 nrows = len_si[proc] / 2 - 1; /* num of rows in Co to be sent to [proc] */
474 buf_si_i = buf_si + nrows + 1;
475 buf_si[0] = nrows;
476 buf_si_i[0] = 0;
477 nrows = 0;
478 for (i = owners_co[proc]; i < owners_co[proc + 1]; i++) {
479 nzi = coi[i + 1] - coi[i];
480 buf_si_i[nrows + 1] = buf_si_i[nrows] + nzi; /* i-structure */
481 buf_si[nrows + 1] = prmap[i] - owners[proc]; /* local row index */
482 nrows++;
483 }
484 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, swaits + k));
485 k++;
486 buf_si += len_si[proc];
487 }
488 for (i = 0; i < nrecv; i++) PetscCallMPI(MPI_Waitany(nrecv, rwaits, &icompleted, &rstatus));
489 PetscCall(PetscFree(rwaits));
490 if (nsend) PetscCallMPI(MPI_Waitall(nsend, swaits, sstatus));
491
492 PetscCall(PetscFree4(len_s, len_si, sstatus, owners_co));
493 PetscCall(PetscFree(len_ri));
494 PetscCall(PetscFree(swaits));
495 PetscCall(PetscFree(buf_s));
496
497 /* (5) compute the local portion of Cmpi */
498 /* set initial free space to be Crmax, sufficient for holding nonzeros in each row of Cmpi */
499 PetscCall(PetscFreeSpaceGet(Crmax, &free_space));
500 current_space = free_space;
501
502 PetscCall(PetscMalloc3(nrecv, &buf_ri_k, nrecv, &nextrow, nrecv, &nextci));
503 for (k = 0; k < nrecv; k++) {
504 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
505 nrows = *buf_ri_k[k];
506 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
507 nextci[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */
508 }
509
510 MatPreallocateBegin(comm, pn, pn, dnz, onz);
511 PetscCall(PetscLLCondensedCreate_Scalable(Crmax, &lnk));
512 for (i = 0; i < pn; i++) {
513 /* add C_loc into Cmpi */
514 nzi = c_loc->i[i + 1] - c_loc->i[i];
515 Jptr = c_loc->j + c_loc->i[i];
516 PetscCall(PetscLLCondensedAddSorted_Scalable(nzi, Jptr, lnk));
517
518 /* add received col data into lnk */
519 for (k = 0; k < nrecv; k++) { /* k-th received message */
520 if (i == *nextrow[k]) { /* i-th row */
521 nzi = *(nextci[k] + 1) - *nextci[k];
522 Jptr = buf_rj[k] + *nextci[k];
523 PetscCall(PetscLLCondensedAddSorted_Scalable(nzi, Jptr, lnk));
524 nextrow[k]++;
525 nextci[k]++;
526 }
527 }
528 nzi = lnk[0];
529
530 /* copy data into free space, then initialize lnk */
531 PetscCall(PetscLLCondensedClean_Scalable(nzi, current_space->array, lnk));
532 PetscCall(MatPreallocateSet(i + owners[rank], nzi, current_space->array, dnz, onz));
533 }
534 PetscCall(PetscFree3(buf_ri_k, nextrow, nextci));
535 PetscCall(PetscLLCondensedDestroy_Scalable(lnk));
536 PetscCall(PetscFreeSpaceDestroy(free_space));
537
538 /* local sizes and preallocation */
539 PetscCall(MatSetSizes(Cmpi, pn, pn, PETSC_DETERMINE, PETSC_DETERMINE));
540 if (P->cmap->bs > 0) {
541 PetscCall(PetscLayoutSetBlockSize(Cmpi->rmap, P->cmap->bs));
542 PetscCall(PetscLayoutSetBlockSize(Cmpi->cmap, P->cmap->bs));
543 }
544 PetscCall(MatMPIAIJSetPreallocation(Cmpi, 0, dnz, 0, onz));
545 MatPreallocateEnd(dnz, onz);
546
547 /* members in merge */
548 PetscCall(PetscFree(id_r));
549 PetscCall(PetscFree(len_r));
550 PetscCall(PetscFree(buf_ri[0]));
551 PetscCall(PetscFree(buf_ri));
552 PetscCall(PetscFree(buf_rj[0]));
553 PetscCall(PetscFree(buf_rj));
554 PetscCall(PetscLayoutDestroy(&rowmap));
555
556 nout = 0;
557 PetscCall(ISGlobalToLocalMappingApply(ptap->ltog, IS_GTOLM_DROP, c_oth->i[ptap->C_oth->rmap->n], c_oth->j, &nout, c_oth->j));
558 PetscCheck(c_oth->i[ptap->C_oth->rmap->n] == nout, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Incorrect mapping %" PetscInt_FMT " != %" PetscInt_FMT, c_oth->i[ptap->C_oth->rmap->n], nout);
559 PetscCall(ISGlobalToLocalMappingApply(ptap->ltog, IS_GTOLM_DROP, c_loc->i[ptap->C_loc->rmap->n], c_loc->j, &nout, c_loc->j));
560 PetscCheck(c_loc->i[ptap->C_loc->rmap->n] == nout, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Incorrect mapping %" PetscInt_FMT " != %" PetscInt_FMT, c_loc->i[ptap->C_loc->rmap->n], nout);
561
562 /* attach the supporting struct to Cmpi for reuse */
563 Cmpi->product->data = ptap;
564 Cmpi->product->view = MatView_MPIAIJ_PtAP;
565 Cmpi->product->destroy = MatProductCtxDestroy_MPIAIJ_PtAP;
566
567 /* Cmpi is not ready for use - assembly will be done by MatPtAPNumeric() */
568 Cmpi->assembled = PETSC_FALSE;
569 Cmpi->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIAIJ_scalable;
570 PetscFunctionReturn(PETSC_SUCCESS);
571 }
572
MatPtAPSymbolicComputeOneRowOfAP_private(Mat A,Mat P,Mat P_oth,const PetscInt * map,PetscInt dof,PetscInt i,PetscHSetI dht,PetscHSetI oht)573 static inline PetscErrorCode MatPtAPSymbolicComputeOneRowOfAP_private(Mat A, Mat P, Mat P_oth, const PetscInt *map, PetscInt dof, PetscInt i, PetscHSetI dht, PetscHSetI oht)
574 {
575 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
576 Mat_SeqAIJ *ad = (Mat_SeqAIJ *)a->A->data, *ao = (Mat_SeqAIJ *)a->B->data, *p_oth = (Mat_SeqAIJ *)P_oth->data, *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data;
577 PetscInt *ai, nzi, j, *aj, row, col, *pi, *pj, pnz, nzpi, *p_othcols, k;
578 PetscInt pcstart, pcend, column, offset;
579
580 PetscFunctionBegin;
581 pcstart = P->cmap->rstart;
582 pcstart *= dof;
583 pcend = P->cmap->rend;
584 pcend *= dof;
585 /* diagonal portion: Ad[i,:]*P */
586 ai = ad->i;
587 nzi = ai[i + 1] - ai[i];
588 aj = ad->j + ai[i];
589 for (j = 0; j < nzi; j++) {
590 row = aj[j];
591 offset = row % dof;
592 row /= dof;
593 nzpi = pd->i[row + 1] - pd->i[row];
594 pj = pd->j + pd->i[row];
595 for (k = 0; k < nzpi; k++) PetscCall(PetscHSetIAdd(dht, pj[k] * dof + offset + pcstart));
596 }
597 /* off-diagonal P */
598 for (j = 0; j < nzi; j++) {
599 row = aj[j];
600 offset = row % dof;
601 row /= dof;
602 nzpi = po->i[row + 1] - po->i[row];
603 pj = PetscSafePointerPlusOffset(po->j, po->i[row]);
604 for (k = 0; k < nzpi; k++) PetscCall(PetscHSetIAdd(oht, p->garray[pj[k]] * dof + offset));
605 }
606
607 /* off-diagonal part: Ao[i, :]*P_oth */
608 if (ao) {
609 ai = ao->i;
610 pi = p_oth->i;
611 nzi = ai[i + 1] - ai[i];
612 aj = ao->j + ai[i];
613 for (j = 0; j < nzi; j++) {
614 row = aj[j];
615 offset = a->garray[row] % dof;
616 row = map[row];
617 pnz = pi[row + 1] - pi[row];
618 p_othcols = p_oth->j + pi[row];
619 for (col = 0; col < pnz; col++) {
620 column = p_othcols[col] * dof + offset;
621 if (column >= pcstart && column < pcend) {
622 PetscCall(PetscHSetIAdd(dht, column));
623 } else {
624 PetscCall(PetscHSetIAdd(oht, column));
625 }
626 }
627 }
628 } /* end if (ao) */
629 PetscFunctionReturn(PETSC_SUCCESS);
630 }
631
MatPtAPNumericComputeOneRowOfAP_private(Mat A,Mat P,Mat P_oth,const PetscInt * map,PetscInt dof,PetscInt i,PetscHMapIV hmap)632 static inline PetscErrorCode MatPtAPNumericComputeOneRowOfAP_private(Mat A, Mat P, Mat P_oth, const PetscInt *map, PetscInt dof, PetscInt i, PetscHMapIV hmap)
633 {
634 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
635 Mat_SeqAIJ *ad = (Mat_SeqAIJ *)a->A->data, *ao = (Mat_SeqAIJ *)a->B->data, *p_oth = (Mat_SeqAIJ *)P_oth->data, *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data;
636 PetscInt *ai, nzi, j, *aj, row, col, *pi, pnz, *p_othcols, pcstart, *pj, k, nzpi, offset;
637 PetscScalar ra, *aa, *pa;
638
639 PetscFunctionBegin;
640 pcstart = P->cmap->rstart;
641 pcstart *= dof;
642
643 /* diagonal portion: Ad[i,:]*P */
644 ai = ad->i;
645 nzi = ai[i + 1] - ai[i];
646 aj = ad->j + ai[i];
647 aa = ad->a + ai[i];
648 for (j = 0; j < nzi; j++) {
649 ra = aa[j];
650 row = aj[j];
651 offset = row % dof;
652 row /= dof;
653 nzpi = pd->i[row + 1] - pd->i[row];
654 pj = pd->j + pd->i[row];
655 pa = pd->a + pd->i[row];
656 for (k = 0; k < nzpi; k++) PetscCall(PetscHMapIVAddValue(hmap, pj[k] * dof + offset + pcstart, ra * pa[k]));
657 PetscCall(PetscLogFlops(2.0 * nzpi));
658 }
659 for (j = 0; j < nzi; j++) {
660 ra = aa[j];
661 row = aj[j];
662 offset = row % dof;
663 row /= dof;
664 nzpi = po->i[row + 1] - po->i[row];
665 pj = PetscSafePointerPlusOffset(po->j, po->i[row]);
666 pa = PetscSafePointerPlusOffset(po->a, po->i[row]);
667 for (k = 0; k < nzpi; k++) PetscCall(PetscHMapIVAddValue(hmap, p->garray[pj[k]] * dof + offset, ra * pa[k]));
668 PetscCall(PetscLogFlops(2.0 * nzpi));
669 }
670
671 /* off-diagonal part: Ao[i, :]*P_oth */
672 if (ao) {
673 ai = ao->i;
674 pi = p_oth->i;
675 nzi = ai[i + 1] - ai[i];
676 aj = ao->j + ai[i];
677 aa = ao->a + ai[i];
678 for (j = 0; j < nzi; j++) {
679 row = aj[j];
680 offset = a->garray[row] % dof;
681 row = map[row];
682 ra = aa[j];
683 pnz = pi[row + 1] - pi[row];
684 p_othcols = p_oth->j + pi[row];
685 pa = p_oth->a + pi[row];
686 for (col = 0; col < pnz; col++) PetscCall(PetscHMapIVAddValue(hmap, p_othcols[col] * dof + offset, ra * pa[col]));
687 PetscCall(PetscLogFlops(2.0 * pnz));
688 }
689 } /* end if (ao) */
690 PetscFunctionReturn(PETSC_SUCCESS);
691 }
692
693 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat, Mat, PetscInt dof, MatReuse, Mat *);
694
MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat A,Mat P,PetscInt dof,Mat C)695 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat A, Mat P, PetscInt dof, Mat C)
696 {
697 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data, *c = (Mat_MPIAIJ *)C->data;
698 Mat_SeqAIJ *cd, *co, *po = (Mat_SeqAIJ *)p->B->data, *pd = (Mat_SeqAIJ *)p->A->data;
699 MatProductCtx_APMPI *ptap;
700 PetscHMapIV hmap;
701 PetscInt i, j, jj, kk, nzi, *c_rmtj, voff, *c_othj, pn, pon, pcstart, pcend, ccstart, ccend, row, am, *poj, *pdj, *apindices, cmaxr, *c_rmtc, *c_rmtjj, *dcc, *occ, loc;
702 PetscScalar *c_rmta, *c_otha, *poa, *pda, *apvalues, *apvaluestmp, *c_rmtaa;
703 PetscInt offset, ii, pocol;
704 const PetscInt *mappingindices;
705 IS map;
706
707 PetscFunctionBegin;
708 MatCheckProduct(C, 4);
709 ptap = (MatProductCtx_APMPI *)C->product->data;
710 PetscCheck(ptap, PetscObjectComm((PetscObject)C), PETSC_ERR_ARG_WRONGSTATE, "PtAP cannot be computed. Missing data");
711 PetscCheck(ptap->P_oth, PetscObjectComm((PetscObject)C), PETSC_ERR_ARG_WRONGSTATE, "PtAP cannot be reused. Do not call MatProductClear()");
712
713 PetscCall(MatZeroEntries(C));
714
715 /* Get P_oth = ptap->P_oth and P_loc = ptap->P_loc */
716 if (ptap->reuse == MAT_REUSE_MATRIX) {
717 /* P_oth and P_loc are obtained in MatPtASymbolic() when reuse == MAT_INITIAL_MATRIX */
718 PetscCall(MatGetBrowsOfAcols_MPIXAIJ(A, P, dof, MAT_REUSE_MATRIX, &ptap->P_oth));
719 }
720 PetscCall(PetscObjectQuery((PetscObject)ptap->P_oth, "aoffdiagtopothmapping", (PetscObject *)&map));
721
722 PetscCall(MatGetLocalSize(p->B, NULL, &pon));
723 pon *= dof;
724 PetscCall(PetscCalloc2(ptap->c_rmti[pon], &c_rmtj, ptap->c_rmti[pon], &c_rmta));
725 PetscCall(MatGetLocalSize(A, &am, NULL));
726 cmaxr = 0;
727 for (i = 0; i < pon; i++) cmaxr = PetscMax(cmaxr, ptap->c_rmti[i + 1] - ptap->c_rmti[i]);
728 PetscCall(PetscCalloc4(cmaxr, &apindices, cmaxr, &apvalues, cmaxr, &apvaluestmp, pon, &c_rmtc));
729 PetscCall(PetscHMapIVCreateWithSize(cmaxr, &hmap));
730 PetscCall(ISGetIndices(map, &mappingindices));
731 for (i = 0; i < am && pon; i++) {
732 PetscCall(PetscHMapIVClear(hmap));
733 offset = i % dof;
734 ii = i / dof;
735 nzi = po->i[ii + 1] - po->i[ii];
736 if (!nzi) continue;
737 PetscCall(MatPtAPNumericComputeOneRowOfAP_private(A, P, ptap->P_oth, mappingindices, dof, i, hmap));
738 voff = 0;
739 PetscCall(PetscHMapIVGetPairs(hmap, &voff, apindices, apvalues));
740 if (!voff) continue;
741
742 /* Form C(ii, :) */
743 poj = po->j + po->i[ii];
744 poa = po->a + po->i[ii];
745 for (j = 0; j < nzi; j++) {
746 pocol = poj[j] * dof + offset;
747 c_rmtjj = c_rmtj + ptap->c_rmti[pocol];
748 c_rmtaa = c_rmta + ptap->c_rmti[pocol];
749 for (jj = 0; jj < voff; jj++) {
750 apvaluestmp[jj] = apvalues[jj] * poa[j];
751 /* If the row is empty */
752 if (!c_rmtc[pocol]) {
753 c_rmtjj[jj] = apindices[jj];
754 c_rmtaa[jj] = apvaluestmp[jj];
755 c_rmtc[pocol]++;
756 } else {
757 PetscCall(PetscFindInt(apindices[jj], c_rmtc[pocol], c_rmtjj, &loc));
758 if (loc >= 0) { /* hit */
759 c_rmtaa[loc] += apvaluestmp[jj];
760 PetscCall(PetscLogFlops(1.0));
761 } else { /* new element */
762 loc = -(loc + 1);
763 /* Move data backward */
764 for (kk = c_rmtc[pocol]; kk > loc; kk--) {
765 c_rmtjj[kk] = c_rmtjj[kk - 1];
766 c_rmtaa[kk] = c_rmtaa[kk - 1];
767 } /* End kk */
768 c_rmtjj[loc] = apindices[jj];
769 c_rmtaa[loc] = apvaluestmp[jj];
770 c_rmtc[pocol]++;
771 }
772 }
773 PetscCall(PetscLogFlops(voff));
774 } /* End jj */
775 } /* End j */
776 } /* End i */
777
778 PetscCall(PetscFree4(apindices, apvalues, apvaluestmp, c_rmtc));
779 PetscCall(PetscHMapIVDestroy(&hmap));
780
781 PetscCall(MatGetLocalSize(P, NULL, &pn));
782 pn *= dof;
783 PetscCall(PetscCalloc2(ptap->c_othi[pn], &c_othj, ptap->c_othi[pn], &c_otha));
784
785 PetscCall(PetscSFReduceBegin(ptap->sf, MPIU_INT, c_rmtj, c_othj, MPI_REPLACE));
786 PetscCall(PetscSFReduceBegin(ptap->sf, MPIU_SCALAR, c_rmta, c_otha, MPI_REPLACE));
787 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, &pcend));
788 pcstart = pcstart * dof;
789 pcend = pcend * dof;
790 cd = (Mat_SeqAIJ *)c->A->data;
791 co = (Mat_SeqAIJ *)c->B->data;
792
793 cmaxr = 0;
794 for (i = 0; i < pn; i++) cmaxr = PetscMax(cmaxr, (cd->i[i + 1] - cd->i[i]) + (co->i[i + 1] - co->i[i]));
795 PetscCall(PetscCalloc5(cmaxr, &apindices, cmaxr, &apvalues, cmaxr, &apvaluestmp, pn, &dcc, pn, &occ));
796 PetscCall(PetscHMapIVCreateWithSize(cmaxr, &hmap));
797 for (i = 0; i < am && pn; i++) {
798 PetscCall(PetscHMapIVClear(hmap));
799 offset = i % dof;
800 ii = i / dof;
801 nzi = pd->i[ii + 1] - pd->i[ii];
802 if (!nzi) continue;
803 PetscCall(MatPtAPNumericComputeOneRowOfAP_private(A, P, ptap->P_oth, mappingindices, dof, i, hmap));
804 voff = 0;
805 PetscCall(PetscHMapIVGetPairs(hmap, &voff, apindices, apvalues));
806 if (!voff) continue;
807 /* Form C(ii, :) */
808 pdj = pd->j + pd->i[ii];
809 pda = pd->a + pd->i[ii];
810 for (j = 0; j < nzi; j++) {
811 row = pcstart + pdj[j] * dof + offset;
812 for (jj = 0; jj < voff; jj++) apvaluestmp[jj] = apvalues[jj] * pda[j];
813 PetscCall(PetscLogFlops(voff));
814 PetscCall(MatSetValues(C, 1, &row, voff, apindices, apvaluestmp, ADD_VALUES));
815 }
816 }
817 PetscCall(ISRestoreIndices(map, &mappingindices));
818 PetscCall(MatGetOwnershipRangeColumn(C, &ccstart, &ccend));
819 PetscCall(PetscFree5(apindices, apvalues, apvaluestmp, dcc, occ));
820 PetscCall(PetscHMapIVDestroy(&hmap));
821 PetscCall(PetscSFReduceEnd(ptap->sf, MPIU_INT, c_rmtj, c_othj, MPI_REPLACE));
822 PetscCall(PetscSFReduceEnd(ptap->sf, MPIU_SCALAR, c_rmta, c_otha, MPI_REPLACE));
823 PetscCall(PetscFree2(c_rmtj, c_rmta));
824
825 /* Add contributions from remote */
826 for (i = 0; i < pn; i++) {
827 row = i + pcstart;
828 PetscCall(MatSetValues(C, 1, &row, ptap->c_othi[i + 1] - ptap->c_othi[i], PetscSafePointerPlusOffset(c_othj, ptap->c_othi[i]), PetscSafePointerPlusOffset(c_otha, ptap->c_othi[i]), ADD_VALUES));
829 }
830 PetscCall(PetscFree2(c_othj, c_otha));
831
832 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
833 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
834
835 ptap->reuse = MAT_REUSE_MATRIX;
836 PetscFunctionReturn(PETSC_SUCCESS);
837 }
838
MatPtAPNumeric_MPIAIJ_MPIAIJ_allatonce(Mat A,Mat P,Mat C)839 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_allatonce(Mat A, Mat P, Mat C)
840 {
841 PetscFunctionBegin;
842 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, P, 1, C));
843 PetscFunctionReturn(PETSC_SUCCESS);
844 }
845
MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat A,Mat P,PetscInt dof,Mat C)846 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat A, Mat P, PetscInt dof, Mat C)
847 {
848 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data, *c = (Mat_MPIAIJ *)C->data;
849 Mat_SeqAIJ *cd, *co, *po = (Mat_SeqAIJ *)p->B->data, *pd = (Mat_SeqAIJ *)p->A->data;
850 MatProductCtx_APMPI *ptap;
851 PetscHMapIV hmap;
852 PetscInt i, j, jj, kk, nzi, dnzi, *c_rmtj, voff, *c_othj, pn, pon, pcstart, pcend, row, am, *poj, *pdj, *apindices, cmaxr, *c_rmtc, *c_rmtjj, loc;
853 PetscScalar *c_rmta, *c_otha, *poa, *pda, *apvalues, *apvaluestmp, *c_rmtaa;
854 PetscInt offset, ii, pocol;
855 const PetscInt *mappingindices;
856 IS map;
857
858 PetscFunctionBegin;
859 MatCheckProduct(C, 4);
860 ptap = (MatProductCtx_APMPI *)C->product->data;
861 PetscCheck(ptap, PetscObjectComm((PetscObject)C), PETSC_ERR_ARG_WRONGSTATE, "PtAP cannot be computed. Missing data");
862 PetscCheck(ptap->P_oth, PetscObjectComm((PetscObject)C), PETSC_ERR_ARG_WRONGSTATE, "PtAP cannot be reused. Do not call MatProductClear()");
863
864 PetscCall(MatZeroEntries(C));
865
866 /* Get P_oth = ptap->P_oth and P_loc = ptap->P_loc */
867 if (ptap->reuse == MAT_REUSE_MATRIX) {
868 /* P_oth and P_loc are obtained in MatPtASymbolic() when reuse == MAT_INITIAL_MATRIX */
869 PetscCall(MatGetBrowsOfAcols_MPIXAIJ(A, P, dof, MAT_REUSE_MATRIX, &ptap->P_oth));
870 }
871 PetscCall(PetscObjectQuery((PetscObject)ptap->P_oth, "aoffdiagtopothmapping", (PetscObject *)&map));
872 PetscCall(MatGetLocalSize(p->B, NULL, &pon));
873 pon *= dof;
874 PetscCall(MatGetLocalSize(P, NULL, &pn));
875 pn *= dof;
876
877 PetscCall(PetscCalloc2(ptap->c_rmti[pon], &c_rmtj, ptap->c_rmti[pon], &c_rmta));
878 PetscCall(MatGetLocalSize(A, &am, NULL));
879 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, &pcend));
880 pcstart *= dof;
881 pcend *= dof;
882 cmaxr = 0;
883 for (i = 0; i < pon; i++) cmaxr = PetscMax(cmaxr, ptap->c_rmti[i + 1] - ptap->c_rmti[i]);
884 cd = (Mat_SeqAIJ *)c->A->data;
885 co = (Mat_SeqAIJ *)c->B->data;
886 for (i = 0; i < pn; i++) cmaxr = PetscMax(cmaxr, (cd->i[i + 1] - cd->i[i]) + (co->i[i + 1] - co->i[i]));
887 PetscCall(PetscCalloc4(cmaxr, &apindices, cmaxr, &apvalues, cmaxr, &apvaluestmp, pon, &c_rmtc));
888 PetscCall(PetscHMapIVCreateWithSize(cmaxr, &hmap));
889 PetscCall(ISGetIndices(map, &mappingindices));
890 for (i = 0; i < am && (pon || pn); i++) {
891 PetscCall(PetscHMapIVClear(hmap));
892 offset = i % dof;
893 ii = i / dof;
894 nzi = po->i[ii + 1] - po->i[ii];
895 dnzi = pd->i[ii + 1] - pd->i[ii];
896 if (!nzi && !dnzi) continue;
897 PetscCall(MatPtAPNumericComputeOneRowOfAP_private(A, P, ptap->P_oth, mappingindices, dof, i, hmap));
898 voff = 0;
899 PetscCall(PetscHMapIVGetPairs(hmap, &voff, apindices, apvalues));
900 if (!voff) continue;
901
902 /* Form remote C(ii, :) */
903 poj = PetscSafePointerPlusOffset(po->j, po->i[ii]);
904 poa = PetscSafePointerPlusOffset(po->a, po->i[ii]);
905 for (j = 0; j < nzi; j++) {
906 pocol = poj[j] * dof + offset;
907 c_rmtjj = c_rmtj + ptap->c_rmti[pocol];
908 c_rmtaa = c_rmta + ptap->c_rmti[pocol];
909 for (jj = 0; jj < voff; jj++) {
910 apvaluestmp[jj] = apvalues[jj] * poa[j];
911 /* If the row is empty */
912 if (!c_rmtc[pocol]) {
913 c_rmtjj[jj] = apindices[jj];
914 c_rmtaa[jj] = apvaluestmp[jj];
915 c_rmtc[pocol]++;
916 } else {
917 PetscCall(PetscFindInt(apindices[jj], c_rmtc[pocol], c_rmtjj, &loc));
918 if (loc >= 0) { /* hit */
919 c_rmtaa[loc] += apvaluestmp[jj];
920 PetscCall(PetscLogFlops(1.0));
921 } else { /* new element */
922 loc = -(loc + 1);
923 /* Move data backward */
924 for (kk = c_rmtc[pocol]; kk > loc; kk--) {
925 c_rmtjj[kk] = c_rmtjj[kk - 1];
926 c_rmtaa[kk] = c_rmtaa[kk - 1];
927 } /* End kk */
928 c_rmtjj[loc] = apindices[jj];
929 c_rmtaa[loc] = apvaluestmp[jj];
930 c_rmtc[pocol]++;
931 }
932 }
933 } /* End jj */
934 PetscCall(PetscLogFlops(voff));
935 } /* End j */
936
937 /* Form local C(ii, :) */
938 pdj = pd->j + pd->i[ii];
939 pda = pd->a + pd->i[ii];
940 for (j = 0; j < dnzi; j++) {
941 row = pcstart + pdj[j] * dof + offset;
942 for (jj = 0; jj < voff; jj++) apvaluestmp[jj] = apvalues[jj] * pda[j]; /* End kk */
943 PetscCall(PetscLogFlops(voff));
944 PetscCall(MatSetValues(C, 1, &row, voff, apindices, apvaluestmp, ADD_VALUES));
945 } /* End j */
946 } /* End i */
947
948 PetscCall(ISRestoreIndices(map, &mappingindices));
949 PetscCall(PetscFree4(apindices, apvalues, apvaluestmp, c_rmtc));
950 PetscCall(PetscHMapIVDestroy(&hmap));
951 PetscCall(PetscCalloc2(ptap->c_othi[pn], &c_othj, ptap->c_othi[pn], &c_otha));
952
953 PetscCall(PetscSFReduceBegin(ptap->sf, MPIU_INT, c_rmtj, c_othj, MPI_REPLACE));
954 PetscCall(PetscSFReduceBegin(ptap->sf, MPIU_SCALAR, c_rmta, c_otha, MPI_REPLACE));
955 PetscCall(PetscSFReduceEnd(ptap->sf, MPIU_INT, c_rmtj, c_othj, MPI_REPLACE));
956 PetscCall(PetscSFReduceEnd(ptap->sf, MPIU_SCALAR, c_rmta, c_otha, MPI_REPLACE));
957 PetscCall(PetscFree2(c_rmtj, c_rmta));
958
959 /* Add contributions from remote */
960 for (i = 0; i < pn; i++) {
961 row = i + pcstart;
962 PetscCall(MatSetValues(C, 1, &row, ptap->c_othi[i + 1] - ptap->c_othi[i], PetscSafePointerPlusOffset(c_othj, ptap->c_othi[i]), PetscSafePointerPlusOffset(c_otha, ptap->c_othi[i]), ADD_VALUES));
963 }
964 PetscCall(PetscFree2(c_othj, c_otha));
965
966 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
967 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
968
969 ptap->reuse = MAT_REUSE_MATRIX;
970 PetscFunctionReturn(PETSC_SUCCESS);
971 }
972
MatPtAPNumeric_MPIAIJ_MPIAIJ_allatonce_merged(Mat A,Mat P,Mat C)973 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_allatonce_merged(Mat A, Mat P, Mat C)
974 {
975 PetscFunctionBegin;
976 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, P, 1, C));
977 PetscFunctionReturn(PETSC_SUCCESS);
978 }
979
980 /* TODO: move algorithm selection to MatProductSetFromOptions */
MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat A,Mat P,PetscInt dof,PetscReal fill,Mat Cmpi)981 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat A, Mat P, PetscInt dof, PetscReal fill, Mat Cmpi)
982 {
983 MatProductCtx_APMPI *ptap;
984 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data;
985 MPI_Comm comm;
986 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data;
987 MatType mtype;
988 PetscSF sf;
989 PetscSFNode *iremote;
990 PetscInt rootspacesize, *rootspace, *rootspaceoffsets, nleaves;
991 const PetscInt *rootdegrees;
992 PetscHSetI ht, oht, *hta, *hto;
993 PetscInt pn, pon, *c_rmtc, i, j, nzi, htsize, htosize, *c_rmtj, off, *c_othj, rcvncols, sendncols, *c_rmtoffsets;
994 PetscInt lidx, *rdj, col, pcstart, pcend, *dnz, *onz, am, arstart, arend, *poj, *pdj;
995 PetscInt nalg = 2, alg = 0, offset, ii;
996 PetscMPIInt owner;
997 const PetscInt *mappingindices;
998 PetscBool flg;
999 const char *algTypes[2] = {"overlapping", "merged"};
1000 IS map;
1001
1002 PetscFunctionBegin;
1003 MatCheckProduct(Cmpi, 5);
1004 PetscCheck(!Cmpi->product->data, PetscObjectComm((PetscObject)Cmpi), PETSC_ERR_PLIB, "Product data not empty");
1005 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1006
1007 /* Create symbolic parallel matrix Cmpi */
1008 PetscCall(MatGetLocalSize(P, NULL, &pn));
1009 pn *= dof;
1010 PetscCall(MatGetType(A, &mtype));
1011 PetscCall(MatSetType(Cmpi, mtype));
1012 PetscCall(MatSetSizes(Cmpi, pn, pn, PETSC_DETERMINE, PETSC_DETERMINE));
1013
1014 PetscCall(PetscNew(&ptap));
1015 ptap->reuse = MAT_INITIAL_MATRIX;
1016 ptap->algType = 2;
1017
1018 /* Get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
1019 PetscCall(MatGetBrowsOfAcols_MPIXAIJ(A, P, dof, MAT_INITIAL_MATRIX, &ptap->P_oth));
1020 PetscCall(PetscObjectQuery((PetscObject)ptap->P_oth, "aoffdiagtopothmapping", (PetscObject *)&map));
1021 /* This equals to the number of offdiag columns in P */
1022 PetscCall(MatGetLocalSize(p->B, NULL, &pon));
1023 pon *= dof;
1024 /* offsets */
1025 PetscCall(PetscMalloc1(pon + 1, &ptap->c_rmti));
1026 /* The number of columns we will send to remote ranks */
1027 PetscCall(PetscMalloc1(pon, &c_rmtc));
1028 PetscCall(PetscMalloc1(pon, &hta));
1029 for (i = 0; i < pon; i++) PetscCall(PetscHSetICreate(&hta[i]));
1030 PetscCall(MatGetLocalSize(A, &am, NULL));
1031 PetscCall(MatGetOwnershipRange(A, &arstart, &arend));
1032 /* Create hash table to merge all columns for C(i, :) */
1033 PetscCall(PetscHSetICreate(&ht));
1034
1035 PetscCall(ISGetIndices(map, &mappingindices));
1036 ptap->c_rmti[0] = 0;
1037 /* 2) Pass 1: calculate the size for C_rmt (a matrix need to be sent to other processors) */
1038 for (i = 0; i < am && pon; i++) {
1039 /* Form one row of AP */
1040 PetscCall(PetscHSetIClear(ht));
1041 offset = i % dof;
1042 ii = i / dof;
1043 /* If the off-diagonal is empty, do not do any calculation */
1044 nzi = po->i[ii + 1] - po->i[ii];
1045 if (!nzi) continue;
1046
1047 PetscCall(MatPtAPSymbolicComputeOneRowOfAP_private(A, P, ptap->P_oth, mappingindices, dof, i, ht, ht));
1048 PetscCall(PetscHSetIGetSize(ht, &htsize));
1049 /* If AP is empty, just continue */
1050 if (!htsize) continue;
1051 /* Form C(ii, :) */
1052 poj = po->j + po->i[ii];
1053 for (j = 0; j < nzi; j++) PetscCall(PetscHSetIUpdate(hta[poj[j] * dof + offset], ht));
1054 }
1055
1056 for (i = 0; i < pon; i++) {
1057 PetscCall(PetscHSetIGetSize(hta[i], &htsize));
1058 ptap->c_rmti[i + 1] = ptap->c_rmti[i] + htsize;
1059 c_rmtc[i] = htsize;
1060 }
1061
1062 PetscCall(PetscMalloc1(ptap->c_rmti[pon], &c_rmtj));
1063
1064 for (i = 0; i < pon; i++) {
1065 off = 0;
1066 PetscCall(PetscHSetIGetElems(hta[i], &off, c_rmtj + ptap->c_rmti[i]));
1067 PetscCall(PetscHSetIDestroy(&hta[i]));
1068 }
1069 PetscCall(PetscFree(hta));
1070
1071 PetscCall(PetscMalloc1(pon, &iremote));
1072 for (i = 0; i < pon; i++) {
1073 owner = 0;
1074 lidx = 0;
1075 offset = i % dof;
1076 ii = i / dof;
1077 PetscCall(PetscLayoutFindOwnerIndex(P->cmap, p->garray[ii], &owner, &lidx));
1078 iremote[i].index = lidx * dof + offset;
1079 iremote[i].rank = owner;
1080 }
1081
1082 PetscCall(PetscSFCreate(comm, &sf));
1083 PetscCall(PetscSFSetGraph(sf, pn, pon, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
1084 /* Reorder ranks properly so that the data handled by gather and scatter have the same order */
1085 PetscCall(PetscSFSetRankOrder(sf, PETSC_TRUE));
1086 PetscCall(PetscSFSetFromOptions(sf));
1087 PetscCall(PetscSFSetUp(sf));
1088 /* How many neighbors have contributions to my rows? */
1089 PetscCall(PetscSFComputeDegreeBegin(sf, &rootdegrees));
1090 PetscCall(PetscSFComputeDegreeEnd(sf, &rootdegrees));
1091 rootspacesize = 0;
1092 for (i = 0; i < pn; i++) rootspacesize += rootdegrees[i];
1093 PetscCall(PetscMalloc1(rootspacesize, &rootspace));
1094 PetscCall(PetscMalloc1(rootspacesize + 1, &rootspaceoffsets));
1095 /* Get information from leaves
1096 * Number of columns other people contribute to my rows
1097 * */
1098 PetscCall(PetscSFGatherBegin(sf, MPIU_INT, c_rmtc, rootspace));
1099 PetscCall(PetscSFGatherEnd(sf, MPIU_INT, c_rmtc, rootspace));
1100 PetscCall(PetscFree(c_rmtc));
1101 PetscCall(PetscCalloc1(pn + 1, &ptap->c_othi));
1102 /* The number of columns is received for each row */
1103 ptap->c_othi[0] = 0;
1104 rootspacesize = 0;
1105 rootspaceoffsets[0] = 0;
1106 for (i = 0; i < pn; i++) {
1107 rcvncols = 0;
1108 for (j = 0; j < rootdegrees[i]; j++) {
1109 rcvncols += rootspace[rootspacesize];
1110 rootspaceoffsets[rootspacesize + 1] = rootspaceoffsets[rootspacesize] + rootspace[rootspacesize];
1111 rootspacesize++;
1112 }
1113 ptap->c_othi[i + 1] = ptap->c_othi[i] + rcvncols;
1114 }
1115 PetscCall(PetscFree(rootspace));
1116
1117 PetscCall(PetscMalloc1(pon, &c_rmtoffsets));
1118 PetscCall(PetscSFScatterBegin(sf, MPIU_INT, rootspaceoffsets, c_rmtoffsets));
1119 PetscCall(PetscSFScatterEnd(sf, MPIU_INT, rootspaceoffsets, c_rmtoffsets));
1120 PetscCall(PetscSFDestroy(&sf));
1121 PetscCall(PetscFree(rootspaceoffsets));
1122
1123 PetscCall(PetscCalloc1(ptap->c_rmti[pon], &iremote));
1124 nleaves = 0;
1125 for (i = 0; i < pon; i++) {
1126 owner = 0;
1127 ii = i / dof;
1128 PetscCall(PetscLayoutFindOwnerIndex(P->cmap, p->garray[ii], &owner, NULL));
1129 sendncols = ptap->c_rmti[i + 1] - ptap->c_rmti[i];
1130 for (j = 0; j < sendncols; j++) {
1131 iremote[nleaves].rank = owner;
1132 iremote[nleaves++].index = c_rmtoffsets[i] + j;
1133 }
1134 }
1135 PetscCall(PetscFree(c_rmtoffsets));
1136 PetscCall(PetscCalloc1(ptap->c_othi[pn], &c_othj));
1137
1138 PetscCall(PetscSFCreate(comm, &ptap->sf));
1139 PetscCall(PetscSFSetGraph(ptap->sf, ptap->c_othi[pn], nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
1140 PetscCall(PetscSFSetFromOptions(ptap->sf));
1141 /* One to one map */
1142 PetscCall(PetscSFReduceBegin(ptap->sf, MPIU_INT, c_rmtj, c_othj, MPI_REPLACE));
1143
1144 PetscCall(PetscMalloc2(pn, &dnz, pn, &onz));
1145 PetscCall(PetscHSetICreate(&oht));
1146 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, &pcend));
1147 pcstart *= dof;
1148 pcend *= dof;
1149 PetscCall(PetscMalloc2(pn, &hta, pn, &hto));
1150 for (i = 0; i < pn; i++) {
1151 PetscCall(PetscHSetICreate(&hta[i]));
1152 PetscCall(PetscHSetICreate(&hto[i]));
1153 }
1154 /* Work on local part */
1155 /* 4) Pass 1: Estimate memory for C_loc */
1156 for (i = 0; i < am && pn; i++) {
1157 PetscCall(PetscHSetIClear(ht));
1158 PetscCall(PetscHSetIClear(oht));
1159 offset = i % dof;
1160 ii = i / dof;
1161 nzi = pd->i[ii + 1] - pd->i[ii];
1162 if (!nzi) continue;
1163
1164 PetscCall(MatPtAPSymbolicComputeOneRowOfAP_private(A, P, ptap->P_oth, mappingindices, dof, i, ht, oht));
1165 PetscCall(PetscHSetIGetSize(ht, &htsize));
1166 PetscCall(PetscHSetIGetSize(oht, &htosize));
1167 if (!(htsize + htosize)) continue;
1168 /* Form C(ii, :) */
1169 pdj = pd->j + pd->i[ii];
1170 for (j = 0; j < nzi; j++) {
1171 PetscCall(PetscHSetIUpdate(hta[pdj[j] * dof + offset], ht));
1172 PetscCall(PetscHSetIUpdate(hto[pdj[j] * dof + offset], oht));
1173 }
1174 }
1175
1176 PetscCall(ISRestoreIndices(map, &mappingindices));
1177
1178 PetscCall(PetscHSetIDestroy(&ht));
1179 PetscCall(PetscHSetIDestroy(&oht));
1180
1181 /* Get remote data */
1182 PetscCall(PetscSFReduceEnd(ptap->sf, MPIU_INT, c_rmtj, c_othj, MPI_REPLACE));
1183 PetscCall(PetscFree(c_rmtj));
1184
1185 for (i = 0; i < pn; i++) {
1186 nzi = ptap->c_othi[i + 1] - ptap->c_othi[i];
1187 rdj = PetscSafePointerPlusOffset(c_othj, ptap->c_othi[i]);
1188 for (j = 0; j < nzi; j++) {
1189 col = rdj[j];
1190 /* diag part */
1191 if (col >= pcstart && col < pcend) {
1192 PetscCall(PetscHSetIAdd(hta[i], col));
1193 } else { /* off-diagonal */
1194 PetscCall(PetscHSetIAdd(hto[i], col));
1195 }
1196 }
1197 PetscCall(PetscHSetIGetSize(hta[i], &htsize));
1198 dnz[i] = htsize;
1199 PetscCall(PetscHSetIDestroy(&hta[i]));
1200 PetscCall(PetscHSetIGetSize(hto[i], &htsize));
1201 onz[i] = htsize;
1202 PetscCall(PetscHSetIDestroy(&hto[i]));
1203 }
1204
1205 PetscCall(PetscFree2(hta, hto));
1206 PetscCall(PetscFree(c_othj));
1207
1208 /* local sizes and preallocation */
1209 PetscCall(MatSetSizes(Cmpi, pn, pn, PETSC_DETERMINE, PETSC_DETERMINE));
1210 PetscCall(MatSetBlockSizes(Cmpi, dof > 1 ? dof : P->cmap->bs, dof > 1 ? dof : P->cmap->bs));
1211 PetscCall(MatMPIAIJSetPreallocation(Cmpi, 0, dnz, 0, onz));
1212 PetscCall(MatSetUp(Cmpi));
1213 PetscCall(PetscFree2(dnz, onz));
1214
1215 /* attach the supporting struct to Cmpi for reuse */
1216 Cmpi->product->data = ptap;
1217 Cmpi->product->destroy = MatProductCtxDestroy_MPIAIJ_PtAP;
1218 Cmpi->product->view = MatView_MPIAIJ_PtAP;
1219
1220 /* Cmpi is not ready for use - assembly will be done by MatPtAPNumeric() */
1221 Cmpi->assembled = PETSC_FALSE;
1222 /* pick an algorithm */
1223 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatPtAP", "Mat");
1224 alg = 0;
1225 PetscCall(PetscOptionsEList("-matptap_allatonce_via", "PtAP allatonce numeric approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
1226 PetscOptionsEnd();
1227 switch (alg) {
1228 case 0:
1229 Cmpi->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIAIJ_allatonce;
1230 break;
1231 case 1:
1232 Cmpi->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIAIJ_allatonce_merged;
1233 break;
1234 default:
1235 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, " Unsupported allatonce numerical algorithm ");
1236 }
1237 PetscFunctionReturn(PETSC_SUCCESS);
1238 }
1239
MatPtAPSymbolic_MPIAIJ_MPIAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)1240 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
1241 {
1242 PetscFunctionBegin;
1243 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, P, 1, fill, C));
1244 PetscFunctionReturn(PETSC_SUCCESS);
1245 }
1246
MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat A,Mat P,PetscInt dof,PetscReal fill,Mat Cmpi)1247 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat A, Mat P, PetscInt dof, PetscReal fill, Mat Cmpi)
1248 {
1249 MatProductCtx_APMPI *ptap;
1250 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data;
1251 MPI_Comm comm;
1252 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data;
1253 MatType mtype;
1254 PetscSF sf;
1255 PetscSFNode *iremote;
1256 PetscInt rootspacesize, *rootspace, *rootspaceoffsets, nleaves;
1257 const PetscInt *rootdegrees;
1258 PetscHSetI ht, oht, *hta, *hto, *htd;
1259 PetscInt pn, pon, *c_rmtc, i, j, nzi, dnzi, htsize, htosize, *c_rmtj, off, *c_othj, rcvncols, sendncols, *c_rmtoffsets;
1260 PetscInt lidx, *rdj, col, pcstart, pcend, *dnz, *onz, am, arstart, arend, *poj, *pdj;
1261 PetscInt nalg = 2, alg = 0, offset, ii;
1262 PetscMPIInt owner;
1263 PetscBool flg;
1264 const char *algTypes[2] = {"merged", "overlapping"};
1265 const PetscInt *mappingindices;
1266 IS map;
1267
1268 PetscFunctionBegin;
1269 MatCheckProduct(Cmpi, 5);
1270 PetscCheck(!Cmpi->product->data, PetscObjectComm((PetscObject)Cmpi), PETSC_ERR_PLIB, "Product data not empty");
1271 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1272
1273 /* Create symbolic parallel matrix Cmpi */
1274 PetscCall(MatGetLocalSize(P, NULL, &pn));
1275 pn *= dof;
1276 PetscCall(MatGetType(A, &mtype));
1277 PetscCall(MatSetType(Cmpi, mtype));
1278 PetscCall(MatSetSizes(Cmpi, pn, pn, PETSC_DETERMINE, PETSC_DETERMINE));
1279
1280 PetscCall(PetscNew(&ptap));
1281 ptap->reuse = MAT_INITIAL_MATRIX;
1282 ptap->algType = 3;
1283
1284 /* 0) Get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
1285 PetscCall(MatGetBrowsOfAcols_MPIXAIJ(A, P, dof, MAT_INITIAL_MATRIX, &ptap->P_oth));
1286 PetscCall(PetscObjectQuery((PetscObject)ptap->P_oth, "aoffdiagtopothmapping", (PetscObject *)&map));
1287
1288 /* This equals to the number of offdiag columns in P */
1289 PetscCall(MatGetLocalSize(p->B, NULL, &pon));
1290 pon *= dof;
1291 /* offsets */
1292 PetscCall(PetscMalloc1(pon + 1, &ptap->c_rmti));
1293 /* The number of columns we will send to remote ranks */
1294 PetscCall(PetscMalloc1(pon, &c_rmtc));
1295 PetscCall(PetscMalloc1(pon, &hta));
1296 for (i = 0; i < pon; i++) PetscCall(PetscHSetICreate(&hta[i]));
1297 PetscCall(MatGetLocalSize(A, &am, NULL));
1298 PetscCall(MatGetOwnershipRange(A, &arstart, &arend));
1299 /* Create hash table to merge all columns for C(i, :) */
1300 PetscCall(PetscHSetICreate(&ht));
1301 PetscCall(PetscHSetICreate(&oht));
1302 PetscCall(PetscMalloc2(pn, &htd, pn, &hto));
1303 for (i = 0; i < pn; i++) {
1304 PetscCall(PetscHSetICreate(&htd[i]));
1305 PetscCall(PetscHSetICreate(&hto[i]));
1306 }
1307
1308 PetscCall(ISGetIndices(map, &mappingindices));
1309 ptap->c_rmti[0] = 0;
1310 /* 2) Pass 1: calculate the size for C_rmt (a matrix need to be sent to other processors) */
1311 for (i = 0; i < am && (pon || pn); i++) {
1312 /* Form one row of AP */
1313 PetscCall(PetscHSetIClear(ht));
1314 PetscCall(PetscHSetIClear(oht));
1315 offset = i % dof;
1316 ii = i / dof;
1317 /* If the off-diagonal is empty, do not do any calculation */
1318 nzi = po->i[ii + 1] - po->i[ii];
1319 dnzi = pd->i[ii + 1] - pd->i[ii];
1320 if (!nzi && !dnzi) continue;
1321
1322 PetscCall(MatPtAPSymbolicComputeOneRowOfAP_private(A, P, ptap->P_oth, mappingindices, dof, i, ht, oht));
1323 PetscCall(PetscHSetIGetSize(ht, &htsize));
1324 PetscCall(PetscHSetIGetSize(oht, &htosize));
1325 /* If AP is empty, just continue */
1326 if (!(htsize + htosize)) continue;
1327
1328 /* Form remote C(ii, :) */
1329 poj = PetscSafePointerPlusOffset(po->j, po->i[ii]);
1330 for (j = 0; j < nzi; j++) {
1331 PetscCall(PetscHSetIUpdate(hta[poj[j] * dof + offset], ht));
1332 PetscCall(PetscHSetIUpdate(hta[poj[j] * dof + offset], oht));
1333 }
1334
1335 /* Form local C(ii, :) */
1336 pdj = pd->j + pd->i[ii];
1337 for (j = 0; j < dnzi; j++) {
1338 PetscCall(PetscHSetIUpdate(htd[pdj[j] * dof + offset], ht));
1339 PetscCall(PetscHSetIUpdate(hto[pdj[j] * dof + offset], oht));
1340 }
1341 }
1342
1343 PetscCall(ISRestoreIndices(map, &mappingindices));
1344
1345 PetscCall(PetscHSetIDestroy(&ht));
1346 PetscCall(PetscHSetIDestroy(&oht));
1347
1348 for (i = 0; i < pon; i++) {
1349 PetscCall(PetscHSetIGetSize(hta[i], &htsize));
1350 ptap->c_rmti[i + 1] = ptap->c_rmti[i] + htsize;
1351 c_rmtc[i] = htsize;
1352 }
1353
1354 PetscCall(PetscMalloc1(ptap->c_rmti[pon], &c_rmtj));
1355
1356 for (i = 0; i < pon; i++) {
1357 off = 0;
1358 PetscCall(PetscHSetIGetElems(hta[i], &off, c_rmtj + ptap->c_rmti[i]));
1359 PetscCall(PetscHSetIDestroy(&hta[i]));
1360 }
1361 PetscCall(PetscFree(hta));
1362
1363 PetscCall(PetscMalloc1(pon, &iremote));
1364 for (i = 0; i < pon; i++) {
1365 owner = 0;
1366 lidx = 0;
1367 offset = i % dof;
1368 ii = i / dof;
1369 PetscCall(PetscLayoutFindOwnerIndex(P->cmap, p->garray[ii], &owner, &lidx));
1370 iremote[i].index = lidx * dof + offset;
1371 iremote[i].rank = owner;
1372 }
1373
1374 PetscCall(PetscSFCreate(comm, &sf));
1375 PetscCall(PetscSFSetGraph(sf, pn, pon, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
1376 /* Reorder ranks properly so that the data handled by gather and scatter have the same order */
1377 PetscCall(PetscSFSetRankOrder(sf, PETSC_TRUE));
1378 PetscCall(PetscSFSetFromOptions(sf));
1379 PetscCall(PetscSFSetUp(sf));
1380 /* How many neighbors have contributions to my rows? */
1381 PetscCall(PetscSFComputeDegreeBegin(sf, &rootdegrees));
1382 PetscCall(PetscSFComputeDegreeEnd(sf, &rootdegrees));
1383 rootspacesize = 0;
1384 for (i = 0; i < pn; i++) rootspacesize += rootdegrees[i];
1385 PetscCall(PetscMalloc1(rootspacesize, &rootspace));
1386 PetscCall(PetscMalloc1(rootspacesize + 1, &rootspaceoffsets));
1387 /* Get information from leaves
1388 * Number of columns other people contribute to my rows
1389 * */
1390 PetscCall(PetscSFGatherBegin(sf, MPIU_INT, c_rmtc, rootspace));
1391 PetscCall(PetscSFGatherEnd(sf, MPIU_INT, c_rmtc, rootspace));
1392 PetscCall(PetscFree(c_rmtc));
1393 PetscCall(PetscMalloc1(pn + 1, &ptap->c_othi));
1394 /* The number of columns is received for each row */
1395 ptap->c_othi[0] = 0;
1396 rootspacesize = 0;
1397 rootspaceoffsets[0] = 0;
1398 for (i = 0; i < pn; i++) {
1399 rcvncols = 0;
1400 for (j = 0; j < rootdegrees[i]; j++) {
1401 rcvncols += rootspace[rootspacesize];
1402 rootspaceoffsets[rootspacesize + 1] = rootspaceoffsets[rootspacesize] + rootspace[rootspacesize];
1403 rootspacesize++;
1404 }
1405 ptap->c_othi[i + 1] = ptap->c_othi[i] + rcvncols;
1406 }
1407 PetscCall(PetscFree(rootspace));
1408
1409 PetscCall(PetscMalloc1(pon, &c_rmtoffsets));
1410 PetscCall(PetscSFScatterBegin(sf, MPIU_INT, rootspaceoffsets, c_rmtoffsets));
1411 PetscCall(PetscSFScatterEnd(sf, MPIU_INT, rootspaceoffsets, c_rmtoffsets));
1412 PetscCall(PetscSFDestroy(&sf));
1413 PetscCall(PetscFree(rootspaceoffsets));
1414
1415 PetscCall(PetscCalloc1(ptap->c_rmti[pon], &iremote));
1416 nleaves = 0;
1417 for (i = 0; i < pon; i++) {
1418 owner = 0;
1419 ii = i / dof;
1420 PetscCall(PetscLayoutFindOwnerIndex(P->cmap, p->garray[ii], &owner, NULL));
1421 sendncols = ptap->c_rmti[i + 1] - ptap->c_rmti[i];
1422 for (j = 0; j < sendncols; j++) {
1423 iremote[nleaves].rank = owner;
1424 iremote[nleaves++].index = c_rmtoffsets[i] + j;
1425 }
1426 }
1427 PetscCall(PetscFree(c_rmtoffsets));
1428 PetscCall(PetscCalloc1(ptap->c_othi[pn], &c_othj));
1429
1430 PetscCall(PetscSFCreate(comm, &ptap->sf));
1431 PetscCall(PetscSFSetGraph(ptap->sf, ptap->c_othi[pn], nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
1432 PetscCall(PetscSFSetFromOptions(ptap->sf));
1433 /* One to one map */
1434 PetscCall(PetscSFReduceBegin(ptap->sf, MPIU_INT, c_rmtj, c_othj, MPI_REPLACE));
1435 /* Get remote data */
1436 PetscCall(PetscSFReduceEnd(ptap->sf, MPIU_INT, c_rmtj, c_othj, MPI_REPLACE));
1437 PetscCall(PetscFree(c_rmtj));
1438 PetscCall(PetscMalloc2(pn, &dnz, pn, &onz));
1439 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, &pcend));
1440 pcstart *= dof;
1441 pcend *= dof;
1442 for (i = 0; i < pn; i++) {
1443 nzi = ptap->c_othi[i + 1] - ptap->c_othi[i];
1444 rdj = PetscSafePointerPlusOffset(c_othj, ptap->c_othi[i]);
1445 for (j = 0; j < nzi; j++) {
1446 col = rdj[j];
1447 /* diagonal part */
1448 if (col >= pcstart && col < pcend) {
1449 PetscCall(PetscHSetIAdd(htd[i], col));
1450 } else { /* off-diagonal */
1451 PetscCall(PetscHSetIAdd(hto[i], col));
1452 }
1453 }
1454 PetscCall(PetscHSetIGetSize(htd[i], &htsize));
1455 dnz[i] = htsize;
1456 PetscCall(PetscHSetIDestroy(&htd[i]));
1457 PetscCall(PetscHSetIGetSize(hto[i], &htsize));
1458 onz[i] = htsize;
1459 PetscCall(PetscHSetIDestroy(&hto[i]));
1460 }
1461
1462 PetscCall(PetscFree2(htd, hto));
1463 PetscCall(PetscFree(c_othj));
1464
1465 /* local sizes and preallocation */
1466 PetscCall(MatSetSizes(Cmpi, pn, pn, PETSC_DETERMINE, PETSC_DETERMINE));
1467 PetscCall(MatSetBlockSizes(Cmpi, dof > 1 ? dof : P->cmap->bs, dof > 1 ? dof : P->cmap->bs));
1468 PetscCall(MatMPIAIJSetPreallocation(Cmpi, 0, dnz, 0, onz));
1469 PetscCall(PetscFree2(dnz, onz));
1470
1471 /* attach the supporting struct to Cmpi for reuse */
1472 Cmpi->product->data = ptap;
1473 Cmpi->product->destroy = MatProductCtxDestroy_MPIAIJ_PtAP;
1474 Cmpi->product->view = MatView_MPIAIJ_PtAP;
1475
1476 /* Cmpi is not ready for use - assembly will be done by MatPtAPNumeric() */
1477 Cmpi->assembled = PETSC_FALSE;
1478 /* pick an algorithm */
1479 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "MatPtAP", "Mat");
1480 alg = 0;
1481 PetscCall(PetscOptionsEList("-matptap_allatonce_via", "PtAP allatonce numeric approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
1482 PetscOptionsEnd();
1483 switch (alg) {
1484 case 0:
1485 Cmpi->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIAIJ_allatonce_merged;
1486 break;
1487 case 1:
1488 Cmpi->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIAIJ_allatonce;
1489 break;
1490 default:
1491 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, " Unsupported allatonce numerical algorithm ");
1492 }
1493 PetscFunctionReturn(PETSC_SUCCESS);
1494 }
1495
MatPtAPSymbolic_MPIAIJ_MPIAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)1496 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
1497 {
1498 PetscFunctionBegin;
1499 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, P, 1, fill, C));
1500 PetscFunctionReturn(PETSC_SUCCESS);
1501 }
1502
MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat Cmpi)1503 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A, Mat P, PetscReal fill, Mat Cmpi)
1504 {
1505 MatProductCtx_APMPI *ptap;
1506 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
1507 MPI_Comm comm;
1508 PetscMPIInt size, rank, nsend, proc;
1509 PetscFreeSpaceList free_space = NULL, current_space = NULL;
1510 PetscInt am = A->rmap->n, pm = P->rmap->n, pN = P->cmap->N, pn = P->cmap->n;
1511 PetscInt *lnk, i, k, pnz, row;
1512 PetscBT lnkbt;
1513 PetscMPIInt tagi, tagj, *len_si, *len_s, *len_ri, nrecv;
1514 PETSC_UNUSED PetscMPIInt icompleted = 0;
1515 PetscInt **buf_rj, **buf_ri, **buf_ri_k;
1516 PetscInt len, *dnz, *onz, *owners, nzi, nspacedouble;
1517 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextci;
1518 MPI_Request *swaits, *rwaits;
1519 MPI_Status *sstatus, rstatus;
1520 PetscLayout rowmap;
1521 PetscInt *owners_co, *coi, *coj; /* i and j array of (p->B)^T*A*P - used in the communication */
1522 PetscMPIInt *len_r, *id_r; /* array of length of comm->size, store send/recv matrix values */
1523 PetscInt *api, *apj, *Jptr, apnz, *prmap = p->garray, con, j, ap_rmax = 0, Crmax, *aj, *ai, *pi;
1524 Mat_SeqAIJ *p_loc, *p_oth = NULL, *ad = (Mat_SeqAIJ *)a->A->data, *ao = NULL, *c_loc, *c_oth;
1525 PetscScalar *apv;
1526 PetscHMapI ta;
1527 MatType mtype;
1528 const char *prefix;
1529 #if defined(PETSC_USE_INFO)
1530 PetscReal apfill;
1531 #endif
1532
1533 PetscFunctionBegin;
1534 MatCheckProduct(Cmpi, 4);
1535 PetscCheck(!Cmpi->product->data, PetscObjectComm((PetscObject)Cmpi), PETSC_ERR_PLIB, "Product data not empty");
1536 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1537 PetscCallMPI(MPI_Comm_size(comm, &size));
1538 PetscCallMPI(MPI_Comm_rank(comm, &rank));
1539
1540 if (size > 1) ao = (Mat_SeqAIJ *)a->B->data;
1541
1542 /* create symbolic parallel matrix Cmpi */
1543 PetscCall(MatGetType(A, &mtype));
1544 PetscCall(MatSetType(Cmpi, mtype));
1545
1546 /* Do dense axpy in MatPtAPNumeric_MPIAIJ_MPIAIJ() */
1547 Cmpi->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIAIJ;
1548
1549 /* create struct MatProductCtx_APMPI and attached it to C later */
1550 PetscCall(PetscNew(&ptap));
1551 ptap->reuse = MAT_INITIAL_MATRIX;
1552 ptap->algType = 1;
1553
1554 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
1555 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &ptap->startsj_s, &ptap->startsj_r, &ptap->bufa, &ptap->P_oth));
1556 /* get P_loc by taking all local rows of P */
1557 PetscCall(MatMPIAIJGetLocalMat(P, MAT_INITIAL_MATRIX, &ptap->P_loc));
1558
1559 /* (0) compute Rd = Pd^T, Ro = Po^T */
1560 PetscCall(MatTranspose(p->A, MAT_INITIAL_MATRIX, &ptap->Rd));
1561 PetscCall(MatTranspose(p->B, MAT_INITIAL_MATRIX, &ptap->Ro));
1562
1563 /* (1) compute symbolic AP = A_loc*P = Ad*P_loc + Ao*P_oth (api,apj) */
1564 p_loc = (Mat_SeqAIJ *)ptap->P_loc->data;
1565 if (ptap->P_oth) p_oth = (Mat_SeqAIJ *)ptap->P_oth->data;
1566
1567 /* create and initialize a linked list */
1568 PetscCall(PetscHMapICreateWithSize(pn, &ta)); /* for compute AP_loc and Cmpi */
1569 MatRowMergeMax_SeqAIJ(p_loc, ptap->P_loc->rmap->N, ta);
1570 MatRowMergeMax_SeqAIJ(p_oth, ptap->P_oth->rmap->N, ta);
1571 PetscCall(PetscHMapIGetSize(ta, &Crmax)); /* Crmax = nnz(sum of Prows) */
1572 /* printf("[%d] est %d, Crmax %d; pN %d\n",rank,5*(p_loc->rmax+p_oth->rmax + (PetscInt)(1.e-2*pN)),Crmax,pN); */
1573
1574 PetscCall(PetscLLCondensedCreate(Crmax, pN, &lnk, &lnkbt));
1575
1576 /* Initial FreeSpace size is fill*(nnz(A) + nnz(P)) */
1577 if (ao) {
1578 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ad->i[am], PetscIntSumTruncate(ao->i[am], p_loc->i[pm]))), &free_space));
1579 } else {
1580 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ad->i[am], p_loc->i[pm])), &free_space));
1581 }
1582 current_space = free_space;
1583 nspacedouble = 0;
1584
1585 PetscCall(PetscMalloc1(am + 1, &api));
1586 api[0] = 0;
1587 for (i = 0; i < am; i++) {
1588 /* diagonal portion: Ad[i,:]*P */
1589 ai = ad->i;
1590 pi = p_loc->i;
1591 nzi = ai[i + 1] - ai[i];
1592 aj = PetscSafePointerPlusOffset(ad->j, ai[i]);
1593 for (j = 0; j < nzi; j++) {
1594 row = aj[j];
1595 pnz = pi[row + 1] - pi[row];
1596 Jptr = p_loc->j + pi[row];
1597 /* add non-zero cols of P into the sorted linked list lnk */
1598 PetscCall(PetscLLCondensedAddSorted(pnz, Jptr, lnk, lnkbt));
1599 }
1600 /* off-diagonal portion: Ao[i,:]*P */
1601 if (ao) {
1602 ai = ao->i;
1603 pi = p_oth->i;
1604 nzi = ai[i + 1] - ai[i];
1605 aj = PetscSafePointerPlusOffset(ao->j, ai[i]);
1606 for (j = 0; j < nzi; j++) {
1607 row = aj[j];
1608 pnz = pi[row + 1] - pi[row];
1609 Jptr = PetscSafePointerPlusOffset(p_oth->j, pi[row]);
1610 PetscCall(PetscLLCondensedAddSorted(pnz, Jptr, lnk, lnkbt));
1611 }
1612 }
1613 apnz = lnk[0];
1614 api[i + 1] = api[i] + apnz;
1615 if (ap_rmax < apnz) ap_rmax = apnz;
1616
1617 /* if free space is not available, double the total space in the list */
1618 if (current_space->local_remaining < apnz) {
1619 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(apnz, current_space->total_array_size), ¤t_space));
1620 nspacedouble++;
1621 }
1622
1623 /* Copy data into free space, then initialize lnk */
1624 PetscCall(PetscLLCondensedClean(pN, apnz, current_space->array, lnk, lnkbt));
1625
1626 current_space->array = PetscSafePointerPlusOffset(current_space->array, apnz);
1627 current_space->local_used += apnz;
1628 current_space->local_remaining -= apnz;
1629 }
1630 /* Allocate space for apj and apv, initialize apj, and */
1631 /* destroy list of free space and other temporary array(s) */
1632 PetscCall(PetscMalloc2(api[am], &apj, api[am], &apv));
1633 PetscCall(PetscFreeSpaceContiguous(&free_space, apj));
1634 PetscCall(PetscLLDestroy(lnk, lnkbt));
1635
1636 /* Create AP_loc for reuse */
1637 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, pN, api, apj, apv, &ptap->AP_loc));
1638 PetscCall(MatSetType(ptap->AP_loc, ((PetscObject)p->A)->type_name));
1639 #if defined(PETSC_USE_INFO)
1640 if (ao) {
1641 apfill = (PetscReal)api[am] / (ad->i[am] + ao->i[am] + p_loc->i[pm] + 1);
1642 } else {
1643 apfill = (PetscReal)api[am] / (ad->i[am] + p_loc->i[pm] + 1);
1644 }
1645 ptap->AP_loc->info.mallocs = nspacedouble;
1646 ptap->AP_loc->info.fill_ratio_given = fill;
1647 ptap->AP_loc->info.fill_ratio_needed = apfill;
1648
1649 if (api[am]) {
1650 PetscCall(PetscInfo(ptap->AP_loc, "Nonscalable algorithm, AP_loc reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", nspacedouble, (double)fill, (double)apfill));
1651 PetscCall(PetscInfo(ptap->AP_loc, "Use MatPtAP(A,B,MatReuse,%g,&C) for best AP_loc performance.;\n", (double)apfill));
1652 } else {
1653 PetscCall(PetscInfo(ptap->AP_loc, "Nonscalable algorithm, AP_loc is empty \n"));
1654 }
1655 #endif
1656
1657 /* (2-1) compute symbolic Co = Ro*AP_loc */
1658 PetscCall(MatGetOptionsPrefix(A, &prefix));
1659 PetscCall(MatSetOptionsPrefix(ptap->Ro, prefix));
1660 PetscCall(MatAppendOptionsPrefix(ptap->Ro, "inner_offdiag_"));
1661 PetscCall(MatProductCreate(ptap->Ro, ptap->AP_loc, NULL, &ptap->C_oth));
1662 PetscCall(MatGetOptionsPrefix(Cmpi, &prefix));
1663 PetscCall(MatSetOptionsPrefix(ptap->C_oth, prefix));
1664 PetscCall(MatAppendOptionsPrefix(ptap->C_oth, "inner_C_oth_"));
1665 PetscCall(MatProductSetType(ptap->C_oth, MATPRODUCT_AB));
1666 PetscCall(MatProductSetAlgorithm(ptap->C_oth, "default"));
1667 PetscCall(MatProductSetFill(ptap->C_oth, fill));
1668 PetscCall(MatProductSetFromOptions(ptap->C_oth));
1669 PetscCall(MatProductSymbolic(ptap->C_oth));
1670
1671 /* (3) send coj of C_oth to other processors */
1672 /* determine row ownership */
1673 PetscCall(PetscLayoutCreate(comm, &rowmap));
1674 rowmap->n = pn;
1675 rowmap->bs = 1;
1676 PetscCall(PetscLayoutSetUp(rowmap));
1677 owners = rowmap->range;
1678
1679 /* determine the number of messages to send, their lengths */
1680 PetscCall(PetscMalloc4(size, &len_s, size, &len_si, size, &sstatus, size + 2, &owners_co));
1681 PetscCall(PetscArrayzero(len_s, size));
1682 PetscCall(PetscArrayzero(len_si, size));
1683
1684 c_oth = (Mat_SeqAIJ *)ptap->C_oth->data;
1685 coi = c_oth->i;
1686 coj = c_oth->j;
1687 con = ptap->C_oth->rmap->n;
1688 proc = 0;
1689 for (i = 0; i < con; i++) {
1690 while (prmap[i] >= owners[proc + 1]) proc++;
1691 len_si[proc]++; /* num of rows in Co(=Pt*AP) to be sent to [proc] */
1692 len_s[proc] += coi[i + 1] - coi[i]; /* num of nonzeros in Co to be sent to [proc] */
1693 }
1694
1695 len = 0; /* max length of buf_si[], see (4) */
1696 owners_co[0] = 0;
1697 nsend = 0;
1698 for (proc = 0; proc < size; proc++) {
1699 owners_co[proc + 1] = owners_co[proc] + len_si[proc];
1700 if (len_s[proc]) {
1701 nsend++;
1702 len_si[proc] = 2 * (len_si[proc] + 1); /* length of buf_si to be sent to [proc] */
1703 len += len_si[proc];
1704 }
1705 }
1706
1707 /* determine the number and length of messages to receive for coi and coj */
1708 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &nrecv));
1709 PetscCall(PetscGatherMessageLengths2(comm, nsend, nrecv, len_s, len_si, &id_r, &len_r, &len_ri));
1710
1711 /* post the Irecv and Isend of coj */
1712 PetscCall(PetscCommGetNewTag(comm, &tagj));
1713 PetscCall(PetscPostIrecvInt(comm, tagj, nrecv, id_r, len_r, &buf_rj, &rwaits));
1714 PetscCall(PetscMalloc1(nsend + 1, &swaits));
1715 for (proc = 0, k = 0; proc < size; proc++) {
1716 if (!len_s[proc]) continue;
1717 i = owners_co[proc];
1718 PetscCallMPI(MPIU_Isend(coj + coi[i], len_s[proc], MPIU_INT, proc, tagj, comm, swaits + k));
1719 k++;
1720 }
1721
1722 /* (2-2) compute symbolic C_loc = Rd*AP_loc */
1723 PetscCall(MatSetOptionsPrefix(ptap->Rd, prefix));
1724 PetscCall(MatAppendOptionsPrefix(ptap->Rd, "inner_diag_"));
1725 PetscCall(MatProductCreate(ptap->Rd, ptap->AP_loc, NULL, &ptap->C_loc));
1726 PetscCall(MatGetOptionsPrefix(Cmpi, &prefix));
1727 PetscCall(MatSetOptionsPrefix(ptap->C_loc, prefix));
1728 PetscCall(MatAppendOptionsPrefix(ptap->C_loc, "inner_C_loc_"));
1729 PetscCall(MatProductSetType(ptap->C_loc, MATPRODUCT_AB));
1730 PetscCall(MatProductSetAlgorithm(ptap->C_loc, "default"));
1731 PetscCall(MatProductSetFill(ptap->C_loc, fill));
1732 PetscCall(MatProductSetFromOptions(ptap->C_loc));
1733 PetscCall(MatProductSymbolic(ptap->C_loc));
1734
1735 c_loc = (Mat_SeqAIJ *)ptap->C_loc->data;
1736
1737 /* receives coj are complete */
1738 for (i = 0; i < nrecv; i++) PetscCallMPI(MPI_Waitany(nrecv, rwaits, &icompleted, &rstatus));
1739 PetscCall(PetscFree(rwaits));
1740 if (nsend) PetscCallMPI(MPI_Waitall(nsend, swaits, sstatus));
1741
1742 /* add received column indices into ta to update Crmax */
1743 for (k = 0; k < nrecv; k++) { /* k-th received message */
1744 Jptr = buf_rj[k];
1745 for (j = 0; j < len_r[k]; j++) PetscCall(PetscHMapISet(ta, *(Jptr + j) + 1, 1));
1746 }
1747 PetscCall(PetscHMapIGetSize(ta, &Crmax));
1748 PetscCall(PetscHMapIDestroy(&ta));
1749
1750 /* (4) send and recv coi */
1751 PetscCall(PetscCommGetNewTag(comm, &tagi));
1752 PetscCall(PetscPostIrecvInt(comm, tagi, nrecv, id_r, len_ri, &buf_ri, &rwaits));
1753 PetscCall(PetscMalloc1(len + 1, &buf_s));
1754 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
1755 for (proc = 0, k = 0; proc < size; proc++) {
1756 if (!len_s[proc]) continue;
1757 /* form outgoing message for i-structure:
1758 buf_si[0]: nrows to be sent
1759 [1:nrows]: row index (global)
1760 [nrows+1:2*nrows+1]: i-structure index
1761 */
1762 nrows = len_si[proc] / 2 - 1; /* num of rows in Co to be sent to [proc] */
1763 buf_si_i = buf_si + nrows + 1;
1764 buf_si[0] = nrows;
1765 buf_si_i[0] = 0;
1766 nrows = 0;
1767 for (i = owners_co[proc]; i < owners_co[proc + 1]; i++) {
1768 nzi = coi[i + 1] - coi[i];
1769 buf_si_i[nrows + 1] = buf_si_i[nrows] + nzi; /* i-structure */
1770 buf_si[nrows + 1] = prmap[i] - owners[proc]; /* local row index */
1771 nrows++;
1772 }
1773 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, swaits + k));
1774 k++;
1775 buf_si += len_si[proc];
1776 }
1777 for (i = 0; i < nrecv; i++) PetscCallMPI(MPI_Waitany(nrecv, rwaits, &icompleted, &rstatus));
1778 PetscCall(PetscFree(rwaits));
1779 if (nsend) PetscCallMPI(MPI_Waitall(nsend, swaits, sstatus));
1780
1781 PetscCall(PetscFree4(len_s, len_si, sstatus, owners_co));
1782 PetscCall(PetscFree(len_ri));
1783 PetscCall(PetscFree(swaits));
1784 PetscCall(PetscFree(buf_s));
1785
1786 /* (5) compute the local portion of Cmpi */
1787 /* set initial free space to be Crmax, sufficient for holding nonzeros in each row of Cmpi */
1788 PetscCall(PetscFreeSpaceGet(Crmax, &free_space));
1789 current_space = free_space;
1790
1791 PetscCall(PetscMalloc3(nrecv, &buf_ri_k, nrecv, &nextrow, nrecv, &nextci));
1792 for (k = 0; k < nrecv; k++) {
1793 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
1794 nrows = *buf_ri_k[k];
1795 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
1796 nextci[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */
1797 }
1798
1799 MatPreallocateBegin(comm, pn, pn, dnz, onz);
1800 PetscCall(PetscLLCondensedCreate(Crmax, pN, &lnk, &lnkbt));
1801 for (i = 0; i < pn; i++) {
1802 /* add C_loc into Cmpi */
1803 nzi = c_loc->i[i + 1] - c_loc->i[i];
1804 Jptr = PetscSafePointerPlusOffset(c_loc->j, c_loc->i[i]);
1805 PetscCall(PetscLLCondensedAddSorted(nzi, Jptr, lnk, lnkbt));
1806
1807 /* add received col data into lnk */
1808 for (k = 0; k < nrecv; k++) { /* k-th received message */
1809 if (i == *nextrow[k]) { /* i-th row */
1810 nzi = *(nextci[k] + 1) - *nextci[k];
1811 Jptr = buf_rj[k] + *nextci[k];
1812 PetscCall(PetscLLCondensedAddSorted(nzi, Jptr, lnk, lnkbt));
1813 nextrow[k]++;
1814 nextci[k]++;
1815 }
1816 }
1817 nzi = lnk[0];
1818
1819 /* copy data into free space, then initialize lnk */
1820 PetscCall(PetscLLCondensedClean(pN, nzi, current_space->array, lnk, lnkbt));
1821 PetscCall(MatPreallocateSet(i + owners[rank], nzi, current_space->array, dnz, onz));
1822 }
1823 PetscCall(PetscFree3(buf_ri_k, nextrow, nextci));
1824 PetscCall(PetscLLDestroy(lnk, lnkbt));
1825 PetscCall(PetscFreeSpaceDestroy(free_space));
1826
1827 /* local sizes and preallocation */
1828 PetscCall(MatSetSizes(Cmpi, pn, pn, PETSC_DETERMINE, PETSC_DETERMINE));
1829 if (P->cmap->bs > 0) {
1830 PetscCall(PetscLayoutSetBlockSize(Cmpi->rmap, P->cmap->bs));
1831 PetscCall(PetscLayoutSetBlockSize(Cmpi->cmap, P->cmap->bs));
1832 }
1833 PetscCall(MatMPIAIJSetPreallocation(Cmpi, 0, dnz, 0, onz));
1834 MatPreallocateEnd(dnz, onz);
1835
1836 /* members in merge */
1837 PetscCall(PetscFree(id_r));
1838 PetscCall(PetscFree(len_r));
1839 PetscCall(PetscFree(buf_ri[0]));
1840 PetscCall(PetscFree(buf_ri));
1841 PetscCall(PetscFree(buf_rj[0]));
1842 PetscCall(PetscFree(buf_rj));
1843 PetscCall(PetscLayoutDestroy(&rowmap));
1844
1845 PetscCall(PetscCalloc1(pN, &ptap->apa));
1846
1847 /* attach the supporting struct to Cmpi for reuse */
1848 Cmpi->product->data = ptap;
1849 Cmpi->product->destroy = MatProductCtxDestroy_MPIAIJ_PtAP;
1850 Cmpi->product->view = MatView_MPIAIJ_PtAP;
1851
1852 /* Cmpi is not ready for use - assembly will be done by MatPtAPNumeric() */
1853 Cmpi->assembled = PETSC_FALSE;
1854 PetscFunctionReturn(PETSC_SUCCESS);
1855 }
1856
MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)1857 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A, Mat P, Mat C)
1858 {
1859 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
1860 Mat_SeqAIJ *ad = (Mat_SeqAIJ *)a->A->data, *ao = (Mat_SeqAIJ *)a->B->data;
1861 Mat_SeqAIJ *ap, *p_loc, *p_oth = NULL, *c_seq;
1862 MatProductCtx_APMPI *ptap;
1863 Mat AP_loc, C_loc, C_oth;
1864 PetscInt i, rstart, rend, cm, ncols, row;
1865 PetscInt *api, *apj, am = A->rmap->n, j, col, apnz;
1866 PetscScalar *apa;
1867 const PetscInt *cols;
1868 const PetscScalar *vals, *array, *dummy1, *dummy2, *dummy3, *dummy4;
1869
1870 PetscFunctionBegin;
1871 MatCheckProduct(C, 3);
1872 ptap = (MatProductCtx_APMPI *)C->product->data;
1873 PetscCheck(ptap, PetscObjectComm((PetscObject)C), PETSC_ERR_ARG_WRONGSTATE, "PtAP cannot be computed. Missing data");
1874 PetscCheck(ptap->AP_loc, PetscObjectComm((PetscObject)C), PETSC_ERR_ARG_WRONGSTATE, "PtAP cannot be reused. Do not call MatProductClear()");
1875
1876 PetscCall(MatZeroEntries(C));
1877 /* 1) get R = Pd^T,Ro = Po^T */
1878 if (ptap->reuse == MAT_REUSE_MATRIX) {
1879 PetscCall(MatTranspose(p->A, MAT_REUSE_MATRIX, &ptap->Rd));
1880 PetscCall(MatTranspose(p->B, MAT_REUSE_MATRIX, &ptap->Ro));
1881 }
1882
1883 /* 2) get AP_loc */
1884 AP_loc = ptap->AP_loc;
1885 ap = (Mat_SeqAIJ *)AP_loc->data;
1886
1887 /* 2-1) get P_oth = ptap->P_oth and P_loc = ptap->P_loc */
1888 if (ptap->reuse == MAT_REUSE_MATRIX) {
1889 /* P_oth and P_loc are obtained in MatPtASymbolic() when reuse == MAT_INITIAL_MATRIX */
1890 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_REUSE_MATRIX, &ptap->startsj_s, &ptap->startsj_r, &ptap->bufa, &ptap->P_oth));
1891 PetscCall(MatMPIAIJGetLocalMat(P, MAT_REUSE_MATRIX, &ptap->P_loc));
1892 }
1893
1894 /* 2-2) compute numeric A_loc*P - dominating part */
1895 /* get data from symbolic products */
1896 p_loc = (Mat_SeqAIJ *)ptap->P_loc->data;
1897 if (ptap->P_oth) p_oth = (Mat_SeqAIJ *)ptap->P_oth->data;
1898 api = ap->i;
1899 apj = ap->j;
1900
1901 // Use MatSeqAIJGetArrayXXX to sync the matrix on host before accessing its values in the style of (Mat_SeqAIJ *)dat->a on host
1902 PetscCall(MatSeqAIJGetArrayWrite(AP_loc, &apa));
1903 PetscCall(MatSeqAIJGetArrayRead(a->A, &dummy1));
1904 PetscCall(MatSeqAIJGetArrayRead(a->B, &dummy2));
1905 PetscCall(MatSeqAIJGetArrayRead(ptap->P_loc, &dummy3));
1906 if (ptap->P_oth) PetscCall(MatSeqAIJGetArrayRead(ptap->P_oth, &dummy4));
1907 for (i = 0; i < am; i++) {
1908 /* AP[i,:] = A[i,:]*P = Ad*P_loc + Ao*P_oth */
1909 AProw_nonscalable(i, ad, ao, p_loc, p_oth, ptap->apa); // Directly access the value arrays from the Mat_SeqAIJ structs
1910 apnz = api[i + 1] - api[i];
1911 for (j = 0; j < apnz; j++) {
1912 col = apj[j + api[i]];
1913 apa[j + ap->i[i]] = ptap->apa[col];
1914 ptap->apa[col] = 0.0;
1915 }
1916 }
1917 PetscCall(MatSeqAIJRestoreArrayWrite(AP_loc, &apa));
1918 PetscCall(MatSeqAIJRestoreArrayRead(a->A, &dummy1));
1919 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &dummy2));
1920 PetscCall(MatSeqAIJRestoreArrayRead(ptap->P_loc, &dummy3));
1921 if (ptap->P_oth) PetscCall(MatSeqAIJRestoreArrayRead(ptap->P_oth, &dummy4));
1922
1923 /* We have modified the contents of local matrix AP_loc and must increase its ObjectState, since we are not doing AssemblyBegin/End on it. */
1924 PetscCall(PetscObjectStateIncrease((PetscObject)AP_loc));
1925
1926 /* 3) C_loc = Rd*AP_loc, C_oth = Ro*AP_loc */
1927 PetscCall(MatProductNumeric(ptap->C_loc));
1928 PetscCall(MatProductNumeric(ptap->C_oth));
1929 C_loc = ptap->C_loc;
1930 C_oth = ptap->C_oth;
1931
1932 /* add C_loc and Co to C */
1933 PetscCall(MatGetOwnershipRange(C, &rstart, &rend));
1934
1935 /* C_loc -> C */
1936 cm = C_loc->rmap->N;
1937 c_seq = (Mat_SeqAIJ *)C_loc->data;
1938 cols = c_seq->j;
1939
1940 PetscCall(MatSeqAIJGetArrayRead(C_loc, &array));
1941 vals = array;
1942
1943 /* The (fast) MatSetValues_MPIAIJ_CopyFromCSRFormat function can only be used when C->was_assembled is PETSC_FALSE and */
1944 /* when there are no off-processor parts. */
1945 /* If was_assembled is true, then the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; in MatSetValues_MPIAIJ_CopyFromCSRFormat */
1946 /* is no longer true. Then the more complex function MatSetValues_MPIAIJ() has to be used, where the column index is looked up from */
1947 /* a table, and other, more complex stuff has to be done. */
1948 if (C->assembled) {
1949 C->was_assembled = PETSC_TRUE;
1950 C->assembled = PETSC_FALSE;
1951 }
1952 if (C->was_assembled) {
1953 for (i = 0; i < cm; i++) {
1954 ncols = c_seq->i[i + 1] - c_seq->i[i];
1955 row = rstart + i;
1956 PetscCall(MatSetValues_MPIAIJ(C, 1, &row, ncols, cols, vals, ADD_VALUES));
1957 cols = PetscSafePointerPlusOffset(cols, ncols);
1958 vals += ncols;
1959 }
1960 } else {
1961 PetscCall(MatSetValues_MPIAIJ_CopyFromCSRFormat(C, c_seq->j, c_seq->i, vals));
1962 }
1963 PetscCall(MatSeqAIJRestoreArrayRead(C_loc, &array));
1964
1965 /* Co -> C, off-processor part */
1966 cm = C_oth->rmap->N;
1967 c_seq = (Mat_SeqAIJ *)C_oth->data;
1968 cols = c_seq->j;
1969 PetscCall(MatSeqAIJGetArrayRead(C_oth, &array));
1970 vals = array;
1971 for (i = 0; i < cm; i++) {
1972 ncols = c_seq->i[i + 1] - c_seq->i[i];
1973 row = p->garray[i];
1974 PetscCall(MatSetValues(C, 1, &row, ncols, cols, vals, ADD_VALUES));
1975 cols += ncols;
1976 vals += ncols;
1977 }
1978 PetscCall(MatSeqAIJRestoreArrayRead(C_oth, &array));
1979
1980 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1981 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1982
1983 ptap->reuse = MAT_REUSE_MATRIX;
1984 PetscFunctionReturn(PETSC_SUCCESS);
1985 }
1986
MatProductSymbolic_PtAP_MPIAIJ_MPIAIJ(Mat C)1987 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIAIJ(Mat C)
1988 {
1989 Mat_Product *product = C->product;
1990 Mat A = product->A, P = product->B;
1991 MatProductAlgorithm alg = product->alg;
1992 PetscReal fill = product->fill;
1993 PetscBool flg;
1994
1995 PetscFunctionBegin;
1996 /* scalable: do R=P^T locally, then C=R*A*P */
1997 PetscCall(PetscStrcmp(alg, "scalable", &flg));
1998 if (flg) {
1999 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIAIJ_scalable(A, P, product->fill, C));
2000 C->ops->productnumeric = MatProductNumeric_PtAP;
2001 goto next;
2002 }
2003
2004 /* nonscalable: do R=P^T locally, then C=R*A*P */
2005 PetscCall(PetscStrcmp(alg, "nonscalable", &flg));
2006 if (flg) {
2007 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIAIJ(A, P, fill, C));
2008 goto next;
2009 }
2010
2011 /* allatonce */
2012 PetscCall(PetscStrcmp(alg, "allatonce", &flg));
2013 if (flg) {
2014 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIAIJ_allatonce(A, P, fill, C));
2015 goto next;
2016 }
2017
2018 /* allatonce_merged */
2019 PetscCall(PetscStrcmp(alg, "allatonce_merged", &flg));
2020 if (flg) {
2021 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIAIJ_allatonce_merged(A, P, fill, C));
2022 goto next;
2023 }
2024
2025 /* backend general code */
2026 PetscCall(PetscStrcmp(alg, "backend", &flg));
2027 if (flg) {
2028 PetscCall(MatProductSymbolic_MPIAIJBACKEND(C));
2029 PetscFunctionReturn(PETSC_SUCCESS);
2030 }
2031
2032 /* hypre */
2033 #if defined(PETSC_HAVE_HYPRE)
2034 PetscCall(PetscStrcmp(alg, "hypre", &flg));
2035 if (flg) {
2036 PetscCall(MatPtAPSymbolic_AIJ_AIJ_wHYPRE(A, P, fill, C));
2037 C->ops->productnumeric = MatProductNumeric_PtAP;
2038 PetscFunctionReturn(PETSC_SUCCESS);
2039 }
2040 #endif
2041 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
2042
2043 next:
2044 C->ops->productnumeric = MatProductNumeric_PtAP;
2045 PetscFunctionReturn(PETSC_SUCCESS);
2046 }
2047