xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision 901f93825bbaa60582af604c6700caf57884a2e1)
1 /*
2    Support for the parallel AIJ matrix vector multiply
3 */
4 #include <../src/mat/impls/aij/mpi/mpiaij.h>
5 #include <petsc/private/vecimpl.h>
6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7 
MatSetUpMultiply_MPIAIJ(Mat mat)8 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
9 {
10   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
11   Mat_SeqAIJ *B   = (Mat_SeqAIJ *)aij->B->data;
12   PetscInt    i, j, *aj = B->j, *garray;
13   PetscInt    ec = 0; /* Number of nonzero external columns */
14   IS          from, to;
15   Vec         gvec;
16 #if defined(PETSC_USE_CTABLE)
17   PetscHMapI    gid1_lid1 = NULL;
18   PetscHashIter tpos;
19   PetscInt      gid, lid;
20 #else
21   PetscInt N = mat->cmap->N, *indices;
22 #endif
23 
24   PetscFunctionBegin;
25   if (!aij->garray) {
26     PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
27 #if defined(PETSC_USE_CTABLE)
28     /* use a table */
29     PetscCall(PetscHMapICreateWithSize(aij->B->rmap->n, &gid1_lid1));
30     for (i = 0; i < aij->B->rmap->n; i++) {
31       for (j = 0; j < B->ilen[i]; j++) {
32         PetscInt data, gid1 = aj[B->i[i] + j] + 1;
33         PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
34         if (!data) {
35           /* one based table */
36           PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
37         }
38       }
39     }
40     /* form array of columns we need */
41     PetscCall(PetscMalloc1(ec, &garray));
42     PetscHashIterBegin(gid1_lid1, tpos);
43     while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
44       PetscHashIterGetKey(gid1_lid1, tpos, gid);
45       PetscHashIterGetVal(gid1_lid1, tpos, lid);
46       PetscHashIterNext(gid1_lid1, tpos);
47       gid--;
48       lid--;
49       garray[lid] = gid;
50     }
51     PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */
52     PetscCall(PetscHMapIClear(gid1_lid1));
53     for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
54     /* compact out the extra columns in B */
55     for (i = 0; i < aij->B->rmap->n; i++) {
56       for (j = 0; j < B->ilen[i]; j++) {
57         PetscInt gid1 = aj[B->i[i] + j] + 1;
58         PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
59         lid--;
60         aj[B->i[i] + j] = lid;
61       }
62     }
63     PetscCall(PetscLayoutDestroy(&aij->B->cmap));
64     PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
65     PetscCall(PetscHMapIDestroy(&gid1_lid1));
66 #else
67     /* Make an array as long as the number of columns */
68     /* mark those columns that are in aij->B */
69     PetscCall(PetscCalloc1(N, &indices));
70     for (i = 0; i < aij->B->rmap->n; i++) {
71       for (j = 0; j < B->ilen[i]; j++) {
72         if (!indices[aj[B->i[i] + j]]) ec++;
73         indices[aj[B->i[i] + j]] = 1;
74       }
75     }
76 
77     /* form array of columns we need */
78     PetscCall(PetscMalloc1(ec, &garray));
79     ec = 0;
80     for (i = 0; i < N; i++) {
81       if (indices[i]) garray[ec++] = i;
82     }
83 
84     /* make indices now point into garray */
85     for (i = 0; i < ec; i++) indices[garray[i]] = i;
86 
87     /* compact out the extra columns in B */
88     for (i = 0; i < aij->B->rmap->n; i++) {
89       for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
90     }
91     PetscCall(PetscLayoutDestroy(&aij->B->cmap));
92     PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
93     PetscCall(PetscFree(indices));
94 #endif
95   } else {
96     garray = aij->garray;
97   }
98 
99   if (!aij->lvec) {
100     PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
101     PetscCall(MatCreateVecs(aij->B, &aij->lvec, NULL));
102   }
103   PetscCall(VecGetSize(aij->lvec, &ec));
104 
105   /* create two temporary Index sets for build scatter gather */
106   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from));
107   PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to));
108 
109   /* create temporary global vector to generate scatter context */
110   /* This does not allocate the array's memory so is efficient */
111   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
112 
113   /* generate the scatter context */
114   PetscCall(VecScatterDestroy(&aij->Mvctx));
115   PetscCall(VecScatterCreate(gvec, from, aij->lvec, to, &aij->Mvctx));
116   PetscCall(VecScatterViewFromOptions(aij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
117   aij->garray = garray;
118 
119   PetscCall(ISDestroy(&from));
120   PetscCall(ISDestroy(&to));
121   PetscCall(VecDestroy(&gvec));
122   PetscFunctionReturn(PETSC_SUCCESS);
123 }
124 
125 /* Disassemble the off-diag portion of the MPIAIJXxx matrix.
126    In other words, change the B from reduced format using local col ids
127    to expanded format using global col ids, which will make it easier to
128    insert new nonzeros (with global col ids) into the matrix.
129    The off-diag B determines communication in the matrix vector multiply.
130    use_preallocation determines whether the use the preallocation or
131    existing non-zero structure when creating the global form of B
132 */
MatDisAssemble_MPIAIJ(Mat A,PetscBool use_preallocation)133 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A, PetscBool use_preallocation)
134 {
135   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data;
136   Mat         B = aij->B, Bnew = NULL;
137 
138   PetscFunctionBegin;
139   /* free stuff related to matrix-vec multiply */
140   PetscCall(VecDestroy(&aij->lvec));
141   if (aij->colmap) {
142 #if defined(PETSC_USE_CTABLE)
143     PetscCall(PetscHMapIDestroy(&aij->colmap));
144 #else
145     PetscCall(PetscFree(aij->colmap));
146 #endif
147   }
148 
149   if (B) {
150     Mat_SeqAIJ        *Baij = (Mat_SeqAIJ *)B->data;
151     PetscInt           i, j, m = B->rmap->n, n = A->cmap->N, col, ct = 0, *garray = aij->garray, *nz;
152     PetscScalar        v;
153     const PetscScalar *ba;
154 
155     /* make sure that B is assembled so we can access its values */
156     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
157     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
158 
159     /* invent new B and copy stuff over */
160     PetscCall(PetscMalloc1(m + 1, &nz));
161     if (use_preallocation)
162       for (i = 0; i < m; i++) nz[i] = Baij->ipre[i];
163     else
164       for (i = 0; i < m; i++) nz[i] = Baij->i[i + 1] - Baij->i[i];
165     PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
166     PetscCall(MatSetSizes(Bnew, m, n, m, n)); /* Bnew now uses A->cmap->N as its col size */
167     PetscCall(MatSetBlockSizesFromMats(Bnew, A, A));
168     PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
169     PetscCall(MatSeqAIJSetPreallocation(Bnew, 0, nz));
170 
171     if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */
172       ((Mat_SeqAIJ *)Bnew->data)->nonew = Baij->nonew;
173     }
174 
175     /*
176      Ensure that B's nonzerostate is monotonically increasing.
177      Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
178      */
179     Bnew->nonzerostate = B->nonzerostate;
180 
181     PetscCall(PetscFree(nz));
182     PetscCall(MatSeqAIJGetArrayRead(B, &ba));
183     for (i = 0; i < m; i++) {
184       for (j = Baij->i[i]; j < Baij->i[i + 1]; j++) {
185         col = garray[Baij->j[ct]];
186         v   = ba[ct++];
187         PetscCall(MatSetValues(Bnew, 1, &i, 1, &col, &v, B->insertmode));
188       }
189     }
190     PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
191     PetscCall(MatDestroy(&B));
192   }
193   PetscCall(PetscFree(aij->garray));
194 
195   aij->B           = Bnew;
196   A->was_assembled = PETSC_FALSE;
197   A->assembled     = PETSC_FALSE;
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 /*      ugly stuff added for Glenn someday we should fix this up */
202 
203 static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
204 static Vec       auglydd = NULL, auglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */
205 
MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)206 static PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
207 {
208   Mat_MPIAIJ *ina = (Mat_MPIAIJ *)inA->data; /*access private part of matrix */
209   PetscInt    i, j, n, nt, cstart, cend, no, *garray = ina->garray, *lindices, bs = inA->rmap->bs;
210   PetscInt   *r_rmapd, *r_rmapo;
211 
212   PetscFunctionBegin;
213   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
214   PetscCall(MatGetSize(ina->A, NULL, &n));
215   PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapd));
216   nt = 0;
217   for (i = 0; i < inA->rmap->mapping->n; i++) {
218     if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
219       nt++;
220       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
221     }
222   }
223   PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n);
224   PetscCall(PetscMalloc1(n, &auglyrmapd));
225   for (i = 0; i < inA->rmap->mapping->n; i++) {
226     if (r_rmapd[i]) {
227       for (j = 0; j < bs; j++) auglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
228     }
229   }
230   PetscCall(PetscFree(r_rmapd));
231   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd));
232 
233   /* This loop handling the setup for the off-diagonal local portion of the matrix is extremely similar to
234      its counterpart in the MPIBAIJ version of this code.
235      The tricky difference is that lindices[] uses block-based indexing (just as in the BAIJ code),
236      but garray[] uses element-based indexing; hence the multiplications / divisions involving bs. */
237   PetscCall(PetscCalloc1(inA->cmap->N / bs, &lindices));
238   for (i = 0; i < ina->B->cmap->n / bs; i++) lindices[garray[i * bs] / bs] = i + 1;
239   no = inA->rmap->mapping->n - nt;
240   PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapo));
241   nt = 0;
242   for (i = 0; i < inA->rmap->mapping->n; i++) {
243     if (lindices[inA->rmap->mapping->indices[i]]) {
244       nt++;
245       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
246     }
247   }
248   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
249   PetscCall(PetscFree(lindices));
250   PetscCall(PetscMalloc1(nt * bs, &auglyrmapo));
251   for (i = 0; i < inA->rmap->mapping->n; i++) {
252     if (r_rmapo[i]) {
253       for (j = 0; j < bs; j++) auglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
254     }
255   }
256   PetscCall(PetscFree(r_rmapo));
257   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &auglyoo));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale)261 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A, Vec scale)
262 {
263   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data; /*access private part of matrix */
264   PetscInt           n, i;
265   PetscScalar       *d, *o;
266   const PetscScalar *s;
267 
268   PetscFunctionBegin;
269   if (!auglyrmapd) PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A, scale));
270 
271   PetscCall(VecGetArrayRead(scale, &s));
272 
273   PetscCall(VecGetLocalSize(auglydd, &n));
274   PetscCall(VecGetArray(auglydd, &d));
275   for (i = 0; i < n; i++) d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
276   PetscCall(VecRestoreArray(auglydd, &d));
277   /* column scale "diagonal" portion of local matrix */
278   PetscCall(MatDiagonalScale(a->A, NULL, auglydd));
279 
280   PetscCall(VecGetLocalSize(auglyoo, &n));
281   PetscCall(VecGetArray(auglyoo, &o));
282   for (i = 0; i < n; i++) o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
283   PetscCall(VecRestoreArrayRead(scale, &s));
284   PetscCall(VecRestoreArray(auglyoo, &o));
285   /* column scale "off-diagonal" portion of local matrix */
286   PetscCall(MatDiagonalScale(a->B, NULL, auglyoo));
287   PetscCall(PetscFree(auglyrmapd));
288   PetscCall(PetscFree(auglyrmapo));
289   PetscCall(VecDestroy(&auglydd));
290   PetscCall(VecDestroy(&auglyoo));
291   PetscFunctionReturn(PETSC_SUCCESS);
292 }
293