xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision 7e1a0bbe36d2be40a00a95404ece00db4857f70d)
1 /*
2    Support for the parallel AIJ matrix vector multiply
3 */
4 #include <../src/mat/impls/aij/mpi/mpiaij.h>
5 #include <petsc/private/vecimpl.h>
6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7 
8 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
9 {
10   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
11   Mat_SeqAIJ *B   = (Mat_SeqAIJ *)aij->B->data;
12   PetscInt    i, j, *aj = B->j, *garray;
13   PetscInt    ec = 0; /* Number of nonzero external columns */
14   IS          from, to;
15   Vec         gvec;
16 #if defined(PETSC_USE_CTABLE)
17   PetscHMapI    gid1_lid1 = NULL;
18   PetscHashIter tpos;
19   PetscInt      gid, lid;
20 #else
21   PetscInt N = mat->cmap->N, *indices;
22 #endif
23 
24   PetscFunctionBegin;
25   if (!aij->garray) {
26     PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
27 #if defined(PETSC_USE_CTABLE)
28     /* use a table */
29     PetscCall(PetscHMapICreateWithSize(aij->B->rmap->n, &gid1_lid1));
30     for (i = 0; i < aij->B->rmap->n; i++) {
31       for (j = 0; j < B->ilen[i]; j++) {
32         PetscInt data, gid1 = aj[B->i[i] + j] + 1;
33         PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
34         if (!data) {
35           /* one based table */
36           PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
37         }
38       }
39     }
40     /* form array of columns we need */
41     PetscCall(PetscMalloc1(ec, &garray));
42     PetscHashIterBegin(gid1_lid1, tpos);
43     while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
44       PetscHashIterGetKey(gid1_lid1, tpos, gid);
45       PetscHashIterGetVal(gid1_lid1, tpos, lid);
46       PetscHashIterNext(gid1_lid1, tpos);
47       gid--;
48       lid--;
49       garray[lid] = gid;
50     }
51     PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */
52     PetscCall(PetscHMapIClear(gid1_lid1));
53     for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
54     /* compact out the extra columns in B */
55     for (i = 0; i < aij->B->rmap->n; i++) {
56       for (j = 0; j < B->ilen[i]; j++) {
57         PetscInt gid1 = aj[B->i[i] + j] + 1;
58         PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
59         lid--;
60         aj[B->i[i] + j] = lid;
61       }
62     }
63     PetscCall(PetscLayoutDestroy(&aij->B->cmap));
64     PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
65     PetscCall(PetscHMapIDestroy(&gid1_lid1));
66 #else
67     /* Make an array as long as the number of columns */
68     /* mark those columns that are in aij->B */
69     PetscCall(PetscCalloc1(N, &indices));
70     for (i = 0; i < aij->B->rmap->n; i++) {
71       for (j = 0; j < B->ilen[i]; j++) {
72         if (!indices[aj[B->i[i] + j]]) ec++;
73         indices[aj[B->i[i] + j]] = 1;
74       }
75     }
76 
77     /* form array of columns we need */
78     PetscCall(PetscMalloc1(ec, &garray));
79     ec = 0;
80     for (i = 0; i < N; i++) {
81       if (indices[i]) garray[ec++] = i;
82     }
83 
84     /* make indices now point into garray */
85     for (i = 0; i < ec; i++) indices[garray[i]] = i;
86 
87     /* compact out the extra columns in B */
88     for (i = 0; i < aij->B->rmap->n; i++) {
89       for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
90     }
91     PetscCall(PetscLayoutDestroy(&aij->B->cmap));
92     PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
93     PetscCall(PetscFree(indices));
94 #endif
95   } else {
96     garray = aij->garray;
97   }
98 
99   if (!aij->lvec) {
100     PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
101     PetscCall(MatCreateVecs(aij->B, &aij->lvec, NULL));
102   }
103   PetscCall(VecGetSize(aij->lvec, &ec));
104 
105   /* create two temporary Index sets for build scatter gather */
106   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from));
107   PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to));
108 
109   /* create temporary global vector to generate scatter context */
110   /* This does not allocate the array's memory so is efficient */
111   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
112 
113   /* generate the scatter context */
114   PetscCall(VecScatterDestroy(&aij->Mvctx));
115   PetscCall(VecScatterCreate(gvec, from, aij->lvec, to, &aij->Mvctx));
116   PetscCall(VecScatterViewFromOptions(aij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
117   aij->garray = garray;
118 
119   PetscCall(ISDestroy(&from));
120   PetscCall(ISDestroy(&to));
121   PetscCall(VecDestroy(&gvec));
122   PetscFunctionReturn(PETSC_SUCCESS);
123 }
124 
125 /* Disassemble the off-diag portion of the MPIAIJXxx matrix.
126    In other words, change the B from reduced format using local col ids
127    to expanded format using global col ids, which will make it easier to
128    insert new nonzeros (with global col ids) into the matrix.
129    The off-diag B determines communication in the matrix vector multiply.
130    use_preallocation determines whether the use the preallocation or
131    existing non-zero structure when creating the global form of B
132 */
133 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A, PetscBool use_preallocation)
134 {
135   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data;
136   Mat         B = aij->B, Bnew = NULL;
137 
138   PetscFunctionBegin;
139   /* free stuff related to matrix-vec multiply */
140   PetscCall(VecDestroy(&aij->lvec));
141   if (aij->colmap) {
142 #if defined(PETSC_USE_CTABLE)
143     PetscCall(PetscHMapIDestroy(&aij->colmap));
144 #else
145     PetscCall(PetscFree(aij->colmap));
146 #endif
147   }
148 
149   if (B) {
150     Mat_SeqAIJ        *Baij = (Mat_SeqAIJ *)B->data;
151     PetscInt           i, j, m = B->rmap->n, n = A->cmap->N, col, ct = 0, *garray = aij->garray, *nz;
152     PetscScalar        v;
153     const PetscScalar *ba;
154 
155     /* make sure that B is assembled so we can access its values */
156     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
157     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
158 
159     /* invent new B and copy stuff over */
160     PetscCall(PetscMalloc1(m + 1, &nz));
161     if (use_preallocation)
162       for (i = 0; i < m; i++) nz[i] = Baij->ipre[i];
163     else
164       for (i = 0; i < m; i++) nz[i] = Baij->i[i + 1] - Baij->i[i];
165     PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
166     PetscCall(MatSetSizes(Bnew, m, n, m, n)); /* Bnew now uses A->cmap->N as its col size */
167     PetscCall(MatSetBlockSizesFromMats(Bnew, A, A));
168     PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
169     PetscCall(MatSeqAIJSetPreallocation(Bnew, 0, nz));
170 
171     if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */
172       ((Mat_SeqAIJ *)Bnew->data)->nonew = Baij->nonew;
173     }
174 
175     /*
176      Ensure that B's nonzerostate is monotonically increasing.
177      Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
178      */
179     Bnew->nonzerostate = B->nonzerostate;
180 
181     PetscCall(PetscFree(nz));
182     PetscCall(MatSeqAIJGetArrayRead(B, &ba));
183     for (i = 0; i < m; i++) {
184       for (j = Baij->i[i]; j < Baij->i[i + 1]; j++) {
185         col = garray[Baij->j[ct]];
186         v   = ba[ct++];
187         PetscCall(MatSetValues(Bnew, 1, &i, 1, &col, &v, B->insertmode));
188       }
189     }
190     PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
191     PetscCall(MatDestroy(&B));
192   }
193   PetscCall(PetscFree(aij->garray));
194 
195   aij->B           = Bnew;
196   A->was_assembled = PETSC_FALSE;
197   A->assembled     = PETSC_FALSE;
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 /*      ugly stuff added for Glenn someday we should fix this up */
202 
203 static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
204 static Vec       auglydd = NULL, auglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */
205 
206 static PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
207 {
208   Mat_MPIAIJ *ina = (Mat_MPIAIJ *)inA->data; /*access private part of matrix */
209   PetscInt    i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices;
210   PetscInt   *r_rmapd, *r_rmapo;
211 
212   PetscFunctionBegin;
213   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
214   PetscCall(MatGetSize(ina->A, NULL, &n));
215   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd));
216   nt = 0;
217   for (i = 0; i < inA->rmap->mapping->n; i++) {
218     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
219       nt++;
220       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
221     }
222   }
223   PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n);
224   PetscCall(PetscMalloc1(n + 1, &auglyrmapd));
225   for (i = 0; i < inA->rmap->mapping->n; i++) {
226     if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i;
227   }
228   PetscCall(PetscFree(r_rmapd));
229   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd));
230 
231   PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices));
232   for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1;
233   no = inA->rmap->mapping->n - nt;
234   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo));
235   nt = 0;
236   for (i = 0; i < inA->rmap->mapping->n; i++) {
237     if (lindices[inA->rmap->mapping->indices[i]]) {
238       nt++;
239       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
240     }
241   }
242   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
243   PetscCall(PetscFree(lindices));
244   PetscCall(PetscMalloc1(nt + 1, &auglyrmapo));
245   for (i = 0; i < inA->rmap->mapping->n; i++) {
246     if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i;
247   }
248   PetscCall(PetscFree(r_rmapo));
249   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo));
250   PetscFunctionReturn(PETSC_SUCCESS);
251 }
252 
253 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A, Vec scale)
254 {
255   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data; /*access private part of matrix */
256   PetscInt           n, i;
257   PetscScalar       *d, *o;
258   const PetscScalar *s;
259 
260   PetscFunctionBegin;
261   if (!auglyrmapd) PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A, scale));
262 
263   PetscCall(VecGetArrayRead(scale, &s));
264 
265   PetscCall(VecGetLocalSize(auglydd, &n));
266   PetscCall(VecGetArray(auglydd, &d));
267   for (i = 0; i < n; i++) d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
268   PetscCall(VecRestoreArray(auglydd, &d));
269   /* column scale "diagonal" portion of local matrix */
270   PetscCall(MatDiagonalScale(a->A, NULL, auglydd));
271 
272   PetscCall(VecGetLocalSize(auglyoo, &n));
273   PetscCall(VecGetArray(auglyoo, &o));
274   for (i = 0; i < n; i++) o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
275   PetscCall(VecRestoreArrayRead(scale, &s));
276   PetscCall(VecRestoreArray(auglyoo, &o));
277   /* column scale "off-diagonal" portion of local matrix */
278   PetscCall(MatDiagonalScale(a->B, NULL, auglyoo));
279   PetscFunctionReturn(PETSC_SUCCESS);
280 }
281