xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 901f93825bbaa60582af604c6700caf57884a2e1)
1 /*
2    Support for the parallel BAIJ matrix vector multiply
3 */
4 #include <../src/mat/impls/baij/mpi/mpibaij.h>
5 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
6 
MatSetUpMultiply_MPIBAIJ(Mat mat)7 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
11   PetscInt     i, j, *aj = B->j, ec = 0, *garray;
12   PetscInt     bs = mat->rmap->bs, *stmp;
13   IS           from, to;
14   Vec          gvec;
15 #if defined(PETSC_USE_CTABLE)
16   PetscHMapI    gid1_lid1 = NULL;
17   PetscHashIter tpos;
18   PetscInt      gid, lid;
19 #else
20   PetscInt Nbs = baij->Nbs, *indices;
21 #endif
22 
23   PetscFunctionBegin;
24 #if defined(PETSC_USE_CTABLE)
25   /* use a table - Mark Adams */
26   PetscCall(PetscHMapICreateWithSize(B->mbs, &gid1_lid1));
27   for (i = 0; i < B->mbs; i++) {
28     for (j = 0; j < B->ilen[i]; j++) {
29       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
30       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
31       if (!data) {
32         /* one based table */
33         PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
34       }
35     }
36   }
37   /* form array of columns we need */
38   PetscCall(PetscMalloc1(ec, &garray));
39   PetscHashIterBegin(gid1_lid1, tpos);
40   while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
41     PetscHashIterGetKey(gid1_lid1, tpos, gid);
42     PetscHashIterGetVal(gid1_lid1, tpos, lid);
43     PetscHashIterNext(gid1_lid1, tpos);
44     gid--;
45     lid--;
46     garray[lid] = gid;
47   }
48   PetscCall(PetscSortInt(ec, garray));
49   PetscCall(PetscHMapIClear(gid1_lid1));
50   for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
51   /* compact out the extra columns in B */
52   for (i = 0; i < B->mbs; i++) {
53     for (j = 0; j < B->ilen[i]; j++) {
54       PetscInt gid1 = aj[B->i[i] + j] + 1;
55       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
56       lid--;
57       aj[B->i[i] + j] = lid;
58     }
59   }
60   B->nbs = ec;
61   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
62   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
63   PetscCall(PetscHMapIDestroy(&gid1_lid1));
64 #else
65   /* Make an array as long as the number of columns */
66   /* mark those columns that are in baij->B */
67   PetscCall(PetscCalloc1(Nbs, &indices));
68   for (i = 0; i < B->mbs; i++) {
69     for (j = 0; j < B->ilen[i]; j++) {
70       if (!indices[aj[B->i[i] + j]]) ec++;
71       indices[aj[B->i[i] + j]] = 1;
72     }
73   }
74 
75   /* form array of columns we need */
76   PetscCall(PetscMalloc1(ec, &garray));
77   ec = 0;
78   for (i = 0; i < Nbs; i++) {
79     if (indices[i]) garray[ec++] = i;
80   }
81 
82   /* make indices now point into garray */
83   for (i = 0; i < ec; i++) indices[garray[i]] = i;
84 
85   /* compact out the extra columns in B */
86   for (i = 0; i < B->mbs; i++) {
87     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
88   }
89   B->nbs = ec;
90   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
91   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
92   PetscCall(PetscFree(indices));
93 #endif
94 
95   /* create local vector that is used to scatter into */
96   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec));
97 
98   /* create two temporary index sets for building scatter-gather */
99   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
100 
101   PetscCall(PetscMalloc1(ec, &stmp));
102   for (i = 0; i < ec; i++) stmp[i] = i;
103   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to));
104 
105   /* create temporary global vector to generate scatter context */
106   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
107 
108   PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx));
109   PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
110 
111   baij->garray = garray;
112 
113   PetscCall(ISDestroy(&from));
114   PetscCall(ISDestroy(&to));
115   PetscCall(VecDestroy(&gvec));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 /*
120      Takes the local part of an already assembled MPIBAIJ matrix
121    and disassembles it. This is to allow new nonzeros into the matrix
122    that require more communication in the matrix vector multiply.
123    Thus certain data-structures must be rebuilt.
124 
125    Kind of slow! But that's what application programmers get when
126    they are sloppy.
127 */
MatDisAssemble_MPIBAIJ(Mat A)128 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
129 {
130   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
131   Mat          B    = baij->B, Bnew;
132   Mat_SeqBAIJ *Bbaij;
133   PetscInt     i, j, mbs, n = A->cmap->N, col, *garray = baij->garray;
134   PetscInt     bs2 = baij->bs2, *nz = NULL, m = A->rmap->n;
135   MatScalar   *a, *atmp;
136 
137   PetscFunctionBegin;
138   /* free stuff related to matrix-vec multiply */
139   PetscCall(VecDestroy(&baij->lvec));
140   PetscCall(VecScatterDestroy(&baij->Mvctx));
141   if (baij->colmap) {
142 #if defined(PETSC_USE_CTABLE)
143     PetscCall(PetscHMapIDestroy(&baij->colmap));
144 #else
145     PetscCall(PetscFree(baij->colmap));
146 #endif
147   }
148 
149   /* make sure that B is assembled so we can access its values */
150   if (B) {
151     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
152     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
153     Bbaij = (Mat_SeqBAIJ *)B->data;
154     mbs   = Bbaij->mbs;
155     a     = Bbaij->a;
156 
157     /* invent new B and copy stuff over */
158     PetscCall(PetscMalloc1(mbs, &nz));
159     for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
160     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew));
161     PetscCall(MatSetSizes(Bnew, m, n, m, n));
162     PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
163     PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
164     /*
165      Ensure that B's nonzerostate is monotonically increasing.
166      Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
167      */
168     Bnew->nonzerostate = B->nonzerostate;
169     if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
170       ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
171     }
172 
173     PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE));
174     for (i = 0; i < mbs; i++) {
175       for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
176         col  = garray[Bbaij->j[j]];
177         atmp = a + j * bs2;
178         PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode));
179       }
180     }
181     PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, Bbaij->roworiented));
182 
183     PetscCall(PetscFree(nz));
184     PetscCall(MatDestroy(&B));
185 
186     baij->B = Bnew;
187   }
188   PetscCall(PetscFree(baij->garray));
189   A->was_assembled = PETSC_FALSE;
190   A->assembled     = PETSC_FALSE;
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 /*      ugly stuff added for Glenn someday we should fix this up */
195 
196 static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
197 static Vec       uglydd = NULL, uglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */
198 
MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)199 static PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
200 {
201   Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */
202   Mat_SeqBAIJ *B   = (Mat_SeqBAIJ *)ina->B->data;
203   PetscInt     bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices;
204   PetscInt    *r_rmapd, *r_rmapo;
205 
206   PetscFunctionBegin;
207   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
208   PetscCall(MatGetSize(ina->A, NULL, &n));
209   PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapd));
210   nt = 0;
211   for (i = 0; i < inA->rmap->mapping->n; i++) {
212     if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
213       nt++;
214       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
215     }
216   }
217   PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n);
218   PetscCall(PetscMalloc1(n, &uglyrmapd));
219   for (i = 0; i < inA->rmap->mapping->n; i++) {
220     if (r_rmapd[i]) {
221       for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
222     }
223   }
224   PetscCall(PetscFree(r_rmapd));
225   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd));
226 
227   PetscCall(PetscCalloc1(ina->Nbs, &lindices));
228   for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1;
229   no = inA->rmap->mapping->n - nt;
230   PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapo));
231   nt = 0;
232   for (i = 0; i < inA->rmap->mapping->n; i++) {
233     if (lindices[inA->rmap->mapping->indices[i]]) {
234       nt++;
235       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
236     }
237   }
238   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
239   PetscCall(PetscFree(lindices));
240   PetscCall(PetscMalloc1(nt * bs, &uglyrmapo));
241   for (i = 0; i < inA->rmap->mapping->n; i++) {
242     if (r_rmapo[i]) {
243       for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
244     }
245   }
246   PetscCall(PetscFree(r_rmapo));
247   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)251 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale)
252 {
253   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */
254   PetscInt           n, i;
255   PetscScalar       *d, *o;
256   const PetscScalar *s;
257 
258   PetscFunctionBegin;
259   if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale));
260 
261   PetscCall(VecGetArrayRead(scale, &s));
262 
263   PetscCall(VecGetLocalSize(uglydd, &n));
264   PetscCall(VecGetArray(uglydd, &d));
265   for (i = 0; i < n; i++) d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
266   PetscCall(VecRestoreArray(uglydd, &d));
267   /* column scale "diagonal" portion of local matrix */
268   PetscCall(MatDiagonalScale(a->A, NULL, uglydd));
269 
270   PetscCall(VecGetLocalSize(uglyoo, &n));
271   PetscCall(VecGetArray(uglyoo, &o));
272   for (i = 0; i < n; i++) o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
273   PetscCall(VecRestoreArrayRead(scale, &s));
274   PetscCall(VecRestoreArray(uglyoo, &o));
275   /* column scale "off-diagonal" portion of local matrix */
276   PetscCall(MatDiagonalScale(a->B, NULL, uglyoo));
277   PetscCall(PetscFree(uglyrmapd));
278   PetscCall(PetscFree(uglyrmapo));
279   PetscCall(VecDestroy(&uglydd));
280   PetscCall(VecDestroy(&uglyoo));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283