xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision bcee047adeeb73090d7e36cc71e39fc287cdbb97)
1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7 
8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
9 {
10   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
11   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)(baij->B->data);
12   PetscInt     i, j, *aj = B->j, ec = 0, *garray;
13   PetscInt     bs = mat->rmap->bs, *stmp;
14   IS           from, to;
15   Vec          gvec;
16 #if defined(PETSC_USE_CTABLE)
17   PetscHMapI    gid1_lid1 = NULL;
18   PetscHashIter tpos;
19   PetscInt      gid, lid;
20 #else
21   PetscInt Nbs = baij->Nbs, *indices;
22 #endif
23 
24   PetscFunctionBegin;
25 #if defined(PETSC_USE_CTABLE)
26   /* use a table - Mark Adams */
27   PetscCall(PetscHMapICreateWithSize(B->mbs, &gid1_lid1));
28   for (i = 0; i < B->mbs; i++) {
29     for (j = 0; j < B->ilen[i]; j++) {
30       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
31       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
32       if (!data) {
33         /* one based table */
34         PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
35       }
36     }
37   }
38   /* form array of columns we need */
39   PetscCall(PetscMalloc1(ec, &garray));
40   PetscHashIterBegin(gid1_lid1, tpos);
41   while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
42     PetscHashIterGetKey(gid1_lid1, tpos, gid);
43     PetscHashIterGetVal(gid1_lid1, tpos, lid);
44     PetscHashIterNext(gid1_lid1, tpos);
45     gid--;
46     lid--;
47     garray[lid] = gid;
48   }
49   PetscCall(PetscSortInt(ec, garray));
50   PetscCall(PetscHMapIClear(gid1_lid1));
51   for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
52   /* compact out the extra columns in B */
53   for (i = 0; i < B->mbs; i++) {
54     for (j = 0; j < B->ilen[i]; j++) {
55       PetscInt gid1 = aj[B->i[i] + j] + 1;
56       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
57       lid--;
58       aj[B->i[i] + j] = lid;
59     }
60   }
61   B->nbs = ec;
62   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
63   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
64   PetscCall(PetscHMapIDestroy(&gid1_lid1));
65 #else
66   /* Make an array as long as the number of columns */
67   /* mark those columns that are in baij->B */
68   PetscCall(PetscCalloc1(Nbs, &indices));
69   for (i = 0; i < B->mbs; i++) {
70     for (j = 0; j < B->ilen[i]; j++) {
71       if (!indices[aj[B->i[i] + j]]) ec++;
72       indices[aj[B->i[i] + j]] = 1;
73     }
74   }
75 
76   /* form array of columns we need */
77   PetscCall(PetscMalloc1(ec, &garray));
78   ec = 0;
79   for (i = 0; i < Nbs; i++) {
80     if (indices[i]) garray[ec++] = i;
81   }
82 
83   /* make indices now point into garray */
84   for (i = 0; i < ec; i++) indices[garray[i]] = i;
85 
86   /* compact out the extra columns in B */
87   for (i = 0; i < B->mbs; i++) {
88     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
89   }
90   B->nbs = ec;
91   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
92   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
93   PetscCall(PetscFree(indices));
94 #endif
95 
96   /* create local vector that is used to scatter into */
97   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec));
98 
99   /* create two temporary index sets for building scatter-gather */
100   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
101 
102   PetscCall(PetscMalloc1(ec, &stmp));
103   for (i = 0; i < ec; i++) stmp[i] = i;
104   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to));
105 
106   /* create temporary global vector to generate scatter context */
107   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
108 
109   PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx));
110   PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
111 
112   baij->garray = garray;
113 
114   PetscCall(ISDestroy(&from));
115   PetscCall(ISDestroy(&to));
116   PetscCall(VecDestroy(&gvec));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
120 /*
121      Takes the local part of an already assembled MPIBAIJ matrix
122    and disassembles it. This is to allow new nonzeros into the matrix
123    that require more communication in the matrix vector multiply.
124    Thus certain data-structures must be rebuilt.
125 
126    Kind of slow! But that's what application programmers get when
127    they are sloppy.
128 */
129 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
130 {
131   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
132   Mat          B    = baij->B, Bnew;
133   Mat_SeqBAIJ *Bbaij;
134   PetscInt     i, j, mbs, n = A->cmap->N, col, *garray = baij->garray;
135   PetscInt     bs2 = baij->bs2, *nz = NULL, m = A->rmap->n;
136   MatScalar   *a, *atmp;
137 
138   PetscFunctionBegin;
139   /* free stuff related to matrix-vec multiply */
140   PetscCall(VecDestroy(&baij->lvec));
141   PetscCall(VecScatterDestroy(&baij->Mvctx));
142   if (baij->colmap) {
143 #if defined(PETSC_USE_CTABLE)
144     PetscCall(PetscHMapIDestroy(&baij->colmap));
145 #else
146     PetscCall(PetscFree(baij->colmap));
147 #endif
148   }
149 
150   /* make sure that B is assembled so we can access its values */
151   if (B) {
152     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
153     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
154     Bbaij = (Mat_SeqBAIJ *)B->data;
155     mbs   = Bbaij->mbs;
156     a     = Bbaij->a;
157 
158     /* invent new B and copy stuff over */
159     PetscCall(PetscMalloc1(mbs, &nz));
160     for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
161     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew));
162     PetscCall(MatSetSizes(Bnew, m, n, m, n));
163     PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
164     PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
165     /*
166      Ensure that B's nonzerostate is monotonically increasing.
167      Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
168      */
169     Bnew->nonzerostate = B->nonzerostate;
170     if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
171       ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
172     }
173 
174     PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE));
175     for (i = 0; i < mbs; i++) {
176       for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
177         col  = garray[Bbaij->j[j]];
178         atmp = a + j * bs2;
179         PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode));
180       }
181     }
182     PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE));
183 
184     PetscCall(PetscFree(nz));
185     PetscCall(MatDestroy(&B));
186 
187     baij->B = Bnew;
188   }
189   PetscCall(PetscFree(baij->garray));
190   A->was_assembled = PETSC_FALSE;
191   A->assembled     = PETSC_FALSE;
192   PetscFunctionReturn(PETSC_SUCCESS);
193 }
194 
195 /*      ugly stuff added for Glenn someday we should fix this up */
196 
197 static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
198 static Vec       uglydd = NULL, uglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */
199 
200 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
201 {
202   Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */
203   Mat_SeqBAIJ *B   = (Mat_SeqBAIJ *)ina->B->data;
204   PetscInt     bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices;
205   PetscInt    *r_rmapd, *r_rmapo;
206 
207   PetscFunctionBegin;
208   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
209   PetscCall(MatGetSize(ina->A, NULL, &n));
210   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd));
211   nt = 0;
212   for (i = 0; i < inA->rmap->mapping->n; i++) {
213     if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
214       nt++;
215       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
216     }
217   }
218   PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n);
219   PetscCall(PetscMalloc1(n + 1, &uglyrmapd));
220   for (i = 0; i < inA->rmap->mapping->n; i++) {
221     if (r_rmapd[i]) {
222       for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
223     }
224   }
225   PetscCall(PetscFree(r_rmapd));
226   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd));
227 
228   PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices));
229   for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1;
230   no = inA->rmap->mapping->n - nt;
231   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo));
232   nt = 0;
233   for (i = 0; i < inA->rmap->mapping->n; i++) {
234     if (lindices[inA->rmap->mapping->indices[i]]) {
235       nt++;
236       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
237     }
238   }
239   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
240   PetscCall(PetscFree(lindices));
241   PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo));
242   for (i = 0; i < inA->rmap->mapping->n; i++) {
243     if (r_rmapo[i]) {
244       for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
245     }
246   }
247   PetscCall(PetscFree(r_rmapo));
248   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo));
249   PetscFunctionReturn(PETSC_SUCCESS);
250 }
251 
252 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale)
253 {
254   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
255 
256   PetscFunctionBegin;
257   PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
261 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale)
262 {
263   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */
264   PetscInt           n, i;
265   PetscScalar       *d, *o;
266   const PetscScalar *s;
267 
268   PetscFunctionBegin;
269   if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale));
270 
271   PetscCall(VecGetArrayRead(scale, &s));
272 
273   PetscCall(VecGetLocalSize(uglydd, &n));
274   PetscCall(VecGetArray(uglydd, &d));
275   for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
276   PetscCall(VecRestoreArray(uglydd, &d));
277   /* column scale "diagonal" portion of local matrix */
278   PetscCall(MatDiagonalScale(a->A, NULL, uglydd));
279 
280   PetscCall(VecGetLocalSize(uglyoo, &n));
281   PetscCall(VecGetArray(uglyoo, &o));
282   for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
283   PetscCall(VecRestoreArrayRead(scale, &s));
284   PetscCall(VecRestoreArray(uglyoo, &o));
285   /* column scale "off-diagonal" portion of local matrix */
286   PetscCall(MatDiagonalScale(a->B, NULL, uglyoo));
287   PetscFunctionReturn(PETSC_SUCCESS);
288 }
289