xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision d8e47b638cf8f604a99e9678e1df24f82d959cd7)
1 /*
2    Support for the parallel SBAIJ matrix vector multiply
3 */
4 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
5 
MatSetUpMultiply_MPISBAIJ(Mat mat)6 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ *)mat->data;
9   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ *)sbaij->B->data;
10   PetscInt        i, j, *aj = B->j, ec = 0, *garray, *sgarray;
11   PetscInt        bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
12   IS              from, to;
13   Vec             gvec;
14   PetscMPIInt     rank   = sbaij->rank;
15   PetscInt       *owners = sbaij->rangebs, *ec_owner, k;
16   const PetscInt *sowners;
17   PetscScalar    *ptr;
18 #if defined(PETSC_USE_CTABLE)
19   PetscHMapI    gid1_lid1 = NULL; /* one-based gid to lid table */
20   PetscHashIter tpos;
21   PetscInt      gid, lid;
22 #else
23   PetscInt  Nbs = sbaij->Nbs;
24   PetscInt *indices;
25 #endif
26 
27   PetscFunctionBegin;
28 #if defined(PETSC_USE_CTABLE)
29   PetscCall(PetscHMapICreateWithSize(mbs, &gid1_lid1));
30   for (i = 0; i < B->mbs; i++) {
31     for (j = 0; j < B->ilen[i]; j++) {
32       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
33       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
34       if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
35     }
36   }
37   /* form array of columns we need */
38   PetscCall(PetscMalloc1(ec, &garray));
39   PetscHashIterBegin(gid1_lid1, tpos);
40   while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
41     PetscHashIterGetKey(gid1_lid1, tpos, gid);
42     PetscHashIterGetVal(gid1_lid1, tpos, lid);
43     PetscHashIterNext(gid1_lid1, tpos);
44     gid--;
45     lid--;
46     garray[lid] = gid;
47   }
48   PetscCall(PetscSortInt(ec, garray));
49   PetscCall(PetscHMapIClear(gid1_lid1));
50   for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
51   /* compact out the extra columns in B */
52   for (i = 0; i < B->mbs; i++) {
53     for (j = 0; j < B->ilen[i]; j++) {
54       PetscInt gid1 = aj[B->i[i] + j] + 1;
55       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
56       lid--;
57       aj[B->i[i] + j] = lid;
58     }
59   }
60   PetscCall(PetscHMapIDestroy(&gid1_lid1));
61   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
62   for (i = j = 0; i < ec; i++) {
63     while (garray[i] >= owners[j + 1]) j++;
64     ec_owner[i] = j;
65   }
66 #else
67   /* For the first stab we make an array as long as the number of columns */
68   /* mark those columns that are in sbaij->B */
69   PetscCall(PetscCalloc1(Nbs, &indices));
70   for (i = 0; i < mbs; i++) {
71     for (j = 0; j < B->ilen[i]; j++) {
72       if (!indices[aj[B->i[i] + j]]) ec++;
73       indices[aj[B->i[i] + j]] = 1;
74     }
75   }
76 
77   /* form arrays of columns we need */
78   PetscCall(PetscMalloc1(ec, &garray));
79   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
80 
81   ec = 0;
82   for (j = 0; j < sbaij->size; j++) {
83     for (i = owners[j]; i < owners[j + 1]; i++) {
84       if (indices[i]) {
85         garray[ec]   = i;
86         ec_owner[ec] = j;
87         ec++;
88       }
89     }
90   }
91 
92   /* make indices now point into garray */
93   for (i = 0; i < ec; i++) indices[garray[i]] = i;
94 
95   /* compact out the extra columns in B */
96   for (i = 0; i < mbs; i++) {
97     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
98   }
99   PetscCall(PetscFree(indices));
100 #endif
101   B->nbs = ec;
102   PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
103   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap));
104 
105   PetscCall(VecScatterDestroy(&sbaij->sMvctx));
106   /* create local vector that is used to scatter into */
107   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec));
108 
109   /* create two temporary index sets for building scatter-gather */
110   PetscCall(PetscMalloc1(2 * ec, &stmp));
111   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
112   for (i = 0; i < ec; i++) stmp[i] = i;
113   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to));
114 
115   /* generate the scatter context
116      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
117   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
118   PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx));
119   PetscCall(VecDestroy(&gvec));
120 
121   sbaij->garray = garray;
122 
123   PetscCall(ISDestroy(&from));
124   PetscCall(ISDestroy(&to));
125 
126   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
127   PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), (mbs + ec) * bs, PETSC_DETERMINE, &sbaij->slvec0));
128   PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1));
129   PetscCall(VecGetSize(sbaij->slvec0, &vec_size));
130 
131   PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners));
132 
133   /* x index in the IS sfrom */
134   for (i = 0; i < ec; i++) {
135     j          = ec_owner[i];
136     sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
137   }
138   /* b index in the IS sfrom */
139   k = sowners[rank] / bs + mbs;
140   for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
141   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from));
142 
143   /* x index in the IS sto */
144   k = sowners[rank] / bs + mbs;
145   for (i = 0; i < ec; i++) stmp[i] = (k + i);
146   /* b index in the IS sto */
147   for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];
148 
149   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to));
150 
151   PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx));
152   PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
153 
154   PetscCall(VecGetLocalSize(sbaij->slvec1, &nt));
155   PetscCall(VecGetArray(sbaij->slvec1, &ptr));
156   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a));
157   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec1b));
158   PetscCall(VecRestoreArray(sbaij->slvec1, &ptr));
159 
160   PetscCall(VecGetArray(sbaij->slvec0, &ptr));
161   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec0b));
162   PetscCall(VecRestoreArray(sbaij->slvec0, &ptr));
163 
164   PetscCall(PetscFree(stmp));
165 
166   PetscCall(ISDestroy(&from));
167   PetscCall(ISDestroy(&to));
168   PetscCall(PetscFree2(sgarray, ec_owner));
169   PetscFunctionReturn(PETSC_SUCCESS);
170 }
171 
172 /*
173      Takes the local part of an already assembled MPISBAIJ matrix
174    and disassembles it. This is to allow new nonzeros into the matrix
175    that require more communication in the matrix vector multiply.
176    Thus certain data-structures must be rebuilt.
177 
178    Kind of slow! But that's what application programmers get when
179    they are sloppy.
180 */
MatDisAssemble_MPISBAIJ(Mat A)181 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
182 {
183   Mat_MPISBAIJ *baij  = (Mat_MPISBAIJ *)A->data;
184   Mat           B     = baij->B, Bnew;
185   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ *)B->data;
186   PetscInt      i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
187   PetscInt      k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
188   MatScalar    *a = Bbaij->a;
189   PetscScalar  *atmp;
190 #if defined(PETSC_USE_REAL_MAT_SINGLE)
191   PetscInt l;
192 #endif
193 
194   PetscFunctionBegin;
195 #if defined(PETSC_USE_REAL_MAT_SINGLE)
196   PetscCall(PetscMalloc1(A->rmap->bs, &atmp));
197 #endif
198   /* free stuff related to matrix-vec multiply */
199   PetscCall(VecDestroy(&baij->lvec));
200   PetscCall(VecScatterDestroy(&baij->Mvctx));
201 
202   PetscCall(VecDestroy(&baij->slvec0));
203   PetscCall(VecDestroy(&baij->slvec0b));
204   PetscCall(VecDestroy(&baij->slvec1));
205   PetscCall(VecDestroy(&baij->slvec1a));
206   PetscCall(VecDestroy(&baij->slvec1b));
207 
208   if (baij->colmap) {
209 #if defined(PETSC_USE_CTABLE)
210     PetscCall(PetscHMapIDestroy(&baij->colmap));
211 #else
212     PetscCall(PetscFree(baij->colmap));
213 #endif
214   }
215 
216   /* make sure that B is assembled so we can access its values */
217   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
218   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
219 
220   /* invent new B and copy stuff over */
221   PetscCall(PetscMalloc1(mbs, &nz));
222   for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
223   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
224   PetscCall(MatSetSizes(Bnew, m, n, m, n));
225   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
226   PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
227   PetscCall(PetscFree(nz));
228 
229   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
230     ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
231   }
232 
233   /*
234    Ensure that B's nonzerostate is monotonically increasing.
235    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
236    */
237   Bnew->nonzerostate = B->nonzerostate;
238   PetscCall(PetscMalloc1(bs, &rvals));
239   for (i = 0; i < mbs; i++) {
240     rvals[0] = bs * i;
241     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
242     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
243       col = garray[Bbaij->j[j]] * bs;
244       for (k = 0; k < bs; k++) {
245 #if defined(PETSC_USE_REAL_MAT_SINGLE)
246         for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
247 #else
248         atmp = a + j * bs2 + k * bs;
249 #endif
250         PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode));
251         col++;
252       }
253     }
254   }
255 #if defined(PETSC_USE_REAL_MAT_SINGLE)
256   PetscCall(PetscFree(atmp));
257 #endif
258   PetscCall(PetscFree(baij->garray));
259 
260   baij->garray = NULL;
261 
262   PetscCall(PetscFree(rvals));
263   PetscCall(MatDestroy(&B));
264 
265   baij->B = Bnew;
266 
267   A->was_assembled = PETSC_FALSE;
268   PetscFunctionReturn(PETSC_SUCCESS);
269 }
270