1 /*
2 Routines to compute overlapping regions of a parallel MPI matrix.
3 Used for finding submatrices that were shared across processors.
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 #include <petscbt.h>
7
8 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat, PetscInt, IS *);
9 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat, PetscInt *, PetscInt, PetscInt *, PetscBT *);
10
MatIncreaseOverlap_MPISBAIJ(Mat C,PetscInt is_max,IS is[],PetscInt ov)11 PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat C, PetscInt is_max, IS is[], PetscInt ov)
12 {
13 PetscInt i, N = C->cmap->N, bs = C->rmap->bs, M = C->rmap->N, Mbs = M / bs, *nidx, isz, iov;
14 IS *is_new, *is_row;
15 Mat *submats;
16 Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data;
17 Mat_SeqSBAIJ *asub_i;
18 PetscBT table;
19 PetscInt *ai, brow, nz, nis, l, nmax, nstages, max_no, pos;
20 const PetscInt *idx;
21 PetscBool flg;
22
23 PetscFunctionBegin;
24 PetscCall(PetscMalloc1(is_max, &is_new));
25 /* Convert the indices into block format */
26 PetscCall(ISCompressIndicesGeneral(N, C->rmap->n, bs, is_max, is, is_new));
27 PetscCheck(ov >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative overlap specified");
28
29 /* previous non-scalable implementation */
30 flg = PETSC_FALSE;
31 PetscCall(PetscOptionsHasName(NULL, NULL, "-IncreaseOverlap_old", &flg));
32 if (flg) { /* previous non-scalable implementation */
33 printf("use previous non-scalable implementation...\n");
34 for (i = 0; i < ov; ++i) PetscCall(MatIncreaseOverlap_MPISBAIJ_Once(C, is_max, is_new));
35 } else { /* implementation using modified BAIJ routines */
36
37 PetscCall(PetscMalloc1(Mbs + 1, &nidx));
38 PetscCall(PetscBTCreate(Mbs, &table)); /* for column search */
39
40 /* Create is_row */
41 PetscCall(PetscMalloc1(is_max, &is_row));
42 PetscCall(ISCreateStride(PETSC_COMM_SELF, Mbs, 0, 1, &is_row[0]));
43
44 for (i = 1; i < is_max; i++) is_row[i] = is_row[0]; /* reuse is_row[0] */
45
46 /* Allocate memory to hold all the submatrices - Modified from MatCreateSubMatrices_MPIBAIJ() */
47 PetscCall(PetscMalloc1(is_max + 1, &submats));
48
49 /* Determine the number of stages through which submatrices are done */
50 nmax = 20 * 1000000 / (c->Nbs * sizeof(PetscInt));
51 if (!nmax) nmax = 1;
52 nstages = is_max / nmax + ((is_max % nmax) ? 1 : 0);
53
54 /* Make sure every processor loops through the nstages */
55 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &nstages, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)C)));
56
57 {
58 const PetscObject obj = (PetscObject)c->A;
59 size_t new_len, cur_len, max_len;
60
61 PetscCall(PetscStrlen(MATSEQBAIJ, &new_len));
62 PetscCall(PetscStrlen(MATSEQSBAIJ, &cur_len));
63 max_len = PetscMax(cur_len, new_len) + 1;
64 PetscCall(PetscRealloc(max_len * sizeof(*obj->type_name), &obj->type_name));
65 /* The resulting submatrices should be BAIJ, not SBAIJ, hence we change this value to
66 trigger that */
67 for (iov = 0; iov < ov; ++iov) {
68 /* 1) Get submats for column search */
69 PetscCall(PetscStrncpy(obj->type_name, MATSEQBAIJ, max_len));
70 for (i = 0, pos = 0; i < nstages; i++) {
71 if (pos + nmax <= is_max) max_no = nmax;
72 else if (pos == is_max) max_no = 0;
73 else max_no = is_max - pos;
74 c->ijonly = PETSC_TRUE; /* only matrix data structures are requested */
75
76 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(C, max_no, is_row + pos, is_new + pos, MAT_INITIAL_MATRIX, submats + pos, PETSC_TRUE));
77 pos += max_no;
78 }
79 PetscCall(PetscStrncpy(obj->type_name, MATSEQSBAIJ, max_len));
80
81 /* 2) Row search */
82 PetscCall(MatIncreaseOverlap_MPIBAIJ_Once(C, is_max, is_new));
83
84 /* 3) Column search */
85 for (i = 0; i < is_max; i++) {
86 asub_i = (Mat_SeqSBAIJ *)submats[i]->data;
87 ai = asub_i->i;
88
89 /* put is_new obtained from MatIncreaseOverlap_MPIBAIJ() to table */
90 PetscCall(PetscBTMemzero(Mbs, table));
91
92 PetscCall(ISGetIndices(is_new[i], &idx));
93 PetscCall(ISGetLocalSize(is_new[i], &nis));
94 for (l = 0; l < nis; l++) {
95 PetscCall(PetscBTSet(table, idx[l]));
96 nidx[l] = idx[l];
97 }
98 isz = nis;
99
100 /* add column entries to table */
101 for (brow = 0; brow < Mbs; brow++) {
102 nz = ai[brow + 1] - ai[brow];
103 if (nz) {
104 if (!PetscBTLookupSet(table, brow)) nidx[isz++] = brow;
105 }
106 }
107 PetscCall(ISRestoreIndices(is_new[i], &idx));
108 PetscCall(ISDestroy(&is_new[i]));
109
110 /* create updated is_new */
111 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, isz, nidx, PETSC_COPY_VALUES, is_new + i));
112 }
113
114 /* Free tmp spaces */
115 for (i = 0; i < is_max; i++) PetscCall(MatDestroy(&submats[i]));
116 }
117
118 PetscCall(PetscBTDestroy(&table));
119 PetscCall(PetscFree(submats));
120 PetscCall(ISDestroy(&is_row[0]));
121 PetscCall(PetscFree(is_row));
122 PetscCall(PetscFree(nidx));
123 }
124 }
125 for (i = 0; i < is_max; i++) {
126 PetscCall(ISDestroy(&is[i]));
127 PetscCall(ISGetLocalSize(is_new[i], &nis));
128 PetscCall(ISGetIndices(is_new[i], &idx));
129 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)is_new[i]), bs, nis, idx, PETSC_COPY_VALUES, &is[i]));
130 PetscCall(ISDestroy(&is_new[i]));
131 }
132 PetscCall(PetscFree(is_new));
133 PetscFunctionReturn(PETSC_SUCCESS);
134 }
135
136 typedef enum {
137 MINE,
138 OTHER
139 } WhoseOwner;
140 /* data1, odata1 and odata2 are packed in the format (for communication):
141 data[0] = is_max, no of is
142 data[1] = size of is[0]
143 ...
144 data[is_max] = size of is[is_max-1]
145 data[is_max + 1] = data(is[0])
146 ...
147 data[is_max+1+sum(size of is[k]), k=0,...,i-1] = data(is[i])
148 ...
149 data2 is packed in the format (for creating output is[]):
150 data[0] = is_max, no of is
151 data[1] = size of is[0]
152 ...
153 data[is_max] = size of is[is_max-1]
154 data[is_max + 1] = data(is[0])
155 ...
156 data[is_max + 1 + Mbs*i) = data(is[i])
157 ...
158 */
MatIncreaseOverlap_MPISBAIJ_Once(Mat C,PetscInt is_max,IS is[])159 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C, PetscInt is_max, IS is[])
160 {
161 Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data;
162 PetscMPIInt proc_end = 0, size, rank, tag1, tag2, *len_s, nrqr, nrqs, *id_r1, *len_r1, flag, *iwork;
163 const PetscInt *idx_i;
164 PetscInt idx, isz, col, *n, *data1, **data1_start, *data2, *data2_i, *data, *data_i, len;
165 PetscInt Mbs, i, j, k, *odata1, *odata2;
166 PetscInt **odata2_ptr, *ctable = NULL, *btable, len_max, len_est;
167 PetscInt len_unused, nodata2;
168 PetscInt ois_max; /* max no of is[] in each of processor */
169 PetscByte *t_p;
170 MPI_Comm comm;
171 MPI_Request *s_waits1, *s_waits2, r_req;
172 MPI_Status *s_status, r_status;
173 PetscBT *table; /* mark indices of this processor's is[] */
174 PetscBT table_i;
175 PetscBT otable; /* mark indices of other processors' is[] */
176 PetscInt bs = C->rmap->bs, Bn = c->B->cmap->n, Bnbs = Bn / bs, *Bowners;
177 IS garray_local, garray_gl;
178
179 PetscFunctionBegin;
180 PetscCall(PetscObjectGetComm((PetscObject)C, &comm));
181 size = c->size;
182 rank = c->rank;
183 Mbs = c->Mbs;
184
185 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag1));
186 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag2));
187
188 /* create tables used in
189 step 1: table[i] - mark c->garray of proc [i]
190 step 3: table[i] - mark indices of is[i] when whose=MINE
191 table[0] - mark incideces of is[] when whose=OTHER */
192 len = PetscMax(is_max, size);
193 PetscCall(PetscMalloc2(len, &table, (Mbs / PETSC_BITS_PER_BYTE + 1) * len, &t_p));
194 for (i = 0; i < len; i++) table[i] = t_p + (Mbs / PETSC_BITS_PER_BYTE + 1) * i;
195
196 PetscCallMPI(MPIU_Allreduce(&is_max, &ois_max, 1, MPIU_INT, MPI_MAX, comm));
197
198 /* 1. Send this processor's is[] to other processors */
199 /* allocate spaces */
200 PetscCall(PetscMalloc1(is_max, &n));
201 len = 0;
202 for (i = 0; i < is_max; i++) {
203 PetscCall(ISGetLocalSize(is[i], &n[i]));
204 len += n[i];
205 }
206 if (!len) {
207 is_max = 0;
208 } else {
209 len += 1 + is_max; /* max length of data1 for one processor */
210 }
211
212 PetscCall(PetscMalloc1(size * len + 1, &data1));
213 PetscCall(PetscMalloc1(size, &data1_start));
214 for (i = 0; i < size; i++) data1_start[i] = data1 + i * len;
215
216 PetscCall(PetscMalloc4(size, &len_s, size, &btable, size, &iwork, size + 1, &Bowners));
217
218 /* gather c->garray from all processors */
219 PetscCall(ISCreateGeneral(comm, Bnbs, c->garray, PETSC_COPY_VALUES, &garray_local));
220 PetscCall(ISAllGather(garray_local, &garray_gl));
221 PetscCall(ISDestroy(&garray_local));
222 PetscCallMPI(MPI_Allgather(&Bnbs, 1, MPIU_INT, Bowners + 1, 1, MPIU_INT, comm));
223
224 Bowners[0] = 0;
225 for (i = 0; i < size; i++) Bowners[i + 1] += Bowners[i];
226
227 if (is_max) {
228 /* hash table ctable which maps c->row to proc_id) */
229 PetscCall(PetscMalloc1(Mbs, &ctable));
230 j = 0;
231 for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) {
232 for (; j < C->rmap->range[proc_id + 1] / bs; j++) ctable[j] = proc_id;
233 }
234
235 /* hash tables marking c->garray */
236 PetscCall(ISGetIndices(garray_gl, &idx_i));
237 for (i = 0; i < size; i++) {
238 table_i = table[i];
239 PetscCall(PetscBTMemzero(Mbs, table_i));
240 for (j = Bowners[i]; j < Bowners[i + 1]; j++) { /* go through B cols of proc[i]*/
241 PetscCall(PetscBTSet(table_i, idx_i[j]));
242 }
243 }
244 PetscCall(ISRestoreIndices(garray_gl, &idx_i));
245 } /* if (is_max) */
246 PetscCall(ISDestroy(&garray_gl));
247
248 /* evaluate communication - mesg to who, length, and buffer space */
249 for (i = 0; i < size; i++) len_s[i] = 0;
250
251 /* header of data1 */
252 for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) {
253 iwork[proc_id] = 0;
254 *data1_start[proc_id] = is_max;
255 data1_start[proc_id]++;
256 for (j = 0; j < is_max; j++) {
257 if (proc_id == rank) {
258 *data1_start[proc_id] = n[j];
259 } else {
260 *data1_start[proc_id] = 0;
261 }
262 data1_start[proc_id]++;
263 }
264 }
265
266 for (i = 0; i < is_max; i++) {
267 PetscCall(ISGetIndices(is[i], &idx_i));
268 for (j = 0; j < n[i]; j++) {
269 idx = idx_i[j];
270 *data1_start[rank] = idx;
271 data1_start[rank]++; /* for local processing */
272 PetscCall(PetscMPIIntCast(ctable[idx], &proc_end));
273 for (PetscMPIInt proc_id = 0; proc_id <= proc_end; proc_id++) { /* for others to process */
274 if (proc_id == rank) continue; /* done before this loop */
275 if (proc_id < proc_end && !PetscBTLookup(table[proc_id], idx)) continue; /* no need for sending idx to [proc_id] */
276 *data1_start[proc_id] = idx;
277 data1_start[proc_id]++;
278 len_s[proc_id]++;
279 }
280 }
281 /* update header data */
282 for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) {
283 if (proc_id == rank) continue;
284 *(data1 + proc_id * len + 1 + i) = len_s[proc_id] - iwork[proc_id];
285 iwork[proc_id] = len_s[proc_id];
286 }
287 PetscCall(ISRestoreIndices(is[i], &idx_i));
288 }
289
290 nrqs = 0;
291 nrqr = 0;
292 for (i = 0; i < size; i++) {
293 data1_start[i] = data1 + i * len;
294 if (len_s[i]) {
295 nrqs++;
296 len_s[i] += 1 + is_max; /* add no. of header msg */
297 }
298 }
299
300 for (i = 0; i < is_max; i++) PetscCall(ISDestroy(&is[i]));
301 PetscCall(PetscFree(n));
302 PetscCall(PetscFree(ctable));
303
304 /* Determine the number of messages to expect, their lengths, from from-ids */
305 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &nrqr));
306 PetscCall(PetscGatherMessageLengths(comm, nrqs, nrqr, len_s, &id_r1, &len_r1));
307
308 /* Now post the sends */
309 PetscCall(PetscMalloc2(size, &s_waits1, size, &s_waits2));
310 k = 0;
311 for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) { /* send data1 to processor [proc_id] */
312 if (len_s[proc_id]) {
313 PetscCallMPI(MPIU_Isend(data1_start[proc_id], len_s[proc_id], MPIU_INT, proc_id, tag1, comm, s_waits1 + k));
314 k++;
315 }
316 }
317
318 /* 2. Receive other's is[] and process. Then send back */
319 len = 0;
320 for (i = 0; i < nrqr; i++) {
321 if (len_r1[i] > len) len = len_r1[i];
322 }
323 PetscCall(PetscFree(len_r1));
324 PetscCall(PetscFree(id_r1));
325
326 for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) len_s[proc_id] = iwork[proc_id] = 0;
327
328 PetscCall(PetscMalloc1(len + 1, &odata1));
329 PetscCall(PetscMalloc1(size, &odata2_ptr));
330 PetscCall(PetscBTCreate(Mbs, &otable));
331
332 len_max = ois_max * (Mbs + 1); /* max space storing all is[] for each receive */
333 len_est = 2 * len_max; /* estimated space of storing is[] for all receiving messages */
334 PetscCall(PetscMalloc1(len_est + 1, &odata2));
335 nodata2 = 0; /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */
336
337 odata2_ptr[nodata2] = odata2;
338
339 len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max */
340
341 k = 0;
342 while (k < nrqr) {
343 PetscMPIInt ilen;
344
345 /* Receive messages */
346 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag1, comm, &flag, &r_status));
347 if (flag) {
348 PetscMPIInt proc_id;
349
350 PetscCallMPI(MPI_Get_count(&r_status, MPIU_INT, &ilen));
351 proc_id = r_status.MPI_SOURCE;
352 PetscCallMPI(MPIU_Irecv(odata1, ilen, MPIU_INT, proc_id, r_status.MPI_TAG, comm, &r_req));
353 PetscCallMPI(MPI_Wait(&r_req, &r_status));
354
355 /* Process messages */
356 /* make sure there is enough unused space in odata2 array */
357 if (len_unused < len_max) { /* allocate more space for odata2 */
358 PetscCall(PetscMalloc1(len_est + 1, &odata2));
359 odata2_ptr[++nodata2] = odata2;
360 len_unused = len_est;
361 }
362
363 PetscCall(MatIncreaseOverlap_MPISBAIJ_Local(C, odata1, OTHER, odata2, &otable));
364 len = 1 + odata2[0];
365 for (i = 0; i < odata2[0]; i++) len += odata2[1 + i];
366
367 /* Send messages back */
368 PetscCallMPI(MPIU_Isend(odata2, len, MPIU_INT, proc_id, tag2, comm, s_waits2 + k));
369 k++;
370 odata2 += len;
371 len_unused -= len;
372 PetscCall(PetscMPIIntCast(len, &len_s[proc_id])); /* length of message sending back to proc_id */
373 }
374 }
375 PetscCall(PetscFree(odata1));
376 PetscCall(PetscBTDestroy(&otable));
377
378 /* 3. Do local work on this processor's is[] */
379 /* make sure there is enough unused space in odata2(=data) array */
380 len_max = is_max * (Mbs + 1); /* max space storing all is[] for this processor */
381 if (len_unused < len_max) { /* allocate more space for odata2 */
382 PetscCall(PetscMalloc1(len_est + 1, &odata2));
383
384 odata2_ptr[++nodata2] = odata2;
385 }
386
387 data = odata2;
388 PetscCall(MatIncreaseOverlap_MPISBAIJ_Local(C, data1_start[rank], MINE, data, table));
389 PetscCall(PetscFree(data1_start));
390
391 /* 4. Receive work done on other processors, then merge */
392 /* get max number of messages that this processor expects to recv */
393 PetscCallMPI(MPIU_Allreduce(len_s, iwork, size, MPI_INT, MPI_MAX, comm));
394 PetscCall(PetscMalloc1(iwork[rank] + 1, &data2));
395 PetscCall(PetscFree4(len_s, btable, iwork, Bowners));
396
397 k = 0;
398 while (k < nrqs) {
399 /* Receive messages */
400 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag2, comm, &flag, &r_status));
401 if (flag) {
402 PetscMPIInt proc_id, ilen;
403 PetscCallMPI(MPI_Get_count(&r_status, MPIU_INT, &ilen));
404 proc_id = r_status.MPI_SOURCE;
405 PetscCallMPI(MPIU_Irecv(data2, ilen, MPIU_INT, proc_id, r_status.MPI_TAG, comm, &r_req));
406 PetscCallMPI(MPI_Wait(&r_req, &r_status));
407 if (ilen > 1 + is_max) { /* Add data2 into data */
408 data2_i = data2 + 1 + is_max;
409 for (i = 0; i < is_max; i++) {
410 table_i = table[i];
411 data_i = data + 1 + is_max + Mbs * i;
412 isz = data[1 + i];
413 for (j = 0; j < data2[1 + i]; j++) {
414 col = data2_i[j];
415 if (!PetscBTLookupSet(table_i, col)) data_i[isz++] = col;
416 }
417 data[1 + i] = isz;
418 if (i < is_max - 1) data2_i += data2[1 + i];
419 }
420 }
421 k++;
422 }
423 }
424 PetscCall(PetscFree(data2));
425 PetscCall(PetscFree2(table, t_p));
426
427 /* phase 1 sends are complete */
428 PetscCall(PetscMalloc1(size, &s_status));
429 if (nrqs) PetscCallMPI(MPI_Waitall(nrqs, s_waits1, s_status));
430 PetscCall(PetscFree(data1));
431
432 /* phase 2 sends are complete */
433 if (nrqr) PetscCallMPI(MPI_Waitall(nrqr, s_waits2, s_status));
434 PetscCall(PetscFree2(s_waits1, s_waits2));
435 PetscCall(PetscFree(s_status));
436
437 /* 5. Create new is[] */
438 for (i = 0; i < is_max; i++) {
439 data_i = data + 1 + is_max + Mbs * i;
440 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, data[1 + i], data_i, PETSC_COPY_VALUES, is + i));
441 }
442 for (k = 0; k <= nodata2; k++) PetscCall(PetscFree(odata2_ptr[k]));
443 PetscCall(PetscFree(odata2_ptr));
444 PetscFunctionReturn(PETSC_SUCCESS);
445 }
446
447 /*
448 MatIncreaseOverlap_MPISBAIJ_Local - Called by MatIncreaseOverlap, to do
449 the work on the local processor.
450
451 Inputs:
452 C - MAT_MPISBAIJ;
453 data - holds is[]. See MatIncreaseOverlap_MPISBAIJ_Once() for the format.
454 whose - whose is[] to be processed,
455 MINE: this processor's is[]
456 OTHER: other processor's is[]
457 Output:
458 nidx - whose = MINE:
459 holds input and newly found indices in the same format as data
460 whose = OTHER:
461 only holds the newly found indices
462 table - table[i]: mark the indices of is[i], i=0,...,is_max. Used only in the case 'whose=MINE'.
463 */
464 /* Would computation be reduced by swapping the loop 'for each is' and 'for each row'? */
MatIncreaseOverlap_MPISBAIJ_Local(Mat C,PetscInt * data,PetscInt whose,PetscInt * nidx,PetscBT * table)465 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat C, PetscInt *data, PetscInt whose, PetscInt *nidx, PetscBT *table)
466 {
467 Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data;
468 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)c->A->data;
469 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)c->B->data;
470 PetscInt row, mbs, Mbs, *nidx_i, col, col_max, isz, isz0, *ai, *aj, *bi, *bj, *garray, rstart, l;
471 PetscInt a_start, a_end, b_start, b_end, i, j, k, is_max, *idx_i, n;
472 PetscBT table0; /* mark the indices of input is[] for look up */
473 PetscBT table_i; /* points to i-th table. When whose=OTHER, a single table is used for all is[] */
474
475 PetscFunctionBegin;
476 Mbs = c->Mbs;
477 mbs = a->mbs;
478 ai = a->i;
479 aj = a->j;
480 bi = b->i;
481 bj = b->j;
482 garray = c->garray;
483 rstart = c->rstartbs;
484 is_max = data[0];
485
486 PetscCall(PetscBTCreate(Mbs, &table0));
487
488 nidx[0] = is_max;
489 idx_i = data + is_max + 1; /* ptr to input is[0] array */
490 nidx_i = nidx + is_max + 1; /* ptr to output is[0] array */
491 for (i = 0; i < is_max; i++) { /* for each is */
492 isz = 0;
493 n = data[1 + i]; /* size of input is[i] */
494
495 /* initialize and set table_i(mark idx and nidx) and table0(only mark idx) */
496 if (whose == MINE) { /* process this processor's is[] */
497 table_i = table[i];
498 nidx_i = nidx + 1 + is_max + Mbs * i;
499 } else { /* process other processor's is[] - only use one temp table */
500 table_i = table[0];
501 }
502 PetscCall(PetscBTMemzero(Mbs, table_i));
503 PetscCall(PetscBTMemzero(Mbs, table0));
504 if (n == 0) {
505 nidx[1 + i] = 0; /* size of new is[i] */
506 continue;
507 }
508
509 isz0 = 0;
510 col_max = 0;
511 for (j = 0; j < n; j++) {
512 col = idx_i[j];
513 PetscCheck(col < Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "index col %" PetscInt_FMT " >= Mbs %" PetscInt_FMT, col, Mbs);
514 if (!PetscBTLookupSet(table_i, col)) {
515 PetscCall(PetscBTSet(table0, col));
516 if (whose == MINE) nidx_i[isz0] = col;
517 if (col_max < col) col_max = col;
518 isz0++;
519 }
520 }
521
522 if (whose == MINE) isz = isz0;
523 k = 0; /* no. of indices from input is[i] that have been examined */
524 for (row = 0; row < mbs; row++) {
525 a_start = ai[row];
526 a_end = ai[row + 1];
527 b_start = bi[row];
528 b_end = bi[row + 1];
529 if (PetscBTLookup(table0, row + rstart)) { /* row is on input is[i]:
530 do row search: collect all col in this row */
531 for (l = a_start; l < a_end; l++) { /* Amat */
532 col = aj[l] + rstart;
533 if (!PetscBTLookupSet(table_i, col)) nidx_i[isz++] = col;
534 }
535 for (l = b_start; l < b_end; l++) { /* Bmat */
536 col = garray[bj[l]];
537 if (!PetscBTLookupSet(table_i, col)) nidx_i[isz++] = col;
538 }
539 k++;
540 if (k >= isz0) break; /* for (row=0; row<mbs; row++) */
541 } else { /* row is not on input is[i]:
542 do col search: add row onto nidx_i if there is a col in nidx_i */
543 for (l = a_start; l < a_end; l++) { /* Amat */
544 col = aj[l] + rstart;
545 if (col > col_max) break;
546 if (PetscBTLookup(table0, col)) {
547 if (!PetscBTLookupSet(table_i, row + rstart)) nidx_i[isz++] = row + rstart;
548 break; /* for l = start; l<end ; l++) */
549 }
550 }
551 for (l = b_start; l < b_end; l++) { /* Bmat */
552 col = garray[bj[l]];
553 if (col > col_max) break;
554 if (PetscBTLookup(table0, col)) {
555 if (!PetscBTLookupSet(table_i, row + rstart)) nidx_i[isz++] = row + rstart;
556 break; /* for l = start; l<end ; l++) */
557 }
558 }
559 }
560 }
561
562 if (i < is_max - 1) {
563 idx_i += n; /* ptr to input is[i+1] array */
564 nidx_i += isz; /* ptr to output is[i+1] array */
565 }
566 nidx[1 + i] = isz; /* size of new is[i] */
567 } /* for each is */
568 PetscCall(PetscBTDestroy(&table0));
569 PetscFunctionReturn(PETSC_SUCCESS);
570 }
571