xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision f3964e098c30ec7459ab60de3d6852f79d6c4e13)
1 
2 /*
3   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
4           C = A * B
5 */
6 
7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
8 #include <../src/mat/utils/freespace.h>
9 #include <petscbt.h>
10 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/
11 /*
12 #define DEBUG_MATMATMULT
13  */
14 EXTERN_C_BEGIN
15 #undef __FUNCT__
16 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
17 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
18 {
19   PetscErrorCode ierr;
20   PetscBool      scalable=PETSC_FALSE;
21 
22   PetscFunctionBegin;
23   if (scall == MAT_INITIAL_MATRIX){
24     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
25     ierr = PetscOptionsGetBool(PETSC_NULL,"-matmatmult_scalable",&scalable,PETSC_NULL);CHKERRQ(ierr);
26     if (scalable){
27       ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr);
28     } else {
29       ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
30     }
31     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
32   }
33   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
34   ierr = (*(*C)->ops->matmultnumeric)(A,B,*C);CHKERRQ(ierr);
35   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
36   PetscFunctionReturn(0);
37 }
38 EXTERN_C_END
39 
40 /*
41  MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ - Get symbolic structure of C=A*B
42   Input Parameter:
43 .    am, Ai, Aj - number of rows and structure of A
44 .    bm, bn, Bi, Bj - number of rows, columns, and structure of B
45 .    fill - filll ratio See MatMatMult()
46 
47   Output Parameter:
48 .    Ci, Cj - structure of C = A*B
49 .    nspacedouble - number of extra mallocs
50  */
51 #undef __FUNCT__
52 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ"
53 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(PetscInt am,PetscInt *Ai,PetscInt *Aj,PetscInt bm,PetscInt bn,PetscInt *Bi,PetscInt *Bj,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble)
54 {
55   PetscErrorCode     ierr;
56   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
57   PetscInt           *ai=Ai,*aj=Aj,*bi=Bi,*bj=Bj,*bjj,*ci,*cj;
58   PetscInt           i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,ndouble=0;
59   PetscBT            lnkbt;
60 
61   PetscFunctionBegin;
62   /* Allocate ci array, arrays for fill computation and */
63   /* free space for accumulating nonzero column info */
64   ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
65   ci[0] = 0;
66 
67   /* create and initialize a linked list */
68   nlnk = bn+1;
69   ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
70 
71   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
72   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
73   current_space = free_space;
74 
75   /* Determine symbolic info for each row of the product: */
76   for (i=0; i<am; i++) {
77     anzi = ai[i+1] - ai[i];
78     cnzi = 0;
79     aj   = Aj + ai[i];
80     for (j=0; j<anzi; j++){
81       brow = aj[j];
82       bnzj = bi[brow+1] - bi[brow];
83       bjj  = bj + bi[brow];
84       /* add non-zero cols of B into the sorted linked list lnk */
85       ierr = PetscLLAddSorted(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
86       cnzi += nlnk;
87     }
88 
89     /* If free space is not available, make more free space */
90     /* Double the amount of total space in the list */
91     if (current_space->local_remaining<cnzi) {
92       ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
93       ndouble++;
94     }
95 
96     /* Copy data into free space, then initialize lnk */
97     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
98     current_space->array           += cnzi;
99     current_space->local_used      += cnzi;
100     current_space->local_remaining -= cnzi;
101     ci[i+1] = ci[i] + cnzi;
102   }
103 
104   /* Column indices are in the list of free space */
105   /* Allocate space for cj, initialize cj, and */
106   /* destroy list of free space and other temporary array(s) */
107   ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
108   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
109   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
110 
111   *Ci           = ci;
112   *Cj           = cj;
113   *nspacedouble = ndouble;
114   PetscFunctionReturn(0);
115 }
116 
117 /*
118  MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable - same as MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ()
119    but replaces O(bn) array 'lnkbt' with a scalable array 'abj' of size Crmax.
120  */
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable"
123 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable(PetscInt am,PetscInt *Ai,PetscInt *Aj,PetscInt bm,PetscInt bn,PetscInt *Bi,PetscInt *Bj,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble)
124 {
125   PetscErrorCode ierr;
126   PetscInt       *aj=Aj,*bi=Bi,*bj,*ci,*cj,rmax=0,*abj,*cj_tmp,nextabj;
127   PetscInt       i,j,anzi,brow,bnzj,cnzi,k;
128   PetscBT        bt;
129 #if defined(DEBUG_MATMATMULT)
130   PetscLogDouble t0,tf,time_rmax=0.0,time_sym=0.0,time_sort=0.0,t0_sort,tf_sort;
131 #endif
132 
133   PetscFunctionBegin;
134   /* Allocate ci array and bt */
135   ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
136   ci[0] = 0;
137 
138   /* Get ci and count rmax of C - Crmax <= max(Armax*Brmax, bn) */
139 #if defined(DEBUG_MATMATMULT)
140   ierr = PetscGetTime(&t0);CHKERRQ(ierr);
141 #endif
142   ierr = PetscBTCreate(bn,bt);CHKERRQ(ierr);
143   ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr);
144   for (i=0; i<am; i++) {
145     anzi = Ai[i+1] - Ai[i];
146     cnzi = 0;
147     aj   = Aj + Ai[i];
148     for (j=0; j<anzi; j++){
149       brow = aj[j];
150       bnzj = bi[brow+1] - bi[brow];
151       bj   = Bj + bi[brow];
152       for (k=0; k<bnzj; k++){
153         if (!PetscBTLookupSet(bt,bj[k])) cnzi++;  /* new entry */
154       }
155     }
156     ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr);
157     ci[i+1] = ci[i] + cnzi;
158     if (rmax < cnzi) rmax = cnzi;
159   }
160 #if defined(DEBUG_MATMATMULT)
161   ierr = PetscGetTime(&tf);CHKERRQ(ierr);
162   time_rmax += tf - t0;
163   ierr = PetscGetTime(&t0);CHKERRQ(ierr);
164 #endif
165   /* Allocate space for cj */
166   ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
167 
168   /* Allocate a temp array for storing column indices of A*B */
169   ierr = PetscMalloc((rmax+1)*sizeof(PetscInt),&abj);CHKERRQ(ierr);
170 
171   /* Determine cj */
172   for (i=0; i<am; i++) {
173     anzi    = Ai[i+1] - Ai[i];
174     cnzi    = 0;
175     nextabj = 0;
176     aj      = Aj + Ai[i];
177     for (j=0; j<anzi; j++){
178       brow = aj[j];
179       bnzj = bi[brow+1] - bi[brow];
180       bj   = Bj + bi[brow];
181       for (k=0; k<bnzj; k++){
182         if (!PetscBTLookupSet(bt,bj[k])){  /* new entry */
183           abj[nextabj] = bj[k]; nextabj++;
184         }
185       }
186     }
187 
188     /* Sort abj, then copy it to cj */
189     cnzi = ci[i+1] - ci[i];
190 #if defined(DEBUG_MATMATMULT)
191     ierr = PetscGetTime(&t0_sort);CHKERRQ(ierr);
192 #endif
193     ierr = PetscSortInt(cnzi,abj);CHKERRQ(ierr);
194 #if defined(DEBUG_MATMATMULT)
195     ierr = PetscGetTime(&tf_sort);CHKERRQ(ierr);
196     time_sort += tf_sort - t0_sort;
197 #endif
198 
199     cj_tmp = cj + ci[i];
200     for (k=0; k< cnzi; k++){
201       cj_tmp[k] = abj[k];
202       ierr = PetscBTClear(bt,abj[k]);CHKERRQ(ierr);
203     }
204   }
205 #if defined(DEBUG_MATMATMULT)
206   ierr = PetscGetTime(&tf);CHKERRQ(ierr);
207   time_sym += tf - t0;
208 #endif
209 
210   ierr = PetscBTDestroy(bt);CHKERRQ(ierr);
211   ierr = PetscFree(abj);CHKERRQ(ierr);
212 
213   *Ci           = ci;
214   *Cj           = cj;
215   *nspacedouble = 0;
216 #if defined(DEBUG_MATMATMULT)
217   printf("Time of GetSymbolic_Scalable: rmax %g + sym %g (sort %g) = %g; Crmax %d, pN %d\n",time_rmax,time_sym,time_sort,time_rmax+time_sym,rmax,bn);
218 #endif
219   PetscFunctionReturn(0);
220 }
221 
222 #undef __FUNCT__
223 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
224 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
225 {
226   PetscErrorCode ierr;
227   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
228   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj;
229   PetscInt       am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble;
230   MatScalar      *ca;
231   PetscReal      afill;
232 
233   PetscFunctionBegin;
234   /* Get ci and cj */
235   ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(am,ai,aj,bm,bn,bi,bj,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr);
236 #if defined(DEBUG_MATMATMULT)
237   ierr = PetscPrintf(PETSC_COMM_SELF,"MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ() is done \n");CHKERRQ(ierr);
238 #endif
239 
240   /* Allocate space for ca */
241   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
242   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
243 
244   /* put together the new symbolic matrix */
245   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
246 
247   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
248   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
249   c = (Mat_SeqAIJ *)((*C)->data);
250   c->free_a   = PETSC_TRUE;
251   c->free_ij  = PETSC_TRUE;
252   c->nonew    = 0;
253   (*C)->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ;
254 
255   ierr = PetscMalloc(bn*sizeof(PetscScalar),&c->matmult_abdense);CHKERRQ(ierr);
256   ierr = PetscMemzero(c->matmult_abdense,bn*sizeof(PetscScalar));CHKERRQ(ierr);
257   (*C)->ops->matmultnumeric =  MatMatMultNumeric_SeqAIJ_SeqAIJ; /* fast, takes additional dense_axpy*bn*sizeof(PetscScalar) space */
258 
259   /* set MatInfo */
260   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
261   if (afill < 1.0) afill = 1.0;
262   c->maxnz                     = ci[am];
263   c->nz                        = ci[am];
264   (*C)->info.mallocs           = nspacedouble;
265   (*C)->info.fill_ratio_given  = fill;
266   (*C)->info.fill_ratio_needed = afill;
267 
268 #if defined(PETSC_USE_INFO)
269   if (ci[am]) {
270     ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr);
271     ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr);
272   } else {
273     ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr);
274   }
275 #endif
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
281 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
282 {
283   PetscErrorCode ierr;
284   PetscLogDouble flops=0.0;
285   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data;
286   Mat_SeqAIJ     *b = (Mat_SeqAIJ *)B->data;
287   Mat_SeqAIJ     *c = (Mat_SeqAIJ *)C->data;
288   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
289   PetscInt       am=A->rmap->n,cm=C->rmap->n;
290   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
291   PetscScalar    *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp;
292   PetscScalar    *ab_dense=c->matmult_abdense;
293 
294   PetscFunctionBegin;
295   /* clean old values in C */
296   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
297   /* Traverse A row-wise. */
298   /* Build the ith row in C by summing over nonzero columns in A, */
299   /* the rows of B corresponding to nonzeros of A. */
300   for (i=0; i<am; i++) {
301     anzi = ai[i+1] - ai[i];
302     for (j=0; j<anzi; j++) {
303       brow = aj[j];
304       bnzi = bi[brow+1] - bi[brow];
305       bjj  = bj + bi[brow];
306       baj  = ba + bi[brow];
307       /* perform dense axpy */
308       valtmp = aa[j];
309       for (k=0; k<bnzi; k++) {
310         ab_dense[bjj[k]] += valtmp*baj[k];
311       }
312       flops += 2*bnzi;
313     }
314     aj += anzi; aa += anzi;
315 
316     cnzi = ci[i+1] - ci[i];
317     for (k=0; k<cnzi; k++) {
318       ca[k]          += ab_dense[cj[k]];
319       ab_dense[cj[k]] = 0.0; /* zero ab_dense */
320     }
321     flops += cnzi;
322     cj += cnzi; ca += cnzi;
323   }
324   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
325   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
326   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable"
332 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C)
333 {
334   PetscErrorCode ierr;
335   PetscLogDouble flops=0.0;
336   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data;
337   Mat_SeqAIJ     *b = (Mat_SeqAIJ *)B->data;
338   Mat_SeqAIJ     *c = (Mat_SeqAIJ *)C->data;
339   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
340   PetscInt       am=A->rmap->N,cm=C->rmap->N;
341   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
342   PetscScalar    *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp;
343   PetscInt       nextb;
344 
345   PetscFunctionBegin;
346 #if defined(DEBUG_MATMATMULT)
347   //ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable...\n");CHKERRQ(ierr);
348 #endif
349   /* clean old values in C */
350   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
351   /* Traverse A row-wise. */
352   /* Build the ith row in C by summing over nonzero columns in A, */
353   /* the rows of B corresponding to nonzeros of A. */
354   for (i=0;i<am;i++) {
355     anzi = ai[i+1] - ai[i];
356     cnzi = ci[i+1] - ci[i];
357     for (j=0;j<anzi;j++) {
358       brow = aj[j];
359       bnzi = bi[brow+1] - bi[brow];
360       bjj  = bj + bi[brow];
361       baj  = ba + bi[brow];
362       /* perform sparse axpy */
363       valtmp = aa[j];
364       nextb  = 0;
365       for (k=0; nextb<bnzi; k++) {
366         if (cj[k] == bjj[nextb]){ /* ccol == bcol */
367           ca[k] += valtmp*baj[nextb++];
368         }
369       }
370       flops += 2*bnzi;
371     }
372     aj += anzi; aa += anzi;
373     cj += cnzi; ca += cnzi;
374   }
375 
376   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
377   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
378   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
379   PetscFunctionReturn(0);
380 }
381 
382 #undef __FUNCT__
383 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable"
384 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat *C)
385 {
386   PetscErrorCode ierr;
387   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
388   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj;
389   PetscInt       am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble;
390   MatScalar      *ca;
391   PetscReal      afill;
392 
393   PetscFunctionBegin;
394 #if defined(DEBUG_MATMATMULT)
395   ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable Armax %d, Brmax %d\n",a->rmax,b->rmax);CHKERRQ(ierr);
396 #endif
397   /* Get ci and cj */
398   ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable(am,ai,aj,bm,bn,bi,bj,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr);
399 #if defined(DEBUG_MATMATMULT)
400   ierr = PetscPrintf(PETSC_COMM_SELF,"MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable() is done \n");CHKERRQ(ierr);
401 #endif
402 
403   /* Allocate space for ca */
404   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
405   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
406 
407   /* put together the new symbolic matrix */
408   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
409 
410   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
411   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
412   c = (Mat_SeqAIJ *)((*C)->data);
413   c->free_a   = PETSC_TRUE;
414   c->free_ij  = PETSC_TRUE;
415   c->nonew    = 0;
416   (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */
417 
418   /* set MatInfo */
419   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
420   if (afill < 1.0) afill = 1.0;
421   c->maxnz                     = ci[am];
422   c->nz                        = ci[am];
423   (*C)->info.mallocs           = nspacedouble;
424   (*C)->info.fill_ratio_given  = fill;
425   (*C)->info.fill_ratio_needed = afill;
426 
427 #if defined(PETSC_USE_INFO)
428   if (ci[am]) {
429     ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr);
430     ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr);
431   } else {
432     ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr);
433   }
434 #endif
435   PetscFunctionReturn(0);
436 }
437 
438 
439 /* This routine is not used. Should be removed! */
440 #undef __FUNCT__
441 #define __FUNCT__ "MatMatTransposeMult_SeqAIJ_SeqAIJ"
442 PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
443 {
444   PetscErrorCode ierr;
445 
446   PetscFunctionBegin;
447   if (scall == MAT_INITIAL_MATRIX){
448     ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
449   }
450   ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "PetscContainerDestroy_Mat_MatMatTransMult"
456 PetscErrorCode PetscContainerDestroy_Mat_MatMatTransMult(void *ptr)
457 {
458   PetscErrorCode      ierr;
459   Mat_MatMatTransMult *multtrans=(Mat_MatMatTransMult*)ptr;
460 
461   PetscFunctionBegin;
462   ierr = MatTransposeColoringDestroy(&multtrans->matcoloring);CHKERRQ(ierr);
463   ierr = MatDestroy(&multtrans->Bt_den);CHKERRQ(ierr);
464   ierr = MatDestroy(&multtrans->ABt_den);CHKERRQ(ierr);
465   ierr = PetscFree(multtrans);CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatDestroy_SeqAIJ_MatMatMultTrans"
471 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A)
472 {
473   PetscErrorCode      ierr;
474   PetscContainer      container;
475   Mat_MatMatTransMult *multtrans=PETSC_NULL;
476 
477   PetscFunctionBegin;
478   ierr = PetscObjectQuery((PetscObject)A,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr);
479   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
480   ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr);
481   A->ops->destroy   = multtrans->destroy;
482   if (A->ops->destroy) {
483     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
484   }
485   ierr = PetscObjectCompose((PetscObject)A,"Mat_MatMatTransMult",0);CHKERRQ(ierr);
486   PetscFunctionReturn(0);
487 }
488 
489 #undef __FUNCT__
490 #define __FUNCT__ "MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ"
491 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
492 {
493   PetscErrorCode      ierr;
494   Mat                 Bt;
495   PetscInt            *bti,*btj;
496   Mat_MatMatTransMult *multtrans;
497   PetscContainer      container;
498   PetscLogDouble      t0,tf,etime2=0.0;
499 
500   PetscFunctionBegin;
501   ierr = PetscGetTime(&t0);CHKERRQ(ierr);
502    /* create symbolic Bt */
503   ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr);
504   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,PETSC_NULL,&Bt);CHKERRQ(ierr);
505 
506   /* get symbolic C=A*Bt */
507   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr);
508 
509   /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */
510   ierr = PetscNew(Mat_MatMatTransMult,&multtrans);CHKERRQ(ierr);
511 
512   /* attach the supporting struct to C */
513   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
514   ierr = PetscContainerSetPointer(container,multtrans);CHKERRQ(ierr);
515   ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_MatMatTransMult);CHKERRQ(ierr);
516   ierr = PetscObjectCompose((PetscObject)(*C),"Mat_MatMatTransMult",(PetscObject)container);CHKERRQ(ierr);
517   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
518 
519   multtrans->usecoloring = PETSC_FALSE;
520   multtrans->destroy = (*C)->ops->destroy;
521   (*C)->ops->destroy = MatDestroy_SeqAIJ_MatMatMultTrans;
522 
523   ierr = PetscGetTime(&tf);CHKERRQ(ierr);
524   etime2 += tf - t0;
525 
526   ierr = PetscOptionsGetBool(PETSC_NULL,"-matmattransmult_color",&multtrans->usecoloring,PETSC_NULL);CHKERRQ(ierr);
527   if (multtrans->usecoloring){
528     /* Create MatTransposeColoring from symbolic C=A*B^T */
529     MatTransposeColoring matcoloring;
530     ISColoring           iscoloring;
531     Mat                  Bt_dense,C_dense;
532     PetscLogDouble       etime0=0.0,etime01=0.0,etime1=0.0;
533 
534     ierr = PetscGetTime(&t0);CHKERRQ(ierr);
535     ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr);
536     ierr = PetscGetTime(&tf);CHKERRQ(ierr);
537     etime0 += tf - t0;
538 
539     ierr = PetscGetTime(&t0);CHKERRQ(ierr);
540     ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr);
541     multtrans->matcoloring = matcoloring;
542     ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr);
543     ierr = PetscGetTime(&tf);CHKERRQ(ierr);
544     etime01 += tf - t0;
545 
546     ierr = PetscGetTime(&t0);CHKERRQ(ierr);
547     /* Create Bt_dense and C_dense = A*Bt_dense */
548     ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr);
549     ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr);
550     ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr);
551     ierr = MatSeqDenseSetPreallocation(Bt_dense,PETSC_NULL);CHKERRQ(ierr);
552     Bt_dense->assembled = PETSC_TRUE;
553     multtrans->Bt_den = Bt_dense;
554 
555     ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr);
556     ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr);
557     ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr);
558     ierr = MatSeqDenseSetPreallocation(C_dense,PETSC_NULL);CHKERRQ(ierr);
559     Bt_dense->assembled = PETSC_TRUE;
560     multtrans->ABt_den = C_dense;
561     ierr = PetscGetTime(&tf);CHKERRQ(ierr);
562     etime1 += tf - t0;
563 
564 #if defined(PETSC_USE_INFO)
565     {
566     Mat_SeqAIJ *c=(Mat_SeqAIJ*)(*C)->data;
567     ierr = PetscInfo5(*C,"Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",A->cmap->n,matcoloring->ncolors,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors));
568     ierr = PetscInfo5(*C,"Sym = GetColor %g + ColorCreate %g + MatDenseCreate %g + non-colorSym %g = %g\n",etime0,etime01,etime1,etime2,etime0+etime01+etime1+etime2);
569     }
570 #endif
571   }
572   /* clean up */
573   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
574   ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr);
575 
576 
577 
578 #if defined(INEFFICIENT_ALGORITHM)
579   /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */
580   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
581   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
582   PetscInt           *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol;
583   PetscInt           am=A->rmap->N,bm=B->rmap->N;
584   PetscInt           i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1];
585   MatScalar          *ca;
586   PetscBT            lnkbt;
587   PetscReal          afill;
588 
589   /* Allocate row pointer array ci  */
590   ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
591   ci[0] = 0;
592 
593   /* Create and initialize a linked list for C columns */
594   nlnk = bm+1;
595   ierr = PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr);
596 
597   /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */
598   ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
599   current_space = free_space;
600 
601   /* Determine symbolic info for each row of the product A*B^T: */
602   for (i=0; i<am; i++) {
603     anzi = ai[i+1] - ai[i];
604     cnzi = 0;
605     acol = aj + ai[i];
606     for (j=0; j<bm; j++){
607       bnzj = bi[j+1] - bi[j];
608       bcol= bj + bi[j];
609       /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
610       ka = 0; kb = 0;
611       while (ka < anzi && kb < bnzj){
612         while (acol[ka] < bcol[kb] && ka < anzi) ka++;
613         if (ka == anzi) break;
614         while (acol[ka] > bcol[kb] && kb < bnzj) kb++;
615         if (kb == bnzj) break;
616         if (acol[ka] == bcol[kb]){ /* add nonzero c(i,j) to lnk */
617           index[0] = j;
618           ierr = PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr);
619           cnzi++;
620           break;
621         }
622       }
623     }
624 
625     /* If free space is not available, make more free space */
626     /* Double the amount of total space in the list */
627     if (current_space->local_remaining<cnzi) {
628       ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
629       nspacedouble++;
630     }
631 
632     /* Copy data into free space, then initialize lnk */
633     ierr = PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
634     current_space->array           += cnzi;
635     current_space->local_used      += cnzi;
636     current_space->local_remaining -= cnzi;
637 
638     ci[i+1] = ci[i] + cnzi;
639   }
640 
641 
642   /* Column indices are in the list of free space.
643      Allocate array cj, copy column indices to cj, and destroy list of free space */
644   ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
645   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
646   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
647 
648   /* Allocate space for ca */
649   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
650   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
651 
652   /* put together the new symbolic matrix */
653   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bm,ci,cj,ca,C);CHKERRQ(ierr);
654 
655   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
656   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
657   c = (Mat_SeqAIJ *)((*C)->data);
658   c->free_a   = PETSC_TRUE;
659   c->free_ij  = PETSC_TRUE;
660   c->nonew    = 0;
661 
662   /* set MatInfo */
663   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
664   if (afill < 1.0) afill = 1.0;
665   c->maxnz                     = ci[am];
666   c->nz                        = ci[am];
667   (*C)->info.mallocs           = nspacedouble;
668   (*C)->info.fill_ratio_given  = fill;
669   (*C)->info.fill_ratio_needed = afill;
670 
671 #if defined(PETSC_USE_INFO)
672   if (ci[am]) {
673     ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr);
674     ierr = PetscInfo1((*C),"Use MatMatTransposeMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr);
675   } else {
676     ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr);
677   }
678 #endif
679 #endif
680   PetscFunctionReturn(0);
681 }
682 
683 /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */
684 #undef __FUNCT__
685 #define __FUNCT__ "MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ"
686 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
687 {
688   PetscErrorCode ierr;
689   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
690   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow;
691   PetscInt       cm=C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol;
692   PetscLogDouble flops=0.0;
693   MatScalar      *aa=a->a,*aval,*ba=b->a,*bval,*ca=c->a,*cval;
694   Mat_MatMatTransMult *multtrans;
695   PetscContainer      container;
696 #if defined(USE_ARRAY)
697   MatScalar      *spdot;
698 #endif
699 
700   PetscFunctionBegin;
701   ierr = PetscObjectQuery((PetscObject)C,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr);
702   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
703   ierr  = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr);
704   if (multtrans->usecoloring){
705     MatTransposeColoring  matcoloring = multtrans->matcoloring;
706     Mat                   Bt_dense;
707     PetscInt              m,n;
708     PetscLogDouble t0,tf,etime0=0.0,etime1=0.0,etime2=0.0;
709     Mat C_dense = multtrans->ABt_den;
710 
711     Bt_dense = multtrans->Bt_den;
712     ierr = MatGetLocalSize(Bt_dense,&m,&n);CHKERRQ(ierr);
713 
714     /* Get Bt_dense by Apply MatTransposeColoring to B */
715     ierr = PetscGetTime(&t0);CHKERRQ(ierr);
716     ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr);
717     ierr = PetscGetTime(&tf);CHKERRQ(ierr);
718     etime0 += tf - t0;
719 
720     /* C_dense = A*Bt_dense */
721     ierr = PetscGetTime(&t0);CHKERRQ(ierr);
722     ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr);
723     ierr = PetscGetTime(&tf);CHKERRQ(ierr);
724     etime2 += tf - t0;
725 
726     /* Recover C from C_dense */
727     ierr = PetscGetTime(&t0);CHKERRQ(ierr);
728     ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr);
729     ierr = PetscGetTime(&tf);CHKERRQ(ierr);
730     etime1 += tf - t0;
731 #if defined(PETSC_USE_INFO)
732     ierr = PetscInfo4(C,"Num = ColoringApply: %g %g + Mult_sp_dense %g = %g\n",etime0,etime1,etime2,etime0+etime1+etime2);
733 #endif
734     PetscFunctionReturn(0);
735   }
736 
737 #if defined(USE_ARRAY)
738   /* allocate an array for implementing sparse inner-product */
739   ierr = PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);CHKERRQ(ierr);
740   ierr = PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));CHKERRQ(ierr);
741 #endif
742 
743   /* clear old values in C */
744   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
745 
746   for (i=0; i<cm; i++) {
747     anzi = ai[i+1] - ai[i];
748     acol = aj + ai[i];
749     aval = aa + ai[i];
750     cnzi = ci[i+1] - ci[i];
751     ccol = cj + ci[i];
752     cval = ca + ci[i];
753     for (j=0; j<cnzi; j++){
754       brow = ccol[j];
755       bnzj = bi[brow+1] - bi[brow];
756       bcol = bj + bi[brow];
757       bval = ba + bi[brow];
758 
759       /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
760 #if defined(USE_ARRAY)
761       /* put ba to spdot array */
762       for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb];
763       /* c(i,j)=A[i,:]*B[j,:]^T */
764       for (nexta=0; nexta<anzi; nexta++){
765         cval[j] += spdot[acol[nexta]]*aval[nexta];
766       }
767       /* zero spdot array */
768       for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0;
769 #else
770       nexta = 0; nextb = 0;
771       while (nexta<anzi && nextb<bnzj){
772         while (acol[nexta] < bcol[nextb] && nexta < anzi) nexta++;
773         if (nexta == anzi) break;
774         while (acol[nexta] > bcol[nextb] && nextb < bnzj) nextb++;
775         if (nextb == bnzj) break;
776         if (acol[nexta] == bcol[nextb]){
777           cval[j] += aval[nexta]*bval[nextb];
778           nexta++; nextb++;
779           flops += 2;
780         }
781       }
782 #endif
783     }
784   }
785   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
786   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
787   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
788 #if defined(USE_ARRAY)
789   ierr = PetscFree(spdot);
790 #endif
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNCT__
795 #define __FUNCT__ "MatTransposeMatMult_SeqAIJ_SeqAIJ"
796 PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
797   PetscErrorCode ierr;
798 
799   PetscFunctionBegin;
800   if (scall == MAT_INITIAL_MATRIX){
801     ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
802   }
803   ierr = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
804   PetscFunctionReturn(0);
805 }
806 
807 #undef __FUNCT__
808 #define __FUNCT__ "MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ"
809 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
810 {
811   PetscErrorCode ierr;
812   Mat            At;
813   PetscInt       *ati,*atj;
814 
815   PetscFunctionBegin;
816   /* create symbolic At */
817   ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
818   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr);
819 
820   /* get symbolic C=At*B */
821   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
822 
823   /* clean up */
824   ierr = MatDestroy(&At);CHKERRQ(ierr);
825   ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ"
831 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
832 {
833   PetscErrorCode ierr;
834   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
835   PetscInt       am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
836   PetscInt       cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k;
837   PetscLogDouble flops=0.0;
838   MatScalar      *aa=a->a,*ba,*ca=c->a,*caj;
839 
840   PetscFunctionBegin;
841   /* clear old values in C */
842   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
843 
844   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
845   for (i=0;i<am;i++) {
846     bj   = b->j + bi[i];
847     ba   = b->a + bi[i];
848     bnzi = bi[i+1] - bi[i];
849     anzi = ai[i+1] - ai[i];
850     for (j=0; j<anzi; j++) {
851       nextb = 0;
852       crow  = *aj++;
853       cjj   = cj + ci[crow];
854       caj   = ca + ci[crow];
855       /* perform sparse axpy operation.  Note cjj includes bj. */
856       for (k=0; nextb<bnzi; k++) {
857         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
858           caj[k] += (*aa)*(*(ba+nextb));
859           nextb++;
860         }
861       }
862       flops += 2*bnzi;
863       aa++;
864     }
865   }
866 
867   /* Assemble the final matrix and clean up */
868   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
869   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
870   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 EXTERN_C_BEGIN
875 #undef __FUNCT__
876 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense"
877 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
878 {
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882   if (scall == MAT_INITIAL_MATRIX){
883     ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr);
884   }
885   ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 EXTERN_C_END
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense"
892 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C)
893 {
894   PetscErrorCode ierr;
895 
896   PetscFunctionBegin;
897   ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr);
898   (*C)->ops->matmult = MatMatMult_SeqAIJ_SeqDense;
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense"
904 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
905 {
906   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
907   PetscErrorCode ierr;
908   PetscScalar    *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4;
909   MatScalar      *aa;
910   PetscInt       cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n;
911   PetscInt       am2 = 2*am, am3 = 3*am,  bm4 = 4*bm,colam;
912 
913   PetscFunctionBegin;
914   if (!cm || !cn) PetscFunctionReturn(0);
915   if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm);
916   if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n);
917   if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n);
918   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
919   ierr = MatGetArray(C,&c);CHKERRQ(ierr);
920   b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
921   for (col=0; col<cn-4; col += 4){  /* over columns of C */
922     colam = col*am;
923     for (i=0; i<am; i++) {        /* over rows of C in those columns */
924       r1 = r2 = r3 = r4 = 0.0;
925       n   = a->i[i+1] - a->i[i];
926       aj  = a->j + a->i[i];
927       aa  = a->a + a->i[i];
928       for (j=0; j<n; j++) {
929         r1 += (*aa)*b1[*aj];
930         r2 += (*aa)*b2[*aj];
931         r3 += (*aa)*b3[*aj];
932         r4 += (*aa++)*b4[*aj++];
933       }
934       c[colam + i]       = r1;
935       c[colam + am + i]  = r2;
936       c[colam + am2 + i] = r3;
937       c[colam + am3 + i] = r4;
938     }
939     b1 += bm4;
940     b2 += bm4;
941     b3 += bm4;
942     b4 += bm4;
943   }
944   for (;col<cn; col++){     /* over extra columns of C */
945     for (i=0; i<am; i++) {  /* over rows of C in those columns */
946       r1 = 0.0;
947       n   = a->i[i+1] - a->i[i];
948       aj  = a->j + a->i[i];
949       aa  = a->a + a->i[i];
950 
951       for (j=0; j<n; j++) {
952         r1 += (*aa++)*b1[*aj++];
953       }
954       c[col*am + i]     = r1;
955     }
956     b1 += bm;
957   }
958   ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr);
959   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
960   ierr = MatRestoreArray(C,&c);CHKERRQ(ierr);
961   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
962   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 /*
967    Note very similar to MatMult_SeqAIJ(), should generate both codes from same base
968 */
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense"
971 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
972 {
973   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
974   PetscErrorCode ierr;
975   PetscScalar    *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4;
976   MatScalar      *aa;
977   PetscInt       cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm;
978   PetscInt       am2 = 2*am, am3 = 3*am,  bm4 = 4*bm,colam,*ridx;
979 
980   PetscFunctionBegin;
981   if (!cm || !cn) PetscFunctionReturn(0);
982   ierr = MatGetArray(B,&b);CHKERRQ(ierr);
983   ierr = MatGetArray(C,&c);CHKERRQ(ierr);
984   b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
985 
986   if (a->compressedrow.use){ /* use compressed row format */
987     for (col=0; col<cn-4; col += 4){  /* over columns of C */
988       colam = col*am;
989       arm   = a->compressedrow.nrows;
990       ii    = a->compressedrow.i;
991       ridx  = a->compressedrow.rindex;
992       for (i=0; i<arm; i++) {        /* over rows of C in those columns */
993 	r1 = r2 = r3 = r4 = 0.0;
994 	n   = ii[i+1] - ii[i];
995 	aj  = a->j + ii[i];
996 	aa  = a->a + ii[i];
997 	for (j=0; j<n; j++) {
998 	  r1 += (*aa)*b1[*aj];
999 	  r2 += (*aa)*b2[*aj];
1000 	  r3 += (*aa)*b3[*aj];
1001 	  r4 += (*aa++)*b4[*aj++];
1002 	}
1003 	c[colam       + ridx[i]] += r1;
1004 	c[colam + am  + ridx[i]] += r2;
1005 	c[colam + am2 + ridx[i]] += r3;
1006 	c[colam + am3 + ridx[i]] += r4;
1007       }
1008       b1 += bm4;
1009       b2 += bm4;
1010       b3 += bm4;
1011       b4 += bm4;
1012     }
1013     for (;col<cn; col++){     /* over extra columns of C */
1014       colam = col*am;
1015       arm   = a->compressedrow.nrows;
1016       ii    = a->compressedrow.i;
1017       ridx  = a->compressedrow.rindex;
1018       for (i=0; i<arm; i++) {  /* over rows of C in those columns */
1019 	r1 = 0.0;
1020 	n   = ii[i+1] - ii[i];
1021 	aj  = a->j + ii[i];
1022 	aa  = a->a + ii[i];
1023 
1024 	for (j=0; j<n; j++) {
1025 	  r1 += (*aa++)*b1[*aj++];
1026 	}
1027 	c[col*am + ridx[i]] += r1;
1028       }
1029       b1 += bm;
1030     }
1031   } else {
1032     for (col=0; col<cn-4; col += 4){  /* over columns of C */
1033       colam = col*am;
1034       for (i=0; i<am; i++) {        /* over rows of C in those columns */
1035 	r1 = r2 = r3 = r4 = 0.0;
1036 	n   = a->i[i+1] - a->i[i];
1037 	aj  = a->j + a->i[i];
1038 	aa  = a->a + a->i[i];
1039 	for (j=0; j<n; j++) {
1040 	  r1 += (*aa)*b1[*aj];
1041 	  r2 += (*aa)*b2[*aj];
1042 	  r3 += (*aa)*b3[*aj];
1043 	  r4 += (*aa++)*b4[*aj++];
1044 	}
1045 	c[colam + i]       += r1;
1046 	c[colam + am + i]  += r2;
1047 	c[colam + am2 + i] += r3;
1048 	c[colam + am3 + i] += r4;
1049       }
1050       b1 += bm4;
1051       b2 += bm4;
1052       b3 += bm4;
1053       b4 += bm4;
1054     }
1055     for (;col<cn; col++){     /* over extra columns of C */
1056       for (i=0; i<am; i++) {  /* over rows of C in those columns */
1057 	r1 = 0.0;
1058 	n   = a->i[i+1] - a->i[i];
1059 	aj  = a->j + a->i[i];
1060 	aa  = a->a + a->i[i];
1061 
1062 	for (j=0; j<n; j++) {
1063 	  r1 += (*aa++)*b1[*aj++];
1064 	}
1065 	c[col*am + i]     += r1;
1066       }
1067       b1 += bm;
1068     }
1069   }
1070   ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr);
1071   ierr = MatRestoreArray(B,&b);CHKERRQ(ierr);
1072   ierr = MatRestoreArray(C,&c);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatTransColoringApplySpToDen_SeqAIJ"
1078 PetscErrorCode  MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense)
1079 {
1080   PetscErrorCode ierr;
1081   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
1082   Mat_SeqDense   *btdense = (Mat_SeqDense*)Btdense->data;
1083   PetscInt       *bi=b->i,*bj=b->j;
1084   PetscInt       m=Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns;
1085   MatScalar      *btval,*btval_den,*ba=b->a;
1086   PetscInt       *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors;
1087 
1088   PetscFunctionBegin;
1089   btval_den=btdense->v;
1090   ierr = PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));CHKERRQ(ierr);
1091   for (k=0; k<ncolors; k++) {
1092     ncolumns = coloring->ncolumns[k];
1093     for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */
1094       col   = *(columns + colorforcol[k] + l);
1095       btcol = bj + bi[col];
1096       btval = ba + bi[col];
1097       anz   = bi[col+1] - bi[col];
1098       for (j=0; j<anz; j++){
1099         brow            = btcol[j];
1100         btval_den[brow] = btval[j];
1101       }
1102     }
1103     btval_den += m;
1104   }
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatTransColoringApplyDenToSp_SeqAIJ"
1110 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp)
1111 {
1112   PetscErrorCode ierr;
1113   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)Csp->data;
1114   PetscInt       k,l,*row,*idx,m,ncolors=matcoloring->ncolors,nrows;
1115   PetscScalar    *ca_den,*cp_den,*ca=csp->a;
1116   PetscInt       *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow;
1117 
1118   PetscFunctionBegin;
1119   ierr = MatGetLocalSize(Csp,&m,PETSC_NULL);CHKERRQ(ierr);
1120   ierr = MatGetArray(Cden,&ca_den);CHKERRQ(ierr);
1121   cp_den = ca_den;
1122   for (k=0; k<ncolors; k++) {
1123     nrows = matcoloring->nrows[k];
1124     row   = rows  + colorforrow[k];
1125     idx   = spidx + colorforrow[k];
1126     for (l=0; l<nrows; l++){
1127       ca[idx[l]] = cp_den[row[l]];
1128     }
1129     cp_den += m;
1130   }
1131   ierr = MatRestoreArray(Cden,&ca_den);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 /*
1136  MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from
1137  MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output
1138  spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ().
1139  */
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color"
1142 PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
1143 {
1144   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1145   PetscErrorCode ierr;
1146   PetscInt       i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n;
1147   PetscInt       nz = a->i[m],row,*jj,mr,col;
1148   PetscInt       *cspidx;
1149 
1150   PetscFunctionBegin;
1151   *nn = n;
1152   if (!ia) PetscFunctionReturn(0);
1153   if (symmetric) {
1154     SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric");
1155     ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
1156   } else {
1157     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
1158     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
1159     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
1160     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
1161     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);CHKERRQ(ierr);
1162     jj = a->j;
1163     for (i=0; i<nz; i++) {
1164       collengths[jj[i]]++;
1165     }
1166     cia[0] = oshift;
1167     for (i=0; i<n; i++) {
1168       cia[i+1] = cia[i] + collengths[i];
1169     }
1170     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
1171     jj   = a->j;
1172     for (row=0; row<m; row++) {
1173       mr = a->i[row+1] - a->i[row];
1174       for (i=0; i<mr; i++) {
1175         col = *jj++;
1176         cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
1177         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
1178       }
1179     }
1180     ierr = PetscFree(collengths);CHKERRQ(ierr);
1181     *ia = cia; *ja = cja;
1182     *spidx = cspidx;
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRestoreColumnIJ_SeqAIJ_Color"
1189 PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
1190 {
1191   PetscErrorCode ierr;
1192 
1193   PetscFunctionBegin;
1194   if (!ia) PetscFunctionReturn(0);
1195 
1196   ierr = PetscFree(*ia);CHKERRQ(ierr);
1197   ierr = PetscFree(*ja);CHKERRQ(ierr);
1198   ierr = PetscFree(*spidx);CHKERRQ(ierr);
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatTransposeColoringCreate_SeqAIJ"
1204 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c)
1205 {
1206   PetscErrorCode ierr;
1207   PetscInt       i,n,nrows,N,j,k,m,*row_idx,*ci,*cj,ncols,col,cm;
1208   const PetscInt *is;
1209   PetscInt       nis = iscoloring->n,*rowhit,bs = 1;
1210   IS             *isa;
1211   PetscBool      done;
1212   PetscBool      flg1,flg2;
1213   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
1214   PetscInt       *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx;
1215   PetscInt       *colorforcol,*columns,*columns_i;
1216 
1217   PetscFunctionBegin;
1218   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
1219 
1220   /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */
1221   ierr = PetscTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr);
1222   ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr);
1223   if (flg1 || flg2) {
1224     ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1225   }
1226 
1227   N         = mat->cmap->N/bs;
1228   c->M      = mat->rmap->N/bs;  /* set total rows, columns and local rows */
1229   c->N      = mat->cmap->N/bs;
1230   c->m      = mat->rmap->N/bs;
1231   c->rstart = 0;
1232 
1233   c->ncolors = nis;
1234   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
1235   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
1236   ierr       = PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);CHKERRQ(ierr);
1237   ierr       = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);CHKERRQ(ierr);
1238   colorforrow[0]    = 0;
1239   rows_i            = rows;
1240   columnsforspidx_i = columnsforspidx;
1241 
1242   ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);CHKERRQ(ierr);
1243   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columns);CHKERRQ(ierr);
1244   colorforcol[0] = 0;
1245   columns_i      = columns;
1246 
1247   ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); /* column-wise storage of mat */
1248   if (!done) SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name);
1249 
1250   cm = c->m;
1251   ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
1252   ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&idxhit);CHKERRQ(ierr);
1253   for (i=0; i<nis; i++) {
1254     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
1255     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
1256     c->ncolumns[i] = n;
1257     if (n) {
1258       ierr = PetscMemcpy(columns_i,is,n*sizeof(PetscInt));CHKERRQ(ierr);
1259     }
1260     colorforcol[i+1] = colorforcol[i] + n;
1261     columns_i       += n;
1262 
1263     /* fast, crude version requires O(N*N) work */
1264     ierr = PetscMemzero(rowhit,cm*sizeof(PetscInt));CHKERRQ(ierr);
1265 
1266     /* loop over columns*/
1267     for (j=0; j<n; j++) {
1268       col     = is[j];
1269       row_idx = cj + ci[col];
1270       m       = ci[col+1] - ci[col];
1271       /* loop over columns marking them in rowhit */
1272       for (k=0; k<m; k++) {
1273         idxhit[*row_idx]   = spidx[ci[col] + k];
1274         rowhit[*row_idx++] = col + 1;
1275       }
1276     }
1277     /* count the number of hits */
1278     nrows = 0;
1279     for (j=0; j<cm; j++) {
1280       if (rowhit[j]) nrows++;
1281     }
1282     c->nrows[i]      = nrows;
1283     colorforrow[i+1] = colorforrow[i] + nrows;
1284 
1285     nrows       = 0;
1286     for (j=0; j<cm; j++) {
1287       if (rowhit[j]) {
1288         rows_i[nrows]            = j;
1289         columnsforspidx_i[nrows] = idxhit[j];
1290         nrows++;
1291       }
1292     }
1293     ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr);
1294     rows_i += nrows; columnsforspidx_i += nrows;
1295   }
1296   ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr);
1297   ierr = PetscFree(rowhit);CHKERRQ(ierr);
1298   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
1299 #if defined(PETSC_USE_DEBUG)
1300   if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]);
1301 #endif
1302 
1303   c->colorforrow     = colorforrow;
1304   c->rows            = rows;
1305   c->columnsforspidx = columnsforspidx;
1306   c->colorforcol     = colorforcol;
1307   c->columns         = columns;
1308   ierr = PetscFree(idxhit);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311