xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision e005ede52eafe2fed2813abb7a7eb3df04d5f886)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatPtAP"
13 /*@
14    MatPtAP - Creates the matrix projection C = P^T * A * P
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the matrix
20 .  P - the projection matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
31    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
36 @*/
37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
50   PetscValidType(P,2);
51   MatPreallocated(P);
52   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55 
56   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57 
58   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59 
60   /* For now, we do not dispatch based on the type of A and P */
61   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62   fA = A->ops->ptap;
63   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64   fP = P->ops->ptap;
65   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67 
68   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
70   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
76 
77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
78 #undef __FUNCT__
79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP"
80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A)
81 {
82   PetscErrorCode       ierr;
83   Mat_PtAPMPI          *ptap;
84   PetscObjectContainer container;
85 
86   PetscFunctionBegin;
87   ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
88   if (container) {
89     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
90 
91     ierr = MatDestroyMatrices(1,&ptap->ploc);CHKERRQ(ierr);
92     ierr = MatDestroy(ptap->P_oth);CHKERRQ(ierr);
93 
94     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
95     ierr = PetscObjectCompose((PetscObject)A,"MatPtApMPI",0);CHKERRQ(ierr);
96   }
97   ierr = PetscFree(ptap);CHKERRQ(ierr);
98 
99   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
105 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
106 {
107   PetscErrorCode       ierr;
108   Mat                  P_loc,P_oth,*ploc;
109   PetscInt             prstart,low;
110   IS                   isrow,iscol;
111   Mat_PtAPMPI          *ptap;
112   PetscObjectContainer container;
113 
114   PetscFunctionBegin;
115   if (scall == MAT_INITIAL_MATRIX){
116     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
117     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
118     ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_oth);CHKERRQ(ierr);
119 
120     /* get P_loc by taking all local rows of P */
121     ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr);
122     ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr);
123     ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr);
124     ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&ploc);CHKERRQ(ierr);
125     P_loc = ploc[0];
126     ierr = ISDestroy(isrow);CHKERRQ(ierr);
127     ierr = ISDestroy(iscol);CHKERRQ(ierr);
128 
129     /* attach the supporting struct to P for reuse */
130     ierr = PetscNew(Mat_PtAPMPI,&ptap);CHKERRQ(ierr);
131     ptap->ploc    = ploc;
132     ptap->P_oth   = P_oth;
133     ptap->prstart = prstart;
134     P->ops->destroy  = MatDestroy_MPIAIJ_PtAP;
135     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
136     ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr);
137     ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr);
138 
139     /* now, compute symbolic product */
140     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
141     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
142   } else if (scall == MAT_REUSE_MATRIX){
143     ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
144     if (container) {
145       ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
146     } else {
147       SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
148     }
149 
150     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
151     /* improve! */
152     ierr = MatDestroy(ptap->P_oth);CHKERRQ(ierr);
153     ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&ptap->P_oth);CHKERRQ(ierr);
154 
155     /* get P_loc by taking all local rows of P */
156     ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr);
157     ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr);
158     ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr);
159     ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_REUSE_MATRIX,&ptap->ploc);CHKERRQ(ierr);
160     ierr = ISDestroy(isrow);CHKERRQ(ierr);
161     ierr = ISDestroy(iscol);CHKERRQ(ierr);
162   } else {
163     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
164   }
165   /* now, compute numeric product */
166   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
167   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
168   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
169 
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
175 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
176 {
177   PetscErrorCode       ierr;
178   Mat                  C_seq;
179   Mat_PtAPMPI          *ptap;
180   PetscObjectContainer container;
181 
182   PetscFunctionBegin;
183 
184   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
185   if (container) {
186     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
187   } else {
188     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
189   }
190   /* compute C_seq = P_loc^T * A_loc * P_seq */
191   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->ploc[0],ptap->P_oth,ptap->prstart,fill,&C_seq);CHKERRQ(ierr);
192 
193   /* add C_seq into mpi C */
194   ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr);
195 
196   PetscFunctionReturn(0);
197 }
198 
199 #undef __FUNCT__
200 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
201 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
202 {
203   PetscErrorCode       ierr;
204   Mat_Merge_SeqsToMPI  *merge;
205   Mat_PtAPMPI          *ptap;
206   PetscObjectContainer cont_merge,cont_ptap;
207 
208   PetscFunctionBegin;
209   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr);
210   if (cont_merge) {
211     ierr  = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr);
212   } else {
213     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container");
214   }
215   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr);
216   if (cont_ptap) {
217     ierr  = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr);
218   } else {
219     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
220   }
221   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->ploc[0],ptap->P_oth,ptap->prstart,merge->C_seq);CHKERRQ(ierr);
222   ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr);
223   PetscFunctionReturn(0);
224 }
225 
226 #undef __FUNCT__
227 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
228 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
229 {
230   PetscErrorCode ierr;
231 
232   PetscFunctionBegin;
233   if (scall == MAT_INITIAL_MATRIX){
234     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
235     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
236     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
237   }
238   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
239   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
240   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
241   PetscFunctionReturn(0);
242 }
243 
244 /*
245    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
246 
247    Collective on Mat
248 
249    Input Parameters:
250 +  A - the matrix
251 -  P - the projection matrix
252 
253    Output Parameters:
254 .  C - the (i,j) structure of the product matrix
255 
256    Notes:
257    C will be created and must be destroyed by the user with MatDestroy().
258 
259    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
260    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
261    this (i,j) structure by calling MatPtAPNumeric().
262 
263    Level: intermediate
264 
265 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
266 */
267 #undef __FUNCT__
268 #define __FUNCT__ "MatPtAPSymbolic"
269 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
270 {
271   PetscErrorCode ierr;
272   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
273   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
274 
275   PetscFunctionBegin;
276 
277   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
278   PetscValidType(A,1);
279   MatPreallocated(A);
280   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
281   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
282 
283   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
284   PetscValidType(P,2);
285   MatPreallocated(P);
286   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
287   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
288 
289   PetscValidPointer(C,3);
290 
291   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
292   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
293 
294   /* For now, we do not dispatch based on the type of A and P */
295   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
296   fA = A->ops->ptapsymbolic;
297   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
298   fP = P->ops->ptapsymbolic;
299   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
300   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
301 
302   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
303   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
304   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
305 
306   PetscFunctionReturn(0);
307 }
308 
309 typedef struct {
310   Mat    symAP;
311 } Mat_PtAPstruct;
312 
313 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
317 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
318 {
319   PetscErrorCode    ierr;
320   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
321 
322   PetscFunctionBegin;
323   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
324   ierr = PetscFree(ptap);CHKERRQ(ierr);
325   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
326   PetscFunctionReturn(0);
327 }
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
331 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
332 {
333   PetscErrorCode ierr;
334   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
335   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
336   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
337   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
338   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
339   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
340   MatScalar      *ca;
341   PetscBT        lnkbt;
342 
343   PetscFunctionBegin;
344   /* Get ij structure of P^T */
345   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
346   ptJ=ptj;
347 
348   /* Allocate ci array, arrays for fill computation and */
349   /* free space for accumulating nonzero column info */
350   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
351   ci[0] = 0;
352 
353   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
354   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
355   ptasparserow = ptadenserow  + an;
356 
357   /* create and initialize a linked list */
358   nlnk = pn+1;
359   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
360 
361   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
362   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
363   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
364   current_space = free_space;
365 
366   /* Determine symbolic info for each row of C: */
367   for (i=0;i<pn;i++) {
368     ptnzi  = pti[i+1] - pti[i];
369     ptanzi = 0;
370     /* Determine symbolic row of PtA: */
371     for (j=0;j<ptnzi;j++) {
372       arow = *ptJ++;
373       anzj = ai[arow+1] - ai[arow];
374       ajj  = aj + ai[arow];
375       for (k=0;k<anzj;k++) {
376         if (!ptadenserow[ajj[k]]) {
377           ptadenserow[ajj[k]]    = -1;
378           ptasparserow[ptanzi++] = ajj[k];
379         }
380       }
381     }
382       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
383     ptaj = ptasparserow;
384     cnzi   = 0;
385     for (j=0;j<ptanzi;j++) {
386       prow = *ptaj++;
387       pnzj = pi[prow+1] - pi[prow];
388       pjj  = pj + pi[prow];
389       /* add non-zero cols of P into the sorted linked list lnk */
390       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
391       cnzi += nlnk;
392     }
393 
394     /* If free space is not available, make more free space */
395     /* Double the amount of total space in the list */
396     if (current_space->local_remaining<cnzi) {
397       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
398     }
399 
400     /* Copy data into free space, and zero out denserows */
401     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
402     current_space->array           += cnzi;
403     current_space->local_used      += cnzi;
404     current_space->local_remaining -= cnzi;
405 
406     for (j=0;j<ptanzi;j++) {
407       ptadenserow[ptasparserow[j]] = 0;
408     }
409     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
410     /*        For now, we will recompute what is needed. */
411     ci[i+1] = ci[i] + cnzi;
412   }
413   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
414   /* Allocate space for cj, initialize cj, and */
415   /* destroy list of free space and other temporary array(s) */
416   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
417   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
418   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
419   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
420 
421   /* Allocate space for ca */
422   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
423   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
424 
425   /* put together the new matrix */
426   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
427 
428   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
429   /* Since these are PETSc arrays, change flags to free them as necessary. */
430   c = (Mat_SeqAIJ *)((*C)->data);
431   c->freedata = PETSC_TRUE;
432   c->nonew    = 0;
433 
434   /* Clean up. */
435   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
436 
437   PetscFunctionReturn(0);
438 }
439 
440 #include "src/mat/impls/maij/maij.h"
441 EXTERN_C_BEGIN
442 #undef __FUNCT__
443 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
444 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
445 {
446   /* This routine requires testing -- I don't think it works. */
447   PetscErrorCode ierr;
448   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
449   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
450   Mat            P=pp->AIJ;
451   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
452   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
453   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
454   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
455   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
456   MatScalar      *ca;
457 
458   PetscFunctionBegin;
459   /* Start timer */
460   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
461 
462   /* Get ij structure of P^T */
463   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
464 
465   /* Allocate ci array, arrays for fill computation and */
466   /* free space for accumulating nonzero column info */
467   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
468   ci[0] = 0;
469 
470   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
471   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
472   ptasparserow = ptadenserow  + an;
473   denserow     = ptasparserow + an;
474   sparserow    = denserow     + pn;
475 
476   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
477   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
478   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
479   current_space = free_space;
480 
481   /* Determine symbolic info for each row of C: */
482   for (i=0;i<pn/ppdof;i++) {
483     ptnzi  = pti[i+1] - pti[i];
484     ptanzi = 0;
485     ptJ    = ptj + pti[i];
486     for (dof=0;dof<ppdof;dof++) {
487     /* Determine symbolic row of PtA: */
488       for (j=0;j<ptnzi;j++) {
489         arow = ptJ[j] + dof;
490         anzj = ai[arow+1] - ai[arow];
491         ajj  = aj + ai[arow];
492         for (k=0;k<anzj;k++) {
493           if (!ptadenserow[ajj[k]]) {
494             ptadenserow[ajj[k]]    = -1;
495             ptasparserow[ptanzi++] = ajj[k];
496           }
497         }
498       }
499       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
500       ptaj = ptasparserow;
501       cnzi   = 0;
502       for (j=0;j<ptanzi;j++) {
503         pdof = *ptaj%dof;
504         prow = (*ptaj++)/dof;
505         pnzj = pi[prow+1] - pi[prow];
506         pjj  = pj + pi[prow];
507         for (k=0;k<pnzj;k++) {
508           if (!denserow[pjj[k]+pdof]) {
509             denserow[pjj[k]+pdof] = -1;
510             sparserow[cnzi++]     = pjj[k]+pdof;
511           }
512         }
513       }
514 
515       /* sort sparserow */
516       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
517 
518       /* If free space is not available, make more free space */
519       /* Double the amount of total space in the list */
520       if (current_space->local_remaining<cnzi) {
521         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
522       }
523 
524       /* Copy data into free space, and zero out denserows */
525       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
526       current_space->array           += cnzi;
527       current_space->local_used      += cnzi;
528       current_space->local_remaining -= cnzi;
529 
530       for (j=0;j<ptanzi;j++) {
531         ptadenserow[ptasparserow[j]] = 0;
532       }
533       for (j=0;j<cnzi;j++) {
534         denserow[sparserow[j]] = 0;
535       }
536       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
537       /*        For now, we will recompute what is needed. */
538       ci[i+1+dof] = ci[i+dof] + cnzi;
539     }
540   }
541   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
542   /* Allocate space for cj, initialize cj, and */
543   /* destroy list of free space and other temporary array(s) */
544   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
545   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
546   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
547 
548   /* Allocate space for ca */
549   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
550   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
551 
552   /* put together the new matrix */
553   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
554 
555   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
556   /* Since these are PETSc arrays, change flags to free them as necessary. */
557   c = (Mat_SeqAIJ *)((*C)->data);
558   c->freedata = PETSC_TRUE;
559   c->nonew    = 0;
560 
561   /* Clean up. */
562   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
563 
564   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
565   PetscFunctionReturn(0);
566 }
567 EXTERN_C_END
568 
569 /*
570    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
571 
572    Collective on Mat
573 
574    Input Parameters:
575 +  A - the matrix
576 -  P - the projection matrix
577 
578    Output Parameters:
579 .  C - the product matrix
580 
581    Notes:
582    C must have been created by calling MatPtAPSymbolic and must be destroyed by
583    the user using MatDeatroy().
584 
585    This routine is currently only implemented for pairs of AIJ matrices and classes
586    which inherit from AIJ.  C will be of type MATAIJ.
587 
588    Level: intermediate
589 
590 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
591 */
592 #undef __FUNCT__
593 #define __FUNCT__ "MatPtAPNumeric"
594 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
595 {
596   PetscErrorCode ierr;
597   PetscErrorCode (*fA)(Mat,Mat,Mat);
598   PetscErrorCode (*fP)(Mat,Mat,Mat);
599 
600   PetscFunctionBegin;
601 
602   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
603   PetscValidType(A,1);
604   MatPreallocated(A);
605   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
606   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
607 
608   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
609   PetscValidType(P,2);
610   MatPreallocated(P);
611   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
612   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
613 
614   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
615   PetscValidType(C,3);
616   MatPreallocated(C);
617   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
618   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
619 
620   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
621   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
622   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
623   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
624 
625   /* For now, we do not dispatch based on the type of A and P */
626   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
627   fA = A->ops->ptapnumeric;
628   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
629   fP = P->ops->ptapnumeric;
630   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
631   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
632 
633   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
634   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
635   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
636 
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
642 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
643 {
644   PetscErrorCode ierr;
645   PetscInt       flops=0;
646   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
647   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
648   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
649   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
650   PetscInt       *ci=c->i,*cj=c->j,*cjj;
651   PetscInt       am=A->M,cn=C->N,cm=C->M;
652   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
653   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
654 
655   PetscFunctionBegin;
656   /* Allocate temporary array for storage of one row of A*P */
657   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
658   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
659 
660   apj      = (PetscInt *)(apa + cn);
661   apjdense = apj + cn;
662 
663   /* Clear old values in C */
664   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
665 
666   for (i=0;i<am;i++) {
667     /* Form sparse row of A*P */
668     anzi  = ai[i+1] - ai[i];
669     apnzj = 0;
670     for (j=0;j<anzi;j++) {
671       prow = *aj++;
672       pnzj = pi[prow+1] - pi[prow];
673       pjj  = pj + pi[prow];
674       paj  = pa + pi[prow];
675       for (k=0;k<pnzj;k++) {
676         if (!apjdense[pjj[k]]) {
677           apjdense[pjj[k]] = -1;
678           apj[apnzj++]     = pjj[k];
679         }
680         apa[pjj[k]] += (*aa)*paj[k];
681       }
682       flops += 2*pnzj;
683       aa++;
684     }
685 
686     /* Sort the j index array for quick sparse axpy. */
687     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
688 
689     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
690     pnzi = pi[i+1] - pi[i];
691     for (j=0;j<pnzi;j++) {
692       nextap = 0;
693       crow   = *pJ++;
694       cjj    = cj + ci[crow];
695       caj    = ca + ci[crow];
696       /* Perform sparse axpy operation.  Note cjj includes apj. */
697       for (k=0;nextap<apnzj;k++) {
698         if (cjj[k]==apj[nextap]) {
699           caj[k] += (*pA)*apa[apj[nextap++]];
700         }
701       }
702       flops += 2*apnzj;
703       pA++;
704     }
705 
706     /* Zero the current row info for A*P */
707     for (j=0;j<apnzj;j++) {
708       apa[apj[j]]      = 0.;
709       apjdense[apj[j]] = 0;
710     }
711   }
712 
713   /* Assemble the final matrix and clean up */
714   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
716   ierr = PetscFree(apa);CHKERRQ(ierr);
717   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
718 
719   PetscFunctionReturn(0);
720 }
721 
722 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
723 static PetscEvent logkey_matptapnumeric_local = 0;
724 #undef __FUNCT__
725 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
726 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
727 {
728   PetscErrorCode ierr;
729   PetscInt       flops=0;
730   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
731   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
732   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
733   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
734   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
735   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
736   PetscInt       *pJ=pj_loc,*pjj;
737   PetscInt       *ci=c->i,*cj=c->j,*cjj;
738   PetscInt       am=A->m,cn=C->N,cm=C->M;
739   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
740   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
741   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
742 
743   PetscFunctionBegin;
744   if (!logkey_matptapnumeric_local) {
745     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr);
746   }
747   ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
748 
749   /* Allocate temporary array for storage of one row of A*P */
750   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
751   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
752   apj      = (PetscInt *)(apa + cn);
753   apjdense = apj + cn;
754 
755   /* Clear old values in C */
756   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
757 
758   for (i=0;i<am;i++) {
759     /* Form i-th sparse row of A*P */
760      apnzj = 0;
761     /* diagonal portion of A */
762     anzi  = adi[i+1] - adi[i];
763     for (j=0;j<anzi;j++) {
764       prow = *adj;
765       adj++;
766       pnzj = pi_loc[prow+1] - pi_loc[prow];
767       pjj  = pj_loc + pi_loc[prow];
768       paj  = pa_loc + pi_loc[prow];
769       for (k=0;k<pnzj;k++) {
770         if (!apjdense[pjj[k]]) {
771           apjdense[pjj[k]] = -1;
772           apj[apnzj++]     = pjj[k];
773         }
774         apa[pjj[k]] += (*ada)*paj[k];
775       }
776       flops += 2*pnzj;
777       ada++;
778     }
779     /* off-diagonal portion of A */
780     anzi  = aoi[i+1] - aoi[i];
781     for (j=0;j<anzi;j++) {
782       col = a->garray[*aoj];
783       if (col < cstart){
784         prow = *aoj;
785       } else if (col >= cend){
786         prow = *aoj;
787       } else {
788         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
789       }
790       aoj++;
791       pnzj = pi_oth[prow+1] - pi_oth[prow];
792       pjj  = pj_oth + pi_oth[prow];
793       paj  = pa_oth + pi_oth[prow];
794       for (k=0;k<pnzj;k++) {
795         if (!apjdense[pjj[k]]) {
796           apjdense[pjj[k]] = -1;
797           apj[apnzj++]     = pjj[k];
798         }
799         apa[pjj[k]] += (*aoa)*paj[k];
800       }
801       flops += 2*pnzj;
802       aoa++;
803     }
804     /* Sort the j index array for quick sparse axpy. */
805     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
806 
807     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
808     pnzi = pi_loc[i+1] - pi_loc[i];
809     for (j=0;j<pnzi;j++) {
810       nextap = 0;
811       crow   = *pJ++;
812       cjj    = cj + ci[crow];
813       caj    = ca + ci[crow];
814       /* Perform sparse axpy operation.  Note cjj includes apj. */
815       for (k=0;nextap<apnzj;k++) {
816         if (cjj[k]==apj[nextap]) {
817           caj[k] += (*pA)*apa[apj[nextap++]];
818         }
819       }
820       flops += 2*apnzj;
821       pA++;
822     }
823 
824     /* Zero the current row info for A*P */
825     for (j=0;j<apnzj;j++) {
826       apa[apj[j]]      = 0.;
827       apjdense[apj[j]] = 0;
828     }
829   }
830 
831   /* Assemble the final matrix and clean up */
832   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
833   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
834   ierr = PetscFree(apa);CHKERRQ(ierr);
835   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
836   ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
837   PetscFunctionReturn(0);
838 }
839 static PetscEvent logkey_matptapsymbolic_local = 0;
840 #undef __FUNCT__
841 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
842 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
843 {
844   PetscErrorCode ierr;
845   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
846   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
847   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
848   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
849   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
850   PetscInt       *ci,*cj,*ptaj;
851   PetscInt       an=A->N,am=A->m,pN=P_loc->N;
852   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
853   PetscInt       pm=P_loc->m,nlnk,*lnk;
854   MatScalar      *ca;
855   PetscBT        lnkbt;
856   PetscInt       prend,nprow_loc,nprow_oth;
857   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
858   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
859   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
860 
861   PetscFunctionBegin;
862   if (!logkey_matptapsymbolic_local) {
863     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
864   }
865   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
866 
867   prend = prstart + pm;
868 
869   /* get ij structure of P_loc^T */
870   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
871   ptJ=ptj;
872 
873   /* Allocate ci array, arrays for fill computation and */
874   /* free space for accumulating nonzero column info */
875   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
876   ci[0] = 0;
877 
878   ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
879   ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
880   ptasparserow_loc = ptadenserow_loc + an;
881   ptadenserow_oth  = ptasparserow_loc + an;
882   ptasparserow_oth = ptadenserow_oth + an;
883 
884   /* create and initialize a linked list */
885   nlnk = pN+1;
886   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
887 
888   /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */
889   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
890   nnz           = adi[am] + aoi[am];
891   ierr          = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space);
892   current_space = free_space;
893 
894   /* determine symbolic info for each row of C: */
895   for (i=0;i<pN;i++) {
896     ptnzi  = pti[i+1] - pti[i];
897     nprow_loc = 0; nprow_oth = 0;
898     /* i-th row of symbolic P_loc^T*A_loc: */
899     for (j=0;j<ptnzi;j++) {
900       arow = *ptJ++;
901       /* diagonal portion of A */
902       anzj = adi[arow+1] - adi[arow];
903       ajj  = adj + adi[arow];
904       for (k=0;k<anzj;k++) {
905         col = ajj[k]+prstart;
906         if (!ptadenserow_loc[col]) {
907           ptadenserow_loc[col]    = -1;
908           ptasparserow_loc[nprow_loc++] = col;
909         }
910       }
911       /* off-diagonal portion of A */
912       anzj = aoi[arow+1] - aoi[arow];
913       ajj  = aoj + aoi[arow];
914       for (k=0;k<anzj;k++) {
915         col = a->garray[ajj[k]];  /* global col */
916         if (col < cstart){
917           col = ajj[k];
918         } else if (col >= cend){
919           col = ajj[k] + pm;
920         } else {
921           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
922         }
923         if (!ptadenserow_oth[col]) {
924           ptadenserow_oth[col]    = -1;
925           ptasparserow_oth[nprow_oth++] = col;
926         }
927       }
928     }
929 
930     /* using symbolic info of local PtA, determine symbolic info for row of C: */
931     cnzi   = 0;
932     /* rows of P_loc */
933     ptaj = ptasparserow_loc;
934     for (j=0; j<nprow_loc; j++) {
935       prow = *ptaj++;
936       prow -= prstart; /* rm */
937       pnzj = pi_loc[prow+1] - pi_loc[prow];
938       pjj  = pj_loc + pi_loc[prow];
939       /* add non-zero cols of P into the sorted linked list lnk */
940       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
941       cnzi += nlnk;
942     }
943     /* rows of P_oth */
944     ptaj = ptasparserow_oth;
945     for (j=0; j<nprow_oth; j++) {
946       prow = *ptaj++;
947       if (prow >= prend) prow -= pm; /* rm */
948       pnzj = pi_oth[prow+1] - pi_oth[prow];
949       pjj  = pj_oth + pi_oth[prow];
950       /* add non-zero cols of P into the sorted linked list lnk */
951       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
952       cnzi += nlnk;
953     }
954 
955     /* If free space is not available, make more free space */
956     /* Double the amount of total space in the list */
957     if (current_space->local_remaining<cnzi) {
958       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
959     }
960 
961     /* Copy data into free space, and zero out denserows */
962     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
963     current_space->array           += cnzi;
964     current_space->local_used      += cnzi;
965     current_space->local_remaining -= cnzi;
966 
967     for (j=0;j<nprow_loc; j++) {
968       ptadenserow_loc[ptasparserow_loc[j]] = 0;
969     }
970     for (j=0;j<nprow_oth; j++) {
971       ptadenserow_oth[ptasparserow_oth[j]] = 0;
972     }
973 
974     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
975     /*        For now, we will recompute what is needed. */
976     ci[i+1] = ci[i] + cnzi;
977   }
978   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
979   /* Allocate space for cj, initialize cj, and */
980   /* destroy list of free space and other temporary array(s) */
981   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
982   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
983   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
984   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
985 
986   /* Allocate space for ca */
987   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
988   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
989 
990   /* put together the new matrix */
991   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
992 
993   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
994   /* Since these are PETSc arrays, change flags to free them as necessary. */
995   c = (Mat_SeqAIJ *)((*C)->data);
996   c->freedata = PETSC_TRUE;
997   c->nonew    = 0;
998 
999   /* Clean up. */
1000   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1001 
1002   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005