xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 4bf112e79d3f87ddad6dbf7b300a6f889c1e7d34)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatPtAP"
12 /*@
13    MatPtAP - Creates the matrix projection C = P^T * A * P
14 
15    Collective on Mat
16 
17    Input Parameters:
18 +  A - the matrix
19 .  P - the projection matrix
20 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
21 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
22 
23    Output Parameters:
24 .  C - the product matrix
25 
26    Notes:
27    C will be created and must be destroyed by the user with MatDestroy().
28 
29    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
30    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
31 
32    Level: intermediate
33 
34 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
35 @*/
36 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) {
37   PetscErrorCode ierr;
38   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
39   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
40 
41   PetscFunctionBegin;
42   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
43   PetscValidType(A,1);
44   MatPreallocated(A);
45   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
46   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
47   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
48   PetscValidType(P,2);
49   MatPreallocated(P);
50   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
51   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
52   PetscValidPointer(C,3);
53 
54   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
55 
56   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57 
58   /* For now, we do not dispatch based on the type of A and P */
59   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
60   fA = A->ops->ptap;
61   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
62   fP = P->ops->ptap;
63   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
64   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
65 
66   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
67   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
68   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   PetscFunctionReturn(0);
70 }
71 
72 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*);
73 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*);
74 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat);
75 #undef __FUNCT__
76 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
77 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
78 {
79   PetscErrorCode    ierr;
80   Mat               AP,AP_seq,P_seq,A_loc,C_seq;
81   /* Mat_MPIAIJ        *p = (Mat_MPIAIJ*)P->data; */
82   Mat_MatMatMultMPI *mult;
83 
84   int               prstart,prend,m=P->m;
85   int               rank,prid=10;
86   IS  isrow,iscol;
87   Mat P_subseq,*psubseq;
88 
89   PetscFunctionBegin;
90   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
91 
92   /* compute symbolic and numeric AP = A*P */
93   ierr = MatMatMult_MPIAIJ_MPIAIJ(A,P,scall,fill,&AP);CHKERRQ(ierr);
94   mult = (Mat_MatMatMultMPI*)AP->spptr;
95   P_seq = mult->bseq[0]; /* = submatrix of P by taking rows of P that equal to nonzero col of A */
96   A_loc = mult->aseq[0]; /* = submatrix of A by taking all local rows of A */
97   AP_seq  = mult->C_seq;
98 
99   if (rank == prid){
100     ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_loc: %d, %d\n",rank,A_loc->m,A_loc->n);CHKERRQ(ierr);
101     ierr = MatView(A_loc,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
102     ierr = PetscPrintf(PETSC_COMM_SELF," [%d] P_seq: %d, %d\n",rank,P_seq->m,P_seq->n);CHKERRQ(ierr);
103     ierr = MatView(P_seq,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
104     ierr = PetscPrintf(PETSC_COMM_SELF," [%d]  AP_seq: %d, %d\n",rank,AP_seq->m,AP_seq->n);
105     ierr = MatView(AP_seq,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
106   }
107 
108   prstart = mult->brstart;
109   prend   = prstart + m;
110   /*
111   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] prstart: %d, prend: %d, dim of P_seq: %d, %d\n",rank,prstart,prend,P_seq->m,P_seq->n);
112   */
113 
114     /* get seq matrix P_subseq by taking local rows of P */
115   ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr);
116   ierr = ISCreateStride(PETSC_COMM_SELF,P_seq->n,0,1,&iscol);CHKERRQ(ierr);
117   ierr = MatGetSubMatrices(P_seq,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psubseq);CHKERRQ(ierr);
118   P_subseq = psubseq[0];
119   ierr = ISDestroy(isrow);CHKERRQ(ierr);
120   ierr = ISDestroy(iscol);CHKERRQ(ierr);
121 
122   /* compute P_subseq^T*AP_seq */
123   ierr = MatMatMultTranspose_SeqAIJ_SeqAIJ(P_subseq,AP_seq,scall,fill,&C_seq);CHKERRQ(ierr);
124   if (rank == prid){
125     ierr = PetscPrintf(PETSC_COMM_SELF," [%d] dim of P_subseq: %d, %d; AP_seq: %d, %d\n",rank,P_subseq->m,P_subseq->n,AP_seq->m,AP_seq->n);
126     ierr = PetscPrintf(PETSC_COMM_SELF," [%d] C_seq: %d, %d\n",rank,C_seq->m,C_seq->n);CHKERRQ(ierr);
127     ierr = MatView(C_seq,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
128   }
129   ierr = MatDestroyMatrices(1,&psubseq);CHKERRQ(ierr);
130 
131   /* ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,scall,fill,prstart,prend,&C_seq);CHKERRQ(ierr); */
132   /* ierr = PetscPrintf(PETSC_COMM_SELF," [%d] C_seq dim: %d, %d\n",rank,C_seq->m,C_seq->n); */
133 
134   /* add C_seq into C */
135   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,scall,C);CHKERRQ(ierr);
136 
137   /* clean up */
138   ierr = MatDestroy(AP);CHKERRQ(ierr);
139   ierr = MatDestroy(C_seq);CHKERRQ(ierr);
140 
141   PetscFunctionReturn(0);
142 }
143 
144 #undef __FUNCT__
145 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
146 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
147 {
148 
149   PetscFunctionBegin;
150   PetscFunctionReturn(0);
151 }
152 
153 #undef __FUNCT__
154 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
155 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
156 {
157 
158   PetscFunctionBegin;
159   PetscFunctionReturn(0);
160 }
161 
162 #undef __FUNCT__
163 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
164 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
165 {
166   PetscErrorCode ierr;
167   PetscFunctionBegin;
168   if (scall == MAT_INITIAL_MATRIX){
169     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
170     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
171     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
172   }
173   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
174   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
175   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 #undef __FUNCT__
180 #define __FUNCT__ "MatPtAPSymbolic"
181 /*
182    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
183 
184    Collective on Mat
185 
186    Input Parameters:
187 +  A - the matrix
188 -  P - the projection matrix
189 
190    Output Parameters:
191 .  C - the (i,j) structure of the product matrix
192 
193    Notes:
194    C will be created and must be destroyed by the user with MatDestroy().
195 
196    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
197    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
198    this (i,j) structure by calling MatPtAPNumeric().
199 
200    Level: intermediate
201 
202 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
203 */
204 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) {
205   PetscErrorCode ierr;
206   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
207   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
208 
209   PetscFunctionBegin;
210 
211   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
212   PetscValidType(A,1);
213   MatPreallocated(A);
214   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
215   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
216 
217   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
218   PetscValidType(P,2);
219   MatPreallocated(P);
220   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
221   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
222 
223   PetscValidPointer(C,3);
224 
225   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
226   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
227 
228   /* For now, we do not dispatch based on the type of A and P */
229   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
230   fA = A->ops->ptapsymbolic;
231   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
232   fP = P->ops->ptapsymbolic;
233   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
234   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
235 
236   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
237   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
238   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
239 
240   PetscFunctionReturn(0);
241 }
242 
243 typedef struct {
244   Mat    symAP;
245 } Mat_PtAPstruct;
246 
247 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
251 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
252 {
253   PetscErrorCode    ierr;
254   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
255 
256   PetscFunctionBegin;
257   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
258   ierr = PetscFree(ptap);CHKERRQ(ierr);
259   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
265 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) {
266   PetscErrorCode ierr;
267   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
268   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
269   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
270   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
271   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
272   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
273   MatScalar      *ca;
274 
275   PetscFunctionBegin;
276   /* Get ij structure of P^T */
277   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
278   ptJ=ptj;
279 
280   /* Allocate ci array, arrays for fill computation and */
281   /* free space for accumulating nonzero column info */
282   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
283   ci[0] = 0;
284 
285   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
286   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
287   ptasparserow = ptadenserow  + an;
288   denserow     = ptasparserow + an;
289   sparserow    = denserow     + pn;
290 
291   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
292   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
293   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
294   current_space = free_space;
295 
296   /* Determine symbolic info for each row of C: */
297   for (i=0;i<pn;i++) {
298     ptnzi  = pti[i+1] - pti[i];
299     ptanzi = 0;
300     /* Determine symbolic row of PtA: */
301     for (j=0;j<ptnzi;j++) {
302       arow = *ptJ++;
303       anzj = ai[arow+1] - ai[arow];
304       ajj  = aj + ai[arow];
305       for (k=0;k<anzj;k++) {
306         if (!ptadenserow[ajj[k]]) {
307           ptadenserow[ajj[k]]    = -1;
308           ptasparserow[ptanzi++] = ajj[k];
309         }
310       }
311     }
312       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
313     ptaj = ptasparserow;
314     cnzi   = 0;
315     for (j=0;j<ptanzi;j++) {
316       prow = *ptaj++;
317       pnzj = pi[prow+1] - pi[prow];
318       pjj  = pj + pi[prow];
319       for (k=0;k<pnzj;k++) {
320         if (!denserow[pjj[k]]) {
321             denserow[pjj[k]]  = -1;
322             sparserow[cnzi++] = pjj[k];
323         }
324       }
325     }
326 
327     /* sort sparserow */
328     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
329 
330     /* If free space is not available, make more free space */
331     /* Double the amount of total space in the list */
332     if (current_space->local_remaining<cnzi) {
333       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
334     }
335 
336     /* Copy data into free space, and zero out denserows */
337     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
338     current_space->array           += cnzi;
339     current_space->local_used      += cnzi;
340     current_space->local_remaining -= cnzi;
341 
342     for (j=0;j<ptanzi;j++) {
343       ptadenserow[ptasparserow[j]] = 0;
344     }
345     for (j=0;j<cnzi;j++) {
346       denserow[sparserow[j]] = 0;
347     }
348       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
349       /*        For now, we will recompute what is needed. */
350     ci[i+1] = ci[i] + cnzi;
351   }
352   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
353   /* Allocate space for cj, initialize cj, and */
354   /* destroy list of free space and other temporary array(s) */
355   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
356   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
357   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
358 
359   /* Allocate space for ca */
360   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
361   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
362 
363   /* put together the new matrix */
364   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
365 
366   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
367   /* Since these are PETSc arrays, change flags to free them as necessary. */
368   c = (Mat_SeqAIJ *)((*C)->data);
369   c->freedata = PETSC_TRUE;
370   c->nonew    = 0;
371 
372   /* Clean up. */
373   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
374 
375   PetscFunctionReturn(0);
376 }
377 
378 #include "src/mat/impls/maij/maij.h"
379 EXTERN_C_BEGIN
380 #undef __FUNCT__
381 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
382 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
383   /* This routine requires testing -- I don't think it works. */
384   PetscErrorCode ierr;
385   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
386   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
387   Mat            P=pp->AIJ;
388   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
389   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
390   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
391   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
392   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
393   MatScalar      *ca;
394 
395   PetscFunctionBegin;
396   /* Start timer */
397   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
398 
399   /* Get ij structure of P^T */
400   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
401 
402   /* Allocate ci array, arrays for fill computation and */
403   /* free space for accumulating nonzero column info */
404   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
405   ci[0] = 0;
406 
407   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
408   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
409   ptasparserow = ptadenserow  + an;
410   denserow     = ptasparserow + an;
411   sparserow    = denserow     + pn;
412 
413   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
414   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
415   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
416   current_space = free_space;
417 
418   /* Determine symbolic info for each row of C: */
419   for (i=0;i<pn/ppdof;i++) {
420     ptnzi  = pti[i+1] - pti[i];
421     ptanzi = 0;
422     ptJ    = ptj + pti[i];
423     for (dof=0;dof<ppdof;dof++) {
424     /* Determine symbolic row of PtA: */
425       for (j=0;j<ptnzi;j++) {
426         arow = ptJ[j] + dof;
427         anzj = ai[arow+1] - ai[arow];
428         ajj  = aj + ai[arow];
429         for (k=0;k<anzj;k++) {
430           if (!ptadenserow[ajj[k]]) {
431             ptadenserow[ajj[k]]    = -1;
432             ptasparserow[ptanzi++] = ajj[k];
433           }
434         }
435       }
436       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
437       ptaj = ptasparserow;
438       cnzi   = 0;
439       for (j=0;j<ptanzi;j++) {
440         pdof = *ptaj%dof;
441         prow = (*ptaj++)/dof;
442         pnzj = pi[prow+1] - pi[prow];
443         pjj  = pj + pi[prow];
444         for (k=0;k<pnzj;k++) {
445           if (!denserow[pjj[k]+pdof]) {
446             denserow[pjj[k]+pdof] = -1;
447             sparserow[cnzi++]     = pjj[k]+pdof;
448           }
449         }
450       }
451 
452       /* sort sparserow */
453       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
454 
455       /* If free space is not available, make more free space */
456       /* Double the amount of total space in the list */
457       if (current_space->local_remaining<cnzi) {
458         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
459       }
460 
461       /* Copy data into free space, and zero out denserows */
462       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
463       current_space->array           += cnzi;
464       current_space->local_used      += cnzi;
465       current_space->local_remaining -= cnzi;
466 
467       for (j=0;j<ptanzi;j++) {
468         ptadenserow[ptasparserow[j]] = 0;
469       }
470       for (j=0;j<cnzi;j++) {
471         denserow[sparserow[j]] = 0;
472       }
473       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
474       /*        For now, we will recompute what is needed. */
475       ci[i+1+dof] = ci[i+dof] + cnzi;
476     }
477   }
478   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
479   /* Allocate space for cj, initialize cj, and */
480   /* destroy list of free space and other temporary array(s) */
481   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
482   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
483   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
484 
485   /* Allocate space for ca */
486   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
487   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
488 
489   /* put together the new matrix */
490   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
491 
492   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
493   /* Since these are PETSc arrays, change flags to free them as necessary. */
494   c = (Mat_SeqAIJ *)((*C)->data);
495   c->freedata = PETSC_TRUE;
496   c->nonew    = 0;
497 
498   /* Clean up. */
499   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
500 
501   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
502   PetscFunctionReturn(0);
503 }
504 EXTERN_C_END
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatPtAPNumeric"
508 /*
509    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
510 
511    Collective on Mat
512 
513    Input Parameters:
514 +  A - the matrix
515 -  P - the projection matrix
516 
517    Output Parameters:
518 .  C - the product matrix
519 
520    Notes:
521    C must have been created by calling MatPtAPSymbolic and must be destroyed by
522    the user using MatDeatroy().
523 
524    This routine is currently only implemented for pairs of AIJ matrices and classes
525    which inherit from AIJ.  C will be of type MATAIJ.
526 
527    Level: intermediate
528 
529 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
530 */
531 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) {
532   PetscErrorCode ierr;
533   PetscErrorCode (*fA)(Mat,Mat,Mat);
534   PetscErrorCode (*fP)(Mat,Mat,Mat);
535 
536   PetscFunctionBegin;
537 
538   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
539   PetscValidType(A,1);
540   MatPreallocated(A);
541   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
542   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
543 
544   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
545   PetscValidType(P,2);
546   MatPreallocated(P);
547   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
548   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
549 
550   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
551   PetscValidType(C,3);
552   MatPreallocated(C);
553   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
554   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
555 
556   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
557   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
558   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
559   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
560 
561   /* For now, we do not dispatch based on the type of A and P */
562   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
563   fA = A->ops->ptapnumeric;
564   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
565   fP = P->ops->ptapnumeric;
566   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
567   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
568 
569   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
570   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
571   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
572 
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
578 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
579 {
580   PetscErrorCode ierr;
581   int            flops=0;
582   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
583   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
584   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
585   int            *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
586   int            *ci=c->i,*cj=c->j,*cjj;
587   int            am=A->M,cn=C->N,cm=C->M;
588   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
589   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
590 
591   PetscFunctionBegin;
592   /* Allocate temporary array for storage of one row of A*P */
593   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
594   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
595 
596   apj      = (int *)(apa + cn);
597   apjdense = apj + cn;
598 
599   /* Clear old values in C */
600   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
601 
602   for (i=0;i<am;i++) {
603     /* Form sparse row of A*P */
604     anzi  = ai[i+1] - ai[i];
605     apnzj = 0;
606     for (j=0;j<anzi;j++) {
607       prow = *aj++;
608       pnzj = pi[prow+1] - pi[prow];
609       pjj  = pj + pi[prow];
610       paj  = pa + pi[prow];
611       for (k=0;k<pnzj;k++) {
612         if (!apjdense[pjj[k]]) {
613           apjdense[pjj[k]] = -1;
614           apj[apnzj++]     = pjj[k];
615         }
616         apa[pjj[k]] += (*aa)*paj[k];
617       }
618       flops += 2*pnzj;
619       aa++;
620     }
621 
622     /* Sort the j index array for quick sparse axpy. */
623     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
624 
625     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
626     pnzi = pi[i+1] - pi[i];
627     for (j=0;j<pnzi;j++) {
628       nextap = 0;
629       crow   = *pJ++;
630       cjj    = cj + ci[crow];
631       caj    = ca + ci[crow];
632       /* Perform sparse axpy operation.  Note cjj includes apj. */
633       for (k=0;nextap<apnzj;k++) {
634         if (cjj[k]==apj[nextap]) {
635           caj[k] += (*pA)*apa[apj[nextap++]];
636         }
637       }
638       flops += 2*apnzj;
639       pA++;
640     }
641 
642     /* Zero the current row info for A*P */
643     for (j=0;j<apnzj;j++) {
644       apa[apj[j]]      = 0.;
645       apjdense[apj[j]] = 0;
646     }
647   }
648 
649   /* Assemble the final matrix and clean up */
650   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
651   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
652   ierr = PetscFree(apa);CHKERRQ(ierr);
653   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
654 
655   PetscFunctionReturn(0);
656 }
657 
658 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */
659 
660 #undef __FUNCT__
661 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt"
662 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C)
663 {
664   PetscErrorCode ierr;
665   int            flops=0;
666   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
667   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
668   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
669   int            *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
670   int            *ci=c->i,*cj=c->j,*cjj;
671   int            am=A->M,cn=C->N,cm=C->M;
672   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
673   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
674 
675   PetscFunctionBegin;
676   pA=p->a+pi[prstart];
677   /* Allocate temporary array for storage of one row of A*P */
678   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
679   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
680 
681   apj      = (int *)(apa + cn);
682   apjdense = apj + cn;
683 
684   /* Clear old values in C */
685   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
686 
687   for (i=0;i<am;i++) {
688     /* Form sparse row of A*P */
689     anzi  = ai[i+1] - ai[i];
690     apnzj = 0;
691     for (j=0;j<anzi;j++) {
692       prow = *aj++;
693       pnzj = pi[prow+1] - pi[prow];
694       pjj  = pj + pi[prow];
695       paj  = pa + pi[prow];
696       for (k=0;k<pnzj;k++) {
697         if (!apjdense[pjj[k]]) {
698           apjdense[pjj[k]] = -1;
699           apj[apnzj++]     = pjj[k];
700         }
701         apa[pjj[k]] += (*aa)*paj[k];
702       }
703       flops += 2*pnzj;
704       aa++;
705     }
706 
707     /* Sort the j index array for quick sparse axpy. */
708     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
709 
710     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
711     pnzi = pi[i+1+prstart] - pi[i+prstart];
712     for (j=0;j<pnzi;j++) {
713       nextap = 0;
714       crow   = *pJ++;
715       cjj    = cj + ci[crow];
716       caj    = ca + ci[crow];
717       /* Perform sparse axpy operation.  Note cjj includes apj. */
718       for (k=0;nextap<apnzj;k++) {
719         if (cjj[k]==apj[nextap]) {
720           caj[k] += (*pA)*apa[apj[nextap++]];
721         }
722       }
723       flops += 2*apnzj;
724       pA++;
725     }
726 
727     /* Zero the current row info for A*P */
728     for (j=0;j<apnzj;j++) {
729       apa[apj[j]]      = 0.;
730       apjdense[apj[j]] = 0;
731     }
732   }
733 
734   /* Assemble the final matrix and clean up */
735   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
736   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
737   ierr = PetscFree(apa);CHKERRQ(ierr);
738   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
739 
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt"
745 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) {
746   PetscErrorCode ierr;
747   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
748   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
749   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
750   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
751   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
752   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
753   MatScalar      *ca;
754   Mat *psub,P_sub;
755   IS  isrow,iscol;
756   int m = prend - prstart;
757 
758   PetscFunctionBegin;
759   /* Get ij structure of P[rstart:rend,:]^T */
760   ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr);
761   ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr);
762   ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr);
763   ierr = ISDestroy(isrow);CHKERRQ(ierr);
764   ierr = ISDestroy(iscol);CHKERRQ(ierr);
765   P_sub = psub[0];
766   ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr);
767   ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr);
768   ptJ=ptj;
769 
770   /* Allocate ci array, arrays for fill computation and */
771   /* free space for accumulating nonzero column info */
772   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
773   ci[0] = 0;
774 
775   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
776   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
777   ptasparserow = ptadenserow  + an;
778   denserow     = ptasparserow + an;
779   sparserow    = denserow     + pn;
780 
781   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
782   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
783   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
784   current_space = free_space;
785 
786   /* Determine symbolic info for each row of C: */
787   for (i=0;i<pn;i++) {
788     ptnzi  = pti[i+1] - pti[i];
789     ptanzi = 0;
790     /* Determine symbolic row of PtA_reduced: */
791     for (j=0;j<ptnzi;j++) {
792       arow = *ptJ++;
793       anzj = ai[arow+1] - ai[arow];
794       ajj  = aj + ai[arow];
795       for (k=0;k<anzj;k++) {
796         if (!ptadenserow[ajj[k]]) {
797           ptadenserow[ajj[k]]    = -1;
798           ptasparserow[ptanzi++] = ajj[k];
799         }
800       }
801     }
802       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
803     ptaj = ptasparserow;
804     cnzi   = 0;
805     for (j=0;j<ptanzi;j++) {
806       prow = *ptaj++;
807       pnzj = pi[prow+1] - pi[prow];
808       pjj  = pj + pi[prow];
809       for (k=0;k<pnzj;k++) {
810         if (!denserow[pjj[k]]) {
811             denserow[pjj[k]]  = -1;
812             sparserow[cnzi++] = pjj[k];
813         }
814       }
815     }
816 
817     /* sort sparserow */
818     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
819 
820     /* If free space is not available, make more free space */
821     /* Double the amount of total space in the list */
822     if (current_space->local_remaining<cnzi) {
823       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
824     }
825 
826     /* Copy data into free space, and zero out denserows */
827     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
828     current_space->array           += cnzi;
829     current_space->local_used      += cnzi;
830     current_space->local_remaining -= cnzi;
831 
832     for (j=0;j<ptanzi;j++) {
833       ptadenserow[ptasparserow[j]] = 0;
834     }
835     for (j=0;j<cnzi;j++) {
836       denserow[sparserow[j]] = 0;
837     }
838       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
839       /*        For now, we will recompute what is needed. */
840     ci[i+1] = ci[i] + cnzi;
841   }
842   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
843   /* Allocate space for cj, initialize cj, and */
844   /* destroy list of free space and other temporary array(s) */
845   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
846   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
847   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
848 
849   /* Allocate space for ca */
850   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
851   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
852 
853   /* put together the new matrix */
854   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
855 
856   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
857   /* Since these are PETSc arrays, change flags to free them as necessary. */
858   c = (Mat_SeqAIJ *)((*C)->data);
859   c->freedata = PETSC_TRUE;
860   c->nonew    = 0;
861 
862   /* Clean up. */
863   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
864 
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt"
870 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C)
871 {
872   PetscErrorCode ierr;
873   PetscFunctionBegin;
874   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
875   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
876   if (scall == MAT_INITIAL_MATRIX){
877     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
878   }
879 
880   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr);
881 
882   PetscFunctionReturn(0);
883 }
884 
885