xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 84df9cb40eca90ea9b18a456fab7a4ecc7f6c1a4)
1 
2 /*
3   Defines projective product routines where A is a SeqAIJ matrix
4           C = P^T * A * P
5 */
6 
7 #include <../src/mat/impls/aij/seq/aij.h>   /*I "petscmat.h" I*/
8 #include <../src/mat/utils/freespace.h>
9 #include <petscbt.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ"
13 PetscErrorCode MatPtAPSymbolic_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
14 {
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   if (!P->ops->ptapsymbolic_seqaij) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_SUP,"Not implemented for A=%s and P=%s",((PetscObject)A)->type_name,((PetscObject)P)->type_name);
19   ierr = (*P->ops->ptapsymbolic_seqaij)(A,P,fill,C);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ"
25 PetscErrorCode MatPtAPNumeric_SeqAIJ(Mat A,Mat P,Mat C)
26 {
27   PetscErrorCode ierr;
28 
29   PetscFunctionBegin;
30   if (!P->ops->ptapnumeric_seqaij) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_SUP,"Not implemented for A=%s and P=%s",((PetscObject)A)->type_name,((PetscObject)P)->type_name);
31   ierr = (*P->ops->ptapnumeric_seqaij)(A,P,C);CHKERRQ(ierr);
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNCT__
36 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
37 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode     ierr;
40   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
41   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
42   PetscInt           *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
43   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*ptaj,nspacedouble=0;
44   PetscInt           an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N;
45   PetscInt           i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
46   MatScalar          *ca;
47   PetscBT            lnkbt;
48 
49   PetscFunctionBegin;
50   /* Get ij structure of P^T */
51   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
52   ptJ=ptj;
53 
54   /* Allocate ci array, arrays for fill computation and */
55   /* free space for accumulating nonzero column info */
56   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
57   ci[0] = 0;
58 
59   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
60   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
61   ptasparserow = ptadenserow  + an;
62 
63   /* create and initialize a linked list */
64   nlnk = pn+1;
65   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
66 
67   /* Set initial free space to be fill*nnz(A). */
68   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
69   ierr          = PetscFreeSpaceGet((PetscInt)(fill*ai[am]),&free_space);
70   current_space = free_space;
71 
72   /* Determine symbolic info for each row of C: */
73   for (i=0;i<pn;i++) {
74     ptnzi  = pti[i+1] - pti[i];
75     ptanzi = 0;
76     /* Determine symbolic row of PtA: */
77     for (j=0;j<ptnzi;j++) {
78       arow = *ptJ++;
79       anzj = ai[arow+1] - ai[arow];
80       ajj  = aj + ai[arow];
81       for (k=0;k<anzj;k++) {
82         if (!ptadenserow[ajj[k]]) {
83           ptadenserow[ajj[k]]    = -1;
84           ptasparserow[ptanzi++] = ajj[k];
85         }
86       }
87     }
88     /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
89     ptaj = ptasparserow;
90     cnzi   = 0;
91     for (j=0;j<ptanzi;j++) {
92       prow = *ptaj++;
93       pnzj = pi[prow+1] - pi[prow];
94       pjj  = pj + pi[prow];
95       /* add non-zero cols of P into the sorted linked list lnk */
96       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
97       cnzi += nlnk;
98     }
99 
100     /* If free space is not available, make more free space */
101     /* Double the amount of total space in the list */
102     if (current_space->local_remaining<cnzi) {
103       ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
104       nspacedouble++;
105     }
106 
107     /* Copy data into free space, and zero out denserows */
108     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
109     current_space->array           += cnzi;
110     current_space->local_used      += cnzi;
111     current_space->local_remaining -= cnzi;
112 
113     for (j=0;j<ptanzi;j++) {
114       ptadenserow[ptasparserow[j]] = 0;
115     }
116     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
117     /*        For now, we will recompute what is needed. */
118     ci[i+1] = ci[i] + cnzi;
119   }
120   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
121   /* Allocate space for cj, initialize cj, and */
122   /* destroy list of free space and other temporary array(s) */
123   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
124   ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
125   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
126   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
127 
128   /* Allocate space for ca */
129   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
130   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
131 
132   /* put together the new matrix */
133   ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
134 
135   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
136   /* Since these are PETSc arrays, change flags to free them as necessary. */
137   c = (Mat_SeqAIJ *)((*C)->data);
138   c->free_a  = PETSC_TRUE;
139   c->free_ij = PETSC_TRUE;
140   c->nonew   = 0;
141 
142   /* Clean up. */
143   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
144 #if defined(PETSC_USE_INFO)
145   if (ci[pn] != 0) {
146     PetscReal afill = ((PetscReal)ci[pn])/ai[am];
147     if (afill < 1.0) afill = 1.0;
148     ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr);
149     ierr = PetscInfo1((*C),"Use MatPtAP(A,P,MatReuse,%G,&C) for best performance.\n",afill);CHKERRQ(ierr);
150   } else {
151     ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr);
152   }
153 #endif
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
159 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
160 {
161   PetscErrorCode ierr;
162   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
163   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
164   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
165   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
166   PetscInt       *ci=c->i,*cj=c->j,*cjj;
167   PetscInt       am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N;
168   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
169   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
170 
171   PetscFunctionBegin;
172   /* Allocate temporary array for storage of one row of A*P */
173   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
174   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
175 
176   apj      = (PetscInt *)(apa + cn);
177   apjdense = apj + cn;
178 
179   /* Clear old values in C */
180   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
181 
182   for (i=0;i<am;i++) {
183     /* Form sparse row of A*P */
184     anzi  = ai[i+1] - ai[i];
185     apnzj = 0;
186     for (j=0;j<anzi;j++) {
187       prow = *aj++;
188       pnzj = pi[prow+1] - pi[prow];
189       pjj  = pj + pi[prow];
190       paj  = pa + pi[prow];
191       for (k=0;k<pnzj;k++) {
192         if (!apjdense[pjj[k]]) {
193           apjdense[pjj[k]] = -1;
194           apj[apnzj++]     = pjj[k];
195         }
196         apa[pjj[k]] += (*aa)*paj[k];
197       }
198       ierr = PetscLogFlops(2.0*pnzj);CHKERRQ(ierr);
199       aa++;
200     }
201 
202     /* Sort the j index array for quick sparse axpy. */
203     /* Note: a array does not need sorting as it is in dense storage locations. */
204     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
205 
206     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
207     pnzi = pi[i+1] - pi[i];
208     for (j=0;j<pnzi;j++) {
209       nextap = 0;
210       crow   = *pJ++;
211       cjj    = cj + ci[crow];
212       caj    = ca + ci[crow];
213       /* Perform sparse axpy operation.  Note cjj includes apj. */
214       for (k=0;nextap<apnzj;k++) {
215 #if defined(PETSC_USE_DEBUG)
216         if (k >= ci[crow+1] - ci[crow]) {
217           SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"k too large k %d, crow %d",k,crow);
218         }
219 #endif
220         if (cjj[k]==apj[nextap]) {
221           caj[k] += (*pA)*apa[apj[nextap++]];
222         }
223       }
224       ierr = PetscLogFlops(2.0*apnzj);CHKERRQ(ierr);
225       pA++;
226     }
227 
228     /* Zero the current row info for A*P */
229     for (j=0;j<apnzj;j++) {
230       apa[apj[j]]      = 0.;
231       apjdense[apj[j]] = 0;
232     }
233   }
234 
235   /* Assemble the final matrix and clean up */
236   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
237   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
238   ierr = PetscFree(apa);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241