xref: /petsc/src/ksp/pc/impls/redistribute/redistribute.c (revision a7fac7c2b47dbcd8ece0cc2dfdfe0e63be1bb7b5)
1 
2 /*
3   This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
4 */
5 #include <petsc/private/pcimpl.h>     /*I "petscksp.h" I*/
6 #include <petscksp.h>
7 
8 typedef struct {
9   KSP         ksp;
10   Vec         x,b;
11   VecScatter  scatter;
12   IS          is;
13   PetscInt    dcnt,*drows;    /* these are the local rows that have only diagonal entry */
14   PetscScalar *diag;
15   Vec         work;
16 } PC_Redistribute;
17 
18 #undef __FUNCT__
19 #define __FUNCT__ "PCView_Redistribute"
20 static PetscErrorCode PCView_Redistribute(PC pc,PetscViewer viewer)
21 {
22   PC_Redistribute *red = (PC_Redistribute*)pc->data;
23   PetscErrorCode  ierr;
24   PetscBool       iascii,isstring;
25   PetscInt        ncnt,N;
26 
27   PetscFunctionBegin;
28   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
29   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
30   if (iascii) {
31     ierr = MPI_Allreduce(&red->dcnt,&ncnt,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
32     ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
33     ierr = PetscViewerASCIIPrintf(viewer,"    Number rows eliminated %D Percentage rows eliminated %g\n",ncnt,100.0*((PetscReal)ncnt)/((PetscReal)N));CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(viewer,"  Redistribute preconditioner: \n");CHKERRQ(ierr);
35     ierr = KSPView(red->ksp,viewer);CHKERRQ(ierr);
36   } else if (isstring) {
37     ierr = PetscViewerStringSPrintf(viewer," Redistribute preconditioner");CHKERRQ(ierr);
38     ierr = KSPView(red->ksp,viewer);CHKERRQ(ierr);
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "PCSetUp_Redistribute"
45 static PetscErrorCode PCSetUp_Redistribute(PC pc)
46 {
47   PC_Redistribute   *red = (PC_Redistribute*)pc->data;
48   PetscErrorCode    ierr;
49   MPI_Comm          comm;
50   PetscInt          rstart,rend,i,nz,cnt,*rows,ncnt,dcnt,*drows;
51   PetscLayout       map,nmap;
52   PetscMPIInt       size,imdex,tag,n;
53   PetscInt          *source = NULL;
54   PetscMPIInt       *sizes = NULL,nrecvs;
55   PetscInt          j,nsends;
56   PetscInt          *owner = NULL,*starts = NULL,count,slen;
57   PetscInt          *rvalues,*svalues,recvtotal;
58   PetscMPIInt       *onodes1,*olengths1;
59   MPI_Request       *send_waits = NULL,*recv_waits = NULL;
60   MPI_Status        recv_status,*send_status;
61   Vec               tvec,diag;
62   Mat               tmat;
63   const PetscScalar *d;
64 
65   PetscFunctionBegin;
66   if (pc->setupcalled) {
67     ierr = KSPGetOperators(red->ksp,NULL,&tmat);CHKERRQ(ierr);
68     ierr = MatGetSubMatrix(pc->pmat,red->is,red->is,MAT_REUSE_MATRIX,&tmat);CHKERRQ(ierr);
69     ierr = KSPSetOperators(red->ksp,tmat,tmat);CHKERRQ(ierr);
70   } else {
71     PetscInt NN;
72 
73     ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
74     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
75     ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr);
76 
77     /* count non-diagonal rows on process */
78     ierr = MatGetOwnershipRange(pc->mat,&rstart,&rend);CHKERRQ(ierr);
79     cnt  = 0;
80     for (i=rstart; i<rend; i++) {
81       ierr = MatGetRow(pc->mat,i,&nz,NULL,NULL);CHKERRQ(ierr);
82       if (nz > 1) cnt++;
83       ierr = MatRestoreRow(pc->mat,i,&nz,NULL,NULL);CHKERRQ(ierr);
84     }
85     ierr = PetscMalloc1(cnt,&rows);CHKERRQ(ierr);
86     ierr = PetscMalloc1(rend - rstart - cnt,&drows);CHKERRQ(ierr);
87 
88     /* list non-diagonal rows on process */
89     cnt = 0; dcnt = 0;
90     for (i=rstart; i<rend; i++) {
91       ierr = MatGetRow(pc->mat,i,&nz,NULL,NULL);CHKERRQ(ierr);
92       if (nz > 1) rows[cnt++] = i;
93       else drows[dcnt++] = i - rstart;
94       ierr = MatRestoreRow(pc->mat,i,&nz,NULL,NULL);CHKERRQ(ierr);
95     }
96 
97     /* create PetscLayout for non-diagonal rows on each process */
98     ierr   = PetscLayoutCreate(comm,&map);CHKERRQ(ierr);
99     ierr   = PetscLayoutSetLocalSize(map,cnt);CHKERRQ(ierr);
100     ierr   = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
101     ierr   = PetscLayoutSetUp(map);CHKERRQ(ierr);
102     rstart = map->rstart;
103     rend   = map->rend;
104 
105     /* create PetscLayout for load-balanced non-diagonal rows on each process */
106     ierr = PetscLayoutCreate(comm,&nmap);CHKERRQ(ierr);
107     ierr = MPI_Allreduce(&cnt,&ncnt,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
108     ierr = PetscLayoutSetSize(nmap,ncnt);CHKERRQ(ierr);
109     ierr = PetscLayoutSetBlockSize(nmap,1);CHKERRQ(ierr);
110     ierr = PetscLayoutSetUp(nmap);CHKERRQ(ierr);
111 
112     ierr = MatGetSize(pc->pmat,&NN,NULL);CHKERRQ(ierr);
113     ierr = PetscInfo2(pc,"Number of diagonal rows eliminated %d, percentage eliminated %g\n",NN-ncnt,((PetscReal)(NN-ncnt))/((PetscReal)(NN)));CHKERRQ(ierr);
114     /*
115         this code is taken from VecScatterCreate_PtoS()
116         Determines what rows need to be moved where to
117         load balance the non-diagonal rows
118     */
119     /*  count number of contributors to each processor */
120     ierr   = PetscMalloc2(size,&sizes,cnt,&owner);CHKERRQ(ierr);
121     ierr   = PetscMemzero(sizes,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
122     j      = 0;
123     nsends = 0;
124     for (i=rstart; i<rend; i++) {
125       if (i < nmap->range[j]) j = 0;
126       for (; j<size; j++) {
127         if (i < nmap->range[j+1]) {
128           if (!sizes[j]++) nsends++;
129           owner[i-rstart] = j;
130           break;
131         }
132       }
133     }
134     /* inform other processors of number of messages and max length*/
135     ierr      = PetscGatherNumberOfMessages(comm,NULL,sizes,&nrecvs);CHKERRQ(ierr);
136     ierr      = PetscGatherMessageLengths(comm,nsends,nrecvs,sizes,&onodes1,&olengths1);CHKERRQ(ierr);
137     ierr      = PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);CHKERRQ(ierr);
138     recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
139 
140     /* post receives:  rvalues - rows I will own; count - nu */
141     ierr  = PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);CHKERRQ(ierr);
142     count = 0;
143     for (i=0; i<nrecvs; i++) {
144       ierr   = MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);CHKERRQ(ierr);
145       count += olengths1[i];
146     }
147 
148     /* do sends:
149        1) starts[i] gives the starting index in svalues for stuff going to
150        the ith processor
151     */
152     ierr      = PetscMalloc3(cnt,&svalues,nsends,&send_waits,size,&starts);CHKERRQ(ierr);
153     starts[0] = 0;
154     for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1];
155     for (i=0; i<cnt; i++)  svalues[starts[owner[i]]++] = rows[i];
156     for (i=0; i<cnt; i++)  rows[i] = rows[i] - rstart;
157     red->drows = drows;
158     red->dcnt  = dcnt;
159     ierr       = PetscFree(rows);CHKERRQ(ierr);
160 
161     starts[0] = 0;
162     for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1];
163     count = 0;
164     for (i=0; i<size; i++) {
165       if (sizes[i]) {
166         ierr = MPI_Isend(svalues+starts[i],sizes[i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
167       }
168     }
169 
170     /*  wait on receives */
171     count = nrecvs;
172     slen  = 0;
173     while (count) {
174       ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
175       /* unpack receives into our local space */
176       ierr  = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
177       slen += n;
178       count--;
179     }
180     if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
181 
182     ierr = ISCreateGeneral(comm,slen,rvalues,PETSC_COPY_VALUES,&red->is);CHKERRQ(ierr);
183 
184     /* free up all work space */
185     ierr = PetscFree(olengths1);CHKERRQ(ierr);
186     ierr = PetscFree(onodes1);CHKERRQ(ierr);
187     ierr = PetscFree3(rvalues,source,recv_waits);CHKERRQ(ierr);
188     ierr = PetscFree2(sizes,owner);CHKERRQ(ierr);
189     if (nsends) {   /* wait on sends */
190       ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
191       ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
192       ierr = PetscFree(send_status);CHKERRQ(ierr);
193     }
194     ierr = PetscFree3(svalues,send_waits,starts);CHKERRQ(ierr);
195     ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
196     ierr = PetscLayoutDestroy(&nmap);CHKERRQ(ierr);
197 
198     ierr = VecCreateMPI(comm,slen,PETSC_DETERMINE,&red->b);CHKERRQ(ierr);
199     ierr = VecDuplicate(red->b,&red->x);CHKERRQ(ierr);
200     ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr);
201     ierr = VecScatterCreate(tvec,red->is,red->b,NULL,&red->scatter);CHKERRQ(ierr);
202     ierr = VecDestroy(&tvec);CHKERRQ(ierr);
203     ierr = MatGetSubMatrix(pc->pmat,red->is,red->is,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
204     ierr = KSPSetOperators(red->ksp,tmat,tmat);CHKERRQ(ierr);
205     ierr = MatDestroy(&tmat);CHKERRQ(ierr);
206   }
207 
208   /* get diagonal portion of matrix */
209   ierr = PetscFree(red->diag);CHKERRQ(ierr);
210   ierr = PetscMalloc1(red->dcnt,&red->diag);CHKERRQ(ierr);
211   ierr = MatCreateVecs(pc->pmat,&diag,NULL);CHKERRQ(ierr);
212   ierr = MatGetDiagonal(pc->pmat,diag);CHKERRQ(ierr);
213   ierr = VecGetArrayRead(diag,&d);CHKERRQ(ierr);
214   for (i=0; i<red->dcnt; i++) red->diag[i] = 1.0/d[red->drows[i]];
215   ierr = VecRestoreArrayRead(diag,&d);CHKERRQ(ierr);
216   ierr = VecDestroy(&diag);CHKERRQ(ierr);
217   ierr = KSPSetUp(red->ksp);CHKERRQ(ierr);
218   PetscFunctionReturn(0);
219 }
220 
221 #undef __FUNCT__
222 #define __FUNCT__ "PCApply_Redistribute"
223 static PetscErrorCode PCApply_Redistribute(PC pc,Vec b,Vec x)
224 {
225   PC_Redistribute   *red = (PC_Redistribute*)pc->data;
226   PetscErrorCode    ierr;
227   PetscInt          dcnt   = red->dcnt,i;
228   const PetscInt    *drows = red->drows;
229   PetscScalar       *xwork;
230   const PetscScalar *bwork,*diag = red->diag;
231 
232   PetscFunctionBegin;
233   if (!red->work) {
234     ierr = VecDuplicate(b,&red->work);CHKERRQ(ierr);
235   }
236   /* compute the rows of solution that have diagonal entries only */
237   ierr = VecSet(x,0.0);CHKERRQ(ierr);         /* x = diag(A)^{-1} b */
238   ierr = VecGetArray(x,&xwork);CHKERRQ(ierr);
239   ierr = VecGetArrayRead(b,&bwork);CHKERRQ(ierr);
240   for (i=0; i<dcnt; i++) xwork[drows[i]] = diag[i]*bwork[drows[i]];
241   ierr = PetscLogFlops(dcnt);CHKERRQ(ierr);
242   ierr = VecRestoreArray(red->work,&xwork);CHKERRQ(ierr);
243   ierr = VecRestoreArrayRead(b,&bwork);CHKERRQ(ierr);
244   /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
245   ierr = MatMult(pc->pmat,x,red->work);CHKERRQ(ierr);
246   ierr = VecAYPX(red->work,-1.0,b);CHKERRQ(ierr);   /* red->work = b - A x */
247 
248   ierr = VecScatterBegin(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
249   ierr = VecScatterEnd(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
250   ierr = KSPSolve(red->ksp,red->b,red->x);CHKERRQ(ierr);
251   ierr = VecScatterBegin(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
252   ierr = VecScatterEnd(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "PCDestroy_Redistribute"
258 static PetscErrorCode PCDestroy_Redistribute(PC pc)
259 {
260   PC_Redistribute *red = (PC_Redistribute*)pc->data;
261   PetscErrorCode  ierr;
262 
263   PetscFunctionBegin;
264   ierr = VecScatterDestroy(&red->scatter);CHKERRQ(ierr);
265   ierr = ISDestroy(&red->is);CHKERRQ(ierr);
266   ierr = VecDestroy(&red->b);CHKERRQ(ierr);
267   ierr = VecDestroy(&red->x);CHKERRQ(ierr);
268   ierr = KSPDestroy(&red->ksp);CHKERRQ(ierr);
269   ierr = VecDestroy(&red->work);CHKERRQ(ierr);
270   ierr = PetscFree(red->drows);CHKERRQ(ierr);
271   ierr = PetscFree(red->diag);CHKERRQ(ierr);
272   ierr = PetscFree(pc->data);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "PCSetFromOptions_Redistribute"
278 static PetscErrorCode PCSetFromOptions_Redistribute(PetscOptions *PetscOptionsObject,PC pc)
279 {
280   PetscErrorCode  ierr;
281   PC_Redistribute *red = (PC_Redistribute*)pc->data;
282 
283   PetscFunctionBegin;
284   ierr = KSPSetFromOptions(red->ksp);CHKERRQ(ierr);
285   PetscFunctionReturn(0);
286 }
287 
288 #undef __FUNCT__
289 #define __FUNCT__ "PCRedistributeGetKSP"
290 /*@
291    PCRedistributeGetKSP - Gets the KSP created by the PCREDISTRIBUTE
292 
293    Not Collective
294 
295    Input Parameter:
296 .  pc - the preconditioner context
297 
298    Output Parameter:
299 .  innerksp - the inner KSP
300 
301    Level: advanced
302 
303 .keywords: PC, redistribute solve
304 @*/
305 PetscErrorCode  PCRedistributeGetKSP(PC pc,KSP *innerksp)
306 {
307   PC_Redistribute *red = (PC_Redistribute*)pc->data;
308 
309   PetscFunctionBegin;
310   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
311   PetscValidPointer(innerksp,2);
312   *innerksp = red->ksp;
313   PetscFunctionReturn(0);
314 }
315 
316 /* -------------------------------------------------------------------------------------*/
317 /*MC
318      PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows that only have a diagonal entry and then applys a KSP to that new matrix
319 
320      Options for the redistribute preconditioners can be set with -redistribute_ksp_xxx <values> and -redistribute_pc_xxx <values>
321 
322      Notes:  Usually run this with -ksp_type preonly
323 
324      If you have used MatZeroRows() to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly
325      -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry.
326 
327      This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same.
328 
329      Developer Notes: Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication.
330 
331    Level: intermediate
332 
333 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedistributeGetKSP()
334 M*/
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "PCCreate_Redistribute"
338 PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc)
339 {
340   PetscErrorCode  ierr;
341   PC_Redistribute *red;
342   const char      *prefix;
343 
344   PetscFunctionBegin;
345   ierr     = PetscNewLog(pc,&red);CHKERRQ(ierr);
346   pc->data = (void*)red;
347 
348   pc->ops->apply          = PCApply_Redistribute;
349   pc->ops->applytranspose = 0;
350   pc->ops->setup          = PCSetUp_Redistribute;
351   pc->ops->destroy        = PCDestroy_Redistribute;
352   pc->ops->setfromoptions = PCSetFromOptions_Redistribute;
353   pc->ops->view           = PCView_Redistribute;
354 
355   ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&red->ksp);CHKERRQ(ierr);
356   ierr = KSPSetErrorIfNotConverged(red->ksp,pc->erroriffailure);CHKERRQ(ierr);
357   ierr = PetscObjectIncrementTabLevel((PetscObject)red->ksp,(PetscObject)pc,1);CHKERRQ(ierr);
358   ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)red->ksp);CHKERRQ(ierr);
359   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
360   ierr = KSPSetOptionsPrefix(red->ksp,prefix);CHKERRQ(ierr);
361   ierr = KSPAppendOptionsPrefix(red->ksp,"redistribute_");CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364