xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 76be9ce4a233aaa47cda2bc7f5a27cd7faabecaa)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mmbaij.c,v 1.16 1997/11/03 04:46:15 bsmith Exp balay $";
3 #endif
4 
5 
6 /*
7    Support for the parallel BAIJ matrix vector multiply
8 */
9 #include "src/mat/impls/baij/mpi/mpibaij.h"
10 #include "src/vec/vecimpl.h"
11 
12 #undef __FUNC__
13 #define __FUNC__ "MatSetUpMultiply_MPIBAIJ"
14 int MatSetUpMultiply_MPIBAIJ(Mat mat)
15 {
16   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
17   Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data);
18   int        Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
19   int        col,bs = baij->bs,*tmp,*stmp;
20   IS         from,to;
21   Vec        gvec;
22 
23   PetscFunctionBegin;
24   /* For the first stab we make an array as long as the number of columns */
25   /* mark those columns that are in baij->B */
26   indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) ); CHKPTRQ(indices);
27   PetscMemzero(indices,Nbs*sizeof(int));
28   for ( i=0; i<B->mbs; i++ ) {
29     for ( j=0; j<B->ilen[i]; j++ ) {
30       if (!indices[aj[B->i[i] + j]]) ec++;
31       indices[aj[B->i[i] + j] ] = 1;
32     }
33   }
34 
35   /* form array of columns we need */
36   garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray);
37   tmp    = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp)
38   ec = 0;
39   for ( i=0; i<Nbs; i++ ) {
40     if (indices[i]) {
41       garray[ec++] = i;
42     }
43   }
44 
45   /* make indices now point into garray */
46   for ( i=0; i<ec; i++ ) {
47     indices[garray[i]] = i;
48   }
49 
50   /* compact out the extra columns in B */
51   for ( i=0; i<B->mbs; i++ ) {
52     for ( j=0; j<B->ilen[i]; j++ ) {
53       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
54     }
55   }
56   B->nbs = ec;
57   B->n   = ec*B->bs;
58   PetscFree(indices);
59 
60   for ( i=0,col=0; i<ec; i++ ) {
61     for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
62   }
63   /* create local vector that is used to scatter into */
64   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr);
65 
66   /* create two temporary index sets for building scatter-gather */
67 
68   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */
69   for ( i=0,col=0; i<ec; i++ ) {
70     garray[i] = bs*garray[i];
71   }
72   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
73   for ( i=0,col=0; i<ec; i++ ) {
74     garray[i] = garray[i]/bs;
75   }
76 
77   stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp);
78   for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; }
79   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
80   PetscFree(stmp);
81 
82   /* create temporary global vector to generate scatter context */
83   /* this is inefficient, but otherwise we must do either
84      1) save garray until the first actual scatter when the vector is known or
85      2) have another way of generating a scatter context without a vector.*/
86   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr);
87 
88   /* gnerate the scatter context */
89   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
90 
91   /*
92       Post the receives for the first matrix vector product. We sync-chronize after
93     this on the chance that the user immediately calls MatMult() after assemblying
94     the matrix.
95   */
96   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
97   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
98 
99   PLogObjectParent(mat,baij->Mvctx);
100   PLogObjectParent(mat,baij->lvec);
101   PLogObjectParent(mat,from);
102   PLogObjectParent(mat,to);
103   baij->garray = garray;
104   PLogObjectMemory(mat,(ec+1)*sizeof(int));
105   ierr = ISDestroy(from); CHKERRQ(ierr);
106   ierr = ISDestroy(to); CHKERRQ(ierr);
107   ierr = VecDestroy(gvec);
108   PetscFree(tmp);
109   PetscFunctionReturn(0);
110 }
111 
112 
113 /*
114      Takes the local part of an already assembled MPIBAIJ matrix
115    and disassembles it. This is to allow new nonzeros into the matrix
116    that require more communication in the matrix vector multiply.
117    Thus certain data-structures must be rebuilt.
118 
119    Kind of slow! But that's what application programmers get when
120    they are sloppy.
121 */
122 #undef __FUNC__
123 #define __FUNC__ "DisAssemble_MPIBAIJ"
124 int DisAssemble_MPIBAIJ(Mat A)
125 {
126   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
127   Mat        B = baij->B,Bnew;
128   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
129   int        ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
130   int        k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
131   Scalar     *a=Bbaij->a;
132 
133   PetscFunctionBegin;
134   /* free stuff related to matrix-vec multiply */
135   ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */
136   ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0;
137   ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0;
138   if (baij->colmap) {
139     PetscFree(baij->colmap); baij->colmap = 0;
140     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
141   }
142 
143   /* make sure that B is assembled so we can access its values */
144   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
145   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
146 
147   /* invent new B and copy stuff over */
148   nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz);
149   for ( i=0; i<mbs; i++ ) {
150     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
151   }
152   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr);
153   PetscFree(nz);
154 
155   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
156   for ( i=0; i<mbs; i++ ) {
157     rvals[0] = bs*i;
158     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
159     for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) {
160       col = garray[Bbaij->j[j]]*bs;
161       for (k=0; k<bs; k++ ) {
162         ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr);
163         col++;
164       }
165     }
166   }
167   PetscFree(baij->garray); baij->garray = 0;
168   PetscFree(rvals);
169   PLogObjectMemory(A,-ec*sizeof(int));
170   ierr = MatDestroy(B); CHKERRQ(ierr);
171   PLogObjectParent(A,Bnew);
172   baij->B = Bnew;
173   A->was_assembled = PETSC_FALSE;
174   PetscFunctionReturn(0);
175 }
176 
177 
178