1 /* 2 Support for the parallel AIJ matrix vector multiply 3 */ 4 #include "src/mat/impls/aij/mpi/mpiaij.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 8 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 9 { 10 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 11 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 12 PetscErrorCode ierr; 13 int i,j,*aj = B->j,ec = 0,*garray; 14 IS from,to; 15 Vec gvec; 16 #if defined (PETSC_USE_CTABLE) 17 PetscTable gid1_lid1; 18 PetscTablePosition tpos; 19 int gid,lid; 20 #else 21 int N = mat->N,*indices; 22 23 #endif 24 25 PetscFunctionBegin; 26 27 #if defined (PETSC_USE_CTABLE) 28 /* use a table - Mark Adams (this has not been tested with "shift") */ 29 ierr = PetscTableCreate(aij->B->m,&gid1_lid1);CHKERRQ(ierr); 30 for (i=0; i<aij->B->m; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 int data,gid1 = aj[B->i[i] + j] + 1; 33 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 34 if (!data) { 35 /* one based table */ 36 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 42 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 43 while (tpos) { 44 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 50 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 51 for (i=0; i<ec; i++) { 52 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 53 } 54 /* compact out the extra columns in B */ 55 for (i=0; i<aij->B->m; i++) { 56 for (j=0; j<B->ilen[i]; j++) { 57 int gid1 = aj[B->i[i] + j] + 1; 58 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59 lid --; 60 aj[B->i[i] + j] = lid; 61 } 62 } 63 aij->B->n = aij->B->N = ec; 64 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 65 /* Mark Adams */ 66 #else 67 /* For the first stab we make an array as long as the number of columns */ 68 /* mark those columns that are in aij->B */ 69 ierr = PetscMalloc((N+1)*sizeof(int),&indices);CHKERRQ(ierr); 70 ierr = PetscMemzero(indices,N*sizeof(int));CHKERRQ(ierr); 71 for (i=0; i<aij->B->m; i++) { 72 for (j=0; j<B->ilen[i]; j++) { 73 if (!indices[aj[B->i[i] + j] ]) ec++; 74 indices[aj[B->i[i] + j] ] = 1; 75 } 76 } 77 78 /* form array of columns we need */ 79 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 80 ec = 0; 81 for (i=0; i<N; i++) { 82 if (indices[i]) garray[ec++] = i; 83 } 84 85 /* make indices now point into garray */ 86 for (i=0; i<ec; i++) { 87 indices[garray[i]] = i; 88 } 89 90 /* compact out the extra columns in B */ 91 for (i=0; i<aij->B->m; i++) { 92 for (j=0; j<B->ilen[i]; j++) { 93 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 94 } 95 } 96 aij->B->n = aij->B->N = ec; 97 ierr = PetscFree(indices);CHKERRQ(ierr); 98 #endif 99 /* create local vector that is used to scatter into */ 100 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 101 102 /* create two temporary Index sets for build scatter gather */ 103 ierr = ISCreateGeneral(mat->comm,ec,garray,&from);CHKERRQ(ierr); 104 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 105 106 /* create temporary global vector to generate scatter context */ 107 /* this is inefficient, but otherwise we must do either 108 1) save garray until the first actual scatter when the vector is known or 109 2) have another way of generating a scatter context without a vector.*/ 110 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 111 112 /* generate the scatter context */ 113 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 114 PetscLogObjectParent(mat,aij->Mvctx); 115 PetscLogObjectParent(mat,aij->lvec); 116 PetscLogObjectParent(mat,from); 117 PetscLogObjectParent(mat,to); 118 aij->garray = garray; 119 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 120 ierr = ISDestroy(from);CHKERRQ(ierr); 121 ierr = ISDestroy(to);CHKERRQ(ierr); 122 ierr = VecDestroy(gvec);CHKERRQ(ierr); 123 PetscFunctionReturn(0); 124 } 125 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "DisAssemble_MPIAIJ" 129 /* 130 Takes the local part of an already assembled MPIAIJ matrix 131 and disassembles it. This is to allow new nonzeros into the matrix 132 that require more communication in the matrix vector multiply. 133 Thus certain data-structures must be rebuilt. 134 135 Kind of slow! But that's what application programmers get when 136 they are sloppy. 137 */ 138 PetscErrorCode DisAssemble_MPIAIJ(Mat A) 139 { 140 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 141 Mat B = aij->B,Bnew; 142 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 143 PetscErrorCode ierr; 144 int i,j,m = B->m,n = A->N,col,ct = 0,*garray = aij->garray; 145 int *nz,ec; 146 PetscScalar v; 147 148 PetscFunctionBegin; 149 /* free stuff related to matrix-vec multiply */ 150 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 151 ierr = VecDestroy(aij->lvec);CHKERRQ(ierr); aij->lvec = 0; 152 ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr); aij->Mvctx = 0; 153 if (aij->colmap) { 154 #if defined (PETSC_USE_CTABLE) 155 ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr); 156 aij->colmap = 0; 157 #else 158 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 159 aij->colmap = 0; 160 PetscLogObjectMemory(A,-aij->B->n*sizeof(int)); 161 #endif 162 } 163 164 /* make sure that B is assembled so we can access its values */ 165 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 166 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 167 168 /* invent new B and copy stuff over */ 169 ierr = PetscMalloc((m+1)*sizeof(int),&nz);CHKERRQ(ierr); 170 for (i=0; i<m; i++) { 171 nz[i] = Baij->i[i+1] - Baij->i[i]; 172 } 173 ierr = MatCreate(PETSC_COMM_SELF,m,n,m,n,&Bnew);CHKERRQ(ierr); 174 ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr); 175 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 176 ierr = PetscFree(nz);CHKERRQ(ierr); 177 for (i=0; i<m; i++) { 178 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 179 col = garray[Baij->j[ct]]; 180 v = Baij->a[ct++]; 181 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 182 } 183 } 184 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 185 aij->garray = 0; 186 PetscLogObjectMemory(A,-ec*sizeof(int)); 187 ierr = MatDestroy(B);CHKERRQ(ierr); 188 PetscLogObjectParent(A,Bnew); 189 aij->B = Bnew; 190 A->was_assembled = PETSC_FALSE; 191 PetscFunctionReturn(0); 192 } 193 194 /* ugly stuff added for Glenn someday we should fix this up */ 195 196 static int *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 197 parts of the local matrix */ 198 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 199 200 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 203 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 204 { 205 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 206 PetscErrorCode ierr; 207 int i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 208 int *r_rmapd,*r_rmapo; 209 210 PetscFunctionBegin; 211 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 212 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 213 ierr = PetscMalloc((inA->mapping->n+1)*sizeof(int),&r_rmapd);CHKERRQ(ierr); 214 ierr = PetscMemzero(r_rmapd,inA->mapping->n*sizeof(int));CHKERRQ(ierr); 215 nt = 0; 216 for (i=0; i<inA->mapping->n; i++) { 217 if (inA->mapping->indices[i] >= cstart && inA->mapping->indices[i] < cend) { 218 nt++; 219 r_rmapd[i] = inA->mapping->indices[i] + 1; 220 } 221 } 222 if (nt != n) SETERRQ2(1,"Hmm nt %d n %d",nt,n); 223 ierr = PetscMalloc((n+1)*sizeof(int),&auglyrmapd);CHKERRQ(ierr); 224 for (i=0; i<inA->mapping->n; i++) { 225 if (r_rmapd[i]){ 226 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 227 } 228 } 229 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 230 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 231 232 ierr = PetscMalloc((inA->N+1)*sizeof(int),&lindices);CHKERRQ(ierr); 233 ierr = PetscMemzero(lindices,inA->N*sizeof(int));CHKERRQ(ierr); 234 for (i=0; i<ina->B->n; i++) { 235 lindices[garray[i]] = i+1; 236 } 237 no = inA->mapping->n - nt; 238 ierr = PetscMalloc((inA->mapping->n+1)*sizeof(int),&r_rmapo);CHKERRQ(ierr); 239 ierr = PetscMemzero(r_rmapo,inA->mapping->n*sizeof(int));CHKERRQ(ierr); 240 nt = 0; 241 for (i=0; i<inA->mapping->n; i++) { 242 if (lindices[inA->mapping->indices[i]]) { 243 nt++; 244 r_rmapo[i] = lindices[inA->mapping->indices[i]]; 245 } 246 } 247 if (nt > no) SETERRQ2(1,"Hmm nt %d no %d",nt,n); 248 ierr = PetscFree(lindices);CHKERRQ(ierr); 249 ierr = PetscMalloc((nt+1)*sizeof(int),&auglyrmapo);CHKERRQ(ierr); 250 for (i=0; i<inA->mapping->n; i++) { 251 if (r_rmapo[i]){ 252 auglyrmapo[(r_rmapo[i]-1)] = i; 253 } 254 } 255 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 256 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 257 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNCT__ 262 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 263 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 264 { 265 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 266 PetscErrorCode ierr,(*f)(Mat,Vec); 267 268 PetscFunctionBegin; 269 ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr); 270 if (f) { 271 ierr = (*f)(A,scale);CHKERRQ(ierr); 272 } 273 PetscFunctionReturn(0); 274 } 275 276 EXTERN_C_BEGIN 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ" 279 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 280 { 281 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 282 PetscErrorCode ierr; 283 int n,i; 284 PetscScalar *d,*o,*s; 285 286 PetscFunctionBegin; 287 if (!auglyrmapd) { 288 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 289 } 290 291 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 292 293 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 294 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 295 for (i=0; i<n; i++) { 296 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 297 } 298 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 299 /* column scale "diagonal" portion of local matrix */ 300 ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr); 301 302 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 303 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 304 for (i=0; i<n; i++) { 305 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 306 } 307 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 308 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 309 /* column scale "off-diagonal" portion of local matrix */ 310 ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr); 311 312 PetscFunctionReturn(0); 313 } 314 EXTERN_C_END 315 316 317