1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 12 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 13 PetscErrorCode ierr; 14 PetscInt i,j,*aj = B->j,ec = 0,*garray; 15 IS from,to; 16 Vec gvec; 17 PetscBool useblockis; 18 #if defined(PETSC_USE_CTABLE) 19 PetscTable gid1_lid1; 20 PetscTablePosition tpos; 21 PetscInt gid,lid; 22 #else 23 PetscInt N = mat->cmap->N,*indices; 24 #endif 25 26 PetscFunctionBegin; 27 #if defined(PETSC_USE_CTABLE) 28 /* use a table */ 29 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 30 for (i=0; i<aij->B->rmap->n; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 33 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 34 if (!data) { 35 /* one based table */ 36 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 42 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 43 while (tpos) { 44 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 50 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 51 for (i=0; i<ec; i++) { 52 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 53 } 54 /* compact out the extra columns in B */ 55 for (i=0; i<aij->B->rmap->n; i++) { 56 for (j=0; j<B->ilen[i]; j++) { 57 PetscInt gid1 = aj[B->i[i] + j] + 1; 58 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59 lid--; 60 aj[B->i[i] + j] = lid; 61 } 62 } 63 aij->B->cmap->n = aij->B->cmap->N = ec; 64 65 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 66 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 67 #else 68 /* Make an array as long as the number of columns */ 69 /* mark those columns that are in aij->B */ 70 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 71 ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr); 72 for (i=0; i<aij->B->rmap->n; i++) { 73 for (j=0; j<B->ilen[i]; j++) { 74 if (!indices[aj[B->i[i] + j]]) ec++; 75 indices[aj[B->i[i] + j]] = 1; 76 } 77 } 78 79 /* form array of columns we need */ 80 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 81 ec = 0; 82 for (i=0; i<N; i++) { 83 if (indices[i]) garray[ec++] = i; 84 } 85 86 /* make indices now point into garray */ 87 for (i=0; i<ec; i++) { 88 indices[garray[i]] = i; 89 } 90 91 /* compact out the extra columns in B */ 92 for (i=0; i<aij->B->rmap->n; i++) { 93 for (j=0; j<B->ilen[i]; j++) { 94 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 } 97 aij->B->cmap->n = aij->B->cmap->N = ec; 98 99 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 100 ierr = PetscFree(indices);CHKERRQ(ierr); 101 #endif 102 /* create local vector that is used to scatter into */ 103 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 104 105 /* create two temporary Index sets for build scatter gather */ 106 /* check for the special case where blocks are communicated for faster VecScatterXXX */ 107 useblockis = PETSC_FALSE; 108 if (mat->cmap->bs > 1) { 109 PetscInt bs = mat->cmap->bs,ibs,ga; 110 if (!(ec % bs)) { 111 useblockis = PETSC_TRUE; 112 for (i=0; i<ec/bs; i++) { 113 if ((ga = garray[ibs = i*bs]) % bs) { 114 useblockis = PETSC_FALSE; 115 break; 116 } 117 for (j=1; j<bs; j++) { 118 if (garray[ibs+j] != ga+j) { 119 useblockis = PETSC_FALSE; 120 break; 121 } 122 } 123 if (!useblockis) break; 124 } 125 } 126 } 127 #if defined(PETSC_USE_DEBUG) 128 i = (PetscInt)useblockis; 129 ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,((PetscObject)mat)->comm);CHKERRQ(ierr); 130 if (j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)"); 131 #endif 132 133 if (useblockis) { 134 PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs; 135 if (ec%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs); 136 ierr = PetscInfo(mat,"Using block index set to define scatter\n");CHKERRQ(ierr); 137 ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr); 138 for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs; 139 ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr); 140 } else { 141 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 142 } 143 144 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 145 146 /* create temporary global vector to generate scatter context */ 147 /* This does not allocate the array's memory so is efficient */ 148 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 149 150 /* generate the scatter context */ 151 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 152 ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr); 153 ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr); 154 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 155 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 156 157 aij->garray = garray; 158 159 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 160 ierr = ISDestroy(&from);CHKERRQ(ierr); 161 ierr = ISDestroy(&to);CHKERRQ(ierr); 162 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 167 #undef __FUNCT__ 168 #define __FUNCT__ "MatDisAssemble_MPIAIJ" 169 /* 170 Takes the local part of an already assembled MPIAIJ matrix 171 and disassembles it. This is to allow new nonzeros into the matrix 172 that require more communication in the matrix vector multiply. 173 Thus certain data-structures must be rebuilt. 174 175 Kind of slow! But that's what application programmers get when 176 they are sloppy. 177 */ 178 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 179 { 180 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 181 Mat B = aij->B,Bnew; 182 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 183 PetscErrorCode ierr; 184 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 185 PetscScalar v; 186 187 PetscFunctionBegin; 188 /* free stuff related to matrix-vec multiply */ 189 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 190 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 191 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 192 if (aij->colmap) { 193 #if defined(PETSC_USE_CTABLE) 194 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 195 #else 196 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 197 ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 198 #endif 199 } 200 201 /* make sure that B is assembled so we can access its values */ 202 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 203 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 204 205 /* invent new B and copy stuff over */ 206 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr); 207 for (i=0; i<m; i++) { 208 nz[i] = Baij->i[i+1] - Baij->i[i]; 209 } 210 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 211 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 212 ierr = MatSetBlockSizes(Bnew,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 213 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 214 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 215 216 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 217 218 ierr = PetscFree(nz);CHKERRQ(ierr); 219 for (i=0; i<m; i++) { 220 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 221 col = garray[Baij->j[ct]]; 222 v = Baij->a[ct++]; 223 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 224 } 225 } 226 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 227 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 228 ierr = MatDestroy(&B);CHKERRQ(ierr); 229 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 230 231 aij->B = Bnew; 232 A->was_assembled = PETSC_FALSE; 233 PetscFunctionReturn(0); 234 } 235 236 /* ugly stuff added for Glenn someday we should fix this up */ 237 238 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 239 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 240 241 242 #undef __FUNCT__ 243 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 244 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 245 { 246 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 247 PetscErrorCode ierr; 248 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 249 PetscInt *r_rmapd,*r_rmapo; 250 251 PetscFunctionBegin; 252 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 253 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 254 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 255 ierr = PetscMemzero(r_rmapd,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 256 nt = 0; 257 for (i=0; i<inA->rmap->mapping->n; i++) { 258 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 259 nt++; 260 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 261 } 262 } 263 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 264 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr); 265 for (i=0; i<inA->rmap->mapping->n; i++) { 266 if (r_rmapd[i]) { 267 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 268 } 269 } 270 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 271 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 272 273 ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 274 ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 275 for (i=0; i<ina->B->cmap->n; i++) { 276 lindices[garray[i]] = i+1; 277 } 278 no = inA->rmap->mapping->n - nt; 279 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 280 ierr = PetscMemzero(r_rmapo,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 281 nt = 0; 282 for (i=0; i<inA->rmap->mapping->n; i++) { 283 if (lindices[inA->rmap->mapping->indices[i]]) { 284 nt++; 285 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 286 } 287 } 288 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 289 ierr = PetscFree(lindices);CHKERRQ(ierr); 290 ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr); 291 for (i=0; i<inA->rmap->mapping->n; i++) { 292 if (r_rmapo[i]) { 293 auglyrmapo[(r_rmapo[i]-1)] = i; 294 } 295 } 296 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 297 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 303 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 304 { 305 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 306 PetscErrorCode ierr; 307 308 PetscFunctionBegin; 309 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 310 PetscFunctionReturn(0); 311 } 312 313 EXTERN_C_BEGIN 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ" 316 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 317 { 318 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 319 PetscErrorCode ierr; 320 PetscInt n,i; 321 PetscScalar *d,*o,*s; 322 323 PetscFunctionBegin; 324 if (!auglyrmapd) { 325 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 326 } 327 328 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 329 330 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 331 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 332 for (i=0; i<n; i++) { 333 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 334 } 335 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 336 /* column scale "diagonal" portion of local matrix */ 337 ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr); 338 339 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 340 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 341 for (i=0; i<n; i++) { 342 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 343 } 344 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 345 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 346 /* column scale "off-diagonal" portion of local matrix */ 347 ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 EXTERN_C_END 351 352 353