1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 204 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 205 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 206 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 207 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 208 rowners[0] = 0; 209 for (i=2; i<=size; i++) { 210 rowners[i] += rowners[i-1]; 211 } 212 rstart = rowners[rank]; 213 rend = rowners[rank+1]; 214 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 215 if (!rank) { 216 gmata = (Mat_SeqAIJ*) gmat->data; 217 /* send row lengths to all processors */ 218 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 219 for (i=1; i<size; i++) { 220 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 /* determine number diagonal and off-diagonal counts */ 223 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 224 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 225 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 226 jj = 0; 227 for (i=0; i<m; i++) { 228 for (j=0; j<dlens[i]; j++) { 229 if (gmata->j[jj] < rstart) ld[i]++; 230 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 231 jj++; 232 } 233 } 234 /* send column indices to other processes */ 235 for (i=1; i<size; i++) { 236 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 237 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 238 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 239 } 240 241 /* send numerical values to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 245 } 246 gmataa = gmata->a; 247 gmataj = gmata->j; 248 249 } else { 250 /* receive row lengths */ 251 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 252 /* receive column indices */ 253 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 255 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 /* determine number diagonal and off-diagonal counts */ 257 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 258 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 259 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 260 jj = 0; 261 for (i=0; i<m; i++) { 262 for (j=0; j<dlens[i]; j++) { 263 if (gmataj[jj] < rstart) ld[i]++; 264 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 265 jj++; 266 } 267 } 268 /* receive numerical values */ 269 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 270 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 271 } 272 /* set preallocation */ 273 for (i=0; i<m; i++) { 274 dlens[i] -= olens[i]; 275 } 276 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 277 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 278 279 for (i=0; i<m; i++) { 280 dlens[i] += olens[i]; 281 } 282 cnt = 0; 283 for (i=0; i<m; i++) { 284 row = rstart + i; 285 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 286 cnt += dlens[i]; 287 } 288 if (rank) { 289 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 290 } 291 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 292 ierr = PetscFree(rowners);CHKERRQ(ierr); 293 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 294 *inmat = mat; 295 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 296 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 297 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 298 mat = *inmat; 299 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 300 if (!rank) { 301 /* send numerical values to other processes */ 302 gmata = (Mat_SeqAIJ*) gmat->data; 303 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 304 gmataa = gmata->a; 305 for (i=1; i<size; i++) { 306 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 307 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 308 } 309 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 310 } else { 311 /* receive numerical values from process 0*/ 312 nz = Ad->nz + Ao->nz; 313 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 314 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 315 } 316 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 317 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 318 ad = Ad->a; 319 ao = Ao->a; 320 if (mat->rmap->n) { 321 i = 0; 322 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 323 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 324 } 325 for (i=1; i<mat->rmap->n; i++) { 326 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 327 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 328 } 329 i--; 330 if (mat->rmap->n) { 331 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 } 333 if (rank) { 334 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 335 } 336 } 337 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 338 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 339 CHKMEMQ; 340 PetscFunctionReturn(0); 341 } 342 343 /* 344 Local utility routine that creates a mapping from the global column 345 number to the local number in the off-diagonal part of the local 346 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 347 a slightly higher hash table cost; without it it is not scalable (each processor 348 has an order N integer array but is fast to acess. 349 */ 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 352 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 353 { 354 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 355 PetscErrorCode ierr; 356 PetscInt n = aij->B->cmap->n,i; 357 358 PetscFunctionBegin; 359 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 360 #if defined (PETSC_USE_CTABLE) 361 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 362 for (i=0; i<n; i++){ 363 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 364 } 365 #else 366 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 367 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 368 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 369 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 370 #endif 371 PetscFunctionReturn(0); 372 } 373 374 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 375 { \ 376 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 377 lastcol1 = col;\ 378 while (high1-low1 > 5) { \ 379 t = (low1+high1)/2; \ 380 if (rp1[t] > col) high1 = t; \ 381 else low1 = t; \ 382 } \ 383 for (_i=low1; _i<high1; _i++) { \ 384 if (rp1[_i] > col) break; \ 385 if (rp1[_i] == col) { \ 386 if (addv == ADD_VALUES) ap1[_i] += value; \ 387 else ap1[_i] = value; \ 388 goto a_noinsert; \ 389 } \ 390 } \ 391 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 392 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 393 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 394 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 395 N = nrow1++ - 1; a->nz++; high1++; \ 396 /* shift up all the later entries in this row */ \ 397 for (ii=N; ii>=_i; ii--) { \ 398 rp1[ii+1] = rp1[ii]; \ 399 ap1[ii+1] = ap1[ii]; \ 400 } \ 401 rp1[_i] = col; \ 402 ap1[_i] = value; \ 403 a_noinsert: ; \ 404 ailen[row] = nrow1; \ 405 } 406 407 408 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 409 { \ 410 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 411 lastcol2 = col;\ 412 while (high2-low2 > 5) { \ 413 t = (low2+high2)/2; \ 414 if (rp2[t] > col) high2 = t; \ 415 else low2 = t; \ 416 } \ 417 for (_i=low2; _i<high2; _i++) { \ 418 if (rp2[_i] > col) break; \ 419 if (rp2[_i] == col) { \ 420 if (addv == ADD_VALUES) ap2[_i] += value; \ 421 else ap2[_i] = value; \ 422 goto b_noinsert; \ 423 } \ 424 } \ 425 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 426 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 427 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 428 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 429 N = nrow2++ - 1; b->nz++; high2++; \ 430 /* shift up all the later entries in this row */ \ 431 for (ii=N; ii>=_i; ii--) { \ 432 rp2[ii+1] = rp2[ii]; \ 433 ap2[ii+1] = ap2[ii]; \ 434 } \ 435 rp2[_i] = col; \ 436 ap2[_i] = value; \ 437 b_noinsert: ; \ 438 bilen[row] = nrow2; \ 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 443 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 444 { 445 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 446 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 447 PetscErrorCode ierr; 448 PetscInt l,*garray = mat->garray,diag; 449 450 PetscFunctionBegin; 451 /* code only works for square matrices A */ 452 453 /* find size of row to the left of the diagonal part */ 454 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 455 row = row - diag; 456 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 457 if (garray[b->j[b->i[row]+l]] > diag) break; 458 } 459 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 460 461 /* diagonal part */ 462 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 463 464 /* right of diagonal part */ 465 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatSetValues_MPIAIJ" 471 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 472 { 473 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 474 PetscScalar value; 475 PetscErrorCode ierr; 476 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 477 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 478 PetscBool roworiented = aij->roworiented; 479 480 /* Some Variables required in the macro */ 481 Mat A = aij->A; 482 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 483 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 484 MatScalar *aa = a->a; 485 PetscBool ignorezeroentries = a->ignorezeroentries; 486 Mat B = aij->B; 487 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 488 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 489 MatScalar *ba = b->a; 490 491 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 492 PetscInt nonew; 493 MatScalar *ap1,*ap2; 494 495 PetscFunctionBegin; 496 if (v) PetscValidScalarPointer(v,6); 497 for (i=0; i<m; i++) { 498 if (im[i] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 501 #endif 502 if (im[i] >= rstart && im[i] < rend) { 503 row = im[i] - rstart; 504 lastcol1 = -1; 505 rp1 = aj + ai[row]; 506 ap1 = aa + ai[row]; 507 rmax1 = aimax[row]; 508 nrow1 = ailen[row]; 509 low1 = 0; 510 high1 = nrow1; 511 lastcol2 = -1; 512 rp2 = bj + bi[row]; 513 ap2 = ba + bi[row]; 514 rmax2 = bimax[row]; 515 nrow2 = bilen[row]; 516 low2 = 0; 517 high2 = nrow2; 518 519 for (j=0; j<n; j++) { 520 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 521 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 522 if (in[j] >= cstart && in[j] < cend){ 523 col = in[j] - cstart; 524 nonew = a->nonew; 525 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 526 } else if (in[j] < 0) continue; 527 #if defined(PETSC_USE_DEBUG) 528 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 529 #endif 530 else { 531 if (mat->was_assembled) { 532 if (!aij->colmap) { 533 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 534 } 535 #if defined (PETSC_USE_CTABLE) 536 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 537 col--; 538 #else 539 col = aij->colmap[in[j]] - 1; 540 #endif 541 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 542 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 543 col = in[j]; 544 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 545 B = aij->B; 546 b = (Mat_SeqAIJ*)B->data; 547 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 548 rp2 = bj + bi[row]; 549 ap2 = ba + bi[row]; 550 rmax2 = bimax[row]; 551 nrow2 = bilen[row]; 552 low2 = 0; 553 high2 = nrow2; 554 bm = aij->B->rmap->n; 555 ba = b->a; 556 } 557 } else col = in[j]; 558 nonew = b->nonew; 559 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 560 } 561 } 562 } else { 563 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 564 if (!aij->donotstash) { 565 mat->assembled = PETSC_FALSE; 566 if (roworiented) { 567 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 568 } else { 569 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 570 } 571 } 572 } 573 } 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIAIJ" 579 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 584 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= rstart && idxm[i] < rend) { 591 row = idxm[i] - rstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= cstart && idxn[j] < cend){ 596 col = idxn[j] - cstart; 597 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!aij->colmap) { 600 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 604 col --; 605 #else 606 col = aij->colmap[idxn[j]] - 1; 607 #endif 608 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 609 else { 610 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } 612 } 613 } 614 } else { 615 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 616 } 617 } 618 PetscFunctionReturn(0); 619 } 620 621 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 625 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 626 { 627 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 628 PetscErrorCode ierr; 629 PetscInt nstash,reallocs; 630 InsertMode addv; 631 632 PetscFunctionBegin; 633 if (aij->donotstash || mat->nooffprocentries) { 634 PetscFunctionReturn(0); 635 } 636 637 /* make sure all processors are either in INSERTMODE or ADDMODE */ 638 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 639 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 640 mat->insertmode = addv; /* in case this processor had no cache */ 641 642 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 643 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 644 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 650 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 651 { 652 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 653 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 654 PetscErrorCode ierr; 655 PetscMPIInt n; 656 PetscInt i,j,rstart,ncols,flg; 657 PetscInt *row,*col; 658 PetscBool other_disassembled; 659 PetscScalar *val; 660 InsertMode addv = mat->insertmode; 661 662 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 663 PetscFunctionBegin; 664 if (!aij->donotstash && !mat->nooffprocentries) { 665 while (1) { 666 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 667 if (!flg) break; 668 669 for (i=0; i<n;) { 670 /* Now identify the consecutive vals belonging to the same row */ 671 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 672 if (j < n) ncols = j-i; 673 else ncols = n-i; 674 /* Now assemble all these values with a single function call */ 675 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 676 i = j; 677 } 678 } 679 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 680 } 681 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 682 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 683 684 /* determine if any processor has disassembled, if so we must 685 also disassemble ourselfs, in order that we may reassemble. */ 686 /* 687 if nonzero structure of submatrix B cannot change then we know that 688 no processor disassembled thus we can skip this stuff 689 */ 690 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 691 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 692 if (mat->was_assembled && !other_disassembled) { 693 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 694 } 695 } 696 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 697 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 698 } 699 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 700 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 701 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 702 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 703 704 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 705 aij->rowvalues = 0; 706 707 /* used by MatAXPY() */ 708 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 709 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 710 711 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 712 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 718 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 719 { 720 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 721 PetscErrorCode ierr; 722 723 PetscFunctionBegin; 724 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 725 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroRows_MPIAIJ" 731 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 736 PetscInt i,*owners = A->rmap->range; 737 PetscInt *nprocs,j,idx,nsends,row; 738 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 739 PetscInt *rvalues,count,base,slen,*source; 740 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 741 MPI_Comm comm = ((PetscObject)A)->comm; 742 MPI_Request *send_waits,*recv_waits; 743 MPI_Status recv_status,*send_status; 744 const PetscScalar *xx; 745 PetscScalar *bb; 746 #if defined(PETSC_DEBUG) 747 PetscBool found = PETSC_FALSE; 748 #endif 749 750 PetscFunctionBegin; 751 /* first count number of contributors to each processor */ 752 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 753 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 754 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 755 j = 0; 756 for (i=0; i<N; i++) { 757 if (lastidx > (idx = rows[i])) j = 0; 758 lastidx = idx; 759 for (; j<size; j++) { 760 if (idx >= owners[j] && idx < owners[j+1]) { 761 nprocs[2*j]++; 762 nprocs[2*j+1] = 1; 763 owner[i] = j; 764 #if defined(PETSC_DEBUG) 765 found = PETSC_TRUE; 766 #endif 767 break; 768 } 769 } 770 #if defined(PETSC_DEBUG) 771 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 772 found = PETSC_FALSE; 773 #endif 774 } 775 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 776 777 if (A->nooffproczerorows) { 778 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 779 nrecvs = nsends; 780 nmax = N; 781 } else { 782 /* inform other processors of number of messages and max length*/ 783 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 784 } 785 786 /* post receives: */ 787 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 788 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 789 for (i=0; i<nrecvs; i++) { 790 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 791 } 792 793 /* do sends: 794 1) starts[i] gives the starting index in svalues for stuff going to 795 the ith processor 796 */ 797 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 798 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 799 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 800 starts[0] = 0; 801 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 802 for (i=0; i<N; i++) { 803 svalues[starts[owner[i]]++] = rows[i]; 804 } 805 806 starts[0] = 0; 807 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 808 count = 0; 809 for (i=0; i<size; i++) { 810 if (nprocs[2*i+1]) { 811 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 812 } 813 } 814 ierr = PetscFree(starts);CHKERRQ(ierr); 815 816 base = owners[rank]; 817 818 /* wait on receives */ 819 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 820 count = nrecvs; slen = 0; 821 while (count) { 822 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 823 /* unpack receives into our local space */ 824 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 825 source[imdex] = recv_status.MPI_SOURCE; 826 lens[imdex] = n; 827 slen += n; 828 count--; 829 } 830 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 831 832 /* move the data into the send scatter */ 833 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 834 count = 0; 835 for (i=0; i<nrecvs; i++) { 836 values = rvalues + i*nmax; 837 for (j=0; j<lens[i]; j++) { 838 lrows[count++] = values[j] - base; 839 } 840 } 841 ierr = PetscFree(rvalues);CHKERRQ(ierr); 842 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 843 ierr = PetscFree(owner);CHKERRQ(ierr); 844 ierr = PetscFree(nprocs);CHKERRQ(ierr); 845 846 /* fix right hand side if needed */ 847 if (x && b) { 848 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 849 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 850 for (i=0; i<slen; i++) { 851 bb[lrows[i]] = diag*xx[lrows[i]]; 852 } 853 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 855 } 856 /* 857 Zero the required rows. If the "diagonal block" of the matrix 858 is square and the user wishes to set the diagonal we use separate 859 code so that MatSetValues() is not called for each diagonal allocating 860 new memory, thus calling lots of mallocs and slowing things down. 861 862 */ 863 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 864 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 865 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 866 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 867 } else if (diag != 0.0) { 868 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 869 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 870 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 871 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 872 } 873 for (i = 0; i < slen; i++) { 874 row = lrows[i] + rstart; 875 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 876 } 877 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 878 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 879 } else { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 } 882 ierr = PetscFree(lrows);CHKERRQ(ierr); 883 884 /* wait on sends */ 885 if (nsends) { 886 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 887 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 888 ierr = PetscFree(send_status);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(send_waits);CHKERRQ(ierr); 891 ierr = PetscFree(svalues);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 897 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 898 { 899 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 900 PetscErrorCode ierr; 901 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 902 PetscInt i,*owners = A->rmap->range; 903 PetscInt *nprocs,j,idx,nsends; 904 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 905 PetscInt *rvalues,count,base,slen,*source; 906 PetscInt *lens,*lrows,*values,m; 907 MPI_Comm comm = ((PetscObject)A)->comm; 908 MPI_Request *send_waits,*recv_waits; 909 MPI_Status recv_status,*send_status; 910 const PetscScalar *xx; 911 PetscScalar *bb,*mask; 912 Vec xmask,lmask; 913 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 914 const PetscInt *aj, *ii,*ridx; 915 PetscScalar *aa; 916 #if defined(PETSC_DEBUG) 917 PetscBool found = PETSC_FALSE; 918 #endif 919 920 PetscFunctionBegin; 921 /* first count number of contributors to each processor */ 922 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 923 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 924 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 925 j = 0; 926 for (i=0; i<N; i++) { 927 if (lastidx > (idx = rows[i])) j = 0; 928 lastidx = idx; 929 for (; j<size; j++) { 930 if (idx >= owners[j] && idx < owners[j+1]) { 931 nprocs[2*j]++; 932 nprocs[2*j+1] = 1; 933 owner[i] = j; 934 #if defined(PETSC_DEBUG) 935 found = PETSC_TRUE; 936 #endif 937 break; 938 } 939 } 940 #if defined(PETSC_DEBUG) 941 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 942 found = PETSC_FALSE; 943 #endif 944 } 945 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 946 947 /* inform other processors of number of messages and max length*/ 948 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 949 950 /* post receives: */ 951 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 952 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 953 for (i=0; i<nrecvs; i++) { 954 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 955 } 956 957 /* do sends: 958 1) starts[i] gives the starting index in svalues for stuff going to 959 the ith processor 960 */ 961 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 963 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 964 starts[0] = 0; 965 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 966 for (i=0; i<N; i++) { 967 svalues[starts[owner[i]]++] = rows[i]; 968 } 969 970 starts[0] = 0; 971 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 972 count = 0; 973 for (i=0; i<size; i++) { 974 if (nprocs[2*i+1]) { 975 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 976 } 977 } 978 ierr = PetscFree(starts);CHKERRQ(ierr); 979 980 base = owners[rank]; 981 982 /* wait on receives */ 983 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 984 count = nrecvs; slen = 0; 985 while (count) { 986 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 987 /* unpack receives into our local space */ 988 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 989 source[imdex] = recv_status.MPI_SOURCE; 990 lens[imdex] = n; 991 slen += n; 992 count--; 993 } 994 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 995 996 /* move the data into the send scatter */ 997 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 998 count = 0; 999 for (i=0; i<nrecvs; i++) { 1000 values = rvalues + i*nmax; 1001 for (j=0; j<lens[i]; j++) { 1002 lrows[count++] = values[j] - base; 1003 } 1004 } 1005 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1006 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1007 ierr = PetscFree(owner);CHKERRQ(ierr); 1008 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1009 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1010 1011 /* zero diagonal part of matrix */ 1012 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1013 1014 /* handle off diagonal part of matrix */ 1015 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1016 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1017 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1018 for (i=0; i<slen; i++) { 1019 bb[lrows[i]] = 1; 1020 } 1021 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1022 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1023 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1024 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1025 if (x) { 1026 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1029 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1030 } 1031 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1032 1033 /* remove zeroed rows of off diagonal matrix */ 1034 ii = aij->i; 1035 for (i=0; i<slen; i++) { 1036 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1037 } 1038 1039 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1040 if (aij->compressedrow.use){ 1041 m = aij->compressedrow.nrows; 1042 ii = aij->compressedrow.i; 1043 ridx = aij->compressedrow.rindex; 1044 for (i=0; i<m; i++){ 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 1049 for (j=0; j<n; j++) { 1050 if (PetscAbsScalar(mask[*aj])) { 1051 if (b) bb[*ridx] -= *aa*xx[*aj]; 1052 *aa = 0.0; 1053 } 1054 aa++; 1055 aj++; 1056 } 1057 ridx++; 1058 } 1059 } else { /* do not use compressed row format */ 1060 m = l->B->rmap->n; 1061 for (i=0; i<m; i++) { 1062 n = ii[i+1] - ii[i]; 1063 aj = aij->j + ii[i]; 1064 aa = aij->a + ii[i]; 1065 for (j=0; j<n; j++) { 1066 if (PetscAbsScalar(mask[*aj])) { 1067 if (b) bb[i] -= *aa*xx[*aj]; 1068 *aa = 0.0; 1069 } 1070 aa++; 1071 aj++; 1072 } 1073 } 1074 } 1075 if (x) { 1076 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1077 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1078 } 1079 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1080 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1081 ierr = PetscFree(lrows);CHKERRQ(ierr); 1082 1083 /* wait on sends */ 1084 if (nsends) { 1085 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1086 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1087 ierr = PetscFree(send_status);CHKERRQ(ierr); 1088 } 1089 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1090 ierr = PetscFree(svalues);CHKERRQ(ierr); 1091 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatMult_MPIAIJ" 1097 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 PetscErrorCode ierr; 1101 PetscInt nt; 1102 1103 PetscFunctionBegin; 1104 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1105 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1106 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1107 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1108 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1109 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1115 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1116 { 1117 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1118 PetscErrorCode ierr; 1119 1120 PetscFunctionBegin; 1121 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1127 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1128 { 1129 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1130 PetscErrorCode ierr; 1131 1132 PetscFunctionBegin; 1133 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1134 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1135 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1136 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1142 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1145 PetscErrorCode ierr; 1146 PetscBool merged; 1147 1148 PetscFunctionBegin; 1149 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1150 /* do nondiagonal part */ 1151 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1152 if (!merged) { 1153 /* send it on its way */ 1154 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 /* do local part */ 1156 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1157 /* receive remote parts: note this assumes the values are not actually */ 1158 /* added in yy until the next line, */ 1159 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 } else { 1161 /* do local part */ 1162 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1163 /* send it on its way */ 1164 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 /* values actually were received in the Begin() but we need to call this nop */ 1166 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1167 } 1168 PetscFunctionReturn(0); 1169 } 1170 1171 EXTERN_C_BEGIN 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1174 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1175 { 1176 MPI_Comm comm; 1177 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1178 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1179 IS Me,Notme; 1180 PetscErrorCode ierr; 1181 PetscInt M,N,first,last,*notme,i; 1182 PetscMPIInt size; 1183 1184 PetscFunctionBegin; 1185 1186 /* Easy test: symmetric diagonal block */ 1187 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1188 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1189 if (!*f) PetscFunctionReturn(0); 1190 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1191 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1192 if (size == 1) PetscFunctionReturn(0); 1193 1194 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1195 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1196 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1197 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1198 for (i=0; i<first; i++) notme[i] = i; 1199 for (i=last; i<M; i++) notme[i-last+first] = i; 1200 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1201 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1202 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1203 Aoff = Aoffs[0]; 1204 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1205 Boff = Boffs[0]; 1206 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1207 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1208 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1209 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1210 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1211 ierr = PetscFree(notme);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 EXTERN_C_END 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1218 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1219 { 1220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 /* do nondiagonal part */ 1225 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1226 /* send it on its way */ 1227 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1228 /* do local part */ 1229 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1230 /* receive remote parts */ 1231 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 /* 1236 This only works correctly for square matrices where the subblock A->A is the 1237 diagonal block 1238 */ 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1241 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1242 { 1243 PetscErrorCode ierr; 1244 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1245 1246 PetscFunctionBegin; 1247 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1248 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1249 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatScale_MPIAIJ" 1255 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1256 { 1257 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1262 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatDestroy_MPIAIJ" 1268 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 #if defined(PETSC_USE_LOG) 1275 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1276 #endif 1277 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1278 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1279 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1280 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1281 #if defined (PETSC_USE_CTABLE) 1282 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1283 #else 1284 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1285 #endif 1286 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1287 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1288 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1289 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1290 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1291 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1292 1293 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1307 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1308 { 1309 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1310 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1311 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1312 PetscErrorCode ierr; 1313 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1314 int fd; 1315 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1316 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1317 PetscScalar *column_values; 1318 PetscInt message_count,flowcontrolcount; 1319 1320 PetscFunctionBegin; 1321 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1322 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1323 nz = A->nz + B->nz; 1324 if (!rank) { 1325 header[0] = MAT_FILE_CLASSID; 1326 header[1] = mat->rmap->N; 1327 header[2] = mat->cmap->N; 1328 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1329 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1330 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1331 /* get largest number of rows any processor has */ 1332 rlen = mat->rmap->n; 1333 range = mat->rmap->range; 1334 for (i=1; i<size; i++) { 1335 rlen = PetscMax(rlen,range[i+1] - range[i]); 1336 } 1337 } else { 1338 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 rlen = mat->rmap->n; 1340 } 1341 1342 /* load up the local row counts */ 1343 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1344 for (i=0; i<mat->rmap->n; i++) { 1345 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1346 } 1347 1348 /* store the row lengths to the file */ 1349 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1350 if (!rank) { 1351 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 for (i=1; i<size; i++) { 1353 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1354 rlen = range[i+1] - range[i]; 1355 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1356 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1361 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1362 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1363 } 1364 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1365 1366 /* load up the local column indices */ 1367 nzmax = nz; /* )th processor needs space a largest processor needs */ 1368 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1369 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1370 cnt = 0; 1371 for (i=0; i<mat->rmap->n; i++) { 1372 for (j=B->i[i]; j<B->i[i+1]; j++) { 1373 if ( (col = garray[B->j[j]]) > cstart) break; 1374 column_indices[cnt++] = col; 1375 } 1376 for (k=A->i[i]; k<A->i[i+1]; k++) { 1377 column_indices[cnt++] = A->j[k] + cstart; 1378 } 1379 for (; j<B->i[i+1]; j++) { 1380 column_indices[cnt++] = garray[B->j[j]]; 1381 } 1382 } 1383 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1384 1385 /* store the column indices to the file */ 1386 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1387 if (!rank) { 1388 MPI_Status status; 1389 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1390 for (i=1; i<size; i++) { 1391 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1392 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1393 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1394 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1395 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1396 } 1397 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1398 } else { 1399 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1400 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1401 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1402 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1403 } 1404 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1405 1406 /* load up the local column values */ 1407 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1408 cnt = 0; 1409 for (i=0; i<mat->rmap->n; i++) { 1410 for (j=B->i[i]; j<B->i[i+1]; j++) { 1411 if ( garray[B->j[j]] > cstart) break; 1412 column_values[cnt++] = B->a[j]; 1413 } 1414 for (k=A->i[i]; k<A->i[i+1]; k++) { 1415 column_values[cnt++] = A->a[k]; 1416 } 1417 for (; j<B->i[i+1]; j++) { 1418 column_values[cnt++] = B->a[j]; 1419 } 1420 } 1421 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1422 1423 /* store the column values to the file */ 1424 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1425 if (!rank) { 1426 MPI_Status status; 1427 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1428 for (i=1; i<size; i++) { 1429 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1430 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1431 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1432 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1433 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1434 } 1435 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1436 } else { 1437 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1438 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1439 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1440 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1441 } 1442 ierr = PetscFree(column_values);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1448 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1449 { 1450 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1451 PetscErrorCode ierr; 1452 PetscMPIInt rank = aij->rank,size = aij->size; 1453 PetscBool isdraw,iascii,isbinary; 1454 PetscViewer sviewer; 1455 PetscViewerFormat format; 1456 1457 PetscFunctionBegin; 1458 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1459 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1460 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1461 if (iascii) { 1462 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1463 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1464 MatInfo info; 1465 PetscBool inodes; 1466 1467 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1468 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1469 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1470 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1471 if (!inodes) { 1472 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1473 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1474 } else { 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1476 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1477 } 1478 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1480 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1481 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1482 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1483 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1484 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1485 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1488 PetscInt inodecount,inodelimit,*inodes; 1489 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1490 if (inodes) { 1491 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1492 } else { 1493 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1494 } 1495 PetscFunctionReturn(0); 1496 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1497 PetscFunctionReturn(0); 1498 } 1499 } else if (isbinary) { 1500 if (size == 1) { 1501 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1502 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1503 } else { 1504 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1505 } 1506 PetscFunctionReturn(0); 1507 } else if (isdraw) { 1508 PetscDraw draw; 1509 PetscBool isnull; 1510 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1511 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1512 } 1513 1514 if (size == 1) { 1515 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1516 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1517 } else { 1518 /* assemble the entire matrix onto first processor. */ 1519 Mat A; 1520 Mat_SeqAIJ *Aloc; 1521 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1522 MatScalar *a; 1523 1524 if (mat->rmap->N > 1024) { 1525 PetscBool flg = PETSC_FALSE; 1526 1527 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1528 if (!flg) { 1529 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1530 } 1531 } 1532 1533 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1534 if (!rank) { 1535 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1536 } else { 1537 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1538 } 1539 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1540 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1541 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1542 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1543 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1544 1545 /* copy over the A part */ 1546 Aloc = (Mat_SeqAIJ*)aij->A->data; 1547 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1548 row = mat->rmap->rstart; 1549 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1550 for (i=0; i<m; i++) { 1551 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1552 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1553 } 1554 aj = Aloc->j; 1555 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1556 1557 /* copy over the B part */ 1558 Aloc = (Mat_SeqAIJ*)aij->B->data; 1559 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1560 row = mat->rmap->rstart; 1561 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1562 ct = cols; 1563 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1564 for (i=0; i<m; i++) { 1565 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1566 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1567 } 1568 ierr = PetscFree(ct);CHKERRQ(ierr); 1569 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1570 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1571 /* 1572 Everyone has to call to draw the matrix since the graphics waits are 1573 synchronized across all processors that share the PetscDraw object 1574 */ 1575 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1576 if (!rank) { 1577 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1578 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1579 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1580 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1581 } 1582 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1583 ierr = MatDestroy(&A);CHKERRQ(ierr); 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatView_MPIAIJ" 1590 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1591 { 1592 PetscErrorCode ierr; 1593 PetscBool iascii,isdraw,issocket,isbinary; 1594 1595 PetscFunctionBegin; 1596 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1597 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1598 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1599 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1600 if (iascii || isdraw || isbinary || issocket) { 1601 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1602 } else { 1603 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1604 } 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatSOR_MPIAIJ" 1610 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1611 { 1612 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1613 PetscErrorCode ierr; 1614 Vec bb1 = 0; 1615 PetscBool hasop; 1616 1617 PetscFunctionBegin; 1618 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1619 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1620 } 1621 1622 if (flag == SOR_APPLY_UPPER) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 PetscFunctionReturn(0); 1625 } 1626 1627 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1628 if (flag & SOR_ZERO_INITIAL_GUESS) { 1629 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1630 its--; 1631 } 1632 1633 while (its--) { 1634 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1635 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1636 1637 /* update rhs: bb1 = bb - B*x */ 1638 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1639 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1640 1641 /* local sweep */ 1642 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1643 } 1644 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1645 if (flag & SOR_ZERO_INITIAL_GUESS) { 1646 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1647 its--; 1648 } 1649 while (its--) { 1650 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1651 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1652 1653 /* update rhs: bb1 = bb - B*x */ 1654 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1655 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1656 1657 /* local sweep */ 1658 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1659 } 1660 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1661 if (flag & SOR_ZERO_INITIAL_GUESS) { 1662 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1663 its--; 1664 } 1665 while (its--) { 1666 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1668 1669 /* update rhs: bb1 = bb - B*x */ 1670 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1671 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1672 1673 /* local sweep */ 1674 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1675 } 1676 } else if (flag & SOR_EISENSTAT) { 1677 Vec xx1; 1678 1679 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1680 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1681 1682 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1683 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1684 if (!mat->diag) { 1685 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1686 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1687 } 1688 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1689 if (hasop) { 1690 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1691 } else { 1692 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1693 } 1694 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1695 1696 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1697 1698 /* local sweep */ 1699 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1700 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1701 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1702 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1703 1704 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatPermute_MPIAIJ" 1710 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1711 { 1712 MPI_Comm comm; 1713 PetscInt first,local_rowsize,local_colsize; 1714 const PetscInt *rows; 1715 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1720 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1721 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1722 /* collect the global row permutation and invert it */ 1723 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1724 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1725 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1726 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1727 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1728 /* get the local target indices */ 1729 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1730 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1731 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1732 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1733 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1734 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1735 /* the column permutation is so much easier; 1736 make a local version of 'colp' and invert it */ 1737 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1738 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1739 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1740 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1741 /* now we just get the submatrix */ 1742 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1743 /* clean up */ 1744 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1745 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1746 PetscFunctionReturn(0); 1747 } 1748 1749 #undef __FUNCT__ 1750 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1751 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1752 { 1753 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1754 Mat A = mat->A,B = mat->B; 1755 PetscErrorCode ierr; 1756 PetscReal isend[5],irecv[5]; 1757 1758 PetscFunctionBegin; 1759 info->block_size = 1.0; 1760 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1761 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1762 isend[3] = info->memory; isend[4] = info->mallocs; 1763 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1764 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1765 isend[3] += info->memory; isend[4] += info->mallocs; 1766 if (flag == MAT_LOCAL) { 1767 info->nz_used = isend[0]; 1768 info->nz_allocated = isend[1]; 1769 info->nz_unneeded = isend[2]; 1770 info->memory = isend[3]; 1771 info->mallocs = isend[4]; 1772 } else if (flag == MAT_GLOBAL_MAX) { 1773 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1774 info->nz_used = irecv[0]; 1775 info->nz_allocated = irecv[1]; 1776 info->nz_unneeded = irecv[2]; 1777 info->memory = irecv[3]; 1778 info->mallocs = irecv[4]; 1779 } else if (flag == MAT_GLOBAL_SUM) { 1780 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1781 info->nz_used = irecv[0]; 1782 info->nz_allocated = irecv[1]; 1783 info->nz_unneeded = irecv[2]; 1784 info->memory = irecv[3]; 1785 info->mallocs = irecv[4]; 1786 } 1787 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1788 info->fill_ratio_needed = 0; 1789 info->factor_mallocs = 0; 1790 1791 PetscFunctionReturn(0); 1792 } 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatSetOption_MPIAIJ" 1796 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1797 { 1798 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1799 PetscErrorCode ierr; 1800 1801 PetscFunctionBegin; 1802 switch (op) { 1803 case MAT_NEW_NONZERO_LOCATIONS: 1804 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1805 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1806 case MAT_KEEP_NONZERO_PATTERN: 1807 case MAT_NEW_NONZERO_LOCATION_ERR: 1808 case MAT_USE_INODES: 1809 case MAT_IGNORE_ZERO_ENTRIES: 1810 MatCheckPreallocated(A,1); 1811 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1812 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1813 break; 1814 case MAT_ROW_ORIENTED: 1815 a->roworiented = flg; 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_NEW_DIAGONALS: 1820 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1821 break; 1822 case MAT_IGNORE_OFF_PROC_ENTRIES: 1823 a->donotstash = flg; 1824 break; 1825 case MAT_SPD: 1826 A->spd_set = PETSC_TRUE; 1827 A->spd = flg; 1828 if (flg) { 1829 A->symmetric = PETSC_TRUE; 1830 A->structurally_symmetric = PETSC_TRUE; 1831 A->symmetric_set = PETSC_TRUE; 1832 A->structurally_symmetric_set = PETSC_TRUE; 1833 } 1834 break; 1835 case MAT_SYMMETRIC: 1836 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1837 break; 1838 case MAT_STRUCTURALLY_SYMMETRIC: 1839 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1840 break; 1841 case MAT_HERMITIAN: 1842 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1843 break; 1844 case MAT_SYMMETRY_ETERNAL: 1845 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1846 break; 1847 default: 1848 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1849 } 1850 PetscFunctionReturn(0); 1851 } 1852 1853 #undef __FUNCT__ 1854 #define __FUNCT__ "MatGetRow_MPIAIJ" 1855 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1856 { 1857 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1858 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1859 PetscErrorCode ierr; 1860 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1861 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1862 PetscInt *cmap,*idx_p; 1863 1864 PetscFunctionBegin; 1865 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1866 mat->getrowactive = PETSC_TRUE; 1867 1868 if (!mat->rowvalues && (idx || v)) { 1869 /* 1870 allocate enough space to hold information from the longest row. 1871 */ 1872 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1873 PetscInt max = 1,tmp; 1874 for (i=0; i<matin->rmap->n; i++) { 1875 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1876 if (max < tmp) { max = tmp; } 1877 } 1878 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1879 } 1880 1881 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1882 lrow = row - rstart; 1883 1884 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1885 if (!v) {pvA = 0; pvB = 0;} 1886 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1887 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1888 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1889 nztot = nzA + nzB; 1890 1891 cmap = mat->garray; 1892 if (v || idx) { 1893 if (nztot) { 1894 /* Sort by increasing column numbers, assuming A and B already sorted */ 1895 PetscInt imark = -1; 1896 if (v) { 1897 *v = v_p = mat->rowvalues; 1898 for (i=0; i<nzB; i++) { 1899 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1900 else break; 1901 } 1902 imark = i; 1903 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1904 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1905 } 1906 if (idx) { 1907 *idx = idx_p = mat->rowindices; 1908 if (imark > -1) { 1909 for (i=0; i<imark; i++) { 1910 idx_p[i] = cmap[cworkB[i]]; 1911 } 1912 } else { 1913 for (i=0; i<nzB; i++) { 1914 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1915 else break; 1916 } 1917 imark = i; 1918 } 1919 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1920 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1921 } 1922 } else { 1923 if (idx) *idx = 0; 1924 if (v) *v = 0; 1925 } 1926 } 1927 *nz = nztot; 1928 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1929 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1930 PetscFunctionReturn(0); 1931 } 1932 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1935 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1936 { 1937 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1938 1939 PetscFunctionBegin; 1940 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1941 aij->getrowactive = PETSC_FALSE; 1942 PetscFunctionReturn(0); 1943 } 1944 1945 #undef __FUNCT__ 1946 #define __FUNCT__ "MatNorm_MPIAIJ" 1947 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1948 { 1949 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1950 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1951 PetscErrorCode ierr; 1952 PetscInt i,j,cstart = mat->cmap->rstart; 1953 PetscReal sum = 0.0; 1954 MatScalar *v; 1955 1956 PetscFunctionBegin; 1957 if (aij->size == 1) { 1958 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1959 } else { 1960 if (type == NORM_FROBENIUS) { 1961 v = amat->a; 1962 for (i=0; i<amat->nz; i++) { 1963 #if defined(PETSC_USE_COMPLEX) 1964 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1965 #else 1966 sum += (*v)*(*v); v++; 1967 #endif 1968 } 1969 v = bmat->a; 1970 for (i=0; i<bmat->nz; i++) { 1971 #if defined(PETSC_USE_COMPLEX) 1972 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1973 #else 1974 sum += (*v)*(*v); v++; 1975 #endif 1976 } 1977 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1978 *norm = PetscSqrtReal(*norm); 1979 } else if (type == NORM_1) { /* max column norm */ 1980 PetscReal *tmp,*tmp2; 1981 PetscInt *jj,*garray = aij->garray; 1982 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1983 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1984 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1985 *norm = 0.0; 1986 v = amat->a; jj = amat->j; 1987 for (j=0; j<amat->nz; j++) { 1988 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1989 } 1990 v = bmat->a; jj = bmat->j; 1991 for (j=0; j<bmat->nz; j++) { 1992 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1993 } 1994 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1995 for (j=0; j<mat->cmap->N; j++) { 1996 if (tmp2[j] > *norm) *norm = tmp2[j]; 1997 } 1998 ierr = PetscFree(tmp);CHKERRQ(ierr); 1999 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2000 } else if (type == NORM_INFINITY) { /* max row norm */ 2001 PetscReal ntemp = 0.0; 2002 for (j=0; j<aij->A->rmap->n; j++) { 2003 v = amat->a + amat->i[j]; 2004 sum = 0.0; 2005 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2006 sum += PetscAbsScalar(*v); v++; 2007 } 2008 v = bmat->a + bmat->i[j]; 2009 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2010 sum += PetscAbsScalar(*v); v++; 2011 } 2012 if (sum > ntemp) ntemp = sum; 2013 } 2014 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2015 } else { 2016 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2017 } 2018 } 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatTranspose_MPIAIJ" 2024 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2025 { 2026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2027 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2028 PetscErrorCode ierr; 2029 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2030 PetscInt cstart=A->cmap->rstart,ncol; 2031 Mat B; 2032 MatScalar *array; 2033 2034 PetscFunctionBegin; 2035 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2036 2037 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2038 ai = Aloc->i; aj = Aloc->j; 2039 bi = Bloc->i; bj = Bloc->j; 2040 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2041 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2042 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2043 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2044 for (i=0; i<ai[ma]; i++){ 2045 d_nnz[aj[i]] ++; 2046 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2047 } 2048 2049 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2050 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2051 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2052 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2053 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2054 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2055 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2056 } else { 2057 B = *matout; 2058 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2059 for (i=0; i<ai[ma]; i++){ 2060 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2061 } 2062 } 2063 2064 /* copy over the A part */ 2065 array = Aloc->a; 2066 row = A->rmap->rstart; 2067 for (i=0; i<ma; i++) { 2068 ncol = ai[i+1]-ai[i]; 2069 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2070 row++; array += ncol; aj += ncol; 2071 } 2072 aj = Aloc->j; 2073 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2074 2075 /* copy over the B part */ 2076 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2077 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2078 array = Bloc->a; 2079 row = A->rmap->rstart; 2080 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2081 cols_tmp = cols; 2082 for (i=0; i<mb; i++) { 2083 ncol = bi[i+1]-bi[i]; 2084 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2085 row++; array += ncol; cols_tmp += ncol; 2086 } 2087 ierr = PetscFree(cols);CHKERRQ(ierr); 2088 2089 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2090 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2091 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2092 *matout = B; 2093 } else { 2094 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2095 } 2096 PetscFunctionReturn(0); 2097 } 2098 2099 #undef __FUNCT__ 2100 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2101 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2102 { 2103 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2104 Mat a = aij->A,b = aij->B; 2105 PetscErrorCode ierr; 2106 PetscInt s1,s2,s3; 2107 2108 PetscFunctionBegin; 2109 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2110 if (rr) { 2111 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2112 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2113 /* Overlap communication with computation. */ 2114 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 } 2116 if (ll) { 2117 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2118 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2119 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2120 } 2121 /* scale the diagonal block */ 2122 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2123 2124 if (rr) { 2125 /* Do a scatter end and then right scale the off-diagonal block */ 2126 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2127 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2128 } 2129 2130 PetscFunctionReturn(0); 2131 } 2132 2133 #undef __FUNCT__ 2134 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2135 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2136 { 2137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "MatEqual_MPIAIJ" 2147 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2148 { 2149 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2150 Mat a,b,c,d; 2151 PetscBool flg; 2152 PetscErrorCode ierr; 2153 2154 PetscFunctionBegin; 2155 a = matA->A; b = matA->B; 2156 c = matB->A; d = matB->B; 2157 2158 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2159 if (flg) { 2160 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2161 } 2162 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatCopy_MPIAIJ" 2168 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2169 { 2170 PetscErrorCode ierr; 2171 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2172 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2173 2174 PetscFunctionBegin; 2175 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2176 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2177 /* because of the column compression in the off-processor part of the matrix a->B, 2178 the number of columns in a->B and b->B may be different, hence we cannot call 2179 the MatCopy() directly on the two parts. If need be, we can provide a more 2180 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2181 then copying the submatrices */ 2182 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2183 } else { 2184 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2185 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2186 } 2187 PetscFunctionReturn(0); 2188 } 2189 2190 #undef __FUNCT__ 2191 #define __FUNCT__ "MatSetUp_MPIAIJ" 2192 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2193 { 2194 PetscErrorCode ierr; 2195 2196 PetscFunctionBegin; 2197 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2198 PetscFunctionReturn(0); 2199 } 2200 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2203 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2204 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2205 { 2206 PetscInt i,m=Y->rmap->N; 2207 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2208 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2209 const PetscInt *xi = x->i,*yi = y->i; 2210 2211 PetscFunctionBegin; 2212 /* Set the number of nonzeros in the new matrix */ 2213 for(i=0; i<m; i++) { 2214 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2215 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2216 nnz[i] = 0; 2217 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2218 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2219 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2220 nnz[i]++; 2221 } 2222 for (; k<nzy; k++) nnz[i]++; 2223 } 2224 PetscFunctionReturn(0); 2225 } 2226 2227 #undef __FUNCT__ 2228 #define __FUNCT__ "MatAXPY_MPIAIJ" 2229 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2230 { 2231 PetscErrorCode ierr; 2232 PetscInt i; 2233 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2234 PetscBLASInt bnz,one=1; 2235 Mat_SeqAIJ *x,*y; 2236 2237 PetscFunctionBegin; 2238 if (str == SAME_NONZERO_PATTERN) { 2239 PetscScalar alpha = a; 2240 x = (Mat_SeqAIJ *)xx->A->data; 2241 y = (Mat_SeqAIJ *)yy->A->data; 2242 bnz = PetscBLASIntCast(x->nz); 2243 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2244 x = (Mat_SeqAIJ *)xx->B->data; 2245 y = (Mat_SeqAIJ *)yy->B->data; 2246 bnz = PetscBLASIntCast(x->nz); 2247 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2248 } else if (str == SUBSET_NONZERO_PATTERN) { 2249 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2250 2251 x = (Mat_SeqAIJ *)xx->B->data; 2252 y = (Mat_SeqAIJ *)yy->B->data; 2253 if (y->xtoy && y->XtoY != xx->B) { 2254 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2255 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2256 } 2257 if (!y->xtoy) { /* get xtoy */ 2258 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2259 y->XtoY = xx->B; 2260 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2261 } 2262 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2263 } else { 2264 Mat B; 2265 PetscInt *nnz_d,*nnz_o; 2266 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2267 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2268 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2269 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2270 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2271 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2272 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2273 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2274 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2275 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2276 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2277 ierr = MatHeaderReplace(Y,B); 2278 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2279 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2280 } 2281 PetscFunctionReturn(0); 2282 } 2283 2284 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2285 2286 #undef __FUNCT__ 2287 #define __FUNCT__ "MatConjugate_MPIAIJ" 2288 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2289 { 2290 #if defined(PETSC_USE_COMPLEX) 2291 PetscErrorCode ierr; 2292 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2293 2294 PetscFunctionBegin; 2295 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2296 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2297 #else 2298 PetscFunctionBegin; 2299 #endif 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatRealPart_MPIAIJ" 2305 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2306 { 2307 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2308 PetscErrorCode ierr; 2309 2310 PetscFunctionBegin; 2311 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2312 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2313 PetscFunctionReturn(0); 2314 } 2315 2316 #undef __FUNCT__ 2317 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2318 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2319 { 2320 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2321 PetscErrorCode ierr; 2322 2323 PetscFunctionBegin; 2324 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2325 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2326 PetscFunctionReturn(0); 2327 } 2328 2329 #ifdef PETSC_HAVE_PBGL 2330 2331 #include <boost/parallel/mpi/bsp_process_group.hpp> 2332 #include <boost/graph/distributed/ilu_default_graph.hpp> 2333 #include <boost/graph/distributed/ilu_0_block.hpp> 2334 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2335 #include <boost/graph/distributed/petsc/interface.hpp> 2336 #include <boost/multi_array.hpp> 2337 #include <boost/parallel/distributed_property_map->hpp> 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2341 /* 2342 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2343 */ 2344 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2345 { 2346 namespace petsc = boost::distributed::petsc; 2347 2348 namespace graph_dist = boost::graph::distributed; 2349 using boost::graph::distributed::ilu_default::process_group_type; 2350 using boost::graph::ilu_permuted; 2351 2352 PetscBool row_identity, col_identity; 2353 PetscContainer c; 2354 PetscInt m, n, M, N; 2355 PetscErrorCode ierr; 2356 2357 PetscFunctionBegin; 2358 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2359 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2360 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2361 if (!row_identity || !col_identity) { 2362 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2363 } 2364 2365 process_group_type pg; 2366 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2367 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2368 lgraph_type& level_graph = *lgraph_p; 2369 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2370 2371 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2372 ilu_permuted(level_graph); 2373 2374 /* put together the new matrix */ 2375 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2376 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2377 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2378 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2379 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2380 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2381 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2382 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2383 2384 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2385 ierr = PetscContainerSetPointer(c, lgraph_p); 2386 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2387 ierr = PetscContainerDestroy(&c); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2393 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2394 { 2395 PetscFunctionBegin; 2396 PetscFunctionReturn(0); 2397 } 2398 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "MatSolve_MPIAIJ" 2401 /* 2402 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2403 */ 2404 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2405 { 2406 namespace graph_dist = boost::graph::distributed; 2407 2408 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2409 lgraph_type* lgraph_p; 2410 PetscContainer c; 2411 PetscErrorCode ierr; 2412 2413 PetscFunctionBegin; 2414 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2415 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2416 ierr = VecCopy(b, x);CHKERRQ(ierr); 2417 2418 PetscScalar* array_x; 2419 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2420 PetscInt sx; 2421 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2422 2423 PetscScalar* array_b; 2424 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2425 PetscInt sb; 2426 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2427 2428 lgraph_type& level_graph = *lgraph_p; 2429 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2430 2431 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2432 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2433 ref_x(array_x, boost::extents[num_vertices(graph)]); 2434 2435 typedef boost::iterator_property_map<array_ref_type::iterator, 2436 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2437 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2438 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2439 2440 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2441 2442 PetscFunctionReturn(0); 2443 } 2444 #endif 2445 2446 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2447 PetscInt nzlocal,nsends,nrecvs; 2448 PetscMPIInt *send_rank,*recv_rank; 2449 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2450 PetscScalar *sbuf_a,**rbuf_a; 2451 PetscErrorCode (*Destroy)(Mat); 2452 } Mat_Redundant; 2453 2454 #undef __FUNCT__ 2455 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2456 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2457 { 2458 PetscErrorCode ierr; 2459 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2460 PetscInt i; 2461 2462 PetscFunctionBegin; 2463 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2464 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2465 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2466 for (i=0; i<redund->nrecvs; i++){ 2467 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2468 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2469 } 2470 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2471 ierr = PetscFree(redund);CHKERRQ(ierr); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 #undef __FUNCT__ 2476 #define __FUNCT__ "MatDestroy_MatRedundant" 2477 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2478 { 2479 PetscErrorCode ierr; 2480 PetscContainer container; 2481 Mat_Redundant *redund=PETSC_NULL; 2482 2483 PetscFunctionBegin; 2484 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2485 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2486 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2487 A->ops->destroy = redund->Destroy; 2488 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2489 if (A->ops->destroy) { 2490 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2491 } 2492 PetscFunctionReturn(0); 2493 } 2494 2495 #undef __FUNCT__ 2496 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2497 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2498 { 2499 PetscMPIInt rank,size; 2500 MPI_Comm comm=((PetscObject)mat)->comm; 2501 PetscErrorCode ierr; 2502 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2503 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2504 PetscInt *rowrange=mat->rmap->range; 2505 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2506 Mat A=aij->A,B=aij->B,C=*matredundant; 2507 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2508 PetscScalar *sbuf_a; 2509 PetscInt nzlocal=a->nz+b->nz; 2510 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2511 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2512 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2513 MatScalar *aworkA,*aworkB; 2514 PetscScalar *vals; 2515 PetscMPIInt tag1,tag2,tag3,imdex; 2516 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2517 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2518 MPI_Status recv_status,*send_status; 2519 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2520 PetscInt **rbuf_j=PETSC_NULL; 2521 PetscScalar **rbuf_a=PETSC_NULL; 2522 Mat_Redundant *redund=PETSC_NULL; 2523 PetscContainer container; 2524 2525 PetscFunctionBegin; 2526 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2527 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2528 2529 if (reuse == MAT_REUSE_MATRIX) { 2530 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2531 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2532 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2533 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2534 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2535 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2536 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2537 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2538 2539 nsends = redund->nsends; 2540 nrecvs = redund->nrecvs; 2541 send_rank = redund->send_rank; 2542 recv_rank = redund->recv_rank; 2543 sbuf_nz = redund->sbuf_nz; 2544 rbuf_nz = redund->rbuf_nz; 2545 sbuf_j = redund->sbuf_j; 2546 sbuf_a = redund->sbuf_a; 2547 rbuf_j = redund->rbuf_j; 2548 rbuf_a = redund->rbuf_a; 2549 } 2550 2551 if (reuse == MAT_INITIAL_MATRIX){ 2552 PetscMPIInt subrank,subsize; 2553 PetscInt nleftover,np_subcomm; 2554 /* get the destination processors' id send_rank, nsends and nrecvs */ 2555 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2556 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2557 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2558 np_subcomm = size/nsubcomm; 2559 nleftover = size - nsubcomm*np_subcomm; 2560 nsends = 0; nrecvs = 0; 2561 for (i=0; i<size; i++){ /* i=rank*/ 2562 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2563 send_rank[nsends] = i; nsends++; 2564 recv_rank[nrecvs++] = i; 2565 } 2566 } 2567 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2568 i = size-nleftover-1; 2569 j = 0; 2570 while (j < nsubcomm - nleftover){ 2571 send_rank[nsends++] = i; 2572 i--; j++; 2573 } 2574 } 2575 2576 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2577 for (i=0; i<nleftover; i++){ 2578 recv_rank[nrecvs++] = size-nleftover+i; 2579 } 2580 } 2581 2582 /* allocate sbuf_j, sbuf_a */ 2583 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2584 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2585 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2586 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2587 2588 /* copy mat's local entries into the buffers */ 2589 if (reuse == MAT_INITIAL_MATRIX){ 2590 rownz_max = 0; 2591 rptr = sbuf_j; 2592 cols = sbuf_j + rend-rstart + 1; 2593 vals = sbuf_a; 2594 rptr[0] = 0; 2595 for (i=0; i<rend-rstart; i++){ 2596 row = i + rstart; 2597 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2598 ncols = nzA + nzB; 2599 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2600 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2601 /* load the column indices for this row into cols */ 2602 lwrite = 0; 2603 for (l=0; l<nzB; l++) { 2604 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2605 vals[lwrite] = aworkB[l]; 2606 cols[lwrite++] = ctmp; 2607 } 2608 } 2609 for (l=0; l<nzA; l++){ 2610 vals[lwrite] = aworkA[l]; 2611 cols[lwrite++] = cstart + cworkA[l]; 2612 } 2613 for (l=0; l<nzB; l++) { 2614 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2615 vals[lwrite] = aworkB[l]; 2616 cols[lwrite++] = ctmp; 2617 } 2618 } 2619 vals += ncols; 2620 cols += ncols; 2621 rptr[i+1] = rptr[i] + ncols; 2622 if (rownz_max < ncols) rownz_max = ncols; 2623 } 2624 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2625 } else { /* only copy matrix values into sbuf_a */ 2626 rptr = sbuf_j; 2627 vals = sbuf_a; 2628 rptr[0] = 0; 2629 for (i=0; i<rend-rstart; i++){ 2630 row = i + rstart; 2631 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2632 ncols = nzA + nzB; 2633 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2634 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2635 lwrite = 0; 2636 for (l=0; l<nzB; l++) { 2637 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2638 } 2639 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2640 for (l=0; l<nzB; l++) { 2641 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2642 } 2643 vals += ncols; 2644 rptr[i+1] = rptr[i] + ncols; 2645 } 2646 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2647 2648 /* send nzlocal to others, and recv other's nzlocal */ 2649 /*--------------------------------------------------*/ 2650 if (reuse == MAT_INITIAL_MATRIX){ 2651 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2652 s_waits2 = s_waits3 + nsends; 2653 s_waits1 = s_waits2 + nsends; 2654 r_waits1 = s_waits1 + nsends; 2655 r_waits2 = r_waits1 + nrecvs; 2656 r_waits3 = r_waits2 + nrecvs; 2657 } else { 2658 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2659 r_waits3 = s_waits3 + nsends; 2660 } 2661 2662 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2663 if (reuse == MAT_INITIAL_MATRIX){ 2664 /* get new tags to keep the communication clean */ 2665 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2666 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2667 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2668 2669 /* post receives of other's nzlocal */ 2670 for (i=0; i<nrecvs; i++){ 2671 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2672 } 2673 /* send nzlocal to others */ 2674 for (i=0; i<nsends; i++){ 2675 sbuf_nz[i] = nzlocal; 2676 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2677 } 2678 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2679 count = nrecvs; 2680 while (count) { 2681 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2682 recv_rank[imdex] = recv_status.MPI_SOURCE; 2683 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2684 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2685 2686 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2687 rbuf_nz[imdex] += i + 2; 2688 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2689 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2690 count--; 2691 } 2692 /* wait on sends of nzlocal */ 2693 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2694 /* send mat->i,j to others, and recv from other's */ 2695 /*------------------------------------------------*/ 2696 for (i=0; i<nsends; i++){ 2697 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2698 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2699 } 2700 /* wait on receives of mat->i,j */ 2701 /*------------------------------*/ 2702 count = nrecvs; 2703 while (count) { 2704 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2705 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2706 count--; 2707 } 2708 /* wait on sends of mat->i,j */ 2709 /*---------------------------*/ 2710 if (nsends) { 2711 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2712 } 2713 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2714 2715 /* post receives, send and receive mat->a */ 2716 /*----------------------------------------*/ 2717 for (imdex=0; imdex<nrecvs; imdex++) { 2718 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2719 } 2720 for (i=0; i<nsends; i++){ 2721 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2722 } 2723 count = nrecvs; 2724 while (count) { 2725 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2726 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2727 count--; 2728 } 2729 if (nsends) { 2730 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2731 } 2732 2733 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2734 2735 /* create redundant matrix */ 2736 /*-------------------------*/ 2737 if (reuse == MAT_INITIAL_MATRIX){ 2738 /* compute rownz_max for preallocation */ 2739 for (imdex=0; imdex<nrecvs; imdex++){ 2740 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2741 rptr = rbuf_j[imdex]; 2742 for (i=0; i<j; i++){ 2743 ncols = rptr[i+1] - rptr[i]; 2744 if (rownz_max < ncols) rownz_max = ncols; 2745 } 2746 } 2747 2748 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2749 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2750 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2751 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2752 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2753 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2754 } else { 2755 C = *matredundant; 2756 } 2757 2758 /* insert local matrix entries */ 2759 rptr = sbuf_j; 2760 cols = sbuf_j + rend-rstart + 1; 2761 vals = sbuf_a; 2762 for (i=0; i<rend-rstart; i++){ 2763 row = i + rstart; 2764 ncols = rptr[i+1] - rptr[i]; 2765 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2766 vals += ncols; 2767 cols += ncols; 2768 } 2769 /* insert received matrix entries */ 2770 for (imdex=0; imdex<nrecvs; imdex++){ 2771 rstart = rowrange[recv_rank[imdex]]; 2772 rend = rowrange[recv_rank[imdex]+1]; 2773 rptr = rbuf_j[imdex]; 2774 cols = rbuf_j[imdex] + rend-rstart + 1; 2775 vals = rbuf_a[imdex]; 2776 for (i=0; i<rend-rstart; i++){ 2777 row = i + rstart; 2778 ncols = rptr[i+1] - rptr[i]; 2779 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2780 vals += ncols; 2781 cols += ncols; 2782 } 2783 } 2784 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2785 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2786 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2787 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2788 if (reuse == MAT_INITIAL_MATRIX) { 2789 PetscContainer container; 2790 *matredundant = C; 2791 /* create a supporting struct and attach it to C for reuse */ 2792 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2793 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2794 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2795 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2796 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2797 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2798 2799 redund->nzlocal = nzlocal; 2800 redund->nsends = nsends; 2801 redund->nrecvs = nrecvs; 2802 redund->send_rank = send_rank; 2803 redund->recv_rank = recv_rank; 2804 redund->sbuf_nz = sbuf_nz; 2805 redund->rbuf_nz = rbuf_nz; 2806 redund->sbuf_j = sbuf_j; 2807 redund->sbuf_a = sbuf_a; 2808 redund->rbuf_j = rbuf_j; 2809 redund->rbuf_a = rbuf_a; 2810 2811 redund->Destroy = C->ops->destroy; 2812 C->ops->destroy = MatDestroy_MatRedundant; 2813 } 2814 PetscFunctionReturn(0); 2815 } 2816 2817 #undef __FUNCT__ 2818 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2819 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2820 { 2821 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2822 PetscErrorCode ierr; 2823 PetscInt i,*idxb = 0; 2824 PetscScalar *va,*vb; 2825 Vec vtmp; 2826 2827 PetscFunctionBegin; 2828 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2829 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2830 if (idx) { 2831 for (i=0; i<A->rmap->n; i++) { 2832 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2833 } 2834 } 2835 2836 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2837 if (idx) { 2838 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2839 } 2840 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2841 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2842 2843 for (i=0; i<A->rmap->n; i++){ 2844 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2845 va[i] = vb[i]; 2846 if (idx) idx[i] = a->garray[idxb[i]]; 2847 } 2848 } 2849 2850 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2851 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2852 ierr = PetscFree(idxb);CHKERRQ(ierr); 2853 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2854 PetscFunctionReturn(0); 2855 } 2856 2857 #undef __FUNCT__ 2858 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2859 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2860 { 2861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2862 PetscErrorCode ierr; 2863 PetscInt i,*idxb = 0; 2864 PetscScalar *va,*vb; 2865 Vec vtmp; 2866 2867 PetscFunctionBegin; 2868 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2869 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2870 if (idx) { 2871 for (i=0; i<A->cmap->n; i++) { 2872 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2873 } 2874 } 2875 2876 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2877 if (idx) { 2878 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2879 } 2880 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2881 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2882 2883 for (i=0; i<A->rmap->n; i++){ 2884 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2885 va[i] = vb[i]; 2886 if (idx) idx[i] = a->garray[idxb[i]]; 2887 } 2888 } 2889 2890 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2891 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2892 ierr = PetscFree(idxb);CHKERRQ(ierr); 2893 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2894 PetscFunctionReturn(0); 2895 } 2896 2897 #undef __FUNCT__ 2898 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2899 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2900 { 2901 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2902 PetscInt n = A->rmap->n; 2903 PetscInt cstart = A->cmap->rstart; 2904 PetscInt *cmap = mat->garray; 2905 PetscInt *diagIdx, *offdiagIdx; 2906 Vec diagV, offdiagV; 2907 PetscScalar *a, *diagA, *offdiagA; 2908 PetscInt r; 2909 PetscErrorCode ierr; 2910 2911 PetscFunctionBegin; 2912 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2913 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2914 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2915 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2916 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2917 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2918 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2919 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2920 for(r = 0; r < n; ++r) { 2921 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2922 a[r] = diagA[r]; 2923 idx[r] = cstart + diagIdx[r]; 2924 } else { 2925 a[r] = offdiagA[r]; 2926 idx[r] = cmap[offdiagIdx[r]]; 2927 } 2928 } 2929 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2930 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2931 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2932 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2933 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2934 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2935 PetscFunctionReturn(0); 2936 } 2937 2938 #undef __FUNCT__ 2939 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2940 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2941 { 2942 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2943 PetscInt n = A->rmap->n; 2944 PetscInt cstart = A->cmap->rstart; 2945 PetscInt *cmap = mat->garray; 2946 PetscInt *diagIdx, *offdiagIdx; 2947 Vec diagV, offdiagV; 2948 PetscScalar *a, *diagA, *offdiagA; 2949 PetscInt r; 2950 PetscErrorCode ierr; 2951 2952 PetscFunctionBegin; 2953 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2954 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2955 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2956 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2957 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2958 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2959 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2960 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2961 for(r = 0; r < n; ++r) { 2962 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2963 a[r] = diagA[r]; 2964 idx[r] = cstart + diagIdx[r]; 2965 } else { 2966 a[r] = offdiagA[r]; 2967 idx[r] = cmap[offdiagIdx[r]]; 2968 } 2969 } 2970 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2971 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2972 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2973 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2974 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2975 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2976 PetscFunctionReturn(0); 2977 } 2978 2979 #undef __FUNCT__ 2980 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2981 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2982 { 2983 PetscErrorCode ierr; 2984 Mat *dummy; 2985 2986 PetscFunctionBegin; 2987 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2988 *newmat = *dummy; 2989 ierr = PetscFree(dummy);CHKERRQ(ierr); 2990 PetscFunctionReturn(0); 2991 } 2992 2993 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2994 2995 #undef __FUNCT__ 2996 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2997 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2998 { 2999 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3000 PetscErrorCode ierr; 3001 3002 PetscFunctionBegin; 3003 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3004 PetscFunctionReturn(0); 3005 } 3006 3007 3008 /* -------------------------------------------------------------------*/ 3009 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3010 MatGetRow_MPIAIJ, 3011 MatRestoreRow_MPIAIJ, 3012 MatMult_MPIAIJ, 3013 /* 4*/ MatMultAdd_MPIAIJ, 3014 MatMultTranspose_MPIAIJ, 3015 MatMultTransposeAdd_MPIAIJ, 3016 #ifdef PETSC_HAVE_PBGL 3017 MatSolve_MPIAIJ, 3018 #else 3019 0, 3020 #endif 3021 0, 3022 0, 3023 /*10*/ 0, 3024 0, 3025 0, 3026 MatSOR_MPIAIJ, 3027 MatTranspose_MPIAIJ, 3028 /*15*/ MatGetInfo_MPIAIJ, 3029 MatEqual_MPIAIJ, 3030 MatGetDiagonal_MPIAIJ, 3031 MatDiagonalScale_MPIAIJ, 3032 MatNorm_MPIAIJ, 3033 /*20*/ MatAssemblyBegin_MPIAIJ, 3034 MatAssemblyEnd_MPIAIJ, 3035 MatSetOption_MPIAIJ, 3036 MatZeroEntries_MPIAIJ, 3037 /*24*/ MatZeroRows_MPIAIJ, 3038 0, 3039 #ifdef PETSC_HAVE_PBGL 3040 0, 3041 #else 3042 0, 3043 #endif 3044 0, 3045 0, 3046 /*29*/ MatSetUp_MPIAIJ, 3047 #ifdef PETSC_HAVE_PBGL 3048 0, 3049 #else 3050 0, 3051 #endif 3052 0, 3053 0, 3054 0, 3055 /*34*/ MatDuplicate_MPIAIJ, 3056 0, 3057 0, 3058 0, 3059 0, 3060 /*39*/ MatAXPY_MPIAIJ, 3061 MatGetSubMatrices_MPIAIJ, 3062 MatIncreaseOverlap_MPIAIJ, 3063 MatGetValues_MPIAIJ, 3064 MatCopy_MPIAIJ, 3065 /*44*/ MatGetRowMax_MPIAIJ, 3066 MatScale_MPIAIJ, 3067 0, 3068 0, 3069 MatZeroRowsColumns_MPIAIJ, 3070 /*49*/ 0, 3071 0, 3072 0, 3073 0, 3074 0, 3075 /*54*/ MatFDColoringCreate_MPIAIJ, 3076 0, 3077 MatSetUnfactored_MPIAIJ, 3078 0, /* MatPermute_MPIAIJ, impl currently broken */ 3079 0, 3080 /*59*/ MatGetSubMatrix_MPIAIJ, 3081 MatDestroy_MPIAIJ, 3082 MatView_MPIAIJ, 3083 0, 3084 0, 3085 /*64*/ 0, 3086 0, 3087 0, 3088 0, 3089 0, 3090 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3091 MatGetRowMinAbs_MPIAIJ, 3092 0, 3093 MatSetColoring_MPIAIJ, 3094 #if defined(PETSC_HAVE_ADIC) 3095 MatSetValuesAdic_MPIAIJ, 3096 #else 3097 0, 3098 #endif 3099 MatSetValuesAdifor_MPIAIJ, 3100 /*75*/ MatFDColoringApply_AIJ, 3101 0, 3102 0, 3103 0, 3104 MatFindZeroDiagonals_MPIAIJ, 3105 /*80*/ 0, 3106 0, 3107 0, 3108 /*83*/ MatLoad_MPIAIJ, 3109 0, 3110 0, 3111 0, 3112 0, 3113 0, 3114 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3115 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3116 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3117 MatPtAP_Basic, 3118 MatPtAPSymbolic_MPIAIJ, 3119 /*94*/ MatPtAPNumeric_MPIAIJ, 3120 0, 3121 0, 3122 0, 3123 0, 3124 /*99*/ 0, 3125 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3126 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3127 MatConjugate_MPIAIJ, 3128 0, 3129 /*104*/MatSetValuesRow_MPIAIJ, 3130 MatRealPart_MPIAIJ, 3131 MatImaginaryPart_MPIAIJ, 3132 0, 3133 0, 3134 /*109*/0, 3135 MatGetRedundantMatrix_MPIAIJ, 3136 MatGetRowMin_MPIAIJ, 3137 0, 3138 0, 3139 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3140 0, 3141 0, 3142 0, 3143 0, 3144 /*119*/0, 3145 0, 3146 0, 3147 0, 3148 MatGetMultiProcBlock_MPIAIJ, 3149 /*124*/MatFindNonzeroRows_MPIAIJ, 3150 MatGetColumnNorms_MPIAIJ, 3151 MatInvertBlockDiagonal_MPIAIJ, 3152 0, 3153 MatGetSubMatricesParallel_MPIAIJ, 3154 /*129*/0, 3155 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3156 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3157 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3158 0, 3159 /*134*/0, 3160 0, 3161 0, 3162 0, 3163 0 3164 }; 3165 3166 /* ----------------------------------------------------------------------------------------*/ 3167 3168 EXTERN_C_BEGIN 3169 #undef __FUNCT__ 3170 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3171 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3172 { 3173 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3174 PetscErrorCode ierr; 3175 3176 PetscFunctionBegin; 3177 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3178 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3179 PetscFunctionReturn(0); 3180 } 3181 EXTERN_C_END 3182 3183 EXTERN_C_BEGIN 3184 #undef __FUNCT__ 3185 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3186 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3187 { 3188 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3189 PetscErrorCode ierr; 3190 3191 PetscFunctionBegin; 3192 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3193 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3194 PetscFunctionReturn(0); 3195 } 3196 EXTERN_C_END 3197 3198 EXTERN_C_BEGIN 3199 #undef __FUNCT__ 3200 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3201 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3202 { 3203 Mat_MPIAIJ *b; 3204 PetscErrorCode ierr; 3205 PetscInt i; 3206 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3207 3208 PetscFunctionBegin; 3209 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3210 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3211 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3212 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3213 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3214 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3215 3216 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3217 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3218 if (d_nnz) { 3219 for (i=0; i<B->rmap->n; i++) { 3220 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3221 } 3222 } 3223 if (o_nnz) { 3224 for (i=0; i<B->rmap->n; i++) { 3225 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3226 } 3227 } 3228 b = (Mat_MPIAIJ*)B->data; 3229 3230 if (!B->preallocated) { 3231 /* Explicitly create 2 MATSEQAIJ matrices. */ 3232 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3233 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3234 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3235 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3236 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3237 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3238 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3239 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3240 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3241 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3242 } 3243 3244 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3245 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3246 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3247 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3248 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3249 B->preallocated = PETSC_TRUE; 3250 PetscFunctionReturn(0); 3251 } 3252 EXTERN_C_END 3253 3254 #undef __FUNCT__ 3255 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3256 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3257 { 3258 Mat mat; 3259 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3260 PetscErrorCode ierr; 3261 3262 PetscFunctionBegin; 3263 *newmat = 0; 3264 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3265 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3266 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3267 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3268 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3269 a = (Mat_MPIAIJ*)mat->data; 3270 3271 mat->factortype = matin->factortype; 3272 mat->rmap->bs = matin->rmap->bs; 3273 mat->cmap->bs = matin->cmap->bs; 3274 mat->assembled = PETSC_TRUE; 3275 mat->insertmode = NOT_SET_VALUES; 3276 mat->preallocated = PETSC_TRUE; 3277 3278 a->size = oldmat->size; 3279 a->rank = oldmat->rank; 3280 a->donotstash = oldmat->donotstash; 3281 a->roworiented = oldmat->roworiented; 3282 a->rowindices = 0; 3283 a->rowvalues = 0; 3284 a->getrowactive = PETSC_FALSE; 3285 3286 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3287 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3288 3289 if (oldmat->colmap) { 3290 #if defined (PETSC_USE_CTABLE) 3291 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3292 #else 3293 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3294 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3295 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3296 #endif 3297 } else a->colmap = 0; 3298 if (oldmat->garray) { 3299 PetscInt len; 3300 len = oldmat->B->cmap->n; 3301 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3302 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3303 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3304 } else a->garray = 0; 3305 3306 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3307 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3308 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3309 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3310 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3311 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3312 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3313 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3314 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3315 *newmat = mat; 3316 PetscFunctionReturn(0); 3317 } 3318 3319 3320 3321 #undef __FUNCT__ 3322 #define __FUNCT__ "MatLoad_MPIAIJ" 3323 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3324 { 3325 PetscScalar *vals,*svals; 3326 MPI_Comm comm = ((PetscObject)viewer)->comm; 3327 PetscErrorCode ierr; 3328 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3329 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3330 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3331 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3332 PetscInt cend,cstart,n,*rowners,sizesset=1; 3333 int fd; 3334 3335 PetscFunctionBegin; 3336 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3337 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3338 if (!rank) { 3339 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3340 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3341 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3342 } 3343 3344 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3345 3346 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3347 M = header[1]; N = header[2]; 3348 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3349 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3350 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3351 3352 /* If global sizes are set, check if they are consistent with that given in the file */ 3353 if (sizesset) { 3354 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3355 } 3356 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3357 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3358 3359 /* determine ownership of all rows */ 3360 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3361 else m = newMat->rmap->n; /* Set by user */ 3362 3363 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3364 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3365 3366 /* First process needs enough room for process with most rows */ 3367 if (!rank) { 3368 mmax = rowners[1]; 3369 for (i=2; i<size; i++) { 3370 mmax = PetscMax(mmax,rowners[i]); 3371 } 3372 } else mmax = m; 3373 3374 rowners[0] = 0; 3375 for (i=2; i<=size; i++) { 3376 rowners[i] += rowners[i-1]; 3377 } 3378 rstart = rowners[rank]; 3379 rend = rowners[rank+1]; 3380 3381 /* distribute row lengths to all processors */ 3382 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3383 if (!rank) { 3384 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3385 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3386 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3387 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3388 for (j=0; j<m; j++) { 3389 procsnz[0] += ourlens[j]; 3390 } 3391 for (i=1; i<size; i++) { 3392 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3393 /* calculate the number of nonzeros on each processor */ 3394 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3395 procsnz[i] += rowlengths[j]; 3396 } 3397 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3398 } 3399 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3400 } else { 3401 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3402 } 3403 3404 if (!rank) { 3405 /* determine max buffer needed and allocate it */ 3406 maxnz = 0; 3407 for (i=0; i<size; i++) { 3408 maxnz = PetscMax(maxnz,procsnz[i]); 3409 } 3410 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3411 3412 /* read in my part of the matrix column indices */ 3413 nz = procsnz[0]; 3414 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3415 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3416 3417 /* read in every one elses and ship off */ 3418 for (i=1; i<size; i++) { 3419 nz = procsnz[i]; 3420 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3421 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3422 } 3423 ierr = PetscFree(cols);CHKERRQ(ierr); 3424 } else { 3425 /* determine buffer space needed for message */ 3426 nz = 0; 3427 for (i=0; i<m; i++) { 3428 nz += ourlens[i]; 3429 } 3430 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3431 3432 /* receive message of column indices*/ 3433 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3434 } 3435 3436 /* determine column ownership if matrix is not square */ 3437 if (N != M) { 3438 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3439 else n = newMat->cmap->n; 3440 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3441 cstart = cend - n; 3442 } else { 3443 cstart = rstart; 3444 cend = rend; 3445 n = cend - cstart; 3446 } 3447 3448 /* loop over local rows, determining number of off diagonal entries */ 3449 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3450 jj = 0; 3451 for (i=0; i<m; i++) { 3452 for (j=0; j<ourlens[i]; j++) { 3453 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3454 jj++; 3455 } 3456 } 3457 3458 for (i=0; i<m; i++) { 3459 ourlens[i] -= offlens[i]; 3460 } 3461 if (!sizesset) { 3462 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3463 } 3464 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3465 3466 for (i=0; i<m; i++) { 3467 ourlens[i] += offlens[i]; 3468 } 3469 3470 if (!rank) { 3471 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3472 3473 /* read in my part of the matrix numerical values */ 3474 nz = procsnz[0]; 3475 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3476 3477 /* insert into matrix */ 3478 jj = rstart; 3479 smycols = mycols; 3480 svals = vals; 3481 for (i=0; i<m; i++) { 3482 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3483 smycols += ourlens[i]; 3484 svals += ourlens[i]; 3485 jj++; 3486 } 3487 3488 /* read in other processors and ship out */ 3489 for (i=1; i<size; i++) { 3490 nz = procsnz[i]; 3491 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3492 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3493 } 3494 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3495 } else { 3496 /* receive numeric values */ 3497 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3498 3499 /* receive message of values*/ 3500 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3501 3502 /* insert into matrix */ 3503 jj = rstart; 3504 smycols = mycols; 3505 svals = vals; 3506 for (i=0; i<m; i++) { 3507 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3508 smycols += ourlens[i]; 3509 svals += ourlens[i]; 3510 jj++; 3511 } 3512 } 3513 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3514 ierr = PetscFree(vals);CHKERRQ(ierr); 3515 ierr = PetscFree(mycols);CHKERRQ(ierr); 3516 ierr = PetscFree(rowners);CHKERRQ(ierr); 3517 3518 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3519 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3520 PetscFunctionReturn(0); 3521 } 3522 3523 #undef __FUNCT__ 3524 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3525 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3526 { 3527 PetscErrorCode ierr; 3528 IS iscol_local; 3529 PetscInt csize; 3530 3531 PetscFunctionBegin; 3532 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3533 if (call == MAT_REUSE_MATRIX) { 3534 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3535 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3536 } else { 3537 PetscInt cbs; 3538 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3539 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3540 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3541 } 3542 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3543 if (call == MAT_INITIAL_MATRIX) { 3544 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3545 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3546 } 3547 PetscFunctionReturn(0); 3548 } 3549 3550 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3551 #undef __FUNCT__ 3552 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3553 /* 3554 Not great since it makes two copies of the submatrix, first an SeqAIJ 3555 in local and then by concatenating the local matrices the end result. 3556 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3557 3558 Note: This requires a sequential iscol with all indices. 3559 */ 3560 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3561 { 3562 PetscErrorCode ierr; 3563 PetscMPIInt rank,size; 3564 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3565 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3566 PetscBool allcolumns, colflag; 3567 Mat M,Mreuse; 3568 MatScalar *vwork,*aa; 3569 MPI_Comm comm = ((PetscObject)mat)->comm; 3570 Mat_SeqAIJ *aij; 3571 3572 3573 PetscFunctionBegin; 3574 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3575 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3576 3577 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3578 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3579 if (colflag && ncol == mat->cmap->N){ 3580 allcolumns = PETSC_TRUE; 3581 } else { 3582 allcolumns = PETSC_FALSE; 3583 } 3584 if (call == MAT_REUSE_MATRIX) { 3585 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3586 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3587 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3588 } else { 3589 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3590 } 3591 3592 /* 3593 m - number of local rows 3594 n - number of columns (same on all processors) 3595 rstart - first row in new global matrix generated 3596 */ 3597 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3598 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3599 if (call == MAT_INITIAL_MATRIX) { 3600 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3601 ii = aij->i; 3602 jj = aij->j; 3603 3604 /* 3605 Determine the number of non-zeros in the diagonal and off-diagonal 3606 portions of the matrix in order to do correct preallocation 3607 */ 3608 3609 /* first get start and end of "diagonal" columns */ 3610 if (csize == PETSC_DECIDE) { 3611 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3612 if (mglobal == n) { /* square matrix */ 3613 nlocal = m; 3614 } else { 3615 nlocal = n/size + ((n % size) > rank); 3616 } 3617 } else { 3618 nlocal = csize; 3619 } 3620 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3621 rstart = rend - nlocal; 3622 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3623 3624 /* next, compute all the lengths */ 3625 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3626 olens = dlens + m; 3627 for (i=0; i<m; i++) { 3628 jend = ii[i+1] - ii[i]; 3629 olen = 0; 3630 dlen = 0; 3631 for (j=0; j<jend; j++) { 3632 if (*jj < rstart || *jj >= rend) olen++; 3633 else dlen++; 3634 jj++; 3635 } 3636 olens[i] = olen; 3637 dlens[i] = dlen; 3638 } 3639 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3640 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3641 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3642 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3643 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3644 ierr = PetscFree(dlens);CHKERRQ(ierr); 3645 } else { 3646 PetscInt ml,nl; 3647 3648 M = *newmat; 3649 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3650 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3651 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3652 /* 3653 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3654 rather than the slower MatSetValues(). 3655 */ 3656 M->was_assembled = PETSC_TRUE; 3657 M->assembled = PETSC_FALSE; 3658 } 3659 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3660 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3661 ii = aij->i; 3662 jj = aij->j; 3663 aa = aij->a; 3664 for (i=0; i<m; i++) { 3665 row = rstart + i; 3666 nz = ii[i+1] - ii[i]; 3667 cwork = jj; jj += nz; 3668 vwork = aa; aa += nz; 3669 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3670 } 3671 3672 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3673 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3674 *newmat = M; 3675 3676 /* save submatrix used in processor for next request */ 3677 if (call == MAT_INITIAL_MATRIX) { 3678 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3679 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3680 } 3681 3682 PetscFunctionReturn(0); 3683 } 3684 3685 EXTERN_C_BEGIN 3686 #undef __FUNCT__ 3687 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3688 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3689 { 3690 PetscInt m,cstart, cend,j,nnz,i,d; 3691 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3692 const PetscInt *JJ; 3693 PetscScalar *values; 3694 PetscErrorCode ierr; 3695 3696 PetscFunctionBegin; 3697 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3698 3699 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3700 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3701 m = B->rmap->n; 3702 cstart = B->cmap->rstart; 3703 cend = B->cmap->rend; 3704 rstart = B->rmap->rstart; 3705 3706 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3707 3708 #if defined(PETSC_USE_DEBUGGING) 3709 for (i=0; i<m; i++) { 3710 nnz = Ii[i+1]- Ii[i]; 3711 JJ = J + Ii[i]; 3712 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3713 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3714 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3715 } 3716 #endif 3717 3718 for (i=0; i<m; i++) { 3719 nnz = Ii[i+1]- Ii[i]; 3720 JJ = J + Ii[i]; 3721 nnz_max = PetscMax(nnz_max,nnz); 3722 d = 0; 3723 for (j=0; j<nnz; j++) { 3724 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3725 } 3726 d_nnz[i] = d; 3727 o_nnz[i] = nnz - d; 3728 } 3729 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3730 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3731 3732 if (v) values = (PetscScalar*)v; 3733 else { 3734 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3735 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3736 } 3737 3738 for (i=0; i<m; i++) { 3739 ii = i + rstart; 3740 nnz = Ii[i+1]- Ii[i]; 3741 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3742 } 3743 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3744 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3745 3746 if (!v) { 3747 ierr = PetscFree(values);CHKERRQ(ierr); 3748 } 3749 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3750 PetscFunctionReturn(0); 3751 } 3752 EXTERN_C_END 3753 3754 #undef __FUNCT__ 3755 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3756 /*@ 3757 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3758 (the default parallel PETSc format). 3759 3760 Collective on MPI_Comm 3761 3762 Input Parameters: 3763 + B - the matrix 3764 . i - the indices into j for the start of each local row (starts with zero) 3765 . j - the column indices for each local row (starts with zero) 3766 - v - optional values in the matrix 3767 3768 Level: developer 3769 3770 Notes: 3771 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3772 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3773 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3774 3775 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3776 3777 The format which is used for the sparse matrix input, is equivalent to a 3778 row-major ordering.. i.e for the following matrix, the input data expected is 3779 as shown: 3780 3781 1 0 0 3782 2 0 3 P0 3783 ------- 3784 4 5 6 P1 3785 3786 Process0 [P0]: rows_owned=[0,1] 3787 i = {0,1,3} [size = nrow+1 = 2+1] 3788 j = {0,0,2} [size = nz = 6] 3789 v = {1,2,3} [size = nz = 6] 3790 3791 Process1 [P1]: rows_owned=[2] 3792 i = {0,3} [size = nrow+1 = 1+1] 3793 j = {0,1,2} [size = nz = 6] 3794 v = {4,5,6} [size = nz = 6] 3795 3796 .keywords: matrix, aij, compressed row, sparse, parallel 3797 3798 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3799 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3800 @*/ 3801 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3802 { 3803 PetscErrorCode ierr; 3804 3805 PetscFunctionBegin; 3806 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3807 PetscFunctionReturn(0); 3808 } 3809 3810 #undef __FUNCT__ 3811 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3812 /*@C 3813 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3814 (the default parallel PETSc format). For good matrix assembly performance 3815 the user should preallocate the matrix storage by setting the parameters 3816 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3817 performance can be increased by more than a factor of 50. 3818 3819 Collective on MPI_Comm 3820 3821 Input Parameters: 3822 + A - the matrix 3823 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3824 (same value is used for all local rows) 3825 . d_nnz - array containing the number of nonzeros in the various rows of the 3826 DIAGONAL portion of the local submatrix (possibly different for each row) 3827 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3828 The size of this array is equal to the number of local rows, i.e 'm'. 3829 For matrices that will be factored, you must leave room for (and set) 3830 the diagonal entry even if it is zero. 3831 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3832 submatrix (same value is used for all local rows). 3833 - o_nnz - array containing the number of nonzeros in the various rows of the 3834 OFF-DIAGONAL portion of the local submatrix (possibly different for 3835 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3836 structure. The size of this array is equal to the number 3837 of local rows, i.e 'm'. 3838 3839 If the *_nnz parameter is given then the *_nz parameter is ignored 3840 3841 The AIJ format (also called the Yale sparse matrix format or 3842 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3843 storage. The stored row and column indices begin with zero. 3844 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3845 3846 The parallel matrix is partitioned such that the first m0 rows belong to 3847 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3848 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3849 3850 The DIAGONAL portion of the local submatrix of a processor can be defined 3851 as the submatrix which is obtained by extraction the part corresponding to 3852 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3853 first row that belongs to the processor, r2 is the last row belonging to 3854 the this processor, and c1-c2 is range of indices of the local part of a 3855 vector suitable for applying the matrix to. This is an mxn matrix. In the 3856 common case of a square matrix, the row and column ranges are the same and 3857 the DIAGONAL part is also square. The remaining portion of the local 3858 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3859 3860 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3861 3862 You can call MatGetInfo() to get information on how effective the preallocation was; 3863 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3864 You can also run with the option -info and look for messages with the string 3865 malloc in them to see if additional memory allocation was needed. 3866 3867 Example usage: 3868 3869 Consider the following 8x8 matrix with 34 non-zero values, that is 3870 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3871 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3872 as follows: 3873 3874 .vb 3875 1 2 0 | 0 3 0 | 0 4 3876 Proc0 0 5 6 | 7 0 0 | 8 0 3877 9 0 10 | 11 0 0 | 12 0 3878 ------------------------------------- 3879 13 0 14 | 15 16 17 | 0 0 3880 Proc1 0 18 0 | 19 20 21 | 0 0 3881 0 0 0 | 22 23 0 | 24 0 3882 ------------------------------------- 3883 Proc2 25 26 27 | 0 0 28 | 29 0 3884 30 0 0 | 31 32 33 | 0 34 3885 .ve 3886 3887 This can be represented as a collection of submatrices as: 3888 3889 .vb 3890 A B C 3891 D E F 3892 G H I 3893 .ve 3894 3895 Where the submatrices A,B,C are owned by proc0, D,E,F are 3896 owned by proc1, G,H,I are owned by proc2. 3897 3898 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3899 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3900 The 'M','N' parameters are 8,8, and have the same values on all procs. 3901 3902 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3903 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3904 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3905 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3906 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3907 matrix, ans [DF] as another SeqAIJ matrix. 3908 3909 When d_nz, o_nz parameters are specified, d_nz storage elements are 3910 allocated for every row of the local diagonal submatrix, and o_nz 3911 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3912 One way to choose d_nz and o_nz is to use the max nonzerors per local 3913 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3914 In this case, the values of d_nz,o_nz are: 3915 .vb 3916 proc0 : dnz = 2, o_nz = 2 3917 proc1 : dnz = 3, o_nz = 2 3918 proc2 : dnz = 1, o_nz = 4 3919 .ve 3920 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3921 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3922 for proc3. i.e we are using 12+15+10=37 storage locations to store 3923 34 values. 3924 3925 When d_nnz, o_nnz parameters are specified, the storage is specified 3926 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3927 In the above case the values for d_nnz,o_nnz are: 3928 .vb 3929 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3930 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3931 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3932 .ve 3933 Here the space allocated is sum of all the above values i.e 34, and 3934 hence pre-allocation is perfect. 3935 3936 Level: intermediate 3937 3938 .keywords: matrix, aij, compressed row, sparse, parallel 3939 3940 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3941 MPIAIJ, MatGetInfo() 3942 @*/ 3943 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3944 { 3945 PetscErrorCode ierr; 3946 3947 PetscFunctionBegin; 3948 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3949 PetscValidType(B,1); 3950 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3951 PetscFunctionReturn(0); 3952 } 3953 3954 #undef __FUNCT__ 3955 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3956 /*@ 3957 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3958 CSR format the local rows. 3959 3960 Collective on MPI_Comm 3961 3962 Input Parameters: 3963 + comm - MPI communicator 3964 . m - number of local rows (Cannot be PETSC_DECIDE) 3965 . n - This value should be the same as the local size used in creating the 3966 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3967 calculated if N is given) For square matrices n is almost always m. 3968 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3969 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3970 . i - row indices 3971 . j - column indices 3972 - a - matrix values 3973 3974 Output Parameter: 3975 . mat - the matrix 3976 3977 Level: intermediate 3978 3979 Notes: 3980 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3981 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3982 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3983 3984 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3985 3986 The format which is used for the sparse matrix input, is equivalent to a 3987 row-major ordering.. i.e for the following matrix, the input data expected is 3988 as shown: 3989 3990 1 0 0 3991 2 0 3 P0 3992 ------- 3993 4 5 6 P1 3994 3995 Process0 [P0]: rows_owned=[0,1] 3996 i = {0,1,3} [size = nrow+1 = 2+1] 3997 j = {0,0,2} [size = nz = 6] 3998 v = {1,2,3} [size = nz = 6] 3999 4000 Process1 [P1]: rows_owned=[2] 4001 i = {0,3} [size = nrow+1 = 1+1] 4002 j = {0,1,2} [size = nz = 6] 4003 v = {4,5,6} [size = nz = 6] 4004 4005 .keywords: matrix, aij, compressed row, sparse, parallel 4006 4007 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4008 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4009 @*/ 4010 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4011 { 4012 PetscErrorCode ierr; 4013 4014 PetscFunctionBegin; 4015 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4016 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4017 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4018 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4019 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4020 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4021 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4022 PetscFunctionReturn(0); 4023 } 4024 4025 #undef __FUNCT__ 4026 #define __FUNCT__ "MatCreateAIJ" 4027 /*@C 4028 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4029 (the default parallel PETSc format). For good matrix assembly performance 4030 the user should preallocate the matrix storage by setting the parameters 4031 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4032 performance can be increased by more than a factor of 50. 4033 4034 Collective on MPI_Comm 4035 4036 Input Parameters: 4037 + comm - MPI communicator 4038 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4039 This value should be the same as the local size used in creating the 4040 y vector for the matrix-vector product y = Ax. 4041 . n - This value should be the same as the local size used in creating the 4042 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4043 calculated if N is given) For square matrices n is almost always m. 4044 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4045 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4046 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4047 (same value is used for all local rows) 4048 . d_nnz - array containing the number of nonzeros in the various rows of the 4049 DIAGONAL portion of the local submatrix (possibly different for each row) 4050 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4051 The size of this array is equal to the number of local rows, i.e 'm'. 4052 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4053 submatrix (same value is used for all local rows). 4054 - o_nnz - array containing the number of nonzeros in the various rows of the 4055 OFF-DIAGONAL portion of the local submatrix (possibly different for 4056 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4057 structure. The size of this array is equal to the number 4058 of local rows, i.e 'm'. 4059 4060 Output Parameter: 4061 . A - the matrix 4062 4063 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4064 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4065 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4066 4067 Notes: 4068 If the *_nnz parameter is given then the *_nz parameter is ignored 4069 4070 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4071 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4072 storage requirements for this matrix. 4073 4074 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4075 processor than it must be used on all processors that share the object for 4076 that argument. 4077 4078 The user MUST specify either the local or global matrix dimensions 4079 (possibly both). 4080 4081 The parallel matrix is partitioned across processors such that the 4082 first m0 rows belong to process 0, the next m1 rows belong to 4083 process 1, the next m2 rows belong to process 2 etc.. where 4084 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4085 values corresponding to [m x N] submatrix. 4086 4087 The columns are logically partitioned with the n0 columns belonging 4088 to 0th partition, the next n1 columns belonging to the next 4089 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4090 4091 The DIAGONAL portion of the local submatrix on any given processor 4092 is the submatrix corresponding to the rows and columns m,n 4093 corresponding to the given processor. i.e diagonal matrix on 4094 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4095 etc. The remaining portion of the local submatrix [m x (N-n)] 4096 constitute the OFF-DIAGONAL portion. The example below better 4097 illustrates this concept. 4098 4099 For a square global matrix we define each processor's diagonal portion 4100 to be its local rows and the corresponding columns (a square submatrix); 4101 each processor's off-diagonal portion encompasses the remainder of the 4102 local matrix (a rectangular submatrix). 4103 4104 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4105 4106 When calling this routine with a single process communicator, a matrix of 4107 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4108 type of communicator, use the construction mechanism: 4109 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4110 4111 By default, this format uses inodes (identical nodes) when possible. 4112 We search for consecutive rows with the same nonzero structure, thereby 4113 reusing matrix information to achieve increased efficiency. 4114 4115 Options Database Keys: 4116 + -mat_no_inode - Do not use inodes 4117 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4118 - -mat_aij_oneindex - Internally use indexing starting at 1 4119 rather than 0. Note that when calling MatSetValues(), 4120 the user still MUST index entries starting at 0! 4121 4122 4123 Example usage: 4124 4125 Consider the following 8x8 matrix with 34 non-zero values, that is 4126 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4127 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4128 as follows: 4129 4130 .vb 4131 1 2 0 | 0 3 0 | 0 4 4132 Proc0 0 5 6 | 7 0 0 | 8 0 4133 9 0 10 | 11 0 0 | 12 0 4134 ------------------------------------- 4135 13 0 14 | 15 16 17 | 0 0 4136 Proc1 0 18 0 | 19 20 21 | 0 0 4137 0 0 0 | 22 23 0 | 24 0 4138 ------------------------------------- 4139 Proc2 25 26 27 | 0 0 28 | 29 0 4140 30 0 0 | 31 32 33 | 0 34 4141 .ve 4142 4143 This can be represented as a collection of submatrices as: 4144 4145 .vb 4146 A B C 4147 D E F 4148 G H I 4149 .ve 4150 4151 Where the submatrices A,B,C are owned by proc0, D,E,F are 4152 owned by proc1, G,H,I are owned by proc2. 4153 4154 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4155 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4156 The 'M','N' parameters are 8,8, and have the same values on all procs. 4157 4158 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4159 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4160 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4161 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4162 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4163 matrix, ans [DF] as another SeqAIJ matrix. 4164 4165 When d_nz, o_nz parameters are specified, d_nz storage elements are 4166 allocated for every row of the local diagonal submatrix, and o_nz 4167 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4168 One way to choose d_nz and o_nz is to use the max nonzerors per local 4169 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4170 In this case, the values of d_nz,o_nz are: 4171 .vb 4172 proc0 : dnz = 2, o_nz = 2 4173 proc1 : dnz = 3, o_nz = 2 4174 proc2 : dnz = 1, o_nz = 4 4175 .ve 4176 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4177 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4178 for proc3. i.e we are using 12+15+10=37 storage locations to store 4179 34 values. 4180 4181 When d_nnz, o_nnz parameters are specified, the storage is specified 4182 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4183 In the above case the values for d_nnz,o_nnz are: 4184 .vb 4185 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4186 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4187 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4188 .ve 4189 Here the space allocated is sum of all the above values i.e 34, and 4190 hence pre-allocation is perfect. 4191 4192 Level: intermediate 4193 4194 .keywords: matrix, aij, compressed row, sparse, parallel 4195 4196 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4197 MPIAIJ, MatCreateMPIAIJWithArrays() 4198 @*/ 4199 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4200 { 4201 PetscErrorCode ierr; 4202 PetscMPIInt size; 4203 4204 PetscFunctionBegin; 4205 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4206 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4207 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4208 if (size > 1) { 4209 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4210 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4211 } else { 4212 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4213 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4214 } 4215 PetscFunctionReturn(0); 4216 } 4217 4218 #undef __FUNCT__ 4219 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4220 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4221 { 4222 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4223 4224 PetscFunctionBegin; 4225 *Ad = a->A; 4226 *Ao = a->B; 4227 *colmap = a->garray; 4228 PetscFunctionReturn(0); 4229 } 4230 4231 #undef __FUNCT__ 4232 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4233 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4234 { 4235 PetscErrorCode ierr; 4236 PetscInt i; 4237 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4238 4239 PetscFunctionBegin; 4240 if (coloring->ctype == IS_COLORING_GLOBAL) { 4241 ISColoringValue *allcolors,*colors; 4242 ISColoring ocoloring; 4243 4244 /* set coloring for diagonal portion */ 4245 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4246 4247 /* set coloring for off-diagonal portion */ 4248 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4249 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4250 for (i=0; i<a->B->cmap->n; i++) { 4251 colors[i] = allcolors[a->garray[i]]; 4252 } 4253 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4254 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4255 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4256 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4257 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4258 ISColoringValue *colors; 4259 PetscInt *larray; 4260 ISColoring ocoloring; 4261 4262 /* set coloring for diagonal portion */ 4263 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4264 for (i=0; i<a->A->cmap->n; i++) { 4265 larray[i] = i + A->cmap->rstart; 4266 } 4267 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4268 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4269 for (i=0; i<a->A->cmap->n; i++) { 4270 colors[i] = coloring->colors[larray[i]]; 4271 } 4272 ierr = PetscFree(larray);CHKERRQ(ierr); 4273 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4274 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4275 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4276 4277 /* set coloring for off-diagonal portion */ 4278 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4279 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4280 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4281 for (i=0; i<a->B->cmap->n; i++) { 4282 colors[i] = coloring->colors[larray[i]]; 4283 } 4284 ierr = PetscFree(larray);CHKERRQ(ierr); 4285 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4286 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4287 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4288 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4289 4290 PetscFunctionReturn(0); 4291 } 4292 4293 #if defined(PETSC_HAVE_ADIC) 4294 #undef __FUNCT__ 4295 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4296 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4297 { 4298 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4299 PetscErrorCode ierr; 4300 4301 PetscFunctionBegin; 4302 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4303 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4304 PetscFunctionReturn(0); 4305 } 4306 #endif 4307 4308 #undef __FUNCT__ 4309 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4310 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4311 { 4312 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4313 PetscErrorCode ierr; 4314 4315 PetscFunctionBegin; 4316 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4317 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4318 PetscFunctionReturn(0); 4319 } 4320 4321 #undef __FUNCT__ 4322 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4323 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4324 { 4325 PetscErrorCode ierr; 4326 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4327 PetscInt *indx; 4328 4329 PetscFunctionBegin; 4330 /* This routine will ONLY return MPIAIJ type matrix */ 4331 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4332 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4333 if (n == PETSC_DECIDE){ 4334 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4335 } 4336 /* Check sum(n) = N */ 4337 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4338 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4339 4340 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4341 rstart -= m; 4342 4343 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4344 for (i=0;i<m;i++) { 4345 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4346 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4347 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4348 } 4349 4350 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4351 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4352 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4353 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4354 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4355 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4356 PetscFunctionReturn(0); 4357 } 4358 4359 #undef __FUNCT__ 4360 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4361 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4362 { 4363 PetscErrorCode ierr; 4364 PetscInt m,N,i,rstart,nnz,Ii; 4365 PetscInt *indx; 4366 PetscScalar *values; 4367 4368 PetscFunctionBegin; 4369 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4370 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4371 for (i=0;i<m;i++) { 4372 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4373 Ii = i + rstart; 4374 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4375 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4376 } 4377 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4378 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4379 PetscFunctionReturn(0); 4380 } 4381 4382 #undef __FUNCT__ 4383 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4384 /*@ 4385 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4386 matrices from each processor 4387 4388 Collective on MPI_Comm 4389 4390 Input Parameters: 4391 + comm - the communicators the parallel matrix will live on 4392 . inmat - the input sequential matrices 4393 . n - number of local columns (or PETSC_DECIDE) 4394 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4395 4396 Output Parameter: 4397 . outmat - the parallel matrix generated 4398 4399 Level: advanced 4400 4401 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4402 4403 @*/ 4404 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4405 { 4406 PetscErrorCode ierr; 4407 4408 PetscFunctionBegin; 4409 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4410 if (scall == MAT_INITIAL_MATRIX){ 4411 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4412 } 4413 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4414 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4415 PetscFunctionReturn(0); 4416 } 4417 4418 #undef __FUNCT__ 4419 #define __FUNCT__ "MatFileSplit" 4420 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4421 { 4422 PetscErrorCode ierr; 4423 PetscMPIInt rank; 4424 PetscInt m,N,i,rstart,nnz; 4425 size_t len; 4426 const PetscInt *indx; 4427 PetscViewer out; 4428 char *name; 4429 Mat B; 4430 const PetscScalar *values; 4431 4432 PetscFunctionBegin; 4433 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4434 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4435 /* Should this be the type of the diagonal block of A? */ 4436 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4437 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4438 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4439 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4440 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4441 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4442 for (i=0;i<m;i++) { 4443 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4444 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4445 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4446 } 4447 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4448 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4449 4450 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4451 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4452 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4453 sprintf(name,"%s.%d",outfile,rank); 4454 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4455 ierr = PetscFree(name); 4456 ierr = MatView(B,out);CHKERRQ(ierr); 4457 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4458 ierr = MatDestroy(&B);CHKERRQ(ierr); 4459 PetscFunctionReturn(0); 4460 } 4461 4462 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4463 #undef __FUNCT__ 4464 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4465 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4466 { 4467 PetscErrorCode ierr; 4468 Mat_Merge_SeqsToMPI *merge; 4469 PetscContainer container; 4470 4471 PetscFunctionBegin; 4472 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4473 if (container) { 4474 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4475 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4476 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4477 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4478 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4479 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4487 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4488 ierr = PetscFree(merge);CHKERRQ(ierr); 4489 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4490 } 4491 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4492 PetscFunctionReturn(0); 4493 } 4494 4495 #include <../src/mat/utils/freespace.h> 4496 #include <petscbt.h> 4497 4498 #undef __FUNCT__ 4499 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4500 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4501 { 4502 PetscErrorCode ierr; 4503 MPI_Comm comm=((PetscObject)mpimat)->comm; 4504 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4505 PetscMPIInt size,rank,taga,*len_s; 4506 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4507 PetscInt proc,m; 4508 PetscInt **buf_ri,**buf_rj; 4509 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4510 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4511 MPI_Request *s_waits,*r_waits; 4512 MPI_Status *status; 4513 MatScalar *aa=a->a; 4514 MatScalar **abuf_r,*ba_i; 4515 Mat_Merge_SeqsToMPI *merge; 4516 PetscContainer container; 4517 4518 PetscFunctionBegin; 4519 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4520 4521 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4522 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4523 4524 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4525 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4526 4527 bi = merge->bi; 4528 bj = merge->bj; 4529 buf_ri = merge->buf_ri; 4530 buf_rj = merge->buf_rj; 4531 4532 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4533 owners = merge->rowmap->range; 4534 len_s = merge->len_s; 4535 4536 /* send and recv matrix values */ 4537 /*-----------------------------*/ 4538 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4539 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4540 4541 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4542 for (proc=0,k=0; proc<size; proc++){ 4543 if (!len_s[proc]) continue; 4544 i = owners[proc]; 4545 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4546 k++; 4547 } 4548 4549 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4550 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4551 ierr = PetscFree(status);CHKERRQ(ierr); 4552 4553 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4554 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4555 4556 /* insert mat values of mpimat */ 4557 /*----------------------------*/ 4558 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4559 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4560 4561 for (k=0; k<merge->nrecv; k++){ 4562 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4563 nrows = *(buf_ri_k[k]); 4564 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4565 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4566 } 4567 4568 /* set values of ba */ 4569 m = merge->rowmap->n; 4570 for (i=0; i<m; i++) { 4571 arow = owners[rank] + i; 4572 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4573 bnzi = bi[i+1] - bi[i]; 4574 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4575 4576 /* add local non-zero vals of this proc's seqmat into ba */ 4577 anzi = ai[arow+1] - ai[arow]; 4578 aj = a->j + ai[arow]; 4579 aa = a->a + ai[arow]; 4580 nextaj = 0; 4581 for (j=0; nextaj<anzi; j++){ 4582 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4583 ba_i[j] += aa[nextaj++]; 4584 } 4585 } 4586 4587 /* add received vals into ba */ 4588 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4589 /* i-th row */ 4590 if (i == *nextrow[k]) { 4591 anzi = *(nextai[k]+1) - *nextai[k]; 4592 aj = buf_rj[k] + *(nextai[k]); 4593 aa = abuf_r[k] + *(nextai[k]); 4594 nextaj = 0; 4595 for (j=0; nextaj<anzi; j++){ 4596 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4597 ba_i[j] += aa[nextaj++]; 4598 } 4599 } 4600 nextrow[k]++; nextai[k]++; 4601 } 4602 } 4603 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4604 } 4605 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4606 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4607 4608 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4609 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4610 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4611 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4612 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4613 PetscFunctionReturn(0); 4614 } 4615 4616 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4617 4618 #undef __FUNCT__ 4619 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4620 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4621 { 4622 PetscErrorCode ierr; 4623 Mat B_mpi; 4624 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4625 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4626 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4627 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4628 PetscInt len,proc,*dnz,*onz,bs,cbs; 4629 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4630 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4631 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4632 MPI_Status *status; 4633 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4634 PetscBT lnkbt; 4635 Mat_Merge_SeqsToMPI *merge; 4636 PetscContainer container; 4637 4638 PetscFunctionBegin; 4639 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4640 4641 /* make sure it is a PETSc comm */ 4642 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4643 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4644 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4645 4646 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4647 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4648 4649 /* determine row ownership */ 4650 /*---------------------------------------------------------*/ 4651 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4652 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4653 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4654 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4655 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4656 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4657 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4658 4659 m = merge->rowmap->n; 4660 M = merge->rowmap->N; 4661 owners = merge->rowmap->range; 4662 4663 /* determine the number of messages to send, their lengths */ 4664 /*---------------------------------------------------------*/ 4665 len_s = merge->len_s; 4666 4667 len = 0; /* length of buf_si[] */ 4668 merge->nsend = 0; 4669 for (proc=0; proc<size; proc++){ 4670 len_si[proc] = 0; 4671 if (proc == rank){ 4672 len_s[proc] = 0; 4673 } else { 4674 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4675 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4676 } 4677 if (len_s[proc]) { 4678 merge->nsend++; 4679 nrows = 0; 4680 for (i=owners[proc]; i<owners[proc+1]; i++){ 4681 if (ai[i+1] > ai[i]) nrows++; 4682 } 4683 len_si[proc] = 2*(nrows+1); 4684 len += len_si[proc]; 4685 } 4686 } 4687 4688 /* determine the number and length of messages to receive for ij-structure */ 4689 /*-------------------------------------------------------------------------*/ 4690 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4691 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4692 4693 /* post the Irecv of j-structure */ 4694 /*-------------------------------*/ 4695 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4696 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4697 4698 /* post the Isend of j-structure */ 4699 /*--------------------------------*/ 4700 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4701 4702 for (proc=0, k=0; proc<size; proc++){ 4703 if (!len_s[proc]) continue; 4704 i = owners[proc]; 4705 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4706 k++; 4707 } 4708 4709 /* receives and sends of j-structure are complete */ 4710 /*------------------------------------------------*/ 4711 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4712 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4713 4714 /* send and recv i-structure */ 4715 /*---------------------------*/ 4716 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4717 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4718 4719 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4720 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4721 for (proc=0,k=0; proc<size; proc++){ 4722 if (!len_s[proc]) continue; 4723 /* form outgoing message for i-structure: 4724 buf_si[0]: nrows to be sent 4725 [1:nrows]: row index (global) 4726 [nrows+1:2*nrows+1]: i-structure index 4727 */ 4728 /*-------------------------------------------*/ 4729 nrows = len_si[proc]/2 - 1; 4730 buf_si_i = buf_si + nrows+1; 4731 buf_si[0] = nrows; 4732 buf_si_i[0] = 0; 4733 nrows = 0; 4734 for (i=owners[proc]; i<owners[proc+1]; i++){ 4735 anzi = ai[i+1] - ai[i]; 4736 if (anzi) { 4737 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4738 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4739 nrows++; 4740 } 4741 } 4742 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4743 k++; 4744 buf_si += len_si[proc]; 4745 } 4746 4747 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4748 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4749 4750 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4751 for (i=0; i<merge->nrecv; i++){ 4752 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4753 } 4754 4755 ierr = PetscFree(len_si);CHKERRQ(ierr); 4756 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4757 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4758 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4759 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4760 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4761 ierr = PetscFree(status);CHKERRQ(ierr); 4762 4763 /* compute a local seq matrix in each processor */ 4764 /*----------------------------------------------*/ 4765 /* allocate bi array and free space for accumulating nonzero column info */ 4766 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4767 bi[0] = 0; 4768 4769 /* create and initialize a linked list */ 4770 nlnk = N+1; 4771 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4772 4773 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4774 len = 0; 4775 len = ai[owners[rank+1]] - ai[owners[rank]]; 4776 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4777 current_space = free_space; 4778 4779 /* determine symbolic info for each local row */ 4780 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4781 4782 for (k=0; k<merge->nrecv; k++){ 4783 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4784 nrows = *buf_ri_k[k]; 4785 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4786 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4787 } 4788 4789 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4790 len = 0; 4791 for (i=0;i<m;i++) { 4792 bnzi = 0; 4793 /* add local non-zero cols of this proc's seqmat into lnk */ 4794 arow = owners[rank] + i; 4795 anzi = ai[arow+1] - ai[arow]; 4796 aj = a->j + ai[arow]; 4797 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4798 bnzi += nlnk; 4799 /* add received col data into lnk */ 4800 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4801 if (i == *nextrow[k]) { /* i-th row */ 4802 anzi = *(nextai[k]+1) - *nextai[k]; 4803 aj = buf_rj[k] + *nextai[k]; 4804 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4805 bnzi += nlnk; 4806 nextrow[k]++; nextai[k]++; 4807 } 4808 } 4809 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4810 4811 /* if free space is not available, make more free space */ 4812 if (current_space->local_remaining<bnzi) { 4813 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4814 nspacedouble++; 4815 } 4816 /* copy data into free space, then initialize lnk */ 4817 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4818 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4819 4820 current_space->array += bnzi; 4821 current_space->local_used += bnzi; 4822 current_space->local_remaining -= bnzi; 4823 4824 bi[i+1] = bi[i] + bnzi; 4825 } 4826 4827 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4828 4829 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4830 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4831 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4832 4833 /* create symbolic parallel matrix B_mpi */ 4834 /*---------------------------------------*/ 4835 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4836 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4837 if (n==PETSC_DECIDE) { 4838 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4839 } else { 4840 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4841 } 4842 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4843 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4844 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4845 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4846 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4847 4848 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4849 B_mpi->assembled = PETSC_FALSE; 4850 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4851 merge->bi = bi; 4852 merge->bj = bj; 4853 merge->buf_ri = buf_ri; 4854 merge->buf_rj = buf_rj; 4855 merge->coi = PETSC_NULL; 4856 merge->coj = PETSC_NULL; 4857 merge->owners_co = PETSC_NULL; 4858 4859 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4860 4861 /* attach the supporting struct to B_mpi for reuse */ 4862 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4863 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4864 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4865 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4866 *mpimat = B_mpi; 4867 4868 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4869 PetscFunctionReturn(0); 4870 } 4871 4872 #undef __FUNCT__ 4873 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4874 /*@C 4875 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4876 matrices from each processor 4877 4878 Collective on MPI_Comm 4879 4880 Input Parameters: 4881 + comm - the communicators the parallel matrix will live on 4882 . seqmat - the input sequential matrices 4883 . m - number of local rows (or PETSC_DECIDE) 4884 . n - number of local columns (or PETSC_DECIDE) 4885 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4886 4887 Output Parameter: 4888 . mpimat - the parallel matrix generated 4889 4890 Level: advanced 4891 4892 Notes: 4893 The dimensions of the sequential matrix in each processor MUST be the same. 4894 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4895 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4896 @*/ 4897 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4898 { 4899 PetscErrorCode ierr; 4900 PetscMPIInt size; 4901 4902 PetscFunctionBegin; 4903 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4904 if (size == 1){ 4905 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4906 if (scall == MAT_INITIAL_MATRIX){ 4907 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4908 } else { 4909 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4910 } 4911 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4912 PetscFunctionReturn(0); 4913 } 4914 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4915 if (scall == MAT_INITIAL_MATRIX){ 4916 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4917 } 4918 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4919 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4920 PetscFunctionReturn(0); 4921 } 4922 4923 #undef __FUNCT__ 4924 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4925 /*@ 4926 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4927 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4928 with MatGetSize() 4929 4930 Not Collective 4931 4932 Input Parameters: 4933 + A - the matrix 4934 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4935 4936 Output Parameter: 4937 . A_loc - the local sequential matrix generated 4938 4939 Level: developer 4940 4941 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4942 4943 @*/ 4944 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4945 { 4946 PetscErrorCode ierr; 4947 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4948 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4949 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4950 MatScalar *aa=a->a,*ba=b->a,*cam; 4951 PetscScalar *ca; 4952 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4953 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4954 PetscBool match; 4955 4956 PetscFunctionBegin; 4957 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4958 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4959 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4960 if (scall == MAT_INITIAL_MATRIX){ 4961 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4962 ci[0] = 0; 4963 for (i=0; i<am; i++){ 4964 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4965 } 4966 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4967 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4968 k = 0; 4969 for (i=0; i<am; i++) { 4970 ncols_o = bi[i+1] - bi[i]; 4971 ncols_d = ai[i+1] - ai[i]; 4972 /* off-diagonal portion of A */ 4973 for (jo=0; jo<ncols_o; jo++) { 4974 col = cmap[*bj]; 4975 if (col >= cstart) break; 4976 cj[k] = col; bj++; 4977 ca[k++] = *ba++; 4978 } 4979 /* diagonal portion of A */ 4980 for (j=0; j<ncols_d; j++) { 4981 cj[k] = cstart + *aj++; 4982 ca[k++] = *aa++; 4983 } 4984 /* off-diagonal portion of A */ 4985 for (j=jo; j<ncols_o; j++) { 4986 cj[k] = cmap[*bj++]; 4987 ca[k++] = *ba++; 4988 } 4989 } 4990 /* put together the new matrix */ 4991 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4992 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4993 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4994 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4995 mat->free_a = PETSC_TRUE; 4996 mat->free_ij = PETSC_TRUE; 4997 mat->nonew = 0; 4998 } else if (scall == MAT_REUSE_MATRIX){ 4999 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5000 ci = mat->i; cj = mat->j; cam = mat->a; 5001 for (i=0; i<am; i++) { 5002 /* off-diagonal portion of A */ 5003 ncols_o = bi[i+1] - bi[i]; 5004 for (jo=0; jo<ncols_o; jo++) { 5005 col = cmap[*bj]; 5006 if (col >= cstart) break; 5007 *cam++ = *ba++; bj++; 5008 } 5009 /* diagonal portion of A */ 5010 ncols_d = ai[i+1] - ai[i]; 5011 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5012 /* off-diagonal portion of A */ 5013 for (j=jo; j<ncols_o; j++) { 5014 *cam++ = *ba++; bj++; 5015 } 5016 } 5017 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5018 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5019 PetscFunctionReturn(0); 5020 } 5021 5022 #undef __FUNCT__ 5023 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5024 /*@C 5025 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5026 5027 Not Collective 5028 5029 Input Parameters: 5030 + A - the matrix 5031 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5032 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5033 5034 Output Parameter: 5035 . A_loc - the local sequential matrix generated 5036 5037 Level: developer 5038 5039 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5040 5041 @*/ 5042 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5043 { 5044 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5045 PetscErrorCode ierr; 5046 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5047 IS isrowa,iscola; 5048 Mat *aloc; 5049 PetscBool match; 5050 5051 PetscFunctionBegin; 5052 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5053 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5054 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5055 if (!row){ 5056 start = A->rmap->rstart; end = A->rmap->rend; 5057 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5058 } else { 5059 isrowa = *row; 5060 } 5061 if (!col){ 5062 start = A->cmap->rstart; 5063 cmap = a->garray; 5064 nzA = a->A->cmap->n; 5065 nzB = a->B->cmap->n; 5066 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5067 ncols = 0; 5068 for (i=0; i<nzB; i++) { 5069 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5070 else break; 5071 } 5072 imark = i; 5073 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5074 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5075 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5076 } else { 5077 iscola = *col; 5078 } 5079 if (scall != MAT_INITIAL_MATRIX){ 5080 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5081 aloc[0] = *A_loc; 5082 } 5083 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5084 *A_loc = aloc[0]; 5085 ierr = PetscFree(aloc);CHKERRQ(ierr); 5086 if (!row){ 5087 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5088 } 5089 if (!col){ 5090 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5091 } 5092 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5093 PetscFunctionReturn(0); 5094 } 5095 5096 #undef __FUNCT__ 5097 #define __FUNCT__ "MatGetBrowsOfAcols" 5098 /*@C 5099 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5100 5101 Collective on Mat 5102 5103 Input Parameters: 5104 + A,B - the matrices in mpiaij format 5105 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5106 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5107 5108 Output Parameter: 5109 + rowb, colb - index sets of rows and columns of B to extract 5110 - B_seq - the sequential matrix generated 5111 5112 Level: developer 5113 5114 @*/ 5115 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5116 { 5117 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5118 PetscErrorCode ierr; 5119 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5120 IS isrowb,iscolb; 5121 Mat *bseq=PETSC_NULL; 5122 5123 PetscFunctionBegin; 5124 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5125 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5126 } 5127 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5128 5129 if (scall == MAT_INITIAL_MATRIX){ 5130 start = A->cmap->rstart; 5131 cmap = a->garray; 5132 nzA = a->A->cmap->n; 5133 nzB = a->B->cmap->n; 5134 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5135 ncols = 0; 5136 for (i=0; i<nzB; i++) { /* row < local row index */ 5137 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5138 else break; 5139 } 5140 imark = i; 5141 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5142 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5143 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5144 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5145 } else { 5146 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5147 isrowb = *rowb; iscolb = *colb; 5148 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5149 bseq[0] = *B_seq; 5150 } 5151 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5152 *B_seq = bseq[0]; 5153 ierr = PetscFree(bseq);CHKERRQ(ierr); 5154 if (!rowb){ 5155 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5156 } else { 5157 *rowb = isrowb; 5158 } 5159 if (!colb){ 5160 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5161 } else { 5162 *colb = iscolb; 5163 } 5164 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5165 PetscFunctionReturn(0); 5166 } 5167 5168 #undef __FUNCT__ 5169 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5170 /* 5171 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5172 of the OFF-DIAGONAL portion of local A 5173 5174 Collective on Mat 5175 5176 Input Parameters: 5177 + A,B - the matrices in mpiaij format 5178 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5179 5180 Output Parameter: 5181 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5182 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5183 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5184 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5185 5186 Level: developer 5187 5188 */ 5189 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5190 { 5191 VecScatter_MPI_General *gen_to,*gen_from; 5192 PetscErrorCode ierr; 5193 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5194 Mat_SeqAIJ *b_oth; 5195 VecScatter ctx=a->Mvctx; 5196 MPI_Comm comm=((PetscObject)ctx)->comm; 5197 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5198 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5199 PetscScalar *rvalues,*svalues; 5200 MatScalar *b_otha,*bufa,*bufA; 5201 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5202 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5203 MPI_Status *sstatus,rstatus; 5204 PetscMPIInt jj; 5205 PetscInt *cols,sbs,rbs; 5206 PetscScalar *vals; 5207 5208 PetscFunctionBegin; 5209 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5210 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5211 } 5212 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5213 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5214 5215 gen_to = (VecScatter_MPI_General*)ctx->todata; 5216 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5217 rvalues = gen_from->values; /* holds the length of receiving row */ 5218 svalues = gen_to->values; /* holds the length of sending row */ 5219 nrecvs = gen_from->n; 5220 nsends = gen_to->n; 5221 5222 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5223 srow = gen_to->indices; /* local row index to be sent */ 5224 sstarts = gen_to->starts; 5225 sprocs = gen_to->procs; 5226 sstatus = gen_to->sstatus; 5227 sbs = gen_to->bs; 5228 rstarts = gen_from->starts; 5229 rprocs = gen_from->procs; 5230 rbs = gen_from->bs; 5231 5232 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5233 if (scall == MAT_INITIAL_MATRIX){ 5234 /* i-array */ 5235 /*---------*/ 5236 /* post receives */ 5237 for (i=0; i<nrecvs; i++){ 5238 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5239 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5240 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5241 } 5242 5243 /* pack the outgoing message */ 5244 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5245 sstartsj[0] = 0; rstartsj[0] = 0; 5246 len = 0; /* total length of j or a array to be sent */ 5247 k = 0; 5248 for (i=0; i<nsends; i++){ 5249 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5250 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5251 for (j=0; j<nrows; j++) { 5252 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5253 for (l=0; l<sbs; l++){ 5254 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5255 rowlen[j*sbs+l] = ncols; 5256 len += ncols; 5257 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5258 } 5259 k++; 5260 } 5261 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5262 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5263 } 5264 /* recvs and sends of i-array are completed */ 5265 i = nrecvs; 5266 while (i--) { 5267 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5268 } 5269 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5270 5271 /* allocate buffers for sending j and a arrays */ 5272 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5273 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5274 5275 /* create i-array of B_oth */ 5276 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5277 b_othi[0] = 0; 5278 len = 0; /* total length of j or a array to be received */ 5279 k = 0; 5280 for (i=0; i<nrecvs; i++){ 5281 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5282 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5283 for (j=0; j<nrows; j++) { 5284 b_othi[k+1] = b_othi[k] + rowlen[j]; 5285 len += rowlen[j]; k++; 5286 } 5287 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5288 } 5289 5290 /* allocate space for j and a arrrays of B_oth */ 5291 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5292 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5293 5294 /* j-array */ 5295 /*---------*/ 5296 /* post receives of j-array */ 5297 for (i=0; i<nrecvs; i++){ 5298 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5299 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5300 } 5301 5302 /* pack the outgoing message j-array */ 5303 k = 0; 5304 for (i=0; i<nsends; i++){ 5305 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5306 bufJ = bufj+sstartsj[i]; 5307 for (j=0; j<nrows; j++) { 5308 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5309 for (ll=0; ll<sbs; ll++){ 5310 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5311 for (l=0; l<ncols; l++){ 5312 *bufJ++ = cols[l]; 5313 } 5314 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5315 } 5316 } 5317 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5318 } 5319 5320 /* recvs and sends of j-array are completed */ 5321 i = nrecvs; 5322 while (i--) { 5323 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5324 } 5325 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5326 } else if (scall == MAT_REUSE_MATRIX){ 5327 sstartsj = *startsj_s; 5328 rstartsj = *startsj_r; 5329 bufa = *bufa_ptr; 5330 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5331 b_otha = b_oth->a; 5332 } else { 5333 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5334 } 5335 5336 /* a-array */ 5337 /*---------*/ 5338 /* post receives of a-array */ 5339 for (i=0; i<nrecvs; i++){ 5340 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5341 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5342 } 5343 5344 /* pack the outgoing message a-array */ 5345 k = 0; 5346 for (i=0; i<nsends; i++){ 5347 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5348 bufA = bufa+sstartsj[i]; 5349 for (j=0; j<nrows; j++) { 5350 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5351 for (ll=0; ll<sbs; ll++){ 5352 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5353 for (l=0; l<ncols; l++){ 5354 *bufA++ = vals[l]; 5355 } 5356 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5357 } 5358 } 5359 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5360 } 5361 /* recvs and sends of a-array are completed */ 5362 i = nrecvs; 5363 while (i--) { 5364 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5365 } 5366 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5367 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5368 5369 if (scall == MAT_INITIAL_MATRIX){ 5370 /* put together the new matrix */ 5371 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5372 5373 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5374 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5375 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5376 b_oth->free_a = PETSC_TRUE; 5377 b_oth->free_ij = PETSC_TRUE; 5378 b_oth->nonew = 0; 5379 5380 ierr = PetscFree(bufj);CHKERRQ(ierr); 5381 if (!startsj_s || !bufa_ptr){ 5382 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5383 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5384 } else { 5385 *startsj_s = sstartsj; 5386 *startsj_r = rstartsj; 5387 *bufa_ptr = bufa; 5388 } 5389 } 5390 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5391 PetscFunctionReturn(0); 5392 } 5393 5394 #undef __FUNCT__ 5395 #define __FUNCT__ "MatGetCommunicationStructs" 5396 /*@C 5397 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5398 5399 Not Collective 5400 5401 Input Parameters: 5402 . A - The matrix in mpiaij format 5403 5404 Output Parameter: 5405 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5406 . colmap - A map from global column index to local index into lvec 5407 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5408 5409 Level: developer 5410 5411 @*/ 5412 #if defined (PETSC_USE_CTABLE) 5413 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5414 #else 5415 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5416 #endif 5417 { 5418 Mat_MPIAIJ *a; 5419 5420 PetscFunctionBegin; 5421 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5422 PetscValidPointer(lvec, 2); 5423 PetscValidPointer(colmap, 3); 5424 PetscValidPointer(multScatter, 4); 5425 a = (Mat_MPIAIJ *) A->data; 5426 if (lvec) *lvec = a->lvec; 5427 if (colmap) *colmap = a->colmap; 5428 if (multScatter) *multScatter = a->Mvctx; 5429 PetscFunctionReturn(0); 5430 } 5431 5432 EXTERN_C_BEGIN 5433 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5434 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5435 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5436 EXTERN_C_END 5437 5438 #undef __FUNCT__ 5439 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5440 /* 5441 Computes (B'*A')' since computing B*A directly is untenable 5442 5443 n p p 5444 ( ) ( ) ( ) 5445 m ( A ) * n ( B ) = m ( C ) 5446 ( ) ( ) ( ) 5447 5448 */ 5449 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5450 { 5451 PetscErrorCode ierr; 5452 Mat At,Bt,Ct; 5453 5454 PetscFunctionBegin; 5455 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5456 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5457 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5458 ierr = MatDestroy(&At);CHKERRQ(ierr); 5459 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5460 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5461 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5462 PetscFunctionReturn(0); 5463 } 5464 5465 #undef __FUNCT__ 5466 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5467 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5468 { 5469 PetscErrorCode ierr; 5470 PetscInt m=A->rmap->n,n=B->cmap->n; 5471 Mat Cmat; 5472 5473 PetscFunctionBegin; 5474 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5475 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5476 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5477 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5478 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5479 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5480 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5481 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5482 *C = Cmat; 5483 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5484 PetscFunctionReturn(0); 5485 } 5486 5487 /* ----------------------------------------------------------------*/ 5488 #undef __FUNCT__ 5489 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5490 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5491 { 5492 PetscErrorCode ierr; 5493 5494 PetscFunctionBegin; 5495 if (scall == MAT_INITIAL_MATRIX){ 5496 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5497 } 5498 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5499 PetscFunctionReturn(0); 5500 } 5501 5502 EXTERN_C_BEGIN 5503 #if defined(PETSC_HAVE_MUMPS) 5504 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5505 #endif 5506 #if defined(PETSC_HAVE_PASTIX) 5507 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5508 #endif 5509 #if defined(PETSC_HAVE_SUPERLU_DIST) 5510 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5511 #endif 5512 #if defined(PETSC_HAVE_SPOOLES) 5513 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5514 #endif 5515 EXTERN_C_END 5516 5517 /*MC 5518 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5519 5520 Options Database Keys: 5521 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5522 5523 Level: beginner 5524 5525 .seealso: MatCreateAIJ() 5526 M*/ 5527 5528 EXTERN_C_BEGIN 5529 #undef __FUNCT__ 5530 #define __FUNCT__ "MatCreate_MPIAIJ" 5531 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5532 { 5533 Mat_MPIAIJ *b; 5534 PetscErrorCode ierr; 5535 PetscMPIInt size; 5536 5537 PetscFunctionBegin; 5538 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5539 5540 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5541 B->data = (void*)b; 5542 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5543 B->assembled = PETSC_FALSE; 5544 5545 B->insertmode = NOT_SET_VALUES; 5546 b->size = size; 5547 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5548 5549 /* build cache for off array entries formed */ 5550 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5551 b->donotstash = PETSC_FALSE; 5552 b->colmap = 0; 5553 b->garray = 0; 5554 b->roworiented = PETSC_TRUE; 5555 5556 /* stuff used for matrix vector multiply */ 5557 b->lvec = PETSC_NULL; 5558 b->Mvctx = PETSC_NULL; 5559 5560 /* stuff for MatGetRow() */ 5561 b->rowindices = 0; 5562 b->rowvalues = 0; 5563 b->getrowactive = PETSC_FALSE; 5564 5565 #if defined(PETSC_HAVE_SPOOLES) 5566 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5567 "MatGetFactor_mpiaij_spooles", 5568 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5569 #endif 5570 #if defined(PETSC_HAVE_MUMPS) 5571 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5572 "MatGetFactor_aij_mumps", 5573 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5574 #endif 5575 #if defined(PETSC_HAVE_PASTIX) 5576 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5577 "MatGetFactor_mpiaij_pastix", 5578 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5579 #endif 5580 #if defined(PETSC_HAVE_SUPERLU_DIST) 5581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5582 "MatGetFactor_mpiaij_superlu_dist", 5583 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5584 #endif 5585 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5586 "MatStoreValues_MPIAIJ", 5587 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5589 "MatRetrieveValues_MPIAIJ", 5590 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5591 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5592 "MatGetDiagonalBlock_MPIAIJ", 5593 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5595 "MatIsTranspose_MPIAIJ", 5596 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5598 "MatMPIAIJSetPreallocation_MPIAIJ", 5599 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5601 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5602 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5604 "MatDiagonalScaleLocal_MPIAIJ", 5605 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5607 "MatConvert_MPIAIJ_MPIAIJPERM", 5608 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5610 "MatConvert_MPIAIJ_MPIAIJCRL", 5611 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5613 "MatConvert_MPIAIJ_MPISBAIJ", 5614 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5616 "MatMatMult_MPIDense_MPIAIJ", 5617 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5619 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5620 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5622 "MatMatMultNumeric_MPIDense_MPIAIJ", 5623 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5624 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5625 PetscFunctionReturn(0); 5626 } 5627 EXTERN_C_END 5628 5629 #undef __FUNCT__ 5630 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5631 /*@ 5632 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5633 and "off-diagonal" part of the matrix in CSR format. 5634 5635 Collective on MPI_Comm 5636 5637 Input Parameters: 5638 + comm - MPI communicator 5639 . m - number of local rows (Cannot be PETSC_DECIDE) 5640 . n - This value should be the same as the local size used in creating the 5641 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5642 calculated if N is given) For square matrices n is almost always m. 5643 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5644 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5645 . i - row indices for "diagonal" portion of matrix 5646 . j - column indices 5647 . a - matrix values 5648 . oi - row indices for "off-diagonal" portion of matrix 5649 . oj - column indices 5650 - oa - matrix values 5651 5652 Output Parameter: 5653 . mat - the matrix 5654 5655 Level: advanced 5656 5657 Notes: 5658 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5659 must free the arrays once the matrix has been destroyed and not before. 5660 5661 The i and j indices are 0 based 5662 5663 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5664 5665 This sets local rows and cannot be used to set off-processor values. 5666 5667 You cannot later use MatSetValues() to change values in this matrix. 5668 5669 .keywords: matrix, aij, compressed row, sparse, parallel 5670 5671 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5672 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5673 @*/ 5674 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5675 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5676 { 5677 PetscErrorCode ierr; 5678 Mat_MPIAIJ *maij; 5679 5680 PetscFunctionBegin; 5681 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5682 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5683 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5684 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5685 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5686 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5687 maij = (Mat_MPIAIJ*) (*mat)->data; 5688 maij->donotstash = PETSC_TRUE; 5689 (*mat)->preallocated = PETSC_TRUE; 5690 5691 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5692 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5693 5694 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5695 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5696 5697 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5698 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5700 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5701 5702 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5703 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5704 PetscFunctionReturn(0); 5705 } 5706 5707 /* 5708 Special version for direct calls from Fortran 5709 */ 5710 #include <petsc-private/fortranimpl.h> 5711 5712 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5713 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5714 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5715 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5716 #endif 5717 5718 /* Change these macros so can be used in void function */ 5719 #undef CHKERRQ 5720 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5721 #undef SETERRQ2 5722 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5723 #undef SETERRQ3 5724 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5725 #undef SETERRQ 5726 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5727 5728 EXTERN_C_BEGIN 5729 #undef __FUNCT__ 5730 #define __FUNCT__ "matsetvaluesmpiaij_" 5731 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5732 { 5733 Mat mat = *mmat; 5734 PetscInt m = *mm, n = *mn; 5735 InsertMode addv = *maddv; 5736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5737 PetscScalar value; 5738 PetscErrorCode ierr; 5739 5740 MatCheckPreallocated(mat,1); 5741 if (mat->insertmode == NOT_SET_VALUES) { 5742 mat->insertmode = addv; 5743 } 5744 #if defined(PETSC_USE_DEBUG) 5745 else if (mat->insertmode != addv) { 5746 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5747 } 5748 #endif 5749 { 5750 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5751 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5752 PetscBool roworiented = aij->roworiented; 5753 5754 /* Some Variables required in the macro */ 5755 Mat A = aij->A; 5756 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5757 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5758 MatScalar *aa = a->a; 5759 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5760 Mat B = aij->B; 5761 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5762 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5763 MatScalar *ba = b->a; 5764 5765 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5766 PetscInt nonew = a->nonew; 5767 MatScalar *ap1,*ap2; 5768 5769 PetscFunctionBegin; 5770 for (i=0; i<m; i++) { 5771 if (im[i] < 0) continue; 5772 #if defined(PETSC_USE_DEBUG) 5773 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5774 #endif 5775 if (im[i] >= rstart && im[i] < rend) { 5776 row = im[i] - rstart; 5777 lastcol1 = -1; 5778 rp1 = aj + ai[row]; 5779 ap1 = aa + ai[row]; 5780 rmax1 = aimax[row]; 5781 nrow1 = ailen[row]; 5782 low1 = 0; 5783 high1 = nrow1; 5784 lastcol2 = -1; 5785 rp2 = bj + bi[row]; 5786 ap2 = ba + bi[row]; 5787 rmax2 = bimax[row]; 5788 nrow2 = bilen[row]; 5789 low2 = 0; 5790 high2 = nrow2; 5791 5792 for (j=0; j<n; j++) { 5793 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5794 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5795 if (in[j] >= cstart && in[j] < cend){ 5796 col = in[j] - cstart; 5797 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5798 } else if (in[j] < 0) continue; 5799 #if defined(PETSC_USE_DEBUG) 5800 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5801 #endif 5802 else { 5803 if (mat->was_assembled) { 5804 if (!aij->colmap) { 5805 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5806 } 5807 #if defined (PETSC_USE_CTABLE) 5808 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5809 col--; 5810 #else 5811 col = aij->colmap[in[j]] - 1; 5812 #endif 5813 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5814 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5815 col = in[j]; 5816 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5817 B = aij->B; 5818 b = (Mat_SeqAIJ*)B->data; 5819 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5820 rp2 = bj + bi[row]; 5821 ap2 = ba + bi[row]; 5822 rmax2 = bimax[row]; 5823 nrow2 = bilen[row]; 5824 low2 = 0; 5825 high2 = nrow2; 5826 bm = aij->B->rmap->n; 5827 ba = b->a; 5828 } 5829 } else col = in[j]; 5830 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5831 } 5832 } 5833 } else { 5834 if (!aij->donotstash) { 5835 if (roworiented) { 5836 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5837 } else { 5838 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5839 } 5840 } 5841 } 5842 }} 5843 PetscFunctionReturnVoid(); 5844 } 5845 EXTERN_C_END 5846 5847