1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++){ 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 416 lastcol2 = col;\ 417 while (high2-low2 > 5) { \ 418 t = (low2+high2)/2; \ 419 if (rp2[t] > col) high2 = t; \ 420 else low2 = t; \ 421 } \ 422 for (_i=low2; _i<high2; _i++) { \ 423 if (rp2[_i] > col) break; \ 424 if (rp2[_i] == col) { \ 425 if (addv == ADD_VALUES) ap2[_i] += value; \ 426 else ap2[_i] = value; \ 427 goto b_noinsert; \ 428 } \ 429 } \ 430 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 433 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 434 N = nrow2++ - 1; b->nz++; high2++; \ 435 /* shift up all the later entries in this row */ \ 436 for (ii=N; ii>=_i; ii--) { \ 437 rp2[ii+1] = rp2[ii]; \ 438 ap2[ii+1] = ap2[ii]; \ 439 } \ 440 rp2[_i] = col; \ 441 ap2[_i] = value; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend){ 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined (PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend){ 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined (PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col --; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else { 620 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscErrorCode ierr; 634 PetscInt nstash,reallocs; 635 InsertMode addv; 636 637 PetscFunctionBegin; 638 if (aij->donotstash || mat->nooffprocentries) { 639 PetscFunctionReturn(0); 640 } 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n;) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 677 if (j < n) ncols = j-i; 678 else ncols = n-i; 679 /* Now assemble all these values with a single function call */ 680 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 706 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 708 709 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 724 { 725 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 726 PetscErrorCode ierr; 727 728 PetscFunctionBegin; 729 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 730 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatZeroRows_MPIAIJ" 736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 737 { 738 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 739 PetscErrorCode ierr; 740 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 741 PetscInt i,*owners = A->rmap->range; 742 PetscInt *nprocs,j,idx,nsends,row; 743 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 744 PetscInt *rvalues,count,base,slen,*source; 745 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 746 MPI_Comm comm = ((PetscObject)A)->comm; 747 MPI_Request *send_waits,*recv_waits; 748 MPI_Status recv_status,*send_status; 749 const PetscScalar *xx; 750 PetscScalar *bb; 751 #if defined(PETSC_DEBUG) 752 PetscBool found = PETSC_FALSE; 753 #endif 754 755 PetscFunctionBegin; 756 /* first count number of contributors to each processor */ 757 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 758 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 759 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 760 j = 0; 761 for (i=0; i<N; i++) { 762 if (lastidx > (idx = rows[i])) j = 0; 763 lastidx = idx; 764 for (; j<size; j++) { 765 if (idx >= owners[j] && idx < owners[j+1]) { 766 nprocs[2*j]++; 767 nprocs[2*j+1] = 1; 768 owner[i] = j; 769 #if defined(PETSC_DEBUG) 770 found = PETSC_TRUE; 771 #endif 772 break; 773 } 774 } 775 #if defined(PETSC_DEBUG) 776 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 777 found = PETSC_FALSE; 778 #endif 779 } 780 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 781 782 if (A->nooffproczerorows) { 783 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 784 nrecvs = nsends; 785 nmax = N; 786 } else { 787 /* inform other processors of number of messages and max length*/ 788 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 789 } 790 791 /* post receives: */ 792 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 793 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 794 for (i=0; i<nrecvs; i++) { 795 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 796 } 797 798 /* do sends: 799 1) starts[i] gives the starting index in svalues for stuff going to 800 the ith processor 801 */ 802 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 804 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 805 starts[0] = 0; 806 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 807 for (i=0; i<N; i++) { 808 svalues[starts[owner[i]]++] = rows[i]; 809 } 810 811 starts[0] = 0; 812 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 813 count = 0; 814 for (i=0; i<size; i++) { 815 if (nprocs[2*i+1]) { 816 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 817 } 818 } 819 ierr = PetscFree(starts);CHKERRQ(ierr); 820 821 base = owners[rank]; 822 823 /* wait on receives */ 824 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 825 count = nrecvs; slen = 0; 826 while (count) { 827 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 828 /* unpack receives into our local space */ 829 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 830 source[imdex] = recv_status.MPI_SOURCE; 831 lens[imdex] = n; 832 slen += n; 833 count--; 834 } 835 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 836 837 /* move the data into the send scatter */ 838 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 839 count = 0; 840 for (i=0; i<nrecvs; i++) { 841 values = rvalues + i*nmax; 842 for (j=0; j<lens[i]; j++) { 843 lrows[count++] = values[j] - base; 844 } 845 } 846 ierr = PetscFree(rvalues);CHKERRQ(ierr); 847 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 848 ierr = PetscFree(owner);CHKERRQ(ierr); 849 ierr = PetscFree(nprocs);CHKERRQ(ierr); 850 851 /* fix right hand side if needed */ 852 if (x && b) { 853 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 855 for (i=0; i<slen; i++) { 856 bb[lrows[i]] = diag*xx[lrows[i]]; 857 } 858 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 859 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 860 } 861 /* 862 Zero the required rows. If the "diagonal block" of the matrix 863 is square and the user wishes to set the diagonal we use separate 864 code so that MatSetValues() is not called for each diagonal allocating 865 new memory, thus calling lots of mallocs and slowing things down. 866 867 */ 868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 869 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 871 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 872 } else if (diag != 0.0) { 873 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 875 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 877 } 878 for (i = 0; i < slen; i++) { 879 row = lrows[i] + rstart; 880 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 881 } 882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 } else { 885 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 886 } 887 ierr = PetscFree(lrows);CHKERRQ(ierr); 888 889 /* wait on sends */ 890 if (nsends) { 891 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 892 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 893 ierr = PetscFree(send_status);CHKERRQ(ierr); 894 } 895 ierr = PetscFree(send_waits);CHKERRQ(ierr); 896 ierr = PetscFree(svalues);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 903 { 904 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 907 PetscInt i,*owners = A->rmap->range; 908 PetscInt *nprocs,j,idx,nsends; 909 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 910 PetscInt *rvalues,count,base,slen,*source; 911 PetscInt *lens,*lrows,*values,m; 912 MPI_Comm comm = ((PetscObject)A)->comm; 913 MPI_Request *send_waits,*recv_waits; 914 MPI_Status recv_status,*send_status; 915 const PetscScalar *xx; 916 PetscScalar *bb,*mask; 917 Vec xmask,lmask; 918 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 919 const PetscInt *aj, *ii,*ridx; 920 PetscScalar *aa; 921 #if defined(PETSC_DEBUG) 922 PetscBool found = PETSC_FALSE; 923 #endif 924 925 PetscFunctionBegin; 926 /* first count number of contributors to each processor */ 927 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 928 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 929 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 930 j = 0; 931 for (i=0; i<N; i++) { 932 if (lastidx > (idx = rows[i])) j = 0; 933 lastidx = idx; 934 for (; j<size; j++) { 935 if (idx >= owners[j] && idx < owners[j+1]) { 936 nprocs[2*j]++; 937 nprocs[2*j+1] = 1; 938 owner[i] = j; 939 #if defined(PETSC_DEBUG) 940 found = PETSC_TRUE; 941 #endif 942 break; 943 } 944 } 945 #if defined(PETSC_DEBUG) 946 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 947 found = PETSC_FALSE; 948 #endif 949 } 950 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 951 952 /* inform other processors of number of messages and max length*/ 953 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 954 955 /* post receives: */ 956 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 957 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 958 for (i=0; i<nrecvs; i++) { 959 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 960 } 961 962 /* do sends: 963 1) starts[i] gives the starting index in svalues for stuff going to 964 the ith processor 965 */ 966 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 967 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 968 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 969 starts[0] = 0; 970 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 971 for (i=0; i<N; i++) { 972 svalues[starts[owner[i]]++] = rows[i]; 973 } 974 975 starts[0] = 0; 976 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 977 count = 0; 978 for (i=0; i<size; i++) { 979 if (nprocs[2*i+1]) { 980 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 981 } 982 } 983 ierr = PetscFree(starts);CHKERRQ(ierr); 984 985 base = owners[rank]; 986 987 /* wait on receives */ 988 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 989 count = nrecvs; slen = 0; 990 while (count) { 991 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 992 /* unpack receives into our local space */ 993 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 994 source[imdex] = recv_status.MPI_SOURCE; 995 lens[imdex] = n; 996 slen += n; 997 count--; 998 } 999 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1000 1001 /* move the data into the send scatter */ 1002 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1003 count = 0; 1004 for (i=0; i<nrecvs; i++) { 1005 values = rvalues + i*nmax; 1006 for (j=0; j<lens[i]; j++) { 1007 lrows[count++] = values[j] - base; 1008 } 1009 } 1010 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1011 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1012 ierr = PetscFree(owner);CHKERRQ(ierr); 1013 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1014 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1015 1016 /* zero diagonal part of matrix */ 1017 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1018 1019 /* handle off diagonal part of matrix */ 1020 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1021 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1022 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1023 for (i=0; i<slen; i++) { 1024 bb[lrows[i]] = 1; 1025 } 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use){ 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++){ 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 EXTERN_C_BEGIN 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 1191 /* Easy test: symmetric diagonal block */ 1192 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1193 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1194 if (!*f) PetscFunctionReturn(0); 1195 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1197 if (size == 1) PetscFunctionReturn(0); 1198 1199 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1200 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1201 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1202 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1203 for (i=0; i<first; i++) notme[i] = i; 1204 for (i=last; i<M; i++) notme[i-last+first] = i; 1205 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1206 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1207 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1208 Aoff = Aoffs[0]; 1209 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1210 Boff = Boffs[0]; 1211 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1213 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1215 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1216 ierr = PetscFree(notme);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 EXTERN_C_END 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1224 { 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* do nondiagonal part */ 1230 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1231 /* send it on its way */ 1232 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 /* do local part */ 1234 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1235 /* receive remote parts */ 1236 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 /* 1241 This only works correctly for square matrices where the subblock A->A is the 1242 diagonal block 1243 */ 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1247 { 1248 PetscErrorCode ierr; 1249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1250 1251 PetscFunctionBegin; 1252 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1253 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1254 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatScale_MPIAIJ" 1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1261 { 1262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1267 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDestroy_MPIAIJ" 1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 #if defined(PETSC_USE_LOG) 1280 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1281 #endif 1282 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1283 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1284 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1285 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1286 #if defined (PETSC_USE_CTABLE) 1287 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1288 #else 1289 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1290 #endif 1291 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1292 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1293 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1294 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1295 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1316 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1319 int fd; 1320 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1321 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1322 PetscScalar *column_values; 1323 PetscInt message_count,flowcontrolcount; 1324 FILE *file; 1325 1326 PetscFunctionBegin; 1327 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1328 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1329 nz = A->nz + B->nz; 1330 if (!rank) { 1331 header[0] = MAT_FILE_CLASSID; 1332 header[1] = mat->rmap->N; 1333 header[2] = mat->cmap->N; 1334 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1335 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1336 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1337 /* get largest number of rows any processor has */ 1338 rlen = mat->rmap->n; 1339 range = mat->rmap->range; 1340 for (i=1; i<size; i++) { 1341 rlen = PetscMax(rlen,range[i+1] - range[i]); 1342 } 1343 } else { 1344 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 rlen = mat->rmap->n; 1346 } 1347 1348 /* load up the local row counts */ 1349 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1350 for (i=0; i<mat->rmap->n; i++) { 1351 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1352 } 1353 1354 /* store the row lengths to the file */ 1355 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 for (i=1; i<size; i++) { 1359 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1360 rlen = range[i+1] - range[i]; 1361 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1362 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1363 } 1364 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1367 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1368 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1369 } 1370 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1371 1372 /* load up the local column indices */ 1373 nzmax = nz; /* )th processor needs space a largest processor needs */ 1374 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1375 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1376 cnt = 0; 1377 for (i=0; i<mat->rmap->n; i++) { 1378 for (j=B->i[i]; j<B->i[i+1]; j++) { 1379 if ( (col = garray[B->j[j]]) > cstart) break; 1380 column_indices[cnt++] = col; 1381 } 1382 for (k=A->i[i]; k<A->i[i+1]; k++) { 1383 column_indices[cnt++] = A->j[k] + cstart; 1384 } 1385 for (; j<B->i[i+1]; j++) { 1386 column_indices[cnt++] = garray[B->j[j]]; 1387 } 1388 } 1389 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1390 1391 /* store the column indices to the file */ 1392 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1393 if (!rank) { 1394 MPI_Status status; 1395 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1396 for (i=1; i<size; i++) { 1397 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1398 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1399 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1400 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1401 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1402 } 1403 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1406 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1408 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1409 } 1410 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1411 1412 /* load up the local column values */ 1413 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1414 cnt = 0; 1415 for (i=0; i<mat->rmap->n; i++) { 1416 for (j=B->i[i]; j<B->i[i+1]; j++) { 1417 if ( garray[B->j[j]] > cstart) break; 1418 column_values[cnt++] = B->a[j]; 1419 } 1420 for (k=A->i[i]; k<A->i[i+1]; k++) { 1421 column_values[cnt++] = A->a[k]; 1422 } 1423 for (; j<B->i[i+1]; j++) { 1424 column_values[cnt++] = B->a[j]; 1425 } 1426 } 1427 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1428 1429 /* store the column values to the file */ 1430 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1431 if (!rank) { 1432 MPI_Status status; 1433 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1434 for (i=1; i<size; i++) { 1435 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1436 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1437 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1438 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1439 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1440 } 1441 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1442 } else { 1443 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1444 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1445 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1446 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1447 } 1448 ierr = PetscFree(column_values);CHKERRQ(ierr); 1449 1450 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1451 if (file) { 1452 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1453 } 1454 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1461 { 1462 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1463 PetscErrorCode ierr; 1464 PetscMPIInt rank = aij->rank,size = aij->size; 1465 PetscBool isdraw,iascii,isbinary; 1466 PetscViewer sviewer; 1467 PetscViewerFormat format; 1468 1469 PetscFunctionBegin; 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1472 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1473 if (iascii) { 1474 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1475 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1476 MatInfo info; 1477 PetscBool inodes; 1478 1479 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1480 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1481 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1482 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1483 if (!inodes) { 1484 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1485 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1486 } else { 1487 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1488 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1489 } 1490 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1491 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1492 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1493 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1494 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1495 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1497 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1500 PetscInt inodecount,inodelimit,*inodes; 1501 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1502 if (inodes) { 1503 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1504 } else { 1505 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1509 PetscFunctionReturn(0); 1510 } 1511 } else if (isbinary) { 1512 if (size == 1) { 1513 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1514 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1515 } else { 1516 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1517 } 1518 PetscFunctionReturn(0); 1519 } else if (isdraw) { 1520 PetscDraw draw; 1521 PetscBool isnull; 1522 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1523 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1524 } 1525 1526 if (size == 1) { 1527 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1528 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1529 } else { 1530 /* assemble the entire matrix onto first processor. */ 1531 Mat A; 1532 Mat_SeqAIJ *Aloc; 1533 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1534 MatScalar *a; 1535 1536 if (mat->rmap->N > 1024) { 1537 PetscBool flg = PETSC_FALSE; 1538 1539 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1540 if (!flg) { 1541 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1542 } 1543 } 1544 1545 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1546 if (!rank) { 1547 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1550 } 1551 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1552 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1553 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1554 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1555 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1556 1557 /* copy over the A part */ 1558 Aloc = (Mat_SeqAIJ*)aij->A->data; 1559 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1560 row = mat->rmap->rstart; 1561 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1562 for (i=0; i<m; i++) { 1563 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1564 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1565 } 1566 aj = Aloc->j; 1567 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1568 1569 /* copy over the B part */ 1570 Aloc = (Mat_SeqAIJ*)aij->B->data; 1571 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1572 row = mat->rmap->rstart; 1573 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1574 ct = cols; 1575 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1576 for (i=0; i<m; i++) { 1577 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1578 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1579 } 1580 ierr = PetscFree(ct);CHKERRQ(ierr); 1581 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 /* 1584 Everyone has to call to draw the matrix since the graphics waits are 1585 synchronized across all processors that share the PetscDraw object 1586 */ 1587 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1588 if (!rank) { 1589 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1590 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1591 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1592 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1593 } 1594 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1595 ierr = MatDestroy(&A);CHKERRQ(ierr); 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatView_MPIAIJ" 1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1603 { 1604 PetscErrorCode ierr; 1605 PetscBool iascii,isdraw,issocket,isbinary; 1606 1607 PetscFunctionBegin; 1608 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1610 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1611 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1612 if (iascii || isdraw || isbinary || issocket) { 1613 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1614 } else { 1615 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatSOR_MPIAIJ" 1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1623 { 1624 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1625 PetscErrorCode ierr; 1626 Vec bb1 = 0; 1627 PetscBool hasop; 1628 1629 PetscFunctionBegin; 1630 if (flag == SOR_APPLY_UPPER) { 1631 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1636 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1637 } 1638 1639 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 1645 while (its--) { 1646 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 1649 /* update rhs: bb1 = bb - B*x */ 1650 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1651 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1652 1653 /* local sweep */ 1654 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1655 } 1656 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1657 if (flag & SOR_ZERO_INITIAL_GUESS) { 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1659 its--; 1660 } 1661 while (its--) { 1662 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 1665 /* update rhs: bb1 = bb - B*x */ 1666 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1667 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1668 1669 /* local sweep */ 1670 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1671 } 1672 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1673 if (flag & SOR_ZERO_INITIAL_GUESS) { 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1675 its--; 1676 } 1677 while (its--) { 1678 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1680 1681 /* update rhs: bb1 = bb - B*x */ 1682 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1683 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1684 1685 /* local sweep */ 1686 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1687 } 1688 } else if (flag & SOR_EISENSTAT) { 1689 Vec xx1; 1690 1691 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1693 1694 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1695 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 if (!mat->diag) { 1697 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1698 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1699 } 1700 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1701 if (hasop) { 1702 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1703 } else { 1704 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1705 } 1706 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1707 1708 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1709 1710 /* local sweep */ 1711 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1712 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1713 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1714 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1715 1716 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatPermute_MPIAIJ" 1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1723 { 1724 Mat aA,aB,Aperm; 1725 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1726 PetscScalar *aa,*ba; 1727 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1728 PetscSF rowsf,sf; 1729 IS parcolp = PETSC_NULL; 1730 PetscBool done; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1735 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1736 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1737 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1738 1739 /* Invert row permutation to find out where my rows should go */ 1740 ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr); 1741 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1742 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1743 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1744 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1745 1746 /* Invert column permutation to find out where my columns should go */ 1747 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1748 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1749 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1750 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1751 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1752 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1753 1754 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1755 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1756 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1757 1758 /* Find out where my gcols should go */ 1759 ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr); 1760 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1761 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1762 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1763 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1764 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1765 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1766 1767 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1768 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1769 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1770 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1771 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1772 for (i=0; i<m; i++) { 1773 PetscInt row = rdest[i],rowner; 1774 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1775 for (j=ai[i]; j<ai[i+1]; j++) { 1776 PetscInt cowner,col = cdest[aj[j]]; 1777 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1778 if (rowner == cowner) dnnz[i]++; 1779 else onnz[i]++; 1780 } 1781 for (j=bi[i]; j<bi[i+1]; j++) { 1782 PetscInt cowner,col = gcdest[bj[j]]; 1783 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1784 if (rowner == cowner) dnnz[i]++; 1785 else onnz[i]++; 1786 } 1787 } 1788 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1789 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1790 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1791 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1792 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1793 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1794 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1795 1796 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->cmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1797 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1798 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1799 for (i=0; i<m; i++) { 1800 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1801 PetscInt rowlen; 1802 rowlen = ai[i+1] - ai[i]; 1803 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1804 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1805 rowlen = bi[i+1] - bi[i]; 1806 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1807 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1808 } 1809 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1810 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1811 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1812 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1813 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1814 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1815 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1816 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1817 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1818 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1819 *B = Aperm; 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1825 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1826 { 1827 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1828 Mat A = mat->A,B = mat->B; 1829 PetscErrorCode ierr; 1830 PetscReal isend[5],irecv[5]; 1831 1832 PetscFunctionBegin; 1833 info->block_size = 1.0; 1834 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1835 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1836 isend[3] = info->memory; isend[4] = info->mallocs; 1837 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1838 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1839 isend[3] += info->memory; isend[4] += info->mallocs; 1840 if (flag == MAT_LOCAL) { 1841 info->nz_used = isend[0]; 1842 info->nz_allocated = isend[1]; 1843 info->nz_unneeded = isend[2]; 1844 info->memory = isend[3]; 1845 info->mallocs = isend[4]; 1846 } else if (flag == MAT_GLOBAL_MAX) { 1847 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1848 info->nz_used = irecv[0]; 1849 info->nz_allocated = irecv[1]; 1850 info->nz_unneeded = irecv[2]; 1851 info->memory = irecv[3]; 1852 info->mallocs = irecv[4]; 1853 } else if (flag == MAT_GLOBAL_SUM) { 1854 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1855 info->nz_used = irecv[0]; 1856 info->nz_allocated = irecv[1]; 1857 info->nz_unneeded = irecv[2]; 1858 info->memory = irecv[3]; 1859 info->mallocs = irecv[4]; 1860 } 1861 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1862 info->fill_ratio_needed = 0; 1863 info->factor_mallocs = 0; 1864 PetscFunctionReturn(0); 1865 } 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatSetOption_MPIAIJ" 1869 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1870 { 1871 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1872 PetscErrorCode ierr; 1873 1874 PetscFunctionBegin; 1875 switch (op) { 1876 case MAT_NEW_NONZERO_LOCATIONS: 1877 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1878 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1879 case MAT_KEEP_NONZERO_PATTERN: 1880 case MAT_NEW_NONZERO_LOCATION_ERR: 1881 case MAT_USE_INODES: 1882 case MAT_IGNORE_ZERO_ENTRIES: 1883 MatCheckPreallocated(A,1); 1884 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1885 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1886 break; 1887 case MAT_ROW_ORIENTED: 1888 a->roworiented = flg; 1889 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1890 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1891 break; 1892 case MAT_NEW_DIAGONALS: 1893 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1894 break; 1895 case MAT_IGNORE_OFF_PROC_ENTRIES: 1896 a->donotstash = flg; 1897 break; 1898 case MAT_SPD: 1899 A->spd_set = PETSC_TRUE; 1900 A->spd = flg; 1901 if (flg) { 1902 A->symmetric = PETSC_TRUE; 1903 A->structurally_symmetric = PETSC_TRUE; 1904 A->symmetric_set = PETSC_TRUE; 1905 A->structurally_symmetric_set = PETSC_TRUE; 1906 } 1907 break; 1908 case MAT_SYMMETRIC: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 case MAT_STRUCTURALLY_SYMMETRIC: 1912 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1913 break; 1914 case MAT_HERMITIAN: 1915 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1916 break; 1917 case MAT_SYMMETRY_ETERNAL: 1918 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1919 break; 1920 default: 1921 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1922 } 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatGetRow_MPIAIJ" 1928 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1929 { 1930 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1931 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1932 PetscErrorCode ierr; 1933 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1934 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1935 PetscInt *cmap,*idx_p; 1936 1937 PetscFunctionBegin; 1938 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1939 mat->getrowactive = PETSC_TRUE; 1940 1941 if (!mat->rowvalues && (idx || v)) { 1942 /* 1943 allocate enough space to hold information from the longest row. 1944 */ 1945 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1946 PetscInt max = 1,tmp; 1947 for (i=0; i<matin->rmap->n; i++) { 1948 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1949 if (max < tmp) { max = tmp; } 1950 } 1951 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1952 } 1953 1954 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1955 lrow = row - rstart; 1956 1957 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1958 if (!v) {pvA = 0; pvB = 0;} 1959 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1960 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1961 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1962 nztot = nzA + nzB; 1963 1964 cmap = mat->garray; 1965 if (v || idx) { 1966 if (nztot) { 1967 /* Sort by increasing column numbers, assuming A and B already sorted */ 1968 PetscInt imark = -1; 1969 if (v) { 1970 *v = v_p = mat->rowvalues; 1971 for (i=0; i<nzB; i++) { 1972 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1973 else break; 1974 } 1975 imark = i; 1976 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1977 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1978 } 1979 if (idx) { 1980 *idx = idx_p = mat->rowindices; 1981 if (imark > -1) { 1982 for (i=0; i<imark; i++) { 1983 idx_p[i] = cmap[cworkB[i]]; 1984 } 1985 } else { 1986 for (i=0; i<nzB; i++) { 1987 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1988 else break; 1989 } 1990 imark = i; 1991 } 1992 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1993 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1994 } 1995 } else { 1996 if (idx) *idx = 0; 1997 if (v) *v = 0; 1998 } 1999 } 2000 *nz = nztot; 2001 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 2002 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2008 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2009 { 2010 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2011 2012 PetscFunctionBegin; 2013 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2014 aij->getrowactive = PETSC_FALSE; 2015 PetscFunctionReturn(0); 2016 } 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "MatNorm_MPIAIJ" 2020 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2021 { 2022 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2023 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2024 PetscErrorCode ierr; 2025 PetscInt i,j,cstart = mat->cmap->rstart; 2026 PetscReal sum = 0.0; 2027 MatScalar *v; 2028 2029 PetscFunctionBegin; 2030 if (aij->size == 1) { 2031 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2032 } else { 2033 if (type == NORM_FROBENIUS) { 2034 v = amat->a; 2035 for (i=0; i<amat->nz; i++) { 2036 #if defined(PETSC_USE_COMPLEX) 2037 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2038 #else 2039 sum += (*v)*(*v); v++; 2040 #endif 2041 } 2042 v = bmat->a; 2043 for (i=0; i<bmat->nz; i++) { 2044 #if defined(PETSC_USE_COMPLEX) 2045 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2046 #else 2047 sum += (*v)*(*v); v++; 2048 #endif 2049 } 2050 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2051 *norm = PetscSqrtReal(*norm); 2052 } else if (type == NORM_1) { /* max column norm */ 2053 PetscReal *tmp,*tmp2; 2054 PetscInt *jj,*garray = aij->garray; 2055 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2056 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2057 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2058 *norm = 0.0; 2059 v = amat->a; jj = amat->j; 2060 for (j=0; j<amat->nz; j++) { 2061 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 2062 } 2063 v = bmat->a; jj = bmat->j; 2064 for (j=0; j<bmat->nz; j++) { 2065 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2066 } 2067 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2068 for (j=0; j<mat->cmap->N; j++) { 2069 if (tmp2[j] > *norm) *norm = tmp2[j]; 2070 } 2071 ierr = PetscFree(tmp);CHKERRQ(ierr); 2072 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2073 } else if (type == NORM_INFINITY) { /* max row norm */ 2074 PetscReal ntemp = 0.0; 2075 for (j=0; j<aij->A->rmap->n; j++) { 2076 v = amat->a + amat->i[j]; 2077 sum = 0.0; 2078 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2079 sum += PetscAbsScalar(*v); v++; 2080 } 2081 v = bmat->a + bmat->i[j]; 2082 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2083 sum += PetscAbsScalar(*v); v++; 2084 } 2085 if (sum > ntemp) ntemp = sum; 2086 } 2087 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2088 } else { 2089 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2090 } 2091 } 2092 PetscFunctionReturn(0); 2093 } 2094 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "MatTranspose_MPIAIJ" 2097 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2098 { 2099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2100 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2101 PetscErrorCode ierr; 2102 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2103 PetscInt cstart=A->cmap->rstart,ncol; 2104 Mat B; 2105 MatScalar *array; 2106 2107 PetscFunctionBegin; 2108 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2109 2110 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2111 ai = Aloc->i; aj = Aloc->j; 2112 bi = Bloc->i; bj = Bloc->j; 2113 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2114 PetscInt *d_nnz,*g_nnz,*o_nnz; 2115 PetscSFNode *oloc; 2116 PETSC_UNUSED PetscSF sf; 2117 2118 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2119 /* compute d_nnz for preallocation */ 2120 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2121 for (i=0; i<ai[ma]; i++) { 2122 d_nnz[aj[i]] ++; 2123 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2124 } 2125 #if defined(OMPI_MAJOR_VERSION) /* Open MPI Bug. https://bitbucket.org/petsc/petsc-dev/issue/9/implement-petscsf-without-one-sided */ 2126 ierr = PetscMemcpy(o_nnz,d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2127 #else 2128 /* compute local off-diagonal contributions */ 2129 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2130 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2131 /* map those to global */ 2132 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2133 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2134 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2135 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2136 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2137 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2138 #endif 2139 2140 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2141 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2142 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2143 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2144 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2145 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2146 #if defined(OMPI_MAJOR_VERSION) 2147 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2148 #endif 2149 } else { 2150 B = *matout; 2151 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2152 for (i=0; i<ai[ma]; i++){ 2153 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2154 } 2155 } 2156 2157 /* copy over the A part */ 2158 array = Aloc->a; 2159 row = A->rmap->rstart; 2160 for (i=0; i<ma; i++) { 2161 ncol = ai[i+1]-ai[i]; 2162 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2163 row++; array += ncol; aj += ncol; 2164 } 2165 aj = Aloc->j; 2166 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2167 2168 /* copy over the B part */ 2169 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2170 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2171 array = Bloc->a; 2172 row = A->rmap->rstart; 2173 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2174 cols_tmp = cols; 2175 for (i=0; i<mb; i++) { 2176 ncol = bi[i+1]-bi[i]; 2177 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2178 row++; array += ncol; cols_tmp += ncol; 2179 } 2180 ierr = PetscFree(cols);CHKERRQ(ierr); 2181 2182 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2183 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2184 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2185 *matout = B; 2186 } else { 2187 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2188 } 2189 PetscFunctionReturn(0); 2190 } 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2194 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2195 { 2196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2197 Mat a = aij->A,b = aij->B; 2198 PetscErrorCode ierr; 2199 PetscInt s1,s2,s3; 2200 2201 PetscFunctionBegin; 2202 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2203 if (rr) { 2204 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2205 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2206 /* Overlap communication with computation. */ 2207 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2208 } 2209 if (ll) { 2210 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2211 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2212 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2213 } 2214 /* scale the diagonal block */ 2215 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2216 2217 if (rr) { 2218 /* Do a scatter end and then right scale the off-diagonal block */ 2219 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2220 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2221 } 2222 2223 PetscFunctionReturn(0); 2224 } 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2228 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2229 { 2230 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2231 PetscErrorCode ierr; 2232 2233 PetscFunctionBegin; 2234 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2235 PetscFunctionReturn(0); 2236 } 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatEqual_MPIAIJ" 2240 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2241 { 2242 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2243 Mat a,b,c,d; 2244 PetscBool flg; 2245 PetscErrorCode ierr; 2246 2247 PetscFunctionBegin; 2248 a = matA->A; b = matA->B; 2249 c = matB->A; d = matB->B; 2250 2251 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2252 if (flg) { 2253 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2254 } 2255 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatCopy_MPIAIJ" 2261 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2262 { 2263 PetscErrorCode ierr; 2264 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2265 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2266 2267 PetscFunctionBegin; 2268 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2269 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2270 /* because of the column compression in the off-processor part of the matrix a->B, 2271 the number of columns in a->B and b->B may be different, hence we cannot call 2272 the MatCopy() directly on the two parts. If need be, we can provide a more 2273 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2274 then copying the submatrices */ 2275 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2276 } else { 2277 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2278 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2279 } 2280 PetscFunctionReturn(0); 2281 } 2282 2283 #undef __FUNCT__ 2284 #define __FUNCT__ "MatSetUp_MPIAIJ" 2285 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2286 { 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2296 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2297 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2298 { 2299 PetscInt i,m=Y->rmap->N; 2300 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2301 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2302 const PetscInt *xi = x->i,*yi = y->i; 2303 2304 PetscFunctionBegin; 2305 /* Set the number of nonzeros in the new matrix */ 2306 for (i=0; i<m; i++) { 2307 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2308 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2309 nnz[i] = 0; 2310 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2311 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2312 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2313 nnz[i]++; 2314 } 2315 for (; k<nzy; k++) nnz[i]++; 2316 } 2317 PetscFunctionReturn(0); 2318 } 2319 2320 #undef __FUNCT__ 2321 #define __FUNCT__ "MatAXPY_MPIAIJ" 2322 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2323 { 2324 PetscErrorCode ierr; 2325 PetscInt i; 2326 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2327 PetscBLASInt bnz,one=1; 2328 Mat_SeqAIJ *x,*y; 2329 2330 PetscFunctionBegin; 2331 if (str == SAME_NONZERO_PATTERN) { 2332 PetscScalar alpha = a; 2333 x = (Mat_SeqAIJ *)xx->A->data; 2334 y = (Mat_SeqAIJ *)yy->A->data; 2335 bnz = PetscBLASIntCast(x->nz); 2336 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2337 x = (Mat_SeqAIJ *)xx->B->data; 2338 y = (Mat_SeqAIJ *)yy->B->data; 2339 bnz = PetscBLASIntCast(x->nz); 2340 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2341 } else if (str == SUBSET_NONZERO_PATTERN) { 2342 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2343 2344 x = (Mat_SeqAIJ *)xx->B->data; 2345 y = (Mat_SeqAIJ *)yy->B->data; 2346 if (y->xtoy && y->XtoY != xx->B) { 2347 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2348 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2349 } 2350 if (!y->xtoy) { /* get xtoy */ 2351 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2352 y->XtoY = xx->B; 2353 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2354 } 2355 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2356 } else { 2357 Mat B; 2358 PetscInt *nnz_d,*nnz_o; 2359 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2360 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2361 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2362 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2363 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2364 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2365 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2366 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2367 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2368 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2369 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2370 ierr = MatHeaderReplace(Y,B); 2371 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2372 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2373 } 2374 PetscFunctionReturn(0); 2375 } 2376 2377 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatConjugate_MPIAIJ" 2381 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2382 { 2383 #if defined(PETSC_USE_COMPLEX) 2384 PetscErrorCode ierr; 2385 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2386 2387 PetscFunctionBegin; 2388 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2389 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2390 #else 2391 PetscFunctionBegin; 2392 #endif 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatRealPart_MPIAIJ" 2398 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2399 { 2400 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2405 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2411 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2412 { 2413 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2414 PetscErrorCode ierr; 2415 2416 PetscFunctionBegin; 2417 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2418 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2419 PetscFunctionReturn(0); 2420 } 2421 2422 #ifdef PETSC_HAVE_PBGL 2423 2424 #include <boost/parallel/mpi/bsp_process_group.hpp> 2425 #include <boost/graph/distributed/ilu_default_graph.hpp> 2426 #include <boost/graph/distributed/ilu_0_block.hpp> 2427 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2428 #include <boost/graph/distributed/petsc/interface.hpp> 2429 #include <boost/multi_array.hpp> 2430 #include <boost/parallel/distributed_property_map->hpp> 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2434 /* 2435 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2436 */ 2437 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2438 { 2439 namespace petsc = boost::distributed::petsc; 2440 2441 namespace graph_dist = boost::graph::distributed; 2442 using boost::graph::distributed::ilu_default::process_group_type; 2443 using boost::graph::ilu_permuted; 2444 2445 PetscBool row_identity, col_identity; 2446 PetscContainer c; 2447 PetscInt m, n, M, N; 2448 PetscErrorCode ierr; 2449 2450 PetscFunctionBegin; 2451 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2452 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2453 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2454 if (!row_identity || !col_identity) { 2455 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2456 } 2457 2458 process_group_type pg; 2459 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2460 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2461 lgraph_type& level_graph = *lgraph_p; 2462 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2463 2464 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2465 ilu_permuted(level_graph); 2466 2467 /* put together the new matrix */ 2468 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2469 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2470 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2471 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2472 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2473 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2474 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2475 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2476 2477 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2478 ierr = PetscContainerSetPointer(c, lgraph_p); 2479 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2480 ierr = PetscContainerDestroy(&c); 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2486 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2487 { 2488 PetscFunctionBegin; 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatSolve_MPIAIJ" 2494 /* 2495 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2496 */ 2497 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2498 { 2499 namespace graph_dist = boost::graph::distributed; 2500 2501 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2502 lgraph_type* lgraph_p; 2503 PetscContainer c; 2504 PetscErrorCode ierr; 2505 2506 PetscFunctionBegin; 2507 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2508 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2509 ierr = VecCopy(b, x);CHKERRQ(ierr); 2510 2511 PetscScalar* array_x; 2512 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2513 PetscInt sx; 2514 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2515 2516 PetscScalar* array_b; 2517 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2518 PetscInt sb; 2519 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2520 2521 lgraph_type& level_graph = *lgraph_p; 2522 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2523 2524 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2525 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2526 ref_x(array_x, boost::extents[num_vertices(graph)]); 2527 2528 typedef boost::iterator_property_map<array_ref_type::iterator, 2529 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2530 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2531 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2532 2533 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2534 2535 PetscFunctionReturn(0); 2536 } 2537 #endif 2538 2539 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2540 PetscInt nzlocal,nsends,nrecvs; 2541 PetscMPIInt *send_rank,*recv_rank; 2542 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2543 PetscScalar *sbuf_a,**rbuf_a; 2544 PetscErrorCode (*Destroy)(Mat); 2545 } Mat_Redundant; 2546 2547 #undef __FUNCT__ 2548 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2549 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2550 { 2551 PetscErrorCode ierr; 2552 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2553 PetscInt i; 2554 2555 PetscFunctionBegin; 2556 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2557 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2558 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2559 for (i=0; i<redund->nrecvs; i++){ 2560 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2561 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2562 } 2563 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2564 ierr = PetscFree(redund);CHKERRQ(ierr); 2565 PetscFunctionReturn(0); 2566 } 2567 2568 #undef __FUNCT__ 2569 #define __FUNCT__ "MatDestroy_MatRedundant" 2570 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2571 { 2572 PetscErrorCode ierr; 2573 PetscContainer container; 2574 Mat_Redundant *redund=PETSC_NULL; 2575 2576 PetscFunctionBegin; 2577 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2578 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2579 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2580 A->ops->destroy = redund->Destroy; 2581 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2582 if (A->ops->destroy) { 2583 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2584 } 2585 PetscFunctionReturn(0); 2586 } 2587 2588 #undef __FUNCT__ 2589 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2590 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2591 { 2592 PetscMPIInt rank,size; 2593 MPI_Comm comm=((PetscObject)mat)->comm; 2594 PetscErrorCode ierr; 2595 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2596 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2597 PetscInt *rowrange=mat->rmap->range; 2598 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2599 Mat A=aij->A,B=aij->B,C=*matredundant; 2600 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2601 PetscScalar *sbuf_a; 2602 PetscInt nzlocal=a->nz+b->nz; 2603 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2604 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2605 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2606 MatScalar *aworkA,*aworkB; 2607 PetscScalar *vals; 2608 PetscMPIInt tag1,tag2,tag3,imdex; 2609 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2610 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2611 MPI_Status recv_status,*send_status; 2612 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2613 PetscInt **rbuf_j=PETSC_NULL; 2614 PetscScalar **rbuf_a=PETSC_NULL; 2615 Mat_Redundant *redund=PETSC_NULL; 2616 PetscContainer container; 2617 2618 PetscFunctionBegin; 2619 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2620 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2621 2622 if (reuse == MAT_REUSE_MATRIX) { 2623 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2624 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2625 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2626 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2627 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2628 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2629 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2630 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2631 2632 nsends = redund->nsends; 2633 nrecvs = redund->nrecvs; 2634 send_rank = redund->send_rank; 2635 recv_rank = redund->recv_rank; 2636 sbuf_nz = redund->sbuf_nz; 2637 rbuf_nz = redund->rbuf_nz; 2638 sbuf_j = redund->sbuf_j; 2639 sbuf_a = redund->sbuf_a; 2640 rbuf_j = redund->rbuf_j; 2641 rbuf_a = redund->rbuf_a; 2642 } 2643 2644 if (reuse == MAT_INITIAL_MATRIX){ 2645 PetscMPIInt subrank,subsize; 2646 PetscInt nleftover,np_subcomm; 2647 /* get the destination processors' id send_rank, nsends and nrecvs */ 2648 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2649 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2650 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2651 np_subcomm = size/nsubcomm; 2652 nleftover = size - nsubcomm*np_subcomm; 2653 nsends = 0; nrecvs = 0; 2654 for (i=0; i<size; i++){ /* i=rank*/ 2655 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2656 send_rank[nsends] = i; nsends++; 2657 recv_rank[nrecvs++] = i; 2658 } 2659 } 2660 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2661 i = size-nleftover-1; 2662 j = 0; 2663 while (j < nsubcomm - nleftover){ 2664 send_rank[nsends++] = i; 2665 i--; j++; 2666 } 2667 } 2668 2669 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2670 for (i=0; i<nleftover; i++){ 2671 recv_rank[nrecvs++] = size-nleftover+i; 2672 } 2673 } 2674 2675 /* allocate sbuf_j, sbuf_a */ 2676 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2677 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2678 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2679 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2680 2681 /* copy mat's local entries into the buffers */ 2682 if (reuse == MAT_INITIAL_MATRIX){ 2683 rownz_max = 0; 2684 rptr = sbuf_j; 2685 cols = sbuf_j + rend-rstart + 1; 2686 vals = sbuf_a; 2687 rptr[0] = 0; 2688 for (i=0; i<rend-rstart; i++){ 2689 row = i + rstart; 2690 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2691 ncols = nzA + nzB; 2692 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2693 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2694 /* load the column indices for this row into cols */ 2695 lwrite = 0; 2696 for (l=0; l<nzB; l++) { 2697 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2698 vals[lwrite] = aworkB[l]; 2699 cols[lwrite++] = ctmp; 2700 } 2701 } 2702 for (l=0; l<nzA; l++){ 2703 vals[lwrite] = aworkA[l]; 2704 cols[lwrite++] = cstart + cworkA[l]; 2705 } 2706 for (l=0; l<nzB; l++) { 2707 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2708 vals[lwrite] = aworkB[l]; 2709 cols[lwrite++] = ctmp; 2710 } 2711 } 2712 vals += ncols; 2713 cols += ncols; 2714 rptr[i+1] = rptr[i] + ncols; 2715 if (rownz_max < ncols) rownz_max = ncols; 2716 } 2717 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2718 } else { /* only copy matrix values into sbuf_a */ 2719 rptr = sbuf_j; 2720 vals = sbuf_a; 2721 rptr[0] = 0; 2722 for (i=0; i<rend-rstart; i++){ 2723 row = i + rstart; 2724 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2725 ncols = nzA + nzB; 2726 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2727 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2728 lwrite = 0; 2729 for (l=0; l<nzB; l++) { 2730 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2731 } 2732 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2733 for (l=0; l<nzB; l++) { 2734 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2735 } 2736 vals += ncols; 2737 rptr[i+1] = rptr[i] + ncols; 2738 } 2739 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2740 2741 /* send nzlocal to others, and recv other's nzlocal */ 2742 /*--------------------------------------------------*/ 2743 if (reuse == MAT_INITIAL_MATRIX){ 2744 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2745 s_waits2 = s_waits3 + nsends; 2746 s_waits1 = s_waits2 + nsends; 2747 r_waits1 = s_waits1 + nsends; 2748 r_waits2 = r_waits1 + nrecvs; 2749 r_waits3 = r_waits2 + nrecvs; 2750 } else { 2751 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2752 r_waits3 = s_waits3 + nsends; 2753 } 2754 2755 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2756 if (reuse == MAT_INITIAL_MATRIX){ 2757 /* get new tags to keep the communication clean */ 2758 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2759 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2760 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2761 2762 /* post receives of other's nzlocal */ 2763 for (i=0; i<nrecvs; i++){ 2764 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2765 } 2766 /* send nzlocal to others */ 2767 for (i=0; i<nsends; i++){ 2768 sbuf_nz[i] = nzlocal; 2769 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2770 } 2771 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2772 count = nrecvs; 2773 while (count) { 2774 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2775 recv_rank[imdex] = recv_status.MPI_SOURCE; 2776 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2777 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2778 2779 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2780 rbuf_nz[imdex] += i + 2; 2781 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2782 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2783 count--; 2784 } 2785 /* wait on sends of nzlocal */ 2786 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2787 /* send mat->i,j to others, and recv from other's */ 2788 /*------------------------------------------------*/ 2789 for (i=0; i<nsends; i++){ 2790 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2791 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2792 } 2793 /* wait on receives of mat->i,j */ 2794 /*------------------------------*/ 2795 count = nrecvs; 2796 while (count) { 2797 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2798 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2799 count--; 2800 } 2801 /* wait on sends of mat->i,j */ 2802 /*---------------------------*/ 2803 if (nsends) { 2804 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2805 } 2806 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2807 2808 /* post receives, send and receive mat->a */ 2809 /*----------------------------------------*/ 2810 for (imdex=0; imdex<nrecvs; imdex++) { 2811 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2812 } 2813 for (i=0; i<nsends; i++){ 2814 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2815 } 2816 count = nrecvs; 2817 while (count) { 2818 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2819 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2820 count--; 2821 } 2822 if (nsends) { 2823 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2824 } 2825 2826 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2827 2828 /* create redundant matrix */ 2829 /*-------------------------*/ 2830 if (reuse == MAT_INITIAL_MATRIX){ 2831 /* compute rownz_max for preallocation */ 2832 for (imdex=0; imdex<nrecvs; imdex++){ 2833 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2834 rptr = rbuf_j[imdex]; 2835 for (i=0; i<j; i++){ 2836 ncols = rptr[i+1] - rptr[i]; 2837 if (rownz_max < ncols) rownz_max = ncols; 2838 } 2839 } 2840 2841 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2842 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2843 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2844 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2845 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2846 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2847 } else { 2848 C = *matredundant; 2849 } 2850 2851 /* insert local matrix entries */ 2852 rptr = sbuf_j; 2853 cols = sbuf_j + rend-rstart + 1; 2854 vals = sbuf_a; 2855 for (i=0; i<rend-rstart; i++){ 2856 row = i + rstart; 2857 ncols = rptr[i+1] - rptr[i]; 2858 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2859 vals += ncols; 2860 cols += ncols; 2861 } 2862 /* insert received matrix entries */ 2863 for (imdex=0; imdex<nrecvs; imdex++){ 2864 rstart = rowrange[recv_rank[imdex]]; 2865 rend = rowrange[recv_rank[imdex]+1]; 2866 rptr = rbuf_j[imdex]; 2867 cols = rbuf_j[imdex] + rend-rstart + 1; 2868 vals = rbuf_a[imdex]; 2869 for (i=0; i<rend-rstart; i++){ 2870 row = i + rstart; 2871 ncols = rptr[i+1] - rptr[i]; 2872 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2873 vals += ncols; 2874 cols += ncols; 2875 } 2876 } 2877 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2878 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2879 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2880 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2881 if (reuse == MAT_INITIAL_MATRIX) { 2882 PetscContainer container; 2883 *matredundant = C; 2884 /* create a supporting struct and attach it to C for reuse */ 2885 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2886 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2887 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2888 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2889 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2890 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2891 2892 redund->nzlocal = nzlocal; 2893 redund->nsends = nsends; 2894 redund->nrecvs = nrecvs; 2895 redund->send_rank = send_rank; 2896 redund->recv_rank = recv_rank; 2897 redund->sbuf_nz = sbuf_nz; 2898 redund->rbuf_nz = rbuf_nz; 2899 redund->sbuf_j = sbuf_j; 2900 redund->sbuf_a = sbuf_a; 2901 redund->rbuf_j = rbuf_j; 2902 redund->rbuf_a = rbuf_a; 2903 2904 redund->Destroy = C->ops->destroy; 2905 C->ops->destroy = MatDestroy_MatRedundant; 2906 } 2907 PetscFunctionReturn(0); 2908 } 2909 2910 #undef __FUNCT__ 2911 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2912 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2913 { 2914 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2915 PetscErrorCode ierr; 2916 PetscInt i,*idxb = 0; 2917 PetscScalar *va,*vb; 2918 Vec vtmp; 2919 2920 PetscFunctionBegin; 2921 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2922 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2923 if (idx) { 2924 for (i=0; i<A->rmap->n; i++) { 2925 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2926 } 2927 } 2928 2929 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2930 if (idx) { 2931 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2932 } 2933 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2934 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2935 2936 for (i=0; i<A->rmap->n; i++){ 2937 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2938 va[i] = vb[i]; 2939 if (idx) idx[i] = a->garray[idxb[i]]; 2940 } 2941 } 2942 2943 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2944 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2945 ierr = PetscFree(idxb);CHKERRQ(ierr); 2946 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2947 PetscFunctionReturn(0); 2948 } 2949 2950 #undef __FUNCT__ 2951 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2952 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2953 { 2954 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2955 PetscErrorCode ierr; 2956 PetscInt i,*idxb = 0; 2957 PetscScalar *va,*vb; 2958 Vec vtmp; 2959 2960 PetscFunctionBegin; 2961 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2962 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2963 if (idx) { 2964 for (i=0; i<A->cmap->n; i++) { 2965 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2966 } 2967 } 2968 2969 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2970 if (idx) { 2971 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2972 } 2973 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2974 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2975 2976 for (i=0; i<A->rmap->n; i++){ 2977 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2978 va[i] = vb[i]; 2979 if (idx) idx[i] = a->garray[idxb[i]]; 2980 } 2981 } 2982 2983 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2984 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2985 ierr = PetscFree(idxb);CHKERRQ(ierr); 2986 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2992 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2993 { 2994 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2995 PetscInt n = A->rmap->n; 2996 PetscInt cstart = A->cmap->rstart; 2997 PetscInt *cmap = mat->garray; 2998 PetscInt *diagIdx, *offdiagIdx; 2999 Vec diagV, offdiagV; 3000 PetscScalar *a, *diagA, *offdiagA; 3001 PetscInt r; 3002 PetscErrorCode ierr; 3003 3004 PetscFunctionBegin; 3005 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3006 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 3007 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 3008 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3009 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3010 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3011 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3012 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3013 for (r = 0; r < n; ++r) { 3014 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3015 a[r] = diagA[r]; 3016 idx[r] = cstart + diagIdx[r]; 3017 } else { 3018 a[r] = offdiagA[r]; 3019 idx[r] = cmap[offdiagIdx[r]]; 3020 } 3021 } 3022 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3023 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3024 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3025 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3026 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3027 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3028 PetscFunctionReturn(0); 3029 } 3030 3031 #undef __FUNCT__ 3032 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3033 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3034 { 3035 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 3036 PetscInt n = A->rmap->n; 3037 PetscInt cstart = A->cmap->rstart; 3038 PetscInt *cmap = mat->garray; 3039 PetscInt *diagIdx, *offdiagIdx; 3040 Vec diagV, offdiagV; 3041 PetscScalar *a, *diagA, *offdiagA; 3042 PetscInt r; 3043 PetscErrorCode ierr; 3044 3045 PetscFunctionBegin; 3046 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3047 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 3048 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 3049 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3050 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3051 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3052 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3053 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3054 for (r = 0; r < n; ++r) { 3055 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3056 a[r] = diagA[r]; 3057 idx[r] = cstart + diagIdx[r]; 3058 } else { 3059 a[r] = offdiagA[r]; 3060 idx[r] = cmap[offdiagIdx[r]]; 3061 } 3062 } 3063 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3064 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3065 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3066 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3067 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3068 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3069 PetscFunctionReturn(0); 3070 } 3071 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3074 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3075 { 3076 PetscErrorCode ierr; 3077 Mat *dummy; 3078 3079 PetscFunctionBegin; 3080 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3081 *newmat = *dummy; 3082 ierr = PetscFree(dummy);CHKERRQ(ierr); 3083 PetscFunctionReturn(0); 3084 } 3085 3086 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3087 3088 #undef __FUNCT__ 3089 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3090 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3091 { 3092 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3093 PetscErrorCode ierr; 3094 3095 PetscFunctionBegin; 3096 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3097 PetscFunctionReturn(0); 3098 } 3099 3100 #undef __FUNCT__ 3101 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3102 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3103 { 3104 PetscErrorCode ierr; 3105 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3106 3107 PetscFunctionBegin; 3108 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3109 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3110 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3111 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3112 PetscFunctionReturn(0); 3113 } 3114 3115 /* -------------------------------------------------------------------*/ 3116 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3117 MatGetRow_MPIAIJ, 3118 MatRestoreRow_MPIAIJ, 3119 MatMult_MPIAIJ, 3120 /* 4*/ MatMultAdd_MPIAIJ, 3121 MatMultTranspose_MPIAIJ, 3122 MatMultTransposeAdd_MPIAIJ, 3123 #ifdef PETSC_HAVE_PBGL 3124 MatSolve_MPIAIJ, 3125 #else 3126 0, 3127 #endif 3128 0, 3129 0, 3130 /*10*/ 0, 3131 0, 3132 0, 3133 MatSOR_MPIAIJ, 3134 MatTranspose_MPIAIJ, 3135 /*15*/ MatGetInfo_MPIAIJ, 3136 MatEqual_MPIAIJ, 3137 MatGetDiagonal_MPIAIJ, 3138 MatDiagonalScale_MPIAIJ, 3139 MatNorm_MPIAIJ, 3140 /*20*/ MatAssemblyBegin_MPIAIJ, 3141 MatAssemblyEnd_MPIAIJ, 3142 MatSetOption_MPIAIJ, 3143 MatZeroEntries_MPIAIJ, 3144 /*24*/ MatZeroRows_MPIAIJ, 3145 0, 3146 #ifdef PETSC_HAVE_PBGL 3147 0, 3148 #else 3149 0, 3150 #endif 3151 0, 3152 0, 3153 /*29*/ MatSetUp_MPIAIJ, 3154 #ifdef PETSC_HAVE_PBGL 3155 0, 3156 #else 3157 0, 3158 #endif 3159 0, 3160 0, 3161 0, 3162 /*34*/ MatDuplicate_MPIAIJ, 3163 0, 3164 0, 3165 0, 3166 0, 3167 /*39*/ MatAXPY_MPIAIJ, 3168 MatGetSubMatrices_MPIAIJ, 3169 MatIncreaseOverlap_MPIAIJ, 3170 MatGetValues_MPIAIJ, 3171 MatCopy_MPIAIJ, 3172 /*44*/ MatGetRowMax_MPIAIJ, 3173 MatScale_MPIAIJ, 3174 0, 3175 0, 3176 MatZeroRowsColumns_MPIAIJ, 3177 /*49*/ MatSetRandom_MPIAIJ, 3178 0, 3179 0, 3180 0, 3181 0, 3182 /*54*/ MatFDColoringCreate_MPIAIJ, 3183 0, 3184 MatSetUnfactored_MPIAIJ, 3185 MatPermute_MPIAIJ, 3186 0, 3187 /*59*/ MatGetSubMatrix_MPIAIJ, 3188 MatDestroy_MPIAIJ, 3189 MatView_MPIAIJ, 3190 0, 3191 0, 3192 /*64*/ 0, 3193 0, 3194 0, 3195 0, 3196 0, 3197 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3198 MatGetRowMinAbs_MPIAIJ, 3199 0, 3200 MatSetColoring_MPIAIJ, 3201 #if defined(PETSC_HAVE_ADIC) 3202 MatSetValuesAdic_MPIAIJ, 3203 #else 3204 0, 3205 #endif 3206 MatSetValuesAdifor_MPIAIJ, 3207 /*75*/ MatFDColoringApply_AIJ, 3208 0, 3209 0, 3210 0, 3211 MatFindZeroDiagonals_MPIAIJ, 3212 /*80*/ 0, 3213 0, 3214 0, 3215 /*83*/ MatLoad_MPIAIJ, 3216 0, 3217 0, 3218 0, 3219 0, 3220 0, 3221 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3222 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3223 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3224 MatPtAP_MPIAIJ_MPIAIJ, 3225 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3226 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3227 0, 3228 0, 3229 0, 3230 0, 3231 /*99*/ 0, 3232 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3233 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3234 MatConjugate_MPIAIJ, 3235 0, 3236 /*104*/MatSetValuesRow_MPIAIJ, 3237 MatRealPart_MPIAIJ, 3238 MatImaginaryPart_MPIAIJ, 3239 0, 3240 0, 3241 /*109*/0, 3242 MatGetRedundantMatrix_MPIAIJ, 3243 MatGetRowMin_MPIAIJ, 3244 0, 3245 0, 3246 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3247 0, 3248 0, 3249 0, 3250 0, 3251 /*119*/0, 3252 0, 3253 0, 3254 0, 3255 MatGetMultiProcBlock_MPIAIJ, 3256 /*124*/MatFindNonzeroRows_MPIAIJ, 3257 MatGetColumnNorms_MPIAIJ, 3258 MatInvertBlockDiagonal_MPIAIJ, 3259 0, 3260 MatGetSubMatricesParallel_MPIAIJ, 3261 /*129*/0, 3262 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3263 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3264 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3265 0, 3266 /*134*/0, 3267 0, 3268 0, 3269 0, 3270 0 3271 }; 3272 3273 /* ----------------------------------------------------------------------------------------*/ 3274 3275 EXTERN_C_BEGIN 3276 #undef __FUNCT__ 3277 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3278 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3279 { 3280 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3281 PetscErrorCode ierr; 3282 3283 PetscFunctionBegin; 3284 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3285 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3286 PetscFunctionReturn(0); 3287 } 3288 EXTERN_C_END 3289 3290 EXTERN_C_BEGIN 3291 #undef __FUNCT__ 3292 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3293 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3294 { 3295 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3296 PetscErrorCode ierr; 3297 3298 PetscFunctionBegin; 3299 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3300 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3301 PetscFunctionReturn(0); 3302 } 3303 EXTERN_C_END 3304 3305 EXTERN_C_BEGIN 3306 #undef __FUNCT__ 3307 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3308 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3309 { 3310 Mat_MPIAIJ *b; 3311 PetscErrorCode ierr; 3312 PetscInt i; 3313 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3314 3315 PetscFunctionBegin; 3316 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3317 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3318 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3319 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3320 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3321 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3322 3323 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3324 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3325 if (d_nnz) { 3326 for (i=0; i<B->rmap->n; i++) { 3327 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3328 } 3329 } 3330 if (o_nnz) { 3331 for (i=0; i<B->rmap->n; i++) { 3332 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3333 } 3334 } 3335 b = (Mat_MPIAIJ*)B->data; 3336 3337 if (!B->preallocated) { 3338 /* Explicitly create 2 MATSEQAIJ matrices. */ 3339 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3340 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3341 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3342 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3343 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3344 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3345 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3346 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3347 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3348 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3349 } 3350 3351 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3352 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3353 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3354 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3355 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3356 B->preallocated = PETSC_TRUE; 3357 PetscFunctionReturn(0); 3358 } 3359 EXTERN_C_END 3360 3361 #undef __FUNCT__ 3362 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3363 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3364 { 3365 Mat mat; 3366 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3367 PetscErrorCode ierr; 3368 3369 PetscFunctionBegin; 3370 *newmat = 0; 3371 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3372 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3373 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3374 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3375 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3376 a = (Mat_MPIAIJ*)mat->data; 3377 3378 mat->factortype = matin->factortype; 3379 mat->rmap->bs = matin->rmap->bs; 3380 mat->cmap->bs = matin->cmap->bs; 3381 mat->assembled = PETSC_TRUE; 3382 mat->insertmode = NOT_SET_VALUES; 3383 mat->preallocated = PETSC_TRUE; 3384 3385 a->size = oldmat->size; 3386 a->rank = oldmat->rank; 3387 a->donotstash = oldmat->donotstash; 3388 a->roworiented = oldmat->roworiented; 3389 a->rowindices = 0; 3390 a->rowvalues = 0; 3391 a->getrowactive = PETSC_FALSE; 3392 3393 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3394 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3395 3396 if (oldmat->colmap) { 3397 #if defined (PETSC_USE_CTABLE) 3398 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3399 #else 3400 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3401 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3402 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3403 #endif 3404 } else a->colmap = 0; 3405 if (oldmat->garray) { 3406 PetscInt len; 3407 len = oldmat->B->cmap->n; 3408 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3409 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3410 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3411 } else a->garray = 0; 3412 3413 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3414 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3415 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3416 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3417 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3418 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3419 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3420 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3421 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3422 *newmat = mat; 3423 PetscFunctionReturn(0); 3424 } 3425 3426 3427 3428 #undef __FUNCT__ 3429 #define __FUNCT__ "MatLoad_MPIAIJ" 3430 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3431 { 3432 PetscScalar *vals,*svals; 3433 MPI_Comm comm = ((PetscObject)viewer)->comm; 3434 PetscErrorCode ierr; 3435 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3436 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3437 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3438 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3439 PetscInt cend,cstart,n,*rowners,sizesset=1; 3440 int fd; 3441 PetscInt bs = 1; 3442 3443 PetscFunctionBegin; 3444 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3445 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3446 if (!rank) { 3447 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3448 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3449 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3450 } 3451 3452 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3453 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3454 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3455 3456 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3457 3458 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3459 M = header[1]; N = header[2]; 3460 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3461 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3462 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3463 3464 /* If global sizes are set, check if they are consistent with that given in the file */ 3465 if (sizesset) { 3466 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3467 } 3468 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3469 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3470 3471 /* determine ownership of all (block) rows */ 3472 if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3473 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3474 else m = newMat->rmap->n; /* Set by user */ 3475 3476 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3477 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3478 3479 /* First process needs enough room for process with most rows */ 3480 if (!rank) { 3481 mmax = rowners[1]; 3482 for (i=2; i<=size; i++) { 3483 mmax = PetscMax(mmax, rowners[i]); 3484 } 3485 } else mmax = m; 3486 3487 rowners[0] = 0; 3488 for (i=2; i<=size; i++) { 3489 rowners[i] += rowners[i-1]; 3490 } 3491 rstart = rowners[rank]; 3492 rend = rowners[rank+1]; 3493 3494 /* distribute row lengths to all processors */ 3495 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3496 if (!rank) { 3497 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3498 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3499 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3500 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3501 for (j=0; j<m; j++) { 3502 procsnz[0] += ourlens[j]; 3503 } 3504 for (i=1; i<size; i++) { 3505 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3506 /* calculate the number of nonzeros on each processor */ 3507 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3508 procsnz[i] += rowlengths[j]; 3509 } 3510 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3511 } 3512 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3513 } else { 3514 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3515 } 3516 3517 if (!rank) { 3518 /* determine max buffer needed and allocate it */ 3519 maxnz = 0; 3520 for (i=0; i<size; i++) { 3521 maxnz = PetscMax(maxnz,procsnz[i]); 3522 } 3523 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3524 3525 /* read in my part of the matrix column indices */ 3526 nz = procsnz[0]; 3527 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3528 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3529 3530 /* read in every one elses and ship off */ 3531 for (i=1; i<size; i++) { 3532 nz = procsnz[i]; 3533 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3534 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3535 } 3536 ierr = PetscFree(cols);CHKERRQ(ierr); 3537 } else { 3538 /* determine buffer space needed for message */ 3539 nz = 0; 3540 for (i=0; i<m; i++) { 3541 nz += ourlens[i]; 3542 } 3543 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3544 3545 /* receive message of column indices*/ 3546 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3547 } 3548 3549 /* determine column ownership if matrix is not square */ 3550 if (N != M) { 3551 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3552 else n = newMat->cmap->n; 3553 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3554 cstart = cend - n; 3555 } else { 3556 cstart = rstart; 3557 cend = rend; 3558 n = cend - cstart; 3559 } 3560 3561 /* loop over local rows, determining number of off diagonal entries */ 3562 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3563 jj = 0; 3564 for (i=0; i<m; i++) { 3565 for (j=0; j<ourlens[i]; j++) { 3566 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3567 jj++; 3568 } 3569 } 3570 3571 for (i=0; i<m; i++) { 3572 ourlens[i] -= offlens[i]; 3573 } 3574 if (!sizesset) { 3575 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3576 } 3577 3578 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3579 3580 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3581 3582 for (i=0; i<m; i++) { 3583 ourlens[i] += offlens[i]; 3584 } 3585 3586 if (!rank) { 3587 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3588 3589 /* read in my part of the matrix numerical values */ 3590 nz = procsnz[0]; 3591 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3592 3593 /* insert into matrix */ 3594 jj = rstart; 3595 smycols = mycols; 3596 svals = vals; 3597 for (i=0; i<m; i++) { 3598 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3599 smycols += ourlens[i]; 3600 svals += ourlens[i]; 3601 jj++; 3602 } 3603 3604 /* read in other processors and ship out */ 3605 for (i=1; i<size; i++) { 3606 nz = procsnz[i]; 3607 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3608 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3609 } 3610 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3611 } else { 3612 /* receive numeric values */ 3613 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3614 3615 /* receive message of values*/ 3616 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3617 3618 /* insert into matrix */ 3619 jj = rstart; 3620 smycols = mycols; 3621 svals = vals; 3622 for (i=0; i<m; i++) { 3623 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3624 smycols += ourlens[i]; 3625 svals += ourlens[i]; 3626 jj++; 3627 } 3628 } 3629 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3630 ierr = PetscFree(vals);CHKERRQ(ierr); 3631 ierr = PetscFree(mycols);CHKERRQ(ierr); 3632 ierr = PetscFree(rowners);CHKERRQ(ierr); 3633 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3634 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3635 3636 PetscFunctionReturn(0); 3637 } 3638 3639 #undef __FUNCT__ 3640 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3641 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3642 { 3643 PetscErrorCode ierr; 3644 IS iscol_local; 3645 PetscInt csize; 3646 3647 PetscFunctionBegin; 3648 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3649 if (call == MAT_REUSE_MATRIX) { 3650 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3651 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3652 } else { 3653 PetscInt cbs; 3654 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3655 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3656 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3657 } 3658 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3659 if (call == MAT_INITIAL_MATRIX) { 3660 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3661 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3662 } 3663 PetscFunctionReturn(0); 3664 } 3665 3666 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3667 #undef __FUNCT__ 3668 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3669 /* 3670 Not great since it makes two copies of the submatrix, first an SeqAIJ 3671 in local and then by concatenating the local matrices the end result. 3672 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3673 3674 Note: This requires a sequential iscol with all indices. 3675 */ 3676 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3677 { 3678 PetscErrorCode ierr; 3679 PetscMPIInt rank,size; 3680 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3681 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3682 PetscBool allcolumns, colflag; 3683 Mat M,Mreuse; 3684 MatScalar *vwork,*aa; 3685 MPI_Comm comm = ((PetscObject)mat)->comm; 3686 Mat_SeqAIJ *aij; 3687 3688 3689 PetscFunctionBegin; 3690 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3691 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3692 3693 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3694 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3695 if (colflag && ncol == mat->cmap->N){ 3696 allcolumns = PETSC_TRUE; 3697 } else { 3698 allcolumns = PETSC_FALSE; 3699 } 3700 if (call == MAT_REUSE_MATRIX) { 3701 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3702 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3703 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3704 } else { 3705 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3706 } 3707 3708 /* 3709 m - number of local rows 3710 n - number of columns (same on all processors) 3711 rstart - first row in new global matrix generated 3712 */ 3713 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3714 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3715 if (call == MAT_INITIAL_MATRIX) { 3716 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3717 ii = aij->i; 3718 jj = aij->j; 3719 3720 /* 3721 Determine the number of non-zeros in the diagonal and off-diagonal 3722 portions of the matrix in order to do correct preallocation 3723 */ 3724 3725 /* first get start and end of "diagonal" columns */ 3726 if (csize == PETSC_DECIDE) { 3727 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3728 if (mglobal == n) { /* square matrix */ 3729 nlocal = m; 3730 } else { 3731 nlocal = n/size + ((n % size) > rank); 3732 } 3733 } else { 3734 nlocal = csize; 3735 } 3736 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3737 rstart = rend - nlocal; 3738 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3739 3740 /* next, compute all the lengths */ 3741 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3742 olens = dlens + m; 3743 for (i=0; i<m; i++) { 3744 jend = ii[i+1] - ii[i]; 3745 olen = 0; 3746 dlen = 0; 3747 for (j=0; j<jend; j++) { 3748 if (*jj < rstart || *jj >= rend) olen++; 3749 else dlen++; 3750 jj++; 3751 } 3752 olens[i] = olen; 3753 dlens[i] = dlen; 3754 } 3755 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3756 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3757 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3758 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3759 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3760 ierr = PetscFree(dlens);CHKERRQ(ierr); 3761 } else { 3762 PetscInt ml,nl; 3763 3764 M = *newmat; 3765 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3766 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3767 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3768 /* 3769 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3770 rather than the slower MatSetValues(). 3771 */ 3772 M->was_assembled = PETSC_TRUE; 3773 M->assembled = PETSC_FALSE; 3774 } 3775 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3776 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3777 ii = aij->i; 3778 jj = aij->j; 3779 aa = aij->a; 3780 for (i=0; i<m; i++) { 3781 row = rstart + i; 3782 nz = ii[i+1] - ii[i]; 3783 cwork = jj; jj += nz; 3784 vwork = aa; aa += nz; 3785 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3786 } 3787 3788 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3789 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3790 *newmat = M; 3791 3792 /* save submatrix used in processor for next request */ 3793 if (call == MAT_INITIAL_MATRIX) { 3794 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3795 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3796 } 3797 3798 PetscFunctionReturn(0); 3799 } 3800 3801 EXTERN_C_BEGIN 3802 #undef __FUNCT__ 3803 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3804 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3805 { 3806 PetscInt m,cstart, cend,j,nnz,i,d; 3807 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3808 const PetscInt *JJ; 3809 PetscScalar *values; 3810 PetscErrorCode ierr; 3811 3812 PetscFunctionBegin; 3813 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3814 3815 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3816 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3817 m = B->rmap->n; 3818 cstart = B->cmap->rstart; 3819 cend = B->cmap->rend; 3820 rstart = B->rmap->rstart; 3821 3822 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3823 3824 #if defined(PETSC_USE_DEBUGGING) 3825 for (i=0; i<m; i++) { 3826 nnz = Ii[i+1]- Ii[i]; 3827 JJ = J + Ii[i]; 3828 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3829 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3830 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3831 } 3832 #endif 3833 3834 for (i=0; i<m; i++) { 3835 nnz = Ii[i+1]- Ii[i]; 3836 JJ = J + Ii[i]; 3837 nnz_max = PetscMax(nnz_max,nnz); 3838 d = 0; 3839 for (j=0; j<nnz; j++) { 3840 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3841 } 3842 d_nnz[i] = d; 3843 o_nnz[i] = nnz - d; 3844 } 3845 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3846 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3847 3848 if (v) values = (PetscScalar*)v; 3849 else { 3850 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3851 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3852 } 3853 3854 for (i=0; i<m; i++) { 3855 ii = i + rstart; 3856 nnz = Ii[i+1]- Ii[i]; 3857 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3858 } 3859 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3860 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3861 3862 if (!v) { 3863 ierr = PetscFree(values);CHKERRQ(ierr); 3864 } 3865 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3866 PetscFunctionReturn(0); 3867 } 3868 EXTERN_C_END 3869 3870 #undef __FUNCT__ 3871 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3872 /*@ 3873 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3874 (the default parallel PETSc format). 3875 3876 Collective on MPI_Comm 3877 3878 Input Parameters: 3879 + B - the matrix 3880 . i - the indices into j for the start of each local row (starts with zero) 3881 . j - the column indices for each local row (starts with zero) 3882 - v - optional values in the matrix 3883 3884 Level: developer 3885 3886 Notes: 3887 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3888 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3889 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3890 3891 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3892 3893 The format which is used for the sparse matrix input, is equivalent to a 3894 row-major ordering.. i.e for the following matrix, the input data expected is 3895 as shown: 3896 3897 1 0 0 3898 2 0 3 P0 3899 ------- 3900 4 5 6 P1 3901 3902 Process0 [P0]: rows_owned=[0,1] 3903 i = {0,1,3} [size = nrow+1 = 2+1] 3904 j = {0,0,2} [size = nz = 6] 3905 v = {1,2,3} [size = nz = 6] 3906 3907 Process1 [P1]: rows_owned=[2] 3908 i = {0,3} [size = nrow+1 = 1+1] 3909 j = {0,1,2} [size = nz = 6] 3910 v = {4,5,6} [size = nz = 6] 3911 3912 .keywords: matrix, aij, compressed row, sparse, parallel 3913 3914 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3915 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3916 @*/ 3917 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3918 { 3919 PetscErrorCode ierr; 3920 3921 PetscFunctionBegin; 3922 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3923 PetscFunctionReturn(0); 3924 } 3925 3926 #undef __FUNCT__ 3927 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3928 /*@C 3929 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3930 (the default parallel PETSc format). For good matrix assembly performance 3931 the user should preallocate the matrix storage by setting the parameters 3932 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3933 performance can be increased by more than a factor of 50. 3934 3935 Collective on MPI_Comm 3936 3937 Input Parameters: 3938 + A - the matrix 3939 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3940 (same value is used for all local rows) 3941 . d_nnz - array containing the number of nonzeros in the various rows of the 3942 DIAGONAL portion of the local submatrix (possibly different for each row) 3943 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3944 The size of this array is equal to the number of local rows, i.e 'm'. 3945 For matrices that will be factored, you must leave room for (and set) 3946 the diagonal entry even if it is zero. 3947 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3948 submatrix (same value is used for all local rows). 3949 - o_nnz - array containing the number of nonzeros in the various rows of the 3950 OFF-DIAGONAL portion of the local submatrix (possibly different for 3951 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3952 structure. The size of this array is equal to the number 3953 of local rows, i.e 'm'. 3954 3955 If the *_nnz parameter is given then the *_nz parameter is ignored 3956 3957 The AIJ format (also called the Yale sparse matrix format or 3958 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3959 storage. The stored row and column indices begin with zero. 3960 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3961 3962 The parallel matrix is partitioned such that the first m0 rows belong to 3963 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3964 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3965 3966 The DIAGONAL portion of the local submatrix of a processor can be defined 3967 as the submatrix which is obtained by extraction the part corresponding to 3968 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3969 first row that belongs to the processor, r2 is the last row belonging to 3970 the this processor, and c1-c2 is range of indices of the local part of a 3971 vector suitable for applying the matrix to. This is an mxn matrix. In the 3972 common case of a square matrix, the row and column ranges are the same and 3973 the DIAGONAL part is also square. The remaining portion of the local 3974 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3975 3976 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3977 3978 You can call MatGetInfo() to get information on how effective the preallocation was; 3979 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3980 You can also run with the option -info and look for messages with the string 3981 malloc in them to see if additional memory allocation was needed. 3982 3983 Example usage: 3984 3985 Consider the following 8x8 matrix with 34 non-zero values, that is 3986 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3987 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3988 as follows: 3989 3990 .vb 3991 1 2 0 | 0 3 0 | 0 4 3992 Proc0 0 5 6 | 7 0 0 | 8 0 3993 9 0 10 | 11 0 0 | 12 0 3994 ------------------------------------- 3995 13 0 14 | 15 16 17 | 0 0 3996 Proc1 0 18 0 | 19 20 21 | 0 0 3997 0 0 0 | 22 23 0 | 24 0 3998 ------------------------------------- 3999 Proc2 25 26 27 | 0 0 28 | 29 0 4000 30 0 0 | 31 32 33 | 0 34 4001 .ve 4002 4003 This can be represented as a collection of submatrices as: 4004 4005 .vb 4006 A B C 4007 D E F 4008 G H I 4009 .ve 4010 4011 Where the submatrices A,B,C are owned by proc0, D,E,F are 4012 owned by proc1, G,H,I are owned by proc2. 4013 4014 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4015 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4016 The 'M','N' parameters are 8,8, and have the same values on all procs. 4017 4018 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4019 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4020 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4021 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4022 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4023 matrix, ans [DF] as another SeqAIJ matrix. 4024 4025 When d_nz, o_nz parameters are specified, d_nz storage elements are 4026 allocated for every row of the local diagonal submatrix, and o_nz 4027 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4028 One way to choose d_nz and o_nz is to use the max nonzerors per local 4029 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4030 In this case, the values of d_nz,o_nz are: 4031 .vb 4032 proc0 : dnz = 2, o_nz = 2 4033 proc1 : dnz = 3, o_nz = 2 4034 proc2 : dnz = 1, o_nz = 4 4035 .ve 4036 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4037 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4038 for proc3. i.e we are using 12+15+10=37 storage locations to store 4039 34 values. 4040 4041 When d_nnz, o_nnz parameters are specified, the storage is specified 4042 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4043 In the above case the values for d_nnz,o_nnz are: 4044 .vb 4045 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4046 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4047 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4048 .ve 4049 Here the space allocated is sum of all the above values i.e 34, and 4050 hence pre-allocation is perfect. 4051 4052 Level: intermediate 4053 4054 .keywords: matrix, aij, compressed row, sparse, parallel 4055 4056 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4057 MPIAIJ, MatGetInfo() 4058 @*/ 4059 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4060 { 4061 PetscErrorCode ierr; 4062 4063 PetscFunctionBegin; 4064 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4065 PetscValidType(B,1); 4066 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4067 PetscFunctionReturn(0); 4068 } 4069 4070 #undef __FUNCT__ 4071 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4072 /*@ 4073 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4074 CSR format the local rows. 4075 4076 Collective on MPI_Comm 4077 4078 Input Parameters: 4079 + comm - MPI communicator 4080 . m - number of local rows (Cannot be PETSC_DECIDE) 4081 . n - This value should be the same as the local size used in creating the 4082 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4083 calculated if N is given) For square matrices n is almost always m. 4084 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4085 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4086 . i - row indices 4087 . j - column indices 4088 - a - matrix values 4089 4090 Output Parameter: 4091 . mat - the matrix 4092 4093 Level: intermediate 4094 4095 Notes: 4096 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4097 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4098 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4099 4100 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4101 4102 The format which is used for the sparse matrix input, is equivalent to a 4103 row-major ordering.. i.e for the following matrix, the input data expected is 4104 as shown: 4105 4106 1 0 0 4107 2 0 3 P0 4108 ------- 4109 4 5 6 P1 4110 4111 Process0 [P0]: rows_owned=[0,1] 4112 i = {0,1,3} [size = nrow+1 = 2+1] 4113 j = {0,0,2} [size = nz = 6] 4114 v = {1,2,3} [size = nz = 6] 4115 4116 Process1 [P1]: rows_owned=[2] 4117 i = {0,3} [size = nrow+1 = 1+1] 4118 j = {0,1,2} [size = nz = 6] 4119 v = {4,5,6} [size = nz = 6] 4120 4121 .keywords: matrix, aij, compressed row, sparse, parallel 4122 4123 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4124 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4125 @*/ 4126 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4127 { 4128 PetscErrorCode ierr; 4129 4130 PetscFunctionBegin; 4131 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4132 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4133 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4134 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4135 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4136 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4137 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4138 PetscFunctionReturn(0); 4139 } 4140 4141 #undef __FUNCT__ 4142 #define __FUNCT__ "MatCreateAIJ" 4143 /*@C 4144 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4145 (the default parallel PETSc format). For good matrix assembly performance 4146 the user should preallocate the matrix storage by setting the parameters 4147 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4148 performance can be increased by more than a factor of 50. 4149 4150 Collective on MPI_Comm 4151 4152 Input Parameters: 4153 + comm - MPI communicator 4154 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4155 This value should be the same as the local size used in creating the 4156 y vector for the matrix-vector product y = Ax. 4157 . n - This value should be the same as the local size used in creating the 4158 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4159 calculated if N is given) For square matrices n is almost always m. 4160 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4161 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4162 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4163 (same value is used for all local rows) 4164 . d_nnz - array containing the number of nonzeros in the various rows of the 4165 DIAGONAL portion of the local submatrix (possibly different for each row) 4166 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4167 The size of this array is equal to the number of local rows, i.e 'm'. 4168 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4169 submatrix (same value is used for all local rows). 4170 - o_nnz - array containing the number of nonzeros in the various rows of the 4171 OFF-DIAGONAL portion of the local submatrix (possibly different for 4172 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4173 structure. The size of this array is equal to the number 4174 of local rows, i.e 'm'. 4175 4176 Output Parameter: 4177 . A - the matrix 4178 4179 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4180 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4181 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4182 4183 Notes: 4184 If the *_nnz parameter is given then the *_nz parameter is ignored 4185 4186 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4187 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4188 storage requirements for this matrix. 4189 4190 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4191 processor than it must be used on all processors that share the object for 4192 that argument. 4193 4194 The user MUST specify either the local or global matrix dimensions 4195 (possibly both). 4196 4197 The parallel matrix is partitioned across processors such that the 4198 first m0 rows belong to process 0, the next m1 rows belong to 4199 process 1, the next m2 rows belong to process 2 etc.. where 4200 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4201 values corresponding to [m x N] submatrix. 4202 4203 The columns are logically partitioned with the n0 columns belonging 4204 to 0th partition, the next n1 columns belonging to the next 4205 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4206 4207 The DIAGONAL portion of the local submatrix on any given processor 4208 is the submatrix corresponding to the rows and columns m,n 4209 corresponding to the given processor. i.e diagonal matrix on 4210 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4211 etc. The remaining portion of the local submatrix [m x (N-n)] 4212 constitute the OFF-DIAGONAL portion. The example below better 4213 illustrates this concept. 4214 4215 For a square global matrix we define each processor's diagonal portion 4216 to be its local rows and the corresponding columns (a square submatrix); 4217 each processor's off-diagonal portion encompasses the remainder of the 4218 local matrix (a rectangular submatrix). 4219 4220 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4221 4222 When calling this routine with a single process communicator, a matrix of 4223 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4224 type of communicator, use the construction mechanism: 4225 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4226 4227 By default, this format uses inodes (identical nodes) when possible. 4228 We search for consecutive rows with the same nonzero structure, thereby 4229 reusing matrix information to achieve increased efficiency. 4230 4231 Options Database Keys: 4232 + -mat_no_inode - Do not use inodes 4233 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4234 - -mat_aij_oneindex - Internally use indexing starting at 1 4235 rather than 0. Note that when calling MatSetValues(), 4236 the user still MUST index entries starting at 0! 4237 4238 4239 Example usage: 4240 4241 Consider the following 8x8 matrix with 34 non-zero values, that is 4242 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4243 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4244 as follows: 4245 4246 .vb 4247 1 2 0 | 0 3 0 | 0 4 4248 Proc0 0 5 6 | 7 0 0 | 8 0 4249 9 0 10 | 11 0 0 | 12 0 4250 ------------------------------------- 4251 13 0 14 | 15 16 17 | 0 0 4252 Proc1 0 18 0 | 19 20 21 | 0 0 4253 0 0 0 | 22 23 0 | 24 0 4254 ------------------------------------- 4255 Proc2 25 26 27 | 0 0 28 | 29 0 4256 30 0 0 | 31 32 33 | 0 34 4257 .ve 4258 4259 This can be represented as a collection of submatrices as: 4260 4261 .vb 4262 A B C 4263 D E F 4264 G H I 4265 .ve 4266 4267 Where the submatrices A,B,C are owned by proc0, D,E,F are 4268 owned by proc1, G,H,I are owned by proc2. 4269 4270 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4271 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4272 The 'M','N' parameters are 8,8, and have the same values on all procs. 4273 4274 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4275 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4276 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4277 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4278 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4279 matrix, ans [DF] as another SeqAIJ matrix. 4280 4281 When d_nz, o_nz parameters are specified, d_nz storage elements are 4282 allocated for every row of the local diagonal submatrix, and o_nz 4283 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4284 One way to choose d_nz and o_nz is to use the max nonzerors per local 4285 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4286 In this case, the values of d_nz,o_nz are: 4287 .vb 4288 proc0 : dnz = 2, o_nz = 2 4289 proc1 : dnz = 3, o_nz = 2 4290 proc2 : dnz = 1, o_nz = 4 4291 .ve 4292 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4293 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4294 for proc3. i.e we are using 12+15+10=37 storage locations to store 4295 34 values. 4296 4297 When d_nnz, o_nnz parameters are specified, the storage is specified 4298 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4299 In the above case the values for d_nnz,o_nnz are: 4300 .vb 4301 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4302 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4303 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4304 .ve 4305 Here the space allocated is sum of all the above values i.e 34, and 4306 hence pre-allocation is perfect. 4307 4308 Level: intermediate 4309 4310 .keywords: matrix, aij, compressed row, sparse, parallel 4311 4312 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4313 MPIAIJ, MatCreateMPIAIJWithArrays() 4314 @*/ 4315 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4316 { 4317 PetscErrorCode ierr; 4318 PetscMPIInt size; 4319 4320 PetscFunctionBegin; 4321 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4322 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4323 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4324 if (size > 1) { 4325 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4326 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4327 } else { 4328 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4329 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4330 } 4331 PetscFunctionReturn(0); 4332 } 4333 4334 #undef __FUNCT__ 4335 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4336 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4337 { 4338 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4339 4340 PetscFunctionBegin; 4341 *Ad = a->A; 4342 *Ao = a->B; 4343 *colmap = a->garray; 4344 PetscFunctionReturn(0); 4345 } 4346 4347 #undef __FUNCT__ 4348 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4349 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4350 { 4351 PetscErrorCode ierr; 4352 PetscInt i; 4353 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4354 4355 PetscFunctionBegin; 4356 if (coloring->ctype == IS_COLORING_GLOBAL) { 4357 ISColoringValue *allcolors,*colors; 4358 ISColoring ocoloring; 4359 4360 /* set coloring for diagonal portion */ 4361 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4362 4363 /* set coloring for off-diagonal portion */ 4364 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4365 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4366 for (i=0; i<a->B->cmap->n; i++) { 4367 colors[i] = allcolors[a->garray[i]]; 4368 } 4369 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4370 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4371 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4372 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4373 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4374 ISColoringValue *colors; 4375 PetscInt *larray; 4376 ISColoring ocoloring; 4377 4378 /* set coloring for diagonal portion */ 4379 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4380 for (i=0; i<a->A->cmap->n; i++) { 4381 larray[i] = i + A->cmap->rstart; 4382 } 4383 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4384 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4385 for (i=0; i<a->A->cmap->n; i++) { 4386 colors[i] = coloring->colors[larray[i]]; 4387 } 4388 ierr = PetscFree(larray);CHKERRQ(ierr); 4389 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4390 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4391 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4392 4393 /* set coloring for off-diagonal portion */ 4394 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4395 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4396 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4397 for (i=0; i<a->B->cmap->n; i++) { 4398 colors[i] = coloring->colors[larray[i]]; 4399 } 4400 ierr = PetscFree(larray);CHKERRQ(ierr); 4401 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4402 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4403 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4404 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4405 4406 PetscFunctionReturn(0); 4407 } 4408 4409 #if defined(PETSC_HAVE_ADIC) 4410 #undef __FUNCT__ 4411 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4412 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4413 { 4414 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4415 PetscErrorCode ierr; 4416 4417 PetscFunctionBegin; 4418 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4419 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4420 PetscFunctionReturn(0); 4421 } 4422 #endif 4423 4424 #undef __FUNCT__ 4425 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4426 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4427 { 4428 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4429 PetscErrorCode ierr; 4430 4431 PetscFunctionBegin; 4432 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4433 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4434 PetscFunctionReturn(0); 4435 } 4436 4437 #undef __FUNCT__ 4438 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4439 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4440 { 4441 PetscErrorCode ierr; 4442 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4443 PetscInt *indx; 4444 4445 PetscFunctionBegin; 4446 /* This routine will ONLY return MPIAIJ type matrix */ 4447 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4448 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4449 if (n == PETSC_DECIDE){ 4450 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4451 } 4452 /* Check sum(n) = N */ 4453 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4454 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4455 4456 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4457 rstart -= m; 4458 4459 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4460 for (i=0;i<m;i++) { 4461 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4462 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4463 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4464 } 4465 4466 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4467 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4468 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4469 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4470 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4471 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4472 PetscFunctionReturn(0); 4473 } 4474 4475 #undef __FUNCT__ 4476 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4477 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4478 { 4479 PetscErrorCode ierr; 4480 PetscInt m,N,i,rstart,nnz,Ii; 4481 PetscInt *indx; 4482 PetscScalar *values; 4483 4484 PetscFunctionBegin; 4485 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4486 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4487 for (i=0;i<m;i++) { 4488 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4489 Ii = i + rstart; 4490 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4491 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4492 } 4493 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4494 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4495 PetscFunctionReturn(0); 4496 } 4497 4498 #undef __FUNCT__ 4499 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4500 /*@ 4501 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4502 matrices from each processor 4503 4504 Collective on MPI_Comm 4505 4506 Input Parameters: 4507 + comm - the communicators the parallel matrix will live on 4508 . inmat - the input sequential matrices 4509 . n - number of local columns (or PETSC_DECIDE) 4510 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4511 4512 Output Parameter: 4513 . outmat - the parallel matrix generated 4514 4515 Level: advanced 4516 4517 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4518 4519 @*/ 4520 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4521 { 4522 PetscErrorCode ierr; 4523 4524 PetscFunctionBegin; 4525 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4526 if (scall == MAT_INITIAL_MATRIX){ 4527 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4528 } 4529 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4530 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4531 PetscFunctionReturn(0); 4532 } 4533 4534 #undef __FUNCT__ 4535 #define __FUNCT__ "MatFileSplit" 4536 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4537 { 4538 PetscErrorCode ierr; 4539 PetscMPIInt rank; 4540 PetscInt m,N,i,rstart,nnz; 4541 size_t len; 4542 const PetscInt *indx; 4543 PetscViewer out; 4544 char *name; 4545 Mat B; 4546 const PetscScalar *values; 4547 4548 PetscFunctionBegin; 4549 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4550 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4551 /* Should this be the type of the diagonal block of A? */ 4552 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4553 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4554 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4555 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4556 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4557 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4558 for (i=0;i<m;i++) { 4559 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4560 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4561 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4562 } 4563 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4564 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4565 4566 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4567 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4568 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4569 sprintf(name,"%s.%d",outfile,rank); 4570 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4571 ierr = PetscFree(name); 4572 ierr = MatView(B,out);CHKERRQ(ierr); 4573 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4574 ierr = MatDestroy(&B);CHKERRQ(ierr); 4575 PetscFunctionReturn(0); 4576 } 4577 4578 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4579 #undef __FUNCT__ 4580 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4581 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4582 { 4583 PetscErrorCode ierr; 4584 Mat_Merge_SeqsToMPI *merge; 4585 PetscContainer container; 4586 4587 PetscFunctionBegin; 4588 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4589 if (container) { 4590 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4591 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4592 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4593 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4594 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4595 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4596 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4597 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4598 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4599 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4600 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4601 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4602 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4603 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4604 ierr = PetscFree(merge);CHKERRQ(ierr); 4605 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4606 } 4607 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4608 PetscFunctionReturn(0); 4609 } 4610 4611 #include <../src/mat/utils/freespace.h> 4612 #include <petscbt.h> 4613 4614 #undef __FUNCT__ 4615 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4616 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4617 { 4618 PetscErrorCode ierr; 4619 MPI_Comm comm=((PetscObject)mpimat)->comm; 4620 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4621 PetscMPIInt size,rank,taga,*len_s; 4622 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4623 PetscInt proc,m; 4624 PetscInt **buf_ri,**buf_rj; 4625 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4626 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4627 MPI_Request *s_waits,*r_waits; 4628 MPI_Status *status; 4629 MatScalar *aa=a->a; 4630 MatScalar **abuf_r,*ba_i; 4631 Mat_Merge_SeqsToMPI *merge; 4632 PetscContainer container; 4633 4634 PetscFunctionBegin; 4635 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4636 4637 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4638 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4639 4640 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4641 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4642 4643 bi = merge->bi; 4644 bj = merge->bj; 4645 buf_ri = merge->buf_ri; 4646 buf_rj = merge->buf_rj; 4647 4648 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4649 owners = merge->rowmap->range; 4650 len_s = merge->len_s; 4651 4652 /* send and recv matrix values */ 4653 /*-----------------------------*/ 4654 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4655 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4656 4657 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4658 for (proc=0,k=0; proc<size; proc++){ 4659 if (!len_s[proc]) continue; 4660 i = owners[proc]; 4661 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4662 k++; 4663 } 4664 4665 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4666 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4667 ierr = PetscFree(status);CHKERRQ(ierr); 4668 4669 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4670 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4671 4672 /* insert mat values of mpimat */ 4673 /*----------------------------*/ 4674 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4675 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4676 4677 for (k=0; k<merge->nrecv; k++){ 4678 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4679 nrows = *(buf_ri_k[k]); 4680 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4681 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4682 } 4683 4684 /* set values of ba */ 4685 m = merge->rowmap->n; 4686 for (i=0; i<m; i++) { 4687 arow = owners[rank] + i; 4688 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4689 bnzi = bi[i+1] - bi[i]; 4690 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4691 4692 /* add local non-zero vals of this proc's seqmat into ba */ 4693 anzi = ai[arow+1] - ai[arow]; 4694 aj = a->j + ai[arow]; 4695 aa = a->a + ai[arow]; 4696 nextaj = 0; 4697 for (j=0; nextaj<anzi; j++){ 4698 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4699 ba_i[j] += aa[nextaj++]; 4700 } 4701 } 4702 4703 /* add received vals into ba */ 4704 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4705 /* i-th row */ 4706 if (i == *nextrow[k]) { 4707 anzi = *(nextai[k]+1) - *nextai[k]; 4708 aj = buf_rj[k] + *(nextai[k]); 4709 aa = abuf_r[k] + *(nextai[k]); 4710 nextaj = 0; 4711 for (j=0; nextaj<anzi; j++){ 4712 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4713 ba_i[j] += aa[nextaj++]; 4714 } 4715 } 4716 nextrow[k]++; nextai[k]++; 4717 } 4718 } 4719 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4720 } 4721 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4722 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4723 4724 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4725 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4726 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4727 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4728 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4729 PetscFunctionReturn(0); 4730 } 4731 4732 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4733 4734 #undef __FUNCT__ 4735 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4736 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4737 { 4738 PetscErrorCode ierr; 4739 Mat B_mpi; 4740 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4741 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4742 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4743 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4744 PetscInt len,proc,*dnz,*onz,bs,cbs; 4745 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4746 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4747 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4748 MPI_Status *status; 4749 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4750 PetscBT lnkbt; 4751 Mat_Merge_SeqsToMPI *merge; 4752 PetscContainer container; 4753 4754 PetscFunctionBegin; 4755 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4756 4757 /* make sure it is a PETSc comm */ 4758 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4759 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4760 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4761 4762 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4763 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4764 4765 /* determine row ownership */ 4766 /*---------------------------------------------------------*/ 4767 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4768 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4769 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4770 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4771 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4772 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4773 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4774 4775 m = merge->rowmap->n; 4776 M = merge->rowmap->N; 4777 owners = merge->rowmap->range; 4778 4779 /* determine the number of messages to send, their lengths */ 4780 /*---------------------------------------------------------*/ 4781 len_s = merge->len_s; 4782 4783 len = 0; /* length of buf_si[] */ 4784 merge->nsend = 0; 4785 for (proc=0; proc<size; proc++){ 4786 len_si[proc] = 0; 4787 if (proc == rank){ 4788 len_s[proc] = 0; 4789 } else { 4790 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4791 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4792 } 4793 if (len_s[proc]) { 4794 merge->nsend++; 4795 nrows = 0; 4796 for (i=owners[proc]; i<owners[proc+1]; i++){ 4797 if (ai[i+1] > ai[i]) nrows++; 4798 } 4799 len_si[proc] = 2*(nrows+1); 4800 len += len_si[proc]; 4801 } 4802 } 4803 4804 /* determine the number and length of messages to receive for ij-structure */ 4805 /*-------------------------------------------------------------------------*/ 4806 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4807 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4808 4809 /* post the Irecv of j-structure */ 4810 /*-------------------------------*/ 4811 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4812 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4813 4814 /* post the Isend of j-structure */ 4815 /*--------------------------------*/ 4816 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4817 4818 for (proc=0, k=0; proc<size; proc++){ 4819 if (!len_s[proc]) continue; 4820 i = owners[proc]; 4821 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4822 k++; 4823 } 4824 4825 /* receives and sends of j-structure are complete */ 4826 /*------------------------------------------------*/ 4827 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4828 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4829 4830 /* send and recv i-structure */ 4831 /*---------------------------*/ 4832 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4833 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4834 4835 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4836 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4837 for (proc=0,k=0; proc<size; proc++){ 4838 if (!len_s[proc]) continue; 4839 /* form outgoing message for i-structure: 4840 buf_si[0]: nrows to be sent 4841 [1:nrows]: row index (global) 4842 [nrows+1:2*nrows+1]: i-structure index 4843 */ 4844 /*-------------------------------------------*/ 4845 nrows = len_si[proc]/2 - 1; 4846 buf_si_i = buf_si + nrows+1; 4847 buf_si[0] = nrows; 4848 buf_si_i[0] = 0; 4849 nrows = 0; 4850 for (i=owners[proc]; i<owners[proc+1]; i++){ 4851 anzi = ai[i+1] - ai[i]; 4852 if (anzi) { 4853 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4854 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4855 nrows++; 4856 } 4857 } 4858 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4859 k++; 4860 buf_si += len_si[proc]; 4861 } 4862 4863 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4864 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4865 4866 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4867 for (i=0; i<merge->nrecv; i++){ 4868 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4869 } 4870 4871 ierr = PetscFree(len_si);CHKERRQ(ierr); 4872 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4873 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4874 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4875 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4876 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4877 ierr = PetscFree(status);CHKERRQ(ierr); 4878 4879 /* compute a local seq matrix in each processor */ 4880 /*----------------------------------------------*/ 4881 /* allocate bi array and free space for accumulating nonzero column info */ 4882 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4883 bi[0] = 0; 4884 4885 /* create and initialize a linked list */ 4886 nlnk = N+1; 4887 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4888 4889 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4890 len = 0; 4891 len = ai[owners[rank+1]] - ai[owners[rank]]; 4892 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4893 current_space = free_space; 4894 4895 /* determine symbolic info for each local row */ 4896 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4897 4898 for (k=0; k<merge->nrecv; k++){ 4899 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4900 nrows = *buf_ri_k[k]; 4901 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4902 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4903 } 4904 4905 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4906 len = 0; 4907 for (i=0;i<m;i++) { 4908 bnzi = 0; 4909 /* add local non-zero cols of this proc's seqmat into lnk */ 4910 arow = owners[rank] + i; 4911 anzi = ai[arow+1] - ai[arow]; 4912 aj = a->j + ai[arow]; 4913 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4914 bnzi += nlnk; 4915 /* add received col data into lnk */ 4916 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4917 if (i == *nextrow[k]) { /* i-th row */ 4918 anzi = *(nextai[k]+1) - *nextai[k]; 4919 aj = buf_rj[k] + *nextai[k]; 4920 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4921 bnzi += nlnk; 4922 nextrow[k]++; nextai[k]++; 4923 } 4924 } 4925 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4926 4927 /* if free space is not available, make more free space */ 4928 if (current_space->local_remaining<bnzi) { 4929 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4930 nspacedouble++; 4931 } 4932 /* copy data into free space, then initialize lnk */ 4933 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4934 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4935 4936 current_space->array += bnzi; 4937 current_space->local_used += bnzi; 4938 current_space->local_remaining -= bnzi; 4939 4940 bi[i+1] = bi[i] + bnzi; 4941 } 4942 4943 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4944 4945 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4946 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4947 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4948 4949 /* create symbolic parallel matrix B_mpi */ 4950 /*---------------------------------------*/ 4951 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4952 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4953 if (n==PETSC_DECIDE) { 4954 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4955 } else { 4956 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4957 } 4958 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4959 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4960 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4961 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4962 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4963 4964 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4965 B_mpi->assembled = PETSC_FALSE; 4966 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4967 merge->bi = bi; 4968 merge->bj = bj; 4969 merge->buf_ri = buf_ri; 4970 merge->buf_rj = buf_rj; 4971 merge->coi = PETSC_NULL; 4972 merge->coj = PETSC_NULL; 4973 merge->owners_co = PETSC_NULL; 4974 4975 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4976 4977 /* attach the supporting struct to B_mpi for reuse */ 4978 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4979 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4980 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4981 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4982 *mpimat = B_mpi; 4983 4984 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4985 PetscFunctionReturn(0); 4986 } 4987 4988 #undef __FUNCT__ 4989 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4990 /*@C 4991 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4992 matrices from each processor 4993 4994 Collective on MPI_Comm 4995 4996 Input Parameters: 4997 + comm - the communicators the parallel matrix will live on 4998 . seqmat - the input sequential matrices 4999 . m - number of local rows (or PETSC_DECIDE) 5000 . n - number of local columns (or PETSC_DECIDE) 5001 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5002 5003 Output Parameter: 5004 . mpimat - the parallel matrix generated 5005 5006 Level: advanced 5007 5008 Notes: 5009 The dimensions of the sequential matrix in each processor MUST be the same. 5010 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5011 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5012 @*/ 5013 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5014 { 5015 PetscErrorCode ierr; 5016 PetscMPIInt size; 5017 5018 PetscFunctionBegin; 5019 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5020 if (size == 1){ 5021 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5022 if (scall == MAT_INITIAL_MATRIX){ 5023 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5024 } else { 5025 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5026 } 5027 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5028 PetscFunctionReturn(0); 5029 } 5030 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5031 if (scall == MAT_INITIAL_MATRIX){ 5032 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5033 } 5034 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5035 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5036 PetscFunctionReturn(0); 5037 } 5038 5039 #undef __FUNCT__ 5040 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5041 /*@ 5042 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5043 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5044 with MatGetSize() 5045 5046 Not Collective 5047 5048 Input Parameters: 5049 + A - the matrix 5050 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5051 5052 Output Parameter: 5053 . A_loc - the local sequential matrix generated 5054 5055 Level: developer 5056 5057 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5058 5059 @*/ 5060 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5061 { 5062 PetscErrorCode ierr; 5063 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5064 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5065 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5066 MatScalar *aa=a->a,*ba=b->a,*cam; 5067 PetscScalar *ca; 5068 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5069 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5070 PetscBool match; 5071 5072 PetscFunctionBegin; 5073 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5074 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5075 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5076 if (scall == MAT_INITIAL_MATRIX){ 5077 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5078 ci[0] = 0; 5079 for (i=0; i<am; i++){ 5080 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5081 } 5082 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5083 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5084 k = 0; 5085 for (i=0; i<am; i++) { 5086 ncols_o = bi[i+1] - bi[i]; 5087 ncols_d = ai[i+1] - ai[i]; 5088 /* off-diagonal portion of A */ 5089 for (jo=0; jo<ncols_o; jo++) { 5090 col = cmap[*bj]; 5091 if (col >= cstart) break; 5092 cj[k] = col; bj++; 5093 ca[k++] = *ba++; 5094 } 5095 /* diagonal portion of A */ 5096 for (j=0; j<ncols_d; j++) { 5097 cj[k] = cstart + *aj++; 5098 ca[k++] = *aa++; 5099 } 5100 /* off-diagonal portion of A */ 5101 for (j=jo; j<ncols_o; j++) { 5102 cj[k] = cmap[*bj++]; 5103 ca[k++] = *ba++; 5104 } 5105 } 5106 /* put together the new matrix */ 5107 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5108 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5109 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5110 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5111 mat->free_a = PETSC_TRUE; 5112 mat->free_ij = PETSC_TRUE; 5113 mat->nonew = 0; 5114 } else if (scall == MAT_REUSE_MATRIX){ 5115 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5116 ci = mat->i; cj = mat->j; cam = mat->a; 5117 for (i=0; i<am; i++) { 5118 /* off-diagonal portion of A */ 5119 ncols_o = bi[i+1] - bi[i]; 5120 for (jo=0; jo<ncols_o; jo++) { 5121 col = cmap[*bj]; 5122 if (col >= cstart) break; 5123 *cam++ = *ba++; bj++; 5124 } 5125 /* diagonal portion of A */ 5126 ncols_d = ai[i+1] - ai[i]; 5127 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5128 /* off-diagonal portion of A */ 5129 for (j=jo; j<ncols_o; j++) { 5130 *cam++ = *ba++; bj++; 5131 } 5132 } 5133 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5134 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5135 PetscFunctionReturn(0); 5136 } 5137 5138 #undef __FUNCT__ 5139 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5140 /*@C 5141 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5142 5143 Not Collective 5144 5145 Input Parameters: 5146 + A - the matrix 5147 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5148 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5149 5150 Output Parameter: 5151 . A_loc - the local sequential matrix generated 5152 5153 Level: developer 5154 5155 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5156 5157 @*/ 5158 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5159 { 5160 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5161 PetscErrorCode ierr; 5162 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5163 IS isrowa,iscola; 5164 Mat *aloc; 5165 PetscBool match; 5166 5167 PetscFunctionBegin; 5168 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5169 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5170 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5171 if (!row){ 5172 start = A->rmap->rstart; end = A->rmap->rend; 5173 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5174 } else { 5175 isrowa = *row; 5176 } 5177 if (!col){ 5178 start = A->cmap->rstart; 5179 cmap = a->garray; 5180 nzA = a->A->cmap->n; 5181 nzB = a->B->cmap->n; 5182 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5183 ncols = 0; 5184 for (i=0; i<nzB; i++) { 5185 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5186 else break; 5187 } 5188 imark = i; 5189 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5190 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5191 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5192 } else { 5193 iscola = *col; 5194 } 5195 if (scall != MAT_INITIAL_MATRIX){ 5196 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5197 aloc[0] = *A_loc; 5198 } 5199 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5200 *A_loc = aloc[0]; 5201 ierr = PetscFree(aloc);CHKERRQ(ierr); 5202 if (!row){ 5203 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5204 } 5205 if (!col){ 5206 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5207 } 5208 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5209 PetscFunctionReturn(0); 5210 } 5211 5212 #undef __FUNCT__ 5213 #define __FUNCT__ "MatGetBrowsOfAcols" 5214 /*@C 5215 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5216 5217 Collective on Mat 5218 5219 Input Parameters: 5220 + A,B - the matrices in mpiaij format 5221 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5222 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5223 5224 Output Parameter: 5225 + rowb, colb - index sets of rows and columns of B to extract 5226 - B_seq - the sequential matrix generated 5227 5228 Level: developer 5229 5230 @*/ 5231 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5232 { 5233 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5234 PetscErrorCode ierr; 5235 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5236 IS isrowb,iscolb; 5237 Mat *bseq=PETSC_NULL; 5238 5239 PetscFunctionBegin; 5240 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5241 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5242 } 5243 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5244 5245 if (scall == MAT_INITIAL_MATRIX){ 5246 start = A->cmap->rstart; 5247 cmap = a->garray; 5248 nzA = a->A->cmap->n; 5249 nzB = a->B->cmap->n; 5250 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5251 ncols = 0; 5252 for (i=0; i<nzB; i++) { /* row < local row index */ 5253 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5254 else break; 5255 } 5256 imark = i; 5257 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5258 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5259 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5260 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5261 } else { 5262 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5263 isrowb = *rowb; iscolb = *colb; 5264 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5265 bseq[0] = *B_seq; 5266 } 5267 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5268 *B_seq = bseq[0]; 5269 ierr = PetscFree(bseq);CHKERRQ(ierr); 5270 if (!rowb){ 5271 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5272 } else { 5273 *rowb = isrowb; 5274 } 5275 if (!colb){ 5276 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5277 } else { 5278 *colb = iscolb; 5279 } 5280 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5281 PetscFunctionReturn(0); 5282 } 5283 5284 #undef __FUNCT__ 5285 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5286 /* 5287 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5288 of the OFF-DIAGONAL portion of local A 5289 5290 Collective on Mat 5291 5292 Input Parameters: 5293 + A,B - the matrices in mpiaij format 5294 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5295 5296 Output Parameter: 5297 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5298 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5299 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5300 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5301 5302 Level: developer 5303 5304 */ 5305 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5306 { 5307 VecScatter_MPI_General *gen_to,*gen_from; 5308 PetscErrorCode ierr; 5309 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5310 Mat_SeqAIJ *b_oth; 5311 VecScatter ctx=a->Mvctx; 5312 MPI_Comm comm=((PetscObject)ctx)->comm; 5313 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5314 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5315 PetscScalar *rvalues,*svalues; 5316 MatScalar *b_otha,*bufa,*bufA; 5317 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5318 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5319 MPI_Status *sstatus,rstatus; 5320 PetscMPIInt jj; 5321 PetscInt *cols,sbs,rbs; 5322 PetscScalar *vals; 5323 5324 PetscFunctionBegin; 5325 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5326 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5327 } 5328 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5329 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5330 5331 gen_to = (VecScatter_MPI_General*)ctx->todata; 5332 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5333 rvalues = gen_from->values; /* holds the length of receiving row */ 5334 svalues = gen_to->values; /* holds the length of sending row */ 5335 nrecvs = gen_from->n; 5336 nsends = gen_to->n; 5337 5338 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5339 srow = gen_to->indices; /* local row index to be sent */ 5340 sstarts = gen_to->starts; 5341 sprocs = gen_to->procs; 5342 sstatus = gen_to->sstatus; 5343 sbs = gen_to->bs; 5344 rstarts = gen_from->starts; 5345 rprocs = gen_from->procs; 5346 rbs = gen_from->bs; 5347 5348 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5349 if (scall == MAT_INITIAL_MATRIX){ 5350 /* i-array */ 5351 /*---------*/ 5352 /* post receives */ 5353 for (i=0; i<nrecvs; i++){ 5354 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5355 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5356 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5357 } 5358 5359 /* pack the outgoing message */ 5360 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5361 sstartsj[0] = 0; rstartsj[0] = 0; 5362 len = 0; /* total length of j or a array to be sent */ 5363 k = 0; 5364 for (i=0; i<nsends; i++){ 5365 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5366 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5367 for (j=0; j<nrows; j++) { 5368 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5369 for (l=0; l<sbs; l++){ 5370 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5371 rowlen[j*sbs+l] = ncols; 5372 len += ncols; 5373 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5374 } 5375 k++; 5376 } 5377 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5378 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5379 } 5380 /* recvs and sends of i-array are completed */ 5381 i = nrecvs; 5382 while (i--) { 5383 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5384 } 5385 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5386 5387 /* allocate buffers for sending j and a arrays */ 5388 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5389 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5390 5391 /* create i-array of B_oth */ 5392 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5393 b_othi[0] = 0; 5394 len = 0; /* total length of j or a array to be received */ 5395 k = 0; 5396 for (i=0; i<nrecvs; i++){ 5397 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5398 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5399 for (j=0; j<nrows; j++) { 5400 b_othi[k+1] = b_othi[k] + rowlen[j]; 5401 len += rowlen[j]; k++; 5402 } 5403 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5404 } 5405 5406 /* allocate space for j and a arrrays of B_oth */ 5407 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5408 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5409 5410 /* j-array */ 5411 /*---------*/ 5412 /* post receives of j-array */ 5413 for (i=0; i<nrecvs; i++){ 5414 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5415 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5416 } 5417 5418 /* pack the outgoing message j-array */ 5419 k = 0; 5420 for (i=0; i<nsends; i++){ 5421 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5422 bufJ = bufj+sstartsj[i]; 5423 for (j=0; j<nrows; j++) { 5424 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5425 for (ll=0; ll<sbs; ll++){ 5426 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5427 for (l=0; l<ncols; l++){ 5428 *bufJ++ = cols[l]; 5429 } 5430 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5431 } 5432 } 5433 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5434 } 5435 5436 /* recvs and sends of j-array are completed */ 5437 i = nrecvs; 5438 while (i--) { 5439 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5440 } 5441 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5442 } else if (scall == MAT_REUSE_MATRIX){ 5443 sstartsj = *startsj_s; 5444 rstartsj = *startsj_r; 5445 bufa = *bufa_ptr; 5446 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5447 b_otha = b_oth->a; 5448 } else { 5449 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5450 } 5451 5452 /* a-array */ 5453 /*---------*/ 5454 /* post receives of a-array */ 5455 for (i=0; i<nrecvs; i++){ 5456 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5457 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5458 } 5459 5460 /* pack the outgoing message a-array */ 5461 k = 0; 5462 for (i=0; i<nsends; i++){ 5463 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5464 bufA = bufa+sstartsj[i]; 5465 for (j=0; j<nrows; j++) { 5466 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5467 for (ll=0; ll<sbs; ll++){ 5468 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5469 for (l=0; l<ncols; l++){ 5470 *bufA++ = vals[l]; 5471 } 5472 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5473 } 5474 } 5475 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5476 } 5477 /* recvs and sends of a-array are completed */ 5478 i = nrecvs; 5479 while (i--) { 5480 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5481 } 5482 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5483 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5484 5485 if (scall == MAT_INITIAL_MATRIX){ 5486 /* put together the new matrix */ 5487 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5488 5489 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5490 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5491 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5492 b_oth->free_a = PETSC_TRUE; 5493 b_oth->free_ij = PETSC_TRUE; 5494 b_oth->nonew = 0; 5495 5496 ierr = PetscFree(bufj);CHKERRQ(ierr); 5497 if (!startsj_s || !bufa_ptr){ 5498 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5499 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5500 } else { 5501 *startsj_s = sstartsj; 5502 *startsj_r = rstartsj; 5503 *bufa_ptr = bufa; 5504 } 5505 } 5506 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5507 PetscFunctionReturn(0); 5508 } 5509 5510 #undef __FUNCT__ 5511 #define __FUNCT__ "MatGetCommunicationStructs" 5512 /*@C 5513 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5514 5515 Not Collective 5516 5517 Input Parameters: 5518 . A - The matrix in mpiaij format 5519 5520 Output Parameter: 5521 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5522 . colmap - A map from global column index to local index into lvec 5523 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5524 5525 Level: developer 5526 5527 @*/ 5528 #if defined (PETSC_USE_CTABLE) 5529 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5530 #else 5531 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5532 #endif 5533 { 5534 Mat_MPIAIJ *a; 5535 5536 PetscFunctionBegin; 5537 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5538 PetscValidPointer(lvec, 2); 5539 PetscValidPointer(colmap, 3); 5540 PetscValidPointer(multScatter, 4); 5541 a = (Mat_MPIAIJ *) A->data; 5542 if (lvec) *lvec = a->lvec; 5543 if (colmap) *colmap = a->colmap; 5544 if (multScatter) *multScatter = a->Mvctx; 5545 PetscFunctionReturn(0); 5546 } 5547 5548 EXTERN_C_BEGIN 5549 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5550 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5551 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5552 EXTERN_C_END 5553 5554 #undef __FUNCT__ 5555 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5556 /* 5557 Computes (B'*A')' since computing B*A directly is untenable 5558 5559 n p p 5560 ( ) ( ) ( ) 5561 m ( A ) * n ( B ) = m ( C ) 5562 ( ) ( ) ( ) 5563 5564 */ 5565 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5566 { 5567 PetscErrorCode ierr; 5568 Mat At,Bt,Ct; 5569 5570 PetscFunctionBegin; 5571 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5572 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5573 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5574 ierr = MatDestroy(&At);CHKERRQ(ierr); 5575 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5576 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5577 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5578 PetscFunctionReturn(0); 5579 } 5580 5581 #undef __FUNCT__ 5582 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5583 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5584 { 5585 PetscErrorCode ierr; 5586 PetscInt m=A->rmap->n,n=B->cmap->n; 5587 Mat Cmat; 5588 5589 PetscFunctionBegin; 5590 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5591 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5592 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5593 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5594 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5595 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5596 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5597 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5598 *C = Cmat; 5599 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5600 PetscFunctionReturn(0); 5601 } 5602 5603 /* ----------------------------------------------------------------*/ 5604 #undef __FUNCT__ 5605 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5606 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5607 { 5608 PetscErrorCode ierr; 5609 5610 PetscFunctionBegin; 5611 if (scall == MAT_INITIAL_MATRIX){ 5612 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5613 } 5614 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5615 PetscFunctionReturn(0); 5616 } 5617 5618 EXTERN_C_BEGIN 5619 #if defined(PETSC_HAVE_MUMPS) 5620 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5621 #endif 5622 #if defined(PETSC_HAVE_PASTIX) 5623 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5624 #endif 5625 #if defined(PETSC_HAVE_SUPERLU_DIST) 5626 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5627 #endif 5628 #if defined(PETSC_HAVE_CLIQUE) 5629 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5630 #endif 5631 EXTERN_C_END 5632 5633 /*MC 5634 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5635 5636 Options Database Keys: 5637 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5638 5639 Level: beginner 5640 5641 .seealso: MatCreateAIJ() 5642 M*/ 5643 5644 EXTERN_C_BEGIN 5645 #undef __FUNCT__ 5646 #define __FUNCT__ "MatCreate_MPIAIJ" 5647 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5648 { 5649 Mat_MPIAIJ *b; 5650 PetscErrorCode ierr; 5651 PetscMPIInt size; 5652 5653 PetscFunctionBegin; 5654 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5655 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5656 B->data = (void*)b; 5657 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5658 B->assembled = PETSC_FALSE; 5659 B->insertmode = NOT_SET_VALUES; 5660 b->size = size; 5661 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5662 5663 /* build cache for off array entries formed */ 5664 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5665 b->donotstash = PETSC_FALSE; 5666 b->colmap = 0; 5667 b->garray = 0; 5668 b->roworiented = PETSC_TRUE; 5669 5670 /* stuff used for matrix vector multiply */ 5671 b->lvec = PETSC_NULL; 5672 b->Mvctx = PETSC_NULL; 5673 5674 /* stuff for MatGetRow() */ 5675 b->rowindices = 0; 5676 b->rowvalues = 0; 5677 b->getrowactive = PETSC_FALSE; 5678 5679 /* flexible pointer used in CUSP/CUSPARSE classes */ 5680 b->spptr = PETSC_NULL; 5681 5682 #if defined(PETSC_HAVE_MUMPS) 5683 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5684 "MatGetFactor_aij_mumps", 5685 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5686 #endif 5687 #if defined(PETSC_HAVE_PASTIX) 5688 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5689 "MatGetFactor_mpiaij_pastix", 5690 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5691 #endif 5692 #if defined(PETSC_HAVE_SUPERLU_DIST) 5693 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5694 "MatGetFactor_mpiaij_superlu_dist", 5695 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5696 #endif 5697 #if defined(PETSC_HAVE_CLIQUE) 5698 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5699 "MatGetFactor_aij_clique", 5700 MatGetFactor_aij_clique);CHKERRQ(ierr); 5701 #endif 5702 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5703 "MatStoreValues_MPIAIJ", 5704 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5705 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5706 "MatRetrieveValues_MPIAIJ", 5707 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5708 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5709 "MatGetDiagonalBlock_MPIAIJ", 5710 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5711 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5712 "MatIsTranspose_MPIAIJ", 5713 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5714 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5715 "MatMPIAIJSetPreallocation_MPIAIJ", 5716 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5717 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5718 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5719 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5720 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5721 "MatDiagonalScaleLocal_MPIAIJ", 5722 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5723 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5724 "MatConvert_MPIAIJ_MPIAIJPERM", 5725 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5726 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5727 "MatConvert_MPIAIJ_MPIAIJCRL", 5728 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5729 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5730 "MatConvert_MPIAIJ_MPISBAIJ", 5731 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5732 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5733 "MatMatMult_MPIDense_MPIAIJ", 5734 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5735 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5736 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5737 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5738 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5739 "MatMatMultNumeric_MPIDense_MPIAIJ", 5740 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5741 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5742 PetscFunctionReturn(0); 5743 } 5744 EXTERN_C_END 5745 5746 #undef __FUNCT__ 5747 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5748 /*@ 5749 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5750 and "off-diagonal" part of the matrix in CSR format. 5751 5752 Collective on MPI_Comm 5753 5754 Input Parameters: 5755 + comm - MPI communicator 5756 . m - number of local rows (Cannot be PETSC_DECIDE) 5757 . n - This value should be the same as the local size used in creating the 5758 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5759 calculated if N is given) For square matrices n is almost always m. 5760 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5761 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5762 . i - row indices for "diagonal" portion of matrix 5763 . j - column indices 5764 . a - matrix values 5765 . oi - row indices for "off-diagonal" portion of matrix 5766 . oj - column indices 5767 - oa - matrix values 5768 5769 Output Parameter: 5770 . mat - the matrix 5771 5772 Level: advanced 5773 5774 Notes: 5775 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5776 must free the arrays once the matrix has been destroyed and not before. 5777 5778 The i and j indices are 0 based 5779 5780 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5781 5782 This sets local rows and cannot be used to set off-processor values. 5783 5784 You cannot later use MatSetValues() to change values in this matrix. 5785 5786 .keywords: matrix, aij, compressed row, sparse, parallel 5787 5788 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5789 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5790 @*/ 5791 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5792 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5793 { 5794 PetscErrorCode ierr; 5795 Mat_MPIAIJ *maij; 5796 5797 PetscFunctionBegin; 5798 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5799 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5800 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5801 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5802 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5803 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5804 maij = (Mat_MPIAIJ*) (*mat)->data; 5805 maij->donotstash = PETSC_TRUE; 5806 (*mat)->preallocated = PETSC_TRUE; 5807 5808 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5809 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5810 5811 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5812 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5813 5814 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5815 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5816 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5817 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5818 5819 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5820 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5821 PetscFunctionReturn(0); 5822 } 5823 5824 /* 5825 Special version for direct calls from Fortran 5826 */ 5827 #include <petsc-private/fortranimpl.h> 5828 5829 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5830 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5831 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5832 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5833 #endif 5834 5835 /* Change these macros so can be used in void function */ 5836 #undef CHKERRQ 5837 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5838 #undef SETERRQ2 5839 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5840 #undef SETERRQ3 5841 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5842 #undef SETERRQ 5843 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5844 5845 EXTERN_C_BEGIN 5846 #undef __FUNCT__ 5847 #define __FUNCT__ "matsetvaluesmpiaij_" 5848 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5849 { 5850 Mat mat = *mmat; 5851 PetscInt m = *mm, n = *mn; 5852 InsertMode addv = *maddv; 5853 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5854 PetscScalar value; 5855 PetscErrorCode ierr; 5856 5857 MatCheckPreallocated(mat,1); 5858 if (mat->insertmode == NOT_SET_VALUES) { 5859 mat->insertmode = addv; 5860 } 5861 #if defined(PETSC_USE_DEBUG) 5862 else if (mat->insertmode != addv) { 5863 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5864 } 5865 #endif 5866 { 5867 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5868 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5869 PetscBool roworiented = aij->roworiented; 5870 5871 /* Some Variables required in the macro */ 5872 Mat A = aij->A; 5873 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5874 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5875 MatScalar *aa = a->a; 5876 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5877 Mat B = aij->B; 5878 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5879 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5880 MatScalar *ba = b->a; 5881 5882 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5883 PetscInt nonew = a->nonew; 5884 MatScalar *ap1,*ap2; 5885 5886 PetscFunctionBegin; 5887 for (i=0; i<m; i++) { 5888 if (im[i] < 0) continue; 5889 #if defined(PETSC_USE_DEBUG) 5890 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5891 #endif 5892 if (im[i] >= rstart && im[i] < rend) { 5893 row = im[i] - rstart; 5894 lastcol1 = -1; 5895 rp1 = aj + ai[row]; 5896 ap1 = aa + ai[row]; 5897 rmax1 = aimax[row]; 5898 nrow1 = ailen[row]; 5899 low1 = 0; 5900 high1 = nrow1; 5901 lastcol2 = -1; 5902 rp2 = bj + bi[row]; 5903 ap2 = ba + bi[row]; 5904 rmax2 = bimax[row]; 5905 nrow2 = bilen[row]; 5906 low2 = 0; 5907 high2 = nrow2; 5908 5909 for (j=0; j<n; j++) { 5910 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5911 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5912 if (in[j] >= cstart && in[j] < cend){ 5913 col = in[j] - cstart; 5914 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5915 } else if (in[j] < 0) continue; 5916 #if defined(PETSC_USE_DEBUG) 5917 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5918 #endif 5919 else { 5920 if (mat->was_assembled) { 5921 if (!aij->colmap) { 5922 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5923 } 5924 #if defined (PETSC_USE_CTABLE) 5925 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5926 col--; 5927 #else 5928 col = aij->colmap[in[j]] - 1; 5929 #endif 5930 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5931 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5932 col = in[j]; 5933 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5934 B = aij->B; 5935 b = (Mat_SeqAIJ*)B->data; 5936 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5937 rp2 = bj + bi[row]; 5938 ap2 = ba + bi[row]; 5939 rmax2 = bimax[row]; 5940 nrow2 = bilen[row]; 5941 low2 = 0; 5942 high2 = nrow2; 5943 bm = aij->B->rmap->n; 5944 ba = b->a; 5945 } 5946 } else col = in[j]; 5947 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5948 } 5949 } 5950 } else { 5951 if (!aij->donotstash) { 5952 if (roworiented) { 5953 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5954 } else { 5955 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5956 } 5957 } 5958 } 5959 }} 5960 PetscFunctionReturnVoid(); 5961 } 5962 EXTERN_C_END 5963 5964