1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 214 rowners[0] = 0; 215 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 216 rstart = rowners[rank]; 217 rend = rowners[rank+1]; 218 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 219 if (!rank) { 220 gmata = (Mat_SeqAIJ*) gmat->data; 221 /* send row lengths to all processors */ 222 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 223 for (i=1; i<size; i++) { 224 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 225 } 226 /* determine number diagonal and off-diagonal counts */ 227 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 228 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 229 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 230 jj = 0; 231 for (i=0; i<m; i++) { 232 for (j=0; j<dlens[i]; j++) { 233 if (gmata->j[jj] < rstart) ld[i]++; 234 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 235 jj++; 236 } 237 } 238 /* send column indices to other processes */ 239 for (i=1; i<size; i++) { 240 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 241 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 242 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 } 244 245 /* send numerical values to other processes */ 246 for (i=1; i<size; i++) { 247 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 248 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 249 } 250 gmataa = gmata->a; 251 gmataj = gmata->j; 252 253 } else { 254 /* receive row lengths */ 255 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 /* receive column indices */ 257 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 259 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 260 /* determine number diagonal and off-diagonal counts */ 261 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 262 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 263 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 264 jj = 0; 265 for (i=0; i<m; i++) { 266 for (j=0; j<dlens[i]; j++) { 267 if (gmataj[jj] < rstart) ld[i]++; 268 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 269 jj++; 270 } 271 } 272 /* receive numerical values */ 273 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 275 } 276 /* set preallocation */ 277 for (i=0; i<m; i++) { 278 dlens[i] -= olens[i]; 279 } 280 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 281 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 282 283 for (i=0; i<m; i++) { 284 dlens[i] += olens[i]; 285 } 286 cnt = 0; 287 for (i=0; i<m; i++) { 288 row = rstart + i; 289 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 290 cnt += dlens[i]; 291 } 292 if (rank) { 293 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 294 } 295 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 296 ierr = PetscFree(rowners);CHKERRQ(ierr); 297 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 300 *inmat = mat; 301 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 302 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 303 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 304 mat = *inmat; 305 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 306 if (!rank) { 307 /* send numerical values to other processes */ 308 gmata = (Mat_SeqAIJ*) gmat->data; 309 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 310 gmataa = gmata->a; 311 for (i=1; i<size; i++) { 312 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 313 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 314 } 315 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 316 } else { 317 /* receive numerical values from process 0*/ 318 nz = Ad->nz + Ao->nz; 319 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 320 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 321 } 322 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 323 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 324 ad = Ad->a; 325 ao = Ao->a; 326 if (mat->rmap->n) { 327 i = 0; 328 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 329 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 330 } 331 for (i=1; i<mat->rmap->n; i++) { 332 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 333 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 334 } 335 i--; 336 if (mat->rmap->n) { 337 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 338 } 339 if (rank) { 340 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 341 } 342 } 343 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 345 CHKMEMQ; 346 PetscFunctionReturn(0); 347 } 348 349 /* 350 Local utility routine that creates a mapping from the global column 351 number to the local number in the off-diagonal part of the local 352 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 353 a slightly higher hash table cost; without it it is not scalable (each processor 354 has an order N integer array but is fast to acess. 355 */ 356 #undef __FUNCT__ 357 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 358 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 359 { 360 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 361 PetscErrorCode ierr; 362 PetscInt n = aij->B->cmap->n,i; 363 364 PetscFunctionBegin; 365 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 366 #if defined(PETSC_USE_CTABLE) 367 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 368 for (i=0; i<n; i++) { 369 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 370 } 371 #else 372 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 373 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 375 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 376 #endif 377 PetscFunctionReturn(0); 378 } 379 380 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 381 { \ 382 if (col <= lastcol1) low1 = 0; \ 383 else high1 = nrow1; \ 384 lastcol1 = col;\ 385 while (high1-low1 > 5) { \ 386 t = (low1+high1)/2; \ 387 if (rp1[t] > col) high1 = t; \ 388 else low1 = t; \ 389 } \ 390 for (_i=low1; _i<high1; _i++) { \ 391 if (rp1[_i] > col) break; \ 392 if (rp1[_i] == col) { \ 393 if (addv == ADD_VALUES) ap1[_i] += value; \ 394 else ap1[_i] = value; \ 395 goto a_noinsert; \ 396 } \ 397 } \ 398 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 399 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 400 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 401 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 402 N = nrow1++ - 1; a->nz++; high1++; \ 403 /* shift up all the later entries in this row */ \ 404 for (ii=N; ii>=_i; ii--) { \ 405 rp1[ii+1] = rp1[ii]; \ 406 ap1[ii+1] = ap1[ii]; \ 407 } \ 408 rp1[_i] = col; \ 409 ap1[_i] = value; \ 410 a_noinsert: ; \ 411 ailen[row] = nrow1; \ 412 } 413 414 415 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 416 { \ 417 if (col <= lastcol2) low2 = 0; \ 418 else high2 = nrow2; \ 419 lastcol2 = col; \ 420 while (high2-low2 > 5) { \ 421 t = (low2+high2)/2; \ 422 if (rp2[t] > col) high2 = t; \ 423 else low2 = t; \ 424 } \ 425 for (_i=low2; _i<high2; _i++) { \ 426 if (rp2[_i] > col) break; \ 427 if (rp2[_i] == col) { \ 428 if (addv == ADD_VALUES) ap2[_i] += value; \ 429 else ap2[_i] = value; \ 430 goto b_noinsert; \ 431 } \ 432 } \ 433 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 434 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 435 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 436 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 437 N = nrow2++ - 1; b->nz++; high2++; \ 438 /* shift up all the later entries in this row */ \ 439 for (ii=N; ii>=_i; ii--) { \ 440 rp2[ii+1] = rp2[ii]; \ 441 ap2[ii+1] = ap2[ii]; \ 442 } \ 443 rp2[_i] = col; \ 444 ap2[_i] = value; \ 445 b_noinsert: ; \ 446 bilen[row] = nrow2; \ 447 } 448 449 #undef __FUNCT__ 450 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 451 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 452 { 453 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 454 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 455 PetscErrorCode ierr; 456 PetscInt l,*garray = mat->garray,diag; 457 458 PetscFunctionBegin; 459 /* code only works for square matrices A */ 460 461 /* find size of row to the left of the diagonal part */ 462 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 463 row = row - diag; 464 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 465 if (garray[b->j[b->i[row]+l]] > diag) break; 466 } 467 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* diagonal part */ 470 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 471 472 /* right of diagonal part */ 473 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatSetValues_MPIAIJ" 479 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 PetscScalar value; 483 PetscErrorCode ierr; 484 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 485 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 486 PetscBool roworiented = aij->roworiented; 487 488 /* Some Variables required in the macro */ 489 Mat A = aij->A; 490 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 491 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 492 MatScalar *aa = a->a; 493 PetscBool ignorezeroentries = a->ignorezeroentries; 494 Mat B = aij->B; 495 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 496 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 497 MatScalar *ba = b->a; 498 499 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 500 PetscInt nonew; 501 MatScalar *ap1,*ap2; 502 503 PetscFunctionBegin; 504 if (v) PetscValidScalarPointer(v,6); 505 for (i=0; i<m; i++) { 506 if (im[i] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 509 #endif 510 if (im[i] >= rstart && im[i] < rend) { 511 row = im[i] - rstart; 512 lastcol1 = -1; 513 rp1 = aj + ai[row]; 514 ap1 = aa + ai[row]; 515 rmax1 = aimax[row]; 516 nrow1 = ailen[row]; 517 low1 = 0; 518 high1 = nrow1; 519 lastcol2 = -1; 520 rp2 = bj + bi[row]; 521 ap2 = ba + bi[row]; 522 rmax2 = bimax[row]; 523 nrow2 = bilen[row]; 524 low2 = 0; 525 high2 = nrow2; 526 527 for (j=0; j<n; j++) { 528 if (v) { 529 if (roworiented) value = v[i*n+j]; 530 else value = v[i+j*m]; 531 } else value = 0.0; 532 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 533 if (in[j] >= cstart && in[j] < cend) { 534 col = in[j] - cstart; 535 nonew = a->nonew; 536 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 537 } else if (in[j] < 0) continue; 538 #if defined(PETSC_USE_DEBUG) 539 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 540 #endif 541 else { 542 if (mat->was_assembled) { 543 if (!aij->colmap) { 544 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 545 } 546 #if defined(PETSC_USE_CTABLE) 547 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 548 col--; 549 #else 550 col = aij->colmap[in[j]] - 1; 551 #endif 552 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 553 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 554 col = in[j]; 555 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 556 B = aij->B; 557 b = (Mat_SeqAIJ*)B->data; 558 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 559 rp2 = bj + bi[row]; 560 ap2 = ba + bi[row]; 561 rmax2 = bimax[row]; 562 nrow2 = bilen[row]; 563 low2 = 0; 564 high2 = nrow2; 565 bm = aij->B->rmap->n; 566 ba = b->a; 567 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 568 } else col = in[j]; 569 nonew = b->nonew; 570 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 571 } 572 } 573 } else { 574 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 575 if (!aij->donotstash) { 576 mat->assembled = PETSC_FALSE; 577 if (roworiented) { 578 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 579 } else { 580 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } 582 } 583 } 584 } 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatGetValues_MPIAIJ" 590 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 591 { 592 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 593 PetscErrorCode ierr; 594 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 595 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 596 597 PetscFunctionBegin; 598 for (i=0; i<m; i++) { 599 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 600 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 601 if (idxm[i] >= rstart && idxm[i] < rend) { 602 row = idxm[i] - rstart; 603 for (j=0; j<n; j++) { 604 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 605 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 606 if (idxn[j] >= cstart && idxn[j] < cend) { 607 col = idxn[j] - cstart; 608 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 609 } else { 610 if (!aij->colmap) { 611 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 612 } 613 #if defined(PETSC_USE_CTABLE) 614 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 615 col--; 616 #else 617 col = aij->colmap[idxn[j]] - 1; 618 #endif 619 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 620 else { 621 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 622 } 623 } 624 } 625 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 626 } 627 PetscFunctionReturn(0); 628 } 629 630 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 631 632 #undef __FUNCT__ 633 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 634 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 635 { 636 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 637 PetscErrorCode ierr; 638 PetscInt nstash,reallocs; 639 InsertMode addv; 640 641 PetscFunctionBegin; 642 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 643 644 /* make sure all processors are either in INSERTMODE or ADDMODE */ 645 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 646 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 647 mat->insertmode = addv; /* in case this processor had no cache */ 648 649 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 650 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 651 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 #undef __FUNCT__ 656 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 657 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 658 { 659 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 661 PetscErrorCode ierr; 662 PetscMPIInt n; 663 PetscInt i,j,rstart,ncols,flg; 664 PetscInt *row,*col; 665 PetscBool other_disassembled; 666 PetscScalar *val; 667 InsertMode addv = mat->insertmode; 668 669 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 670 671 PetscFunctionBegin; 672 if (!aij->donotstash && !mat->nooffprocentries) { 673 while (1) { 674 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 675 if (!flg) break; 676 677 for (i=0; i<n; ) { 678 /* Now identify the consecutive vals belonging to the same row */ 679 for (j=i,rstart=row[j]; j<n; j++) { 680 if (row[j] != rstart) break; 681 } 682 if (j < n) ncols = j-i; 683 else ncols = n-i; 684 /* Now assemble all these values with a single function call */ 685 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 686 687 i = j; 688 } 689 } 690 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 694 695 /* determine if any processor has disassembled, if so we must 696 also disassemble ourselfs, in order that we may reassemble. */ 697 /* 698 if nonzero structure of submatrix B cannot change then we know that 699 no processor disassembled thus we can skip this stuff 700 */ 701 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 702 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 703 if (mat->was_assembled && !other_disassembled) { 704 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 705 } 706 } 707 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 708 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 709 } 710 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 711 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 712 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 713 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 714 715 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 716 717 aij->rowvalues = 0; 718 719 /* used by MatAXPY() */ 720 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 721 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 722 723 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 724 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 730 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 731 { 732 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 737 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatZeroRows_MPIAIJ" 743 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 744 { 745 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 746 PetscErrorCode ierr; 747 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 748 PetscInt i,*owners = A->rmap->range; 749 PetscInt *nprocs,j,idx,nsends,row; 750 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 751 PetscInt *rvalues,count,base,slen,*source; 752 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 753 MPI_Comm comm = ((PetscObject)A)->comm; 754 MPI_Request *send_waits,*recv_waits; 755 MPI_Status recv_status,*send_status; 756 const PetscScalar *xx; 757 PetscScalar *bb; 758 #if defined(PETSC_DEBUG) 759 PetscBool found = PETSC_FALSE; 760 #endif 761 762 PetscFunctionBegin; 763 /* first count number of contributors to each processor */ 764 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 765 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 766 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 767 j = 0; 768 for (i=0; i<N; i++) { 769 if (lastidx > (idx = rows[i])) j = 0; 770 lastidx = idx; 771 for (; j<size; j++) { 772 if (idx >= owners[j] && idx < owners[j+1]) { 773 nprocs[2*j]++; 774 nprocs[2*j+1] = 1; 775 owner[i] = j; 776 #if defined(PETSC_DEBUG) 777 found = PETSC_TRUE; 778 #endif 779 break; 780 } 781 } 782 #if defined(PETSC_DEBUG) 783 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 784 found = PETSC_FALSE; 785 #endif 786 } 787 nsends = 0; 788 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 789 790 if (A->nooffproczerorows) { 791 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 792 nrecvs = nsends; 793 nmax = N; 794 } else { 795 /* inform other processors of number of messages and max length*/ 796 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 797 } 798 799 /* post receives: */ 800 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 801 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 802 for (i=0; i<nrecvs; i++) { 803 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 804 } 805 806 /* do sends: 807 1) starts[i] gives the starting index in svalues for stuff going to 808 the ith processor 809 */ 810 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 811 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 812 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 813 814 starts[0] = 0; 815 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 816 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 817 818 starts[0] = 0; 819 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 820 count = 0; 821 for (i=0; i<size; i++) { 822 if (nprocs[2*i+1]) { 823 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 824 } 825 } 826 ierr = PetscFree(starts);CHKERRQ(ierr); 827 828 base = owners[rank]; 829 830 /* wait on receives */ 831 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 832 count = nrecvs; slen = 0; 833 while (count) { 834 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 835 /* unpack receives into our local space */ 836 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 837 838 source[imdex] = recv_status.MPI_SOURCE; 839 lens[imdex] = n; 840 slen += n; 841 count--; 842 } 843 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 844 845 /* move the data into the send scatter */ 846 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 847 count = 0; 848 for (i=0; i<nrecvs; i++) { 849 values = rvalues + i*nmax; 850 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 851 } 852 ierr = PetscFree(rvalues);CHKERRQ(ierr); 853 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 854 ierr = PetscFree(owner);CHKERRQ(ierr); 855 ierr = PetscFree(nprocs);CHKERRQ(ierr); 856 857 /* fix right hand side if needed */ 858 if (x && b) { 859 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 860 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 861 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 862 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 864 } 865 /* 866 Zero the required rows. If the "diagonal block" of the matrix 867 is square and the user wishes to set the diagonal we use separate 868 code so that MatSetValues() is not called for each diagonal allocating 869 new memory, thus calling lots of mallocs and slowing things down. 870 871 */ 872 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 873 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 875 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 876 } else if (diag != 0.0) { 877 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 878 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 879 for (i = 0; i < slen; i++) { 880 row = lrows[i] + rstart; 881 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 882 } 883 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 885 } else { 886 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 887 } 888 ierr = PetscFree(lrows);CHKERRQ(ierr); 889 890 /* wait on sends */ 891 if (nsends) { 892 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 893 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 894 ierr = PetscFree(send_status);CHKERRQ(ierr); 895 } 896 ierr = PetscFree(send_waits);CHKERRQ(ierr); 897 ierr = PetscFree(svalues);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 903 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 904 { 905 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 906 PetscErrorCode ierr; 907 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 908 PetscInt i,*owners = A->rmap->range; 909 PetscInt *nprocs,j,idx,nsends; 910 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 911 PetscInt *rvalues,count,base,slen,*source; 912 PetscInt *lens,*lrows,*values,m; 913 MPI_Comm comm = ((PetscObject)A)->comm; 914 MPI_Request *send_waits,*recv_waits; 915 MPI_Status recv_status,*send_status; 916 const PetscScalar *xx; 917 PetscScalar *bb,*mask; 918 Vec xmask,lmask; 919 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 920 const PetscInt *aj, *ii,*ridx; 921 PetscScalar *aa; 922 #if defined(PETSC_DEBUG) 923 PetscBool found = PETSC_FALSE; 924 #endif 925 926 PetscFunctionBegin; 927 /* first count number of contributors to each processor */ 928 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 929 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 930 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 931 j = 0; 932 for (i=0; i<N; i++) { 933 if (lastidx > (idx = rows[i])) j = 0; 934 lastidx = idx; 935 for (; j<size; j++) { 936 if (idx >= owners[j] && idx < owners[j+1]) { 937 nprocs[2*j]++; 938 nprocs[2*j+1] = 1; 939 owner[i] = j; 940 #if defined(PETSC_DEBUG) 941 found = PETSC_TRUE; 942 #endif 943 break; 944 } 945 } 946 #if defined(PETSC_DEBUG) 947 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 948 found = PETSC_FALSE; 949 #endif 950 } 951 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 952 953 /* inform other processors of number of messages and max length*/ 954 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 955 956 /* post receives: */ 957 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 958 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 959 for (i=0; i<nrecvs; i++) { 960 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 961 } 962 963 /* do sends: 964 1) starts[i] gives the starting index in svalues for stuff going to 965 the ith processor 966 */ 967 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 968 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 969 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 970 971 starts[0] = 0; 972 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 973 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 974 975 starts[0] = 0; 976 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 count = 0; 978 for (i=0; i<size; i++) { 979 if (nprocs[2*i+1]) { 980 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 981 } 982 } 983 ierr = PetscFree(starts);CHKERRQ(ierr); 984 985 base = owners[rank]; 986 987 /* wait on receives */ 988 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 989 count = nrecvs; slen = 0; 990 while (count) { 991 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 992 /* unpack receives into our local space */ 993 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 994 995 source[imdex] = recv_status.MPI_SOURCE; 996 lens[imdex] = n; 997 slen += n; 998 count--; 999 } 1000 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1001 1002 /* move the data into the send scatter */ 1003 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1004 count = 0; 1005 for (i=0; i<nrecvs; i++) { 1006 values = rvalues + i*nmax; 1007 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1008 } 1009 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1010 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1011 ierr = PetscFree(owner);CHKERRQ(ierr); 1012 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1013 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1014 1015 /* zero diagonal part of matrix */ 1016 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1017 1018 /* handle off diagonal part of matrix */ 1019 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1020 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1021 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1022 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1023 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1024 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1027 if (x) { 1028 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1031 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1032 } 1033 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1034 1035 /* remove zeroed rows of off diagonal matrix */ 1036 ii = aij->i; 1037 for (i=0; i<slen; i++) { 1038 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1039 } 1040 1041 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1042 if (aij->compressedrow.use) { 1043 m = aij->compressedrow.nrows; 1044 ii = aij->compressedrow.i; 1045 ridx = aij->compressedrow.rindex; 1046 for (i=0; i<m; i++) { 1047 n = ii[i+1] - ii[i]; 1048 aj = aij->j + ii[i]; 1049 aa = aij->a + ii[i]; 1050 1051 for (j=0; j<n; j++) { 1052 if (PetscAbsScalar(mask[*aj])) { 1053 if (b) bb[*ridx] -= *aa*xx[*aj]; 1054 *aa = 0.0; 1055 } 1056 aa++; 1057 aj++; 1058 } 1059 ridx++; 1060 } 1061 } else { /* do not use compressed row format */ 1062 m = l->B->rmap->n; 1063 for (i=0; i<m; i++) { 1064 n = ii[i+1] - ii[i]; 1065 aj = aij->j + ii[i]; 1066 aa = aij->a + ii[i]; 1067 for (j=0; j<n; j++) { 1068 if (PetscAbsScalar(mask[*aj])) { 1069 if (b) bb[i] -= *aa*xx[*aj]; 1070 *aa = 0.0; 1071 } 1072 aa++; 1073 aj++; 1074 } 1075 } 1076 } 1077 if (x) { 1078 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1079 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1080 } 1081 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1082 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1083 ierr = PetscFree(lrows);CHKERRQ(ierr); 1084 1085 /* wait on sends */ 1086 if (nsends) { 1087 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1088 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1089 ierr = PetscFree(send_status);CHKERRQ(ierr); 1090 } 1091 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1092 ierr = PetscFree(svalues);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMult_MPIAIJ" 1098 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 PetscInt nt; 1103 1104 PetscFunctionBegin; 1105 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1106 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1107 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1109 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1116 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1128 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1129 { 1130 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1135 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1136 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1137 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1143 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1144 { 1145 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1146 PetscErrorCode ierr; 1147 PetscBool merged; 1148 1149 PetscFunctionBegin; 1150 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 if (!merged) { 1154 /* send it on its way */ 1155 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1156 /* do local part */ 1157 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1158 /* receive remote parts: note this assumes the values are not actually */ 1159 /* added in yy until the next line, */ 1160 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1161 } else { 1162 /* do local part */ 1163 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1164 /* send it on its way */ 1165 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1166 /* values actually were received in the Begin() but we need to call this nop */ 1167 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 EXTERN_C_BEGIN 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1175 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1176 { 1177 MPI_Comm comm; 1178 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1179 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1180 IS Me,Notme; 1181 PetscErrorCode ierr; 1182 PetscInt M,N,first,last,*notme,i; 1183 PetscMPIInt size; 1184 1185 PetscFunctionBegin; 1186 /* Easy test: symmetric diagonal block */ 1187 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1188 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1189 if (!*f) PetscFunctionReturn(0); 1190 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1191 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1192 if (size == 1) PetscFunctionReturn(0); 1193 1194 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1195 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1196 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1197 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1198 for (i=0; i<first; i++) notme[i] = i; 1199 for (i=last; i<M; i++) notme[i-last+first] = i; 1200 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1201 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1202 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1203 Aoff = Aoffs[0]; 1204 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1205 Boff = Boffs[0]; 1206 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1207 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1208 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1209 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1210 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1211 ierr = PetscFree(notme);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 EXTERN_C_END 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1218 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1219 { 1220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 /* do nondiagonal part */ 1225 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1226 /* send it on its way */ 1227 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1228 /* do local part */ 1229 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1230 /* receive remote parts */ 1231 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 /* 1236 This only works correctly for square matrices where the subblock A->A is the 1237 diagonal block 1238 */ 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1241 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1242 { 1243 PetscErrorCode ierr; 1244 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1245 1246 PetscFunctionBegin; 1247 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1248 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1249 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatScale_MPIAIJ" 1255 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1256 { 1257 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1262 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatDestroy_MPIAIJ" 1268 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 #if defined(PETSC_USE_LOG) 1275 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1276 #endif 1277 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1278 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1279 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1280 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1281 #if defined(PETSC_USE_CTABLE) 1282 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1283 #else 1284 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1285 #endif 1286 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1287 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1288 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1289 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1290 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1291 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1292 1293 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1307 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1308 { 1309 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1310 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1311 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1312 PetscErrorCode ierr; 1313 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1314 int fd; 1315 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1316 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1317 PetscScalar *column_values; 1318 PetscInt message_count,flowcontrolcount; 1319 FILE *file; 1320 1321 PetscFunctionBegin; 1322 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1323 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1324 nz = A->nz + B->nz; 1325 if (!rank) { 1326 header[0] = MAT_FILE_CLASSID; 1327 header[1] = mat->rmap->N; 1328 header[2] = mat->cmap->N; 1329 1330 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1331 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1332 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1333 /* get largest number of rows any processor has */ 1334 rlen = mat->rmap->n; 1335 range = mat->rmap->range; 1336 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1337 } else { 1338 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 rlen = mat->rmap->n; 1340 } 1341 1342 /* load up the local row counts */ 1343 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1344 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1345 1346 /* store the row lengths to the file */ 1347 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1348 if (!rank) { 1349 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1350 for (i=1; i<size; i++) { 1351 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1352 rlen = range[i+1] - range[i]; 1353 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1354 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1355 } 1356 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1357 } else { 1358 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1359 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1360 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1361 } 1362 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1363 1364 /* load up the local column indices */ 1365 nzmax = nz; /* th processor needs space a largest processor needs */ 1366 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1367 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1368 cnt = 0; 1369 for (i=0; i<mat->rmap->n; i++) { 1370 for (j=B->i[i]; j<B->i[i+1]; j++) { 1371 if ((col = garray[B->j[j]]) > cstart) break; 1372 column_indices[cnt++] = col; 1373 } 1374 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1375 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1376 } 1377 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1378 1379 /* store the column indices to the file */ 1380 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1381 if (!rank) { 1382 MPI_Status status; 1383 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1384 for (i=1; i<size; i++) { 1385 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1386 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1387 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1388 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1389 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1390 } 1391 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1392 } else { 1393 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1394 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1395 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1396 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1397 } 1398 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1399 1400 /* load up the local column values */ 1401 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1402 cnt = 0; 1403 for (i=0; i<mat->rmap->n; i++) { 1404 for (j=B->i[i]; j<B->i[i+1]; j++) { 1405 if (garray[B->j[j]] > cstart) break; 1406 column_values[cnt++] = B->a[j]; 1407 } 1408 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1409 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1410 } 1411 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1412 1413 /* store the column values to the file */ 1414 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1415 if (!rank) { 1416 MPI_Status status; 1417 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1418 for (i=1; i<size; i++) { 1419 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1420 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1421 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1422 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1424 } 1425 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1426 } else { 1427 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1428 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1429 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1430 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1431 } 1432 ierr = PetscFree(column_values);CHKERRQ(ierr); 1433 1434 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1435 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1441 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1442 { 1443 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1444 PetscErrorCode ierr; 1445 PetscMPIInt rank = aij->rank,size = aij->size; 1446 PetscBool isdraw,iascii,isbinary; 1447 PetscViewer sviewer; 1448 PetscViewerFormat format; 1449 1450 PetscFunctionBegin; 1451 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1452 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1453 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1454 if (iascii) { 1455 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1456 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1457 MatInfo info; 1458 PetscBool inodes; 1459 1460 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1461 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1462 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt**)&inodes,PETSC_NULL);CHKERRQ(ierr); 1463 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1464 if (!inodes) { 1465 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1466 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1467 } else { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } 1471 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1472 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1473 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1475 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1478 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1481 PetscInt inodecount,inodelimit,*inodes; 1482 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1483 if (inodes) { 1484 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1485 } else { 1486 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1487 } 1488 PetscFunctionReturn(0); 1489 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1490 PetscFunctionReturn(0); 1491 } 1492 } else if (isbinary) { 1493 if (size == 1) { 1494 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1495 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1496 } else { 1497 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1498 } 1499 PetscFunctionReturn(0); 1500 } else if (isdraw) { 1501 PetscDraw draw; 1502 PetscBool isnull; 1503 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1504 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1505 } 1506 1507 if (size == 1) { 1508 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1509 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1510 } else { 1511 /* assemble the entire matrix onto first processor. */ 1512 Mat A; 1513 Mat_SeqAIJ *Aloc; 1514 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1515 MatScalar *a; 1516 1517 if (mat->rmap->N > 1024) { 1518 PetscBool flg = PETSC_FALSE; 1519 1520 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1521 if (!flg) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1522 } 1523 1524 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1525 if (!rank) { 1526 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1527 } else { 1528 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1529 } 1530 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1531 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1532 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1533 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1534 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1535 1536 /* copy over the A part */ 1537 Aloc = (Mat_SeqAIJ*)aij->A->data; 1538 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1539 row = mat->rmap->rstart; 1540 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1541 for (i=0; i<m; i++) { 1542 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1543 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1544 } 1545 aj = Aloc->j; 1546 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1547 1548 /* copy over the B part */ 1549 Aloc = (Mat_SeqAIJ*)aij->B->data; 1550 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1551 row = mat->rmap->rstart; 1552 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1553 ct = cols; 1554 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1555 for (i=0; i<m; i++) { 1556 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1557 row++; 1558 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1559 } 1560 ierr = PetscFree(ct);CHKERRQ(ierr); 1561 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1562 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1563 /* 1564 Everyone has to call to draw the matrix since the graphics waits are 1565 synchronized across all processors that share the PetscDraw object 1566 */ 1567 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1568 if (!rank) { 1569 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1570 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1571 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1572 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1573 } 1574 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1575 ierr = MatDestroy(&A);CHKERRQ(ierr); 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatView_MPIAIJ" 1582 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1583 { 1584 PetscErrorCode ierr; 1585 PetscBool iascii,isdraw,issocket,isbinary; 1586 1587 PetscFunctionBegin; 1588 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1589 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1590 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1591 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1592 if (iascii || isdraw || isbinary || issocket) { 1593 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1594 } 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatSOR_MPIAIJ" 1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1601 { 1602 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1603 PetscErrorCode ierr; 1604 Vec bb1 = 0; 1605 PetscBool hasop; 1606 1607 PetscFunctionBegin; 1608 if (flag == SOR_APPLY_UPPER) { 1609 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1610 PetscFunctionReturn(0); 1611 } 1612 1613 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1614 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1615 } 1616 1617 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1618 if (flag & SOR_ZERO_INITIAL_GUESS) { 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1620 its--; 1621 } 1622 1623 while (its--) { 1624 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 1627 /* update rhs: bb1 = bb - B*x */ 1628 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1629 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1630 1631 /* local sweep */ 1632 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1633 } 1634 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1635 if (flag & SOR_ZERO_INITIAL_GUESS) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 its--; 1638 } 1639 while (its--) { 1640 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 1643 /* update rhs: bb1 = bb - B*x */ 1644 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1645 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1646 1647 /* local sweep */ 1648 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1649 } 1650 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1651 if (flag & SOR_ZERO_INITIAL_GUESS) { 1652 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1653 its--; 1654 } 1655 while (its--) { 1656 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 1659 /* update rhs: bb1 = bb - B*x */ 1660 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1661 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1662 1663 /* local sweep */ 1664 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1665 } 1666 } else if (flag & SOR_EISENSTAT) { 1667 Vec xx1; 1668 1669 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1670 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1671 1672 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 if (!mat->diag) { 1675 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1676 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1677 } 1678 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1679 if (hasop) { 1680 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1681 } else { 1682 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1683 } 1684 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1685 1686 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1687 1688 /* local sweep */ 1689 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1690 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1691 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1692 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1693 1694 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatPermute_MPIAIJ" 1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1701 { 1702 Mat aA,aB,Aperm; 1703 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1704 PetscScalar *aa,*ba; 1705 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1706 PetscSF rowsf,sf; 1707 IS parcolp = PETSC_NULL; 1708 PetscBool done; 1709 PetscErrorCode ierr; 1710 1711 PetscFunctionBegin; 1712 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1713 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1714 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1715 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1716 1717 /* Invert row permutation to find out where my rows should go */ 1718 ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr); 1719 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1720 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1721 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1722 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1723 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1724 1725 /* Invert column permutation to find out where my columns should go */ 1726 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1727 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1728 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1729 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1730 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1731 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1732 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1733 1734 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1735 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1736 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1737 1738 /* Find out where my gcols should go */ 1739 ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr); 1740 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1741 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1742 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1743 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1744 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1746 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1747 1748 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1749 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1750 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1751 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1752 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1753 for (i=0; i<m; i++) { 1754 PetscInt row = rdest[i],rowner; 1755 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1756 for (j=ai[i]; j<ai[i+1]; j++) { 1757 PetscInt cowner,col = cdest[aj[j]]; 1758 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1759 if (rowner == cowner) dnnz[i]++; 1760 else onnz[i]++; 1761 } 1762 for (j=bi[i]; j<bi[i+1]; j++) { 1763 PetscInt cowner,col = gcdest[bj[j]]; 1764 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1765 if (rowner == cowner) dnnz[i]++; 1766 else onnz[i]++; 1767 } 1768 } 1769 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1770 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1771 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1772 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1773 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1774 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1775 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1776 1777 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1778 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1779 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1780 for (i=0; i<m; i++) { 1781 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1782 PetscInt rowlen; 1783 rowlen = ai[i+1] - ai[i]; 1784 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1785 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1786 rowlen = bi[i+1] - bi[i]; 1787 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1788 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1789 } 1790 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1791 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1792 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1793 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1794 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1795 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1796 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1797 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1798 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1799 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1800 *B = Aperm; 1801 PetscFunctionReturn(0); 1802 } 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1806 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1807 { 1808 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1809 Mat A = mat->A,B = mat->B; 1810 PetscErrorCode ierr; 1811 PetscReal isend[5],irecv[5]; 1812 1813 PetscFunctionBegin; 1814 info->block_size = 1.0; 1815 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1816 1817 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1818 isend[3] = info->memory; isend[4] = info->mallocs; 1819 1820 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1821 1822 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1823 isend[3] += info->memory; isend[4] += info->mallocs; 1824 if (flag == MAT_LOCAL) { 1825 info->nz_used = isend[0]; 1826 info->nz_allocated = isend[1]; 1827 info->nz_unneeded = isend[2]; 1828 info->memory = isend[3]; 1829 info->mallocs = isend[4]; 1830 } else if (flag == MAT_GLOBAL_MAX) { 1831 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1832 1833 info->nz_used = irecv[0]; 1834 info->nz_allocated = irecv[1]; 1835 info->nz_unneeded = irecv[2]; 1836 info->memory = irecv[3]; 1837 info->mallocs = irecv[4]; 1838 } else if (flag == MAT_GLOBAL_SUM) { 1839 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1840 1841 info->nz_used = irecv[0]; 1842 info->nz_allocated = irecv[1]; 1843 info->nz_unneeded = irecv[2]; 1844 info->memory = irecv[3]; 1845 info->mallocs = irecv[4]; 1846 } 1847 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1848 info->fill_ratio_needed = 0; 1849 info->factor_mallocs = 0; 1850 PetscFunctionReturn(0); 1851 } 1852 1853 #undef __FUNCT__ 1854 #define __FUNCT__ "MatSetOption_MPIAIJ" 1855 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1856 { 1857 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1858 PetscErrorCode ierr; 1859 1860 PetscFunctionBegin; 1861 switch (op) { 1862 case MAT_NEW_NONZERO_LOCATIONS: 1863 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1864 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1865 case MAT_KEEP_NONZERO_PATTERN: 1866 case MAT_NEW_NONZERO_LOCATION_ERR: 1867 case MAT_USE_INODES: 1868 case MAT_IGNORE_ZERO_ENTRIES: 1869 MatCheckPreallocated(A,1); 1870 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1871 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1872 break; 1873 case MAT_ROW_ORIENTED: 1874 a->roworiented = flg; 1875 1876 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1877 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1878 break; 1879 case MAT_NEW_DIAGONALS: 1880 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1881 break; 1882 case MAT_IGNORE_OFF_PROC_ENTRIES: 1883 a->donotstash = flg; 1884 break; 1885 case MAT_SPD: 1886 A->spd_set = PETSC_TRUE; 1887 A->spd = flg; 1888 if (flg) { 1889 A->symmetric = PETSC_TRUE; 1890 A->structurally_symmetric = PETSC_TRUE; 1891 A->symmetric_set = PETSC_TRUE; 1892 A->structurally_symmetric_set = PETSC_TRUE; 1893 } 1894 break; 1895 case MAT_SYMMETRIC: 1896 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1897 break; 1898 case MAT_STRUCTURALLY_SYMMETRIC: 1899 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1900 break; 1901 case MAT_HERMITIAN: 1902 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1903 break; 1904 case MAT_SYMMETRY_ETERNAL: 1905 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1906 break; 1907 default: 1908 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1909 } 1910 PetscFunctionReturn(0); 1911 } 1912 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatGetRow_MPIAIJ" 1915 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1916 { 1917 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1918 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1919 PetscErrorCode ierr; 1920 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1921 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1922 PetscInt *cmap,*idx_p; 1923 1924 PetscFunctionBegin; 1925 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1926 mat->getrowactive = PETSC_TRUE; 1927 1928 if (!mat->rowvalues && (idx || v)) { 1929 /* 1930 allocate enough space to hold information from the longest row. 1931 */ 1932 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1933 PetscInt max = 1,tmp; 1934 for (i=0; i<matin->rmap->n; i++) { 1935 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1936 if (max < tmp) max = tmp; 1937 } 1938 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1939 } 1940 1941 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1942 lrow = row - rstart; 1943 1944 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1945 if (!v) {pvA = 0; pvB = 0;} 1946 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1947 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1948 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1949 nztot = nzA + nzB; 1950 1951 cmap = mat->garray; 1952 if (v || idx) { 1953 if (nztot) { 1954 /* Sort by increasing column numbers, assuming A and B already sorted */ 1955 PetscInt imark = -1; 1956 if (v) { 1957 *v = v_p = mat->rowvalues; 1958 for (i=0; i<nzB; i++) { 1959 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1960 else break; 1961 } 1962 imark = i; 1963 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1964 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1965 } 1966 if (idx) { 1967 *idx = idx_p = mat->rowindices; 1968 if (imark > -1) { 1969 for (i=0; i<imark; i++) { 1970 idx_p[i] = cmap[cworkB[i]]; 1971 } 1972 } else { 1973 for (i=0; i<nzB; i++) { 1974 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1975 else break; 1976 } 1977 imark = i; 1978 } 1979 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1980 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1981 } 1982 } else { 1983 if (idx) *idx = 0; 1984 if (v) *v = 0; 1985 } 1986 } 1987 *nz = nztot; 1988 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1989 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 #undef __FUNCT__ 1994 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1995 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1996 { 1997 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1998 1999 PetscFunctionBegin; 2000 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2001 aij->getrowactive = PETSC_FALSE; 2002 PetscFunctionReturn(0); 2003 } 2004 2005 #undef __FUNCT__ 2006 #define __FUNCT__ "MatNorm_MPIAIJ" 2007 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2008 { 2009 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2010 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2011 PetscErrorCode ierr; 2012 PetscInt i,j,cstart = mat->cmap->rstart; 2013 PetscReal sum = 0.0; 2014 MatScalar *v; 2015 2016 PetscFunctionBegin; 2017 if (aij->size == 1) { 2018 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2019 } else { 2020 if (type == NORM_FROBENIUS) { 2021 v = amat->a; 2022 for (i=0; i<amat->nz; i++) { 2023 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2024 } 2025 v = bmat->a; 2026 for (i=0; i<bmat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2030 *norm = PetscSqrtReal(*norm); 2031 } else if (type == NORM_1) { /* max column norm */ 2032 PetscReal *tmp,*tmp2; 2033 PetscInt *jj,*garray = aij->garray; 2034 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2035 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2036 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2037 *norm = 0.0; 2038 v = amat->a; jj = amat->j; 2039 for (j=0; j<amat->nz; j++) { 2040 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2041 } 2042 v = bmat->a; jj = bmat->j; 2043 for (j=0; j<bmat->nz; j++) { 2044 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2045 } 2046 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2047 for (j=0; j<mat->cmap->N; j++) { 2048 if (tmp2[j] > *norm) *norm = tmp2[j]; 2049 } 2050 ierr = PetscFree(tmp);CHKERRQ(ierr); 2051 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2052 } else if (type == NORM_INFINITY) { /* max row norm */ 2053 PetscReal ntemp = 0.0; 2054 for (j=0; j<aij->A->rmap->n; j++) { 2055 v = amat->a + amat->i[j]; 2056 sum = 0.0; 2057 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2058 sum += PetscAbsScalar(*v); v++; 2059 } 2060 v = bmat->a + bmat->i[j]; 2061 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 if (sum > ntemp) ntemp = sum; 2065 } 2066 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2067 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2068 } 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatTranspose_MPIAIJ" 2074 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2075 { 2076 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2077 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2078 PetscErrorCode ierr; 2079 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2080 PetscInt cstart = A->cmap->rstart,ncol; 2081 Mat B; 2082 MatScalar *array; 2083 2084 PetscFunctionBegin; 2085 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2086 2087 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2088 ai = Aloc->i; aj = Aloc->j; 2089 bi = Bloc->i; bj = Bloc->j; 2090 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2091 PetscInt *d_nnz,*g_nnz,*o_nnz; 2092 PetscSFNode *oloc; 2093 PETSC_UNUSED PetscSF sf; 2094 2095 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2096 /* compute d_nnz for preallocation */ 2097 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2098 for (i=0; i<ai[ma]; i++) { 2099 d_nnz[aj[i]]++; 2100 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2101 } 2102 /* compute local off-diagonal contributions */ 2103 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2104 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2105 /* map those to global */ 2106 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2107 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2108 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2109 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2110 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2111 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2112 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2113 2114 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2115 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2116 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2117 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2118 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2119 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2120 } else { 2121 B = *matout; 2122 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2123 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2124 } 2125 2126 /* copy over the A part */ 2127 array = Aloc->a; 2128 row = A->rmap->rstart; 2129 for (i=0; i<ma; i++) { 2130 ncol = ai[i+1]-ai[i]; 2131 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2132 row++; 2133 array += ncol; aj += ncol; 2134 } 2135 aj = Aloc->j; 2136 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2137 2138 /* copy over the B part */ 2139 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2140 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2141 array = Bloc->a; 2142 row = A->rmap->rstart; 2143 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2144 cols_tmp = cols; 2145 for (i=0; i<mb; i++) { 2146 ncol = bi[i+1]-bi[i]; 2147 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2148 row++; 2149 array += ncol; cols_tmp += ncol; 2150 } 2151 ierr = PetscFree(cols);CHKERRQ(ierr); 2152 2153 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2154 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2155 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2156 *matout = B; 2157 } else { 2158 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2159 } 2160 PetscFunctionReturn(0); 2161 } 2162 2163 #undef __FUNCT__ 2164 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2165 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2166 { 2167 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2168 Mat a = aij->A,b = aij->B; 2169 PetscErrorCode ierr; 2170 PetscInt s1,s2,s3; 2171 2172 PetscFunctionBegin; 2173 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2174 if (rr) { 2175 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2176 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2177 /* Overlap communication with computation. */ 2178 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2179 } 2180 if (ll) { 2181 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2182 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2183 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2184 } 2185 /* scale the diagonal block */ 2186 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2187 2188 if (rr) { 2189 /* Do a scatter end and then right scale the off-diagonal block */ 2190 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2191 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2192 } 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2198 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2199 { 2200 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2201 PetscErrorCode ierr; 2202 2203 PetscFunctionBegin; 2204 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2205 PetscFunctionReturn(0); 2206 } 2207 2208 #undef __FUNCT__ 2209 #define __FUNCT__ "MatEqual_MPIAIJ" 2210 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2211 { 2212 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2213 Mat a,b,c,d; 2214 PetscBool flg; 2215 PetscErrorCode ierr; 2216 2217 PetscFunctionBegin; 2218 a = matA->A; b = matA->B; 2219 c = matB->A; d = matB->B; 2220 2221 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2222 if (flg) { 2223 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2224 } 2225 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2226 PetscFunctionReturn(0); 2227 } 2228 2229 #undef __FUNCT__ 2230 #define __FUNCT__ "MatCopy_MPIAIJ" 2231 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2232 { 2233 PetscErrorCode ierr; 2234 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2235 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2236 2237 PetscFunctionBegin; 2238 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2239 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2240 /* because of the column compression in the off-processor part of the matrix a->B, 2241 the number of columns in a->B and b->B may be different, hence we cannot call 2242 the MatCopy() directly on the two parts. If need be, we can provide a more 2243 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2244 then copying the submatrices */ 2245 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2246 } else { 2247 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2248 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2249 } 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatSetUp_MPIAIJ" 2255 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2256 { 2257 PetscErrorCode ierr; 2258 2259 PetscFunctionBegin; 2260 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2261 PetscFunctionReturn(0); 2262 } 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2266 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2267 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2268 { 2269 PetscInt i,m=Y->rmap->N; 2270 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2271 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2272 const PetscInt *xi = x->i,*yi = y->i; 2273 2274 PetscFunctionBegin; 2275 /* Set the number of nonzeros in the new matrix */ 2276 for (i=0; i<m; i++) { 2277 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2278 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2279 nnz[i] = 0; 2280 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2281 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2282 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2283 nnz[i]++; 2284 } 2285 for (; k<nzy; k++) nnz[i]++; 2286 } 2287 PetscFunctionReturn(0); 2288 } 2289 2290 #undef __FUNCT__ 2291 #define __FUNCT__ "MatAXPY_MPIAIJ" 2292 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2293 { 2294 PetscErrorCode ierr; 2295 PetscInt i; 2296 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2297 PetscBLASInt bnz,one=1; 2298 Mat_SeqAIJ *x,*y; 2299 2300 PetscFunctionBegin; 2301 if (str == SAME_NONZERO_PATTERN) { 2302 PetscScalar alpha = a; 2303 x = (Mat_SeqAIJ*)xx->A->data; 2304 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2305 y = (Mat_SeqAIJ*)yy->A->data; 2306 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2307 x = (Mat_SeqAIJ*)xx->B->data; 2308 y = (Mat_SeqAIJ*)yy->B->data; 2309 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2310 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2311 } else if (str == SUBSET_NONZERO_PATTERN) { 2312 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2313 2314 x = (Mat_SeqAIJ*)xx->B->data; 2315 y = (Mat_SeqAIJ*)yy->B->data; 2316 if (y->xtoy && y->XtoY != xx->B) { 2317 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2318 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2319 } 2320 if (!y->xtoy) { /* get xtoy */ 2321 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2322 y->XtoY = xx->B; 2323 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2324 } 2325 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2326 } else { 2327 Mat B; 2328 PetscInt *nnz_d,*nnz_o; 2329 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2330 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2331 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2332 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2333 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2334 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2335 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2336 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2337 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2338 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2339 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2340 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2341 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2342 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2343 } 2344 PetscFunctionReturn(0); 2345 } 2346 2347 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatConjugate_MPIAIJ" 2351 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2352 { 2353 #if defined(PETSC_USE_COMPLEX) 2354 PetscErrorCode ierr; 2355 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2356 2357 PetscFunctionBegin; 2358 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2359 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2360 #else 2361 PetscFunctionBegin; 2362 #endif 2363 PetscFunctionReturn(0); 2364 } 2365 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "MatRealPart_MPIAIJ" 2368 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2369 { 2370 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2371 PetscErrorCode ierr; 2372 2373 PetscFunctionBegin; 2374 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2375 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2381 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2382 { 2383 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2384 PetscErrorCode ierr; 2385 2386 PetscFunctionBegin; 2387 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2388 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2389 PetscFunctionReturn(0); 2390 } 2391 2392 #if defined(PETSC_HAVE_PBGL) 2393 2394 #include <boost/parallel/mpi/bsp_process_group.hpp> 2395 #include <boost/graph/distributed/ilu_default_graph.hpp> 2396 #include <boost/graph/distributed/ilu_0_block.hpp> 2397 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2398 #include <boost/graph/distributed/petsc/interface.hpp> 2399 #include <boost/multi_array.hpp> 2400 #include <boost/parallel/distributed_property_map->hpp> 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2404 /* 2405 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2406 */ 2407 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2408 { 2409 namespace petsc = boost::distributed::petsc; 2410 2411 namespace graph_dist = boost::graph::distributed; 2412 using boost::graph::distributed::ilu_default::process_group_type; 2413 using boost::graph::ilu_permuted; 2414 2415 PetscBool row_identity, col_identity; 2416 PetscContainer c; 2417 PetscInt m, n, M, N; 2418 PetscErrorCode ierr; 2419 2420 PetscFunctionBegin; 2421 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2422 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2423 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2424 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2425 2426 process_group_type pg; 2427 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2428 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2429 lgraph_type& level_graph = *lgraph_p; 2430 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2431 2432 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2433 ilu_permuted(level_graph); 2434 2435 /* put together the new matrix */ 2436 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2437 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2438 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2439 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2440 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2441 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2442 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2443 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2444 2445 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2446 ierr = PetscContainerSetPointer(c, lgraph_p); 2447 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2448 ierr = PetscContainerDestroy(&c); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2454 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2455 { 2456 PetscFunctionBegin; 2457 PetscFunctionReturn(0); 2458 } 2459 2460 #undef __FUNCT__ 2461 #define __FUNCT__ "MatSolve_MPIAIJ" 2462 /* 2463 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2464 */ 2465 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2466 { 2467 namespace graph_dist = boost::graph::distributed; 2468 2469 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2470 lgraph_type *lgraph_p; 2471 PetscContainer c; 2472 PetscErrorCode ierr; 2473 2474 PetscFunctionBegin; 2475 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2476 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2477 ierr = VecCopy(b, x);CHKERRQ(ierr); 2478 2479 PetscScalar *array_x; 2480 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2481 PetscInt sx; 2482 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2483 2484 PetscScalar *array_b; 2485 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2486 PetscInt sb; 2487 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2488 2489 lgraph_type& level_graph = *lgraph_p; 2490 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2491 2492 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2493 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2494 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2495 2496 typedef boost::iterator_property_map<array_ref_type::iterator, 2497 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2498 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2499 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2500 2501 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2502 PetscFunctionReturn(0); 2503 } 2504 #endif 2505 2506 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2507 PetscInt nzlocal,nsends,nrecvs; 2508 PetscMPIInt *send_rank,*recv_rank; 2509 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2510 PetscScalar *sbuf_a,**rbuf_a; 2511 PetscErrorCode (*Destroy)(Mat); 2512 } Mat_Redundant; 2513 2514 #undef __FUNCT__ 2515 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2516 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2517 { 2518 PetscErrorCode ierr; 2519 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2520 PetscInt i; 2521 2522 PetscFunctionBegin; 2523 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2524 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2525 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2526 for (i=0; i<redund->nrecvs; i++) { 2527 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2528 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2529 } 2530 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2531 ierr = PetscFree(redund);CHKERRQ(ierr); 2532 PetscFunctionReturn(0); 2533 } 2534 2535 #undef __FUNCT__ 2536 #define __FUNCT__ "MatDestroy_MatRedundant" 2537 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2538 { 2539 PetscErrorCode ierr; 2540 PetscContainer container; 2541 Mat_Redundant *redund=PETSC_NULL; 2542 2543 PetscFunctionBegin; 2544 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2545 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2546 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2547 2548 A->ops->destroy = redund->Destroy; 2549 2550 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2551 if (A->ops->destroy) { 2552 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2553 } 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2559 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2560 { 2561 PetscMPIInt rank,size; 2562 MPI_Comm comm=((PetscObject)mat)->comm; 2563 PetscErrorCode ierr; 2564 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2565 PetscMPIInt *send_rank= PETSC_NULL,*recv_rank=PETSC_NULL; 2566 PetscInt *rowrange = mat->rmap->range; 2567 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2568 Mat A = aij->A,B=aij->B,C=*matredundant; 2569 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2570 PetscScalar *sbuf_a; 2571 PetscInt nzlocal=a->nz+b->nz; 2572 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2573 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2574 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2575 MatScalar *aworkA,*aworkB; 2576 PetscScalar *vals; 2577 PetscMPIInt tag1,tag2,tag3,imdex; 2578 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL; 2579 MPI_Request *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2580 MPI_Status recv_status,*send_status; 2581 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2582 PetscInt **rbuf_j=PETSC_NULL; 2583 PetscScalar **rbuf_a=PETSC_NULL; 2584 Mat_Redundant *redund =PETSC_NULL; 2585 PetscContainer container; 2586 2587 PetscFunctionBegin; 2588 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2589 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2590 2591 if (reuse == MAT_REUSE_MATRIX) { 2592 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2593 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2594 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2595 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2596 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2597 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2598 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2599 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2600 2601 nsends = redund->nsends; 2602 nrecvs = redund->nrecvs; 2603 send_rank = redund->send_rank; 2604 recv_rank = redund->recv_rank; 2605 sbuf_nz = redund->sbuf_nz; 2606 rbuf_nz = redund->rbuf_nz; 2607 sbuf_j = redund->sbuf_j; 2608 sbuf_a = redund->sbuf_a; 2609 rbuf_j = redund->rbuf_j; 2610 rbuf_a = redund->rbuf_a; 2611 } 2612 2613 if (reuse == MAT_INITIAL_MATRIX) { 2614 PetscMPIInt subrank,subsize; 2615 PetscInt nleftover,np_subcomm; 2616 /* get the destination processors' id send_rank, nsends and nrecvs */ 2617 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2618 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2619 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2620 2621 np_subcomm = size/nsubcomm; 2622 nleftover = size - nsubcomm*np_subcomm; 2623 2624 nsends = 0; nrecvs = 0; 2625 for (i=0; i<size; i++) { /* i=rank*/ 2626 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2627 send_rank[nsends] = i; nsends++; 2628 recv_rank[nrecvs++] = i; 2629 } 2630 } 2631 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2632 i = size-nleftover-1; 2633 j = 0; 2634 while (j < nsubcomm - nleftover) { 2635 send_rank[nsends++] = i; 2636 i--; j++; 2637 } 2638 } 2639 2640 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2641 for (i=0; i<nleftover; i++) { 2642 recv_rank[nrecvs++] = size-nleftover+i; 2643 } 2644 } 2645 2646 /* allocate sbuf_j, sbuf_a */ 2647 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2648 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2649 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2650 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2651 2652 /* copy mat's local entries into the buffers */ 2653 if (reuse == MAT_INITIAL_MATRIX) { 2654 rownz_max = 0; 2655 rptr = sbuf_j; 2656 cols = sbuf_j + rend-rstart + 1; 2657 vals = sbuf_a; 2658 rptr[0] = 0; 2659 for (i=0; i<rend-rstart; i++) { 2660 row = i + rstart; 2661 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2662 ncols = nzA + nzB; 2663 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2664 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2665 /* load the column indices for this row into cols */ 2666 lwrite = 0; 2667 for (l=0; l<nzB; l++) { 2668 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2669 vals[lwrite] = aworkB[l]; 2670 cols[lwrite++] = ctmp; 2671 } 2672 } 2673 for (l=0; l<nzA; l++) { 2674 vals[lwrite] = aworkA[l]; 2675 cols[lwrite++] = cstart + cworkA[l]; 2676 } 2677 for (l=0; l<nzB; l++) { 2678 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2679 vals[lwrite] = aworkB[l]; 2680 cols[lwrite++] = ctmp; 2681 } 2682 } 2683 vals += ncols; 2684 cols += ncols; 2685 rptr[i+1] = rptr[i] + ncols; 2686 if (rownz_max < ncols) rownz_max = ncols; 2687 } 2688 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2689 } else { /* only copy matrix values into sbuf_a */ 2690 rptr = sbuf_j; 2691 vals = sbuf_a; 2692 rptr[0] = 0; 2693 for (i=0; i<rend-rstart; i++) { 2694 row = i + rstart; 2695 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2696 ncols = nzA + nzB; 2697 cworkB = b->j + b->i[i]; 2698 aworkA = a->a + a->i[i]; 2699 aworkB = b->a + b->i[i]; 2700 lwrite = 0; 2701 for (l=0; l<nzB; l++) { 2702 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2703 } 2704 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2705 for (l=0; l<nzB; l++) { 2706 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2707 } 2708 vals += ncols; 2709 rptr[i+1] = rptr[i] + ncols; 2710 } 2711 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2712 2713 /* send nzlocal to others, and recv other's nzlocal */ 2714 /*--------------------------------------------------*/ 2715 if (reuse == MAT_INITIAL_MATRIX) { 2716 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2717 2718 s_waits2 = s_waits3 + nsends; 2719 s_waits1 = s_waits2 + nsends; 2720 r_waits1 = s_waits1 + nsends; 2721 r_waits2 = r_waits1 + nrecvs; 2722 r_waits3 = r_waits2 + nrecvs; 2723 } else { 2724 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2725 2726 r_waits3 = s_waits3 + nsends; 2727 } 2728 2729 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2730 if (reuse == MAT_INITIAL_MATRIX) { 2731 /* get new tags to keep the communication clean */ 2732 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2733 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2734 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2735 2736 /* post receives of other's nzlocal */ 2737 for (i=0; i<nrecvs; i++) { 2738 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2739 } 2740 /* send nzlocal to others */ 2741 for (i=0; i<nsends; i++) { 2742 sbuf_nz[i] = nzlocal; 2743 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2744 } 2745 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2746 count = nrecvs; 2747 while (count) { 2748 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2749 recv_rank[imdex] = recv_status.MPI_SOURCE; 2750 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2751 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2752 2753 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2754 2755 rbuf_nz[imdex] += i + 2; 2756 2757 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2758 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2759 count--; 2760 } 2761 /* wait on sends of nzlocal */ 2762 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2763 /* send mat->i,j to others, and recv from other's */ 2764 /*------------------------------------------------*/ 2765 for (i=0; i<nsends; i++) { 2766 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2767 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2768 } 2769 /* wait on receives of mat->i,j */ 2770 /*------------------------------*/ 2771 count = nrecvs; 2772 while (count) { 2773 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2774 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2775 count--; 2776 } 2777 /* wait on sends of mat->i,j */ 2778 /*---------------------------*/ 2779 if (nsends) { 2780 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2781 } 2782 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2783 2784 /* post receives, send and receive mat->a */ 2785 /*----------------------------------------*/ 2786 for (imdex=0; imdex<nrecvs; imdex++) { 2787 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2788 } 2789 for (i=0; i<nsends; i++) { 2790 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2791 } 2792 count = nrecvs; 2793 while (count) { 2794 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2795 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2796 count--; 2797 } 2798 if (nsends) { 2799 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2800 } 2801 2802 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2803 2804 /* create redundant matrix */ 2805 /*-------------------------*/ 2806 if (reuse == MAT_INITIAL_MATRIX) { 2807 /* compute rownz_max for preallocation */ 2808 for (imdex=0; imdex<nrecvs; imdex++) { 2809 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2810 rptr = rbuf_j[imdex]; 2811 for (i=0; i<j; i++) { 2812 ncols = rptr[i+1] - rptr[i]; 2813 if (rownz_max < ncols) rownz_max = ncols; 2814 } 2815 } 2816 2817 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2818 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2819 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2820 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2821 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2822 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2823 } else { 2824 C = *matredundant; 2825 } 2826 2827 /* insert local matrix entries */ 2828 rptr = sbuf_j; 2829 cols = sbuf_j + rend-rstart + 1; 2830 vals = sbuf_a; 2831 for (i=0; i<rend-rstart; i++) { 2832 row = i + rstart; 2833 ncols = rptr[i+1] - rptr[i]; 2834 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2835 vals += ncols; 2836 cols += ncols; 2837 } 2838 /* insert received matrix entries */ 2839 for (imdex=0; imdex<nrecvs; imdex++) { 2840 rstart = rowrange[recv_rank[imdex]]; 2841 rend = rowrange[recv_rank[imdex]+1]; 2842 rptr = rbuf_j[imdex]; 2843 cols = rbuf_j[imdex] + rend-rstart + 1; 2844 vals = rbuf_a[imdex]; 2845 for (i=0; i<rend-rstart; i++) { 2846 row = i + rstart; 2847 ncols = rptr[i+1] - rptr[i]; 2848 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2849 vals += ncols; 2850 cols += ncols; 2851 } 2852 } 2853 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2856 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2857 if (reuse == MAT_INITIAL_MATRIX) { 2858 PetscContainer container; 2859 *matredundant = C; 2860 /* create a supporting struct and attach it to C for reuse */ 2861 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2862 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2863 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2864 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2865 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2866 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2867 2868 redund->nzlocal = nzlocal; 2869 redund->nsends = nsends; 2870 redund->nrecvs = nrecvs; 2871 redund->send_rank = send_rank; 2872 redund->recv_rank = recv_rank; 2873 redund->sbuf_nz = sbuf_nz; 2874 redund->rbuf_nz = rbuf_nz; 2875 redund->sbuf_j = sbuf_j; 2876 redund->sbuf_a = sbuf_a; 2877 redund->rbuf_j = rbuf_j; 2878 redund->rbuf_a = rbuf_a; 2879 2880 redund->Destroy = C->ops->destroy; 2881 C->ops->destroy = MatDestroy_MatRedundant; 2882 } 2883 PetscFunctionReturn(0); 2884 } 2885 2886 #undef __FUNCT__ 2887 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2888 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2889 { 2890 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2891 PetscErrorCode ierr; 2892 PetscInt i,*idxb = 0; 2893 PetscScalar *va,*vb; 2894 Vec vtmp; 2895 2896 PetscFunctionBegin; 2897 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2898 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2899 if (idx) { 2900 for (i=0; i<A->rmap->n; i++) { 2901 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2902 } 2903 } 2904 2905 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2906 if (idx) { 2907 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2908 } 2909 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2910 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2911 2912 for (i=0; i<A->rmap->n; i++) { 2913 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2914 va[i] = vb[i]; 2915 if (idx) idx[i] = a->garray[idxb[i]]; 2916 } 2917 } 2918 2919 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2920 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2921 ierr = PetscFree(idxb);CHKERRQ(ierr); 2922 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2923 PetscFunctionReturn(0); 2924 } 2925 2926 #undef __FUNCT__ 2927 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2928 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2929 { 2930 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2931 PetscErrorCode ierr; 2932 PetscInt i,*idxb = 0; 2933 PetscScalar *va,*vb; 2934 Vec vtmp; 2935 2936 PetscFunctionBegin; 2937 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2938 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2939 if (idx) { 2940 for (i=0; i<A->cmap->n; i++) { 2941 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2942 } 2943 } 2944 2945 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2946 if (idx) { 2947 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2948 } 2949 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2950 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2951 2952 for (i=0; i<A->rmap->n; i++) { 2953 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2954 va[i] = vb[i]; 2955 if (idx) idx[i] = a->garray[idxb[i]]; 2956 } 2957 } 2958 2959 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2960 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2961 ierr = PetscFree(idxb);CHKERRQ(ierr); 2962 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2963 PetscFunctionReturn(0); 2964 } 2965 2966 #undef __FUNCT__ 2967 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2968 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2969 { 2970 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2971 PetscInt n = A->rmap->n; 2972 PetscInt cstart = A->cmap->rstart; 2973 PetscInt *cmap = mat->garray; 2974 PetscInt *diagIdx, *offdiagIdx; 2975 Vec diagV, offdiagV; 2976 PetscScalar *a, *diagA, *offdiagA; 2977 PetscInt r; 2978 PetscErrorCode ierr; 2979 2980 PetscFunctionBegin; 2981 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2982 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2983 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2984 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2985 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2986 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2987 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2988 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2989 for (r = 0; r < n; ++r) { 2990 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2991 a[r] = diagA[r]; 2992 idx[r] = cstart + diagIdx[r]; 2993 } else { 2994 a[r] = offdiagA[r]; 2995 idx[r] = cmap[offdiagIdx[r]]; 2996 } 2997 } 2998 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2999 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3000 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3001 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3002 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3003 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3004 PetscFunctionReturn(0); 3005 } 3006 3007 #undef __FUNCT__ 3008 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3009 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3010 { 3011 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3012 PetscInt n = A->rmap->n; 3013 PetscInt cstart = A->cmap->rstart; 3014 PetscInt *cmap = mat->garray; 3015 PetscInt *diagIdx, *offdiagIdx; 3016 Vec diagV, offdiagV; 3017 PetscScalar *a, *diagA, *offdiagA; 3018 PetscInt r; 3019 PetscErrorCode ierr; 3020 3021 PetscFunctionBegin; 3022 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3023 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3024 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3025 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3026 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3027 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3028 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3029 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3030 for (r = 0; r < n; ++r) { 3031 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3032 a[r] = diagA[r]; 3033 idx[r] = cstart + diagIdx[r]; 3034 } else { 3035 a[r] = offdiagA[r]; 3036 idx[r] = cmap[offdiagIdx[r]]; 3037 } 3038 } 3039 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3040 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3041 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3042 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3043 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3044 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3045 PetscFunctionReturn(0); 3046 } 3047 3048 #undef __FUNCT__ 3049 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3050 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3051 { 3052 PetscErrorCode ierr; 3053 Mat *dummy; 3054 3055 PetscFunctionBegin; 3056 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3057 *newmat = *dummy; 3058 ierr = PetscFree(dummy);CHKERRQ(ierr); 3059 PetscFunctionReturn(0); 3060 } 3061 3062 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3063 3064 #undef __FUNCT__ 3065 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3066 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3067 { 3068 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3069 PetscErrorCode ierr; 3070 3071 PetscFunctionBegin; 3072 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3073 PetscFunctionReturn(0); 3074 } 3075 3076 #undef __FUNCT__ 3077 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3078 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3079 { 3080 PetscErrorCode ierr; 3081 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3082 3083 PetscFunctionBegin; 3084 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3085 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3086 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3087 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3088 PetscFunctionReturn(0); 3089 } 3090 3091 /* -------------------------------------------------------------------*/ 3092 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3093 MatGetRow_MPIAIJ, 3094 MatRestoreRow_MPIAIJ, 3095 MatMult_MPIAIJ, 3096 /* 4*/ MatMultAdd_MPIAIJ, 3097 MatMultTranspose_MPIAIJ, 3098 MatMultTransposeAdd_MPIAIJ, 3099 #if defined(PETSC_HAVE_PBGL) 3100 MatSolve_MPIAIJ, 3101 #else 3102 0, 3103 #endif 3104 0, 3105 0, 3106 /*10*/ 0, 3107 0, 3108 0, 3109 MatSOR_MPIAIJ, 3110 MatTranspose_MPIAIJ, 3111 /*15*/ MatGetInfo_MPIAIJ, 3112 MatEqual_MPIAIJ, 3113 MatGetDiagonal_MPIAIJ, 3114 MatDiagonalScale_MPIAIJ, 3115 MatNorm_MPIAIJ, 3116 /*20*/ MatAssemblyBegin_MPIAIJ, 3117 MatAssemblyEnd_MPIAIJ, 3118 MatSetOption_MPIAIJ, 3119 MatZeroEntries_MPIAIJ, 3120 /*24*/ MatZeroRows_MPIAIJ, 3121 0, 3122 #if defined(PETSC_HAVE_PBGL) 3123 0, 3124 #else 3125 0, 3126 #endif 3127 0, 3128 0, 3129 /*29*/ MatSetUp_MPIAIJ, 3130 #if defined(PETSC_HAVE_PBGL) 3131 0, 3132 #else 3133 0, 3134 #endif 3135 0, 3136 0, 3137 0, 3138 /*34*/ MatDuplicate_MPIAIJ, 3139 0, 3140 0, 3141 0, 3142 0, 3143 /*39*/ MatAXPY_MPIAIJ, 3144 MatGetSubMatrices_MPIAIJ, 3145 MatIncreaseOverlap_MPIAIJ, 3146 MatGetValues_MPIAIJ, 3147 MatCopy_MPIAIJ, 3148 /*44*/ MatGetRowMax_MPIAIJ, 3149 MatScale_MPIAIJ, 3150 0, 3151 0, 3152 MatZeroRowsColumns_MPIAIJ, 3153 /*49*/ MatSetRandom_MPIAIJ, 3154 0, 3155 0, 3156 0, 3157 0, 3158 /*54*/ MatFDColoringCreate_MPIAIJ, 3159 0, 3160 MatSetUnfactored_MPIAIJ, 3161 MatPermute_MPIAIJ, 3162 0, 3163 /*59*/ MatGetSubMatrix_MPIAIJ, 3164 MatDestroy_MPIAIJ, 3165 MatView_MPIAIJ, 3166 0, 3167 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3168 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3169 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3170 0, 3171 0, 3172 0, 3173 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3174 MatGetRowMinAbs_MPIAIJ, 3175 0, 3176 MatSetColoring_MPIAIJ, 3177 0, 3178 MatSetValuesAdifor_MPIAIJ, 3179 /*75*/ MatFDColoringApply_AIJ, 3180 0, 3181 0, 3182 0, 3183 MatFindZeroDiagonals_MPIAIJ, 3184 /*80*/ 0, 3185 0, 3186 0, 3187 /*83*/ MatLoad_MPIAIJ, 3188 0, 3189 0, 3190 0, 3191 0, 3192 0, 3193 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3194 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3195 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3196 MatPtAP_MPIAIJ_MPIAIJ, 3197 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3198 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3199 0, 3200 0, 3201 0, 3202 0, 3203 /*99*/ 0, 3204 0, 3205 0, 3206 MatConjugate_MPIAIJ, 3207 0, 3208 /*104*/MatSetValuesRow_MPIAIJ, 3209 MatRealPart_MPIAIJ, 3210 MatImaginaryPart_MPIAIJ, 3211 0, 3212 0, 3213 /*109*/0, 3214 MatGetRedundantMatrix_MPIAIJ, 3215 MatGetRowMin_MPIAIJ, 3216 0, 3217 0, 3218 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3219 0, 3220 0, 3221 0, 3222 0, 3223 /*119*/0, 3224 0, 3225 0, 3226 0, 3227 MatGetMultiProcBlock_MPIAIJ, 3228 /*124*/MatFindNonzeroRows_MPIAIJ, 3229 MatGetColumnNorms_MPIAIJ, 3230 MatInvertBlockDiagonal_MPIAIJ, 3231 0, 3232 MatGetSubMatricesParallel_MPIAIJ, 3233 /*129*/0, 3234 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3235 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3236 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3237 0, 3238 /*134*/0, 3239 0, 3240 0, 3241 0, 3242 0 3243 }; 3244 3245 /* ----------------------------------------------------------------------------------------*/ 3246 3247 EXTERN_C_BEGIN 3248 #undef __FUNCT__ 3249 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3250 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3251 { 3252 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3253 PetscErrorCode ierr; 3254 3255 PetscFunctionBegin; 3256 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3257 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3258 PetscFunctionReturn(0); 3259 } 3260 EXTERN_C_END 3261 3262 EXTERN_C_BEGIN 3263 #undef __FUNCT__ 3264 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3265 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3266 { 3267 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3268 PetscErrorCode ierr; 3269 3270 PetscFunctionBegin; 3271 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3272 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3273 PetscFunctionReturn(0); 3274 } 3275 EXTERN_C_END 3276 3277 EXTERN_C_BEGIN 3278 #undef __FUNCT__ 3279 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3280 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3281 { 3282 Mat_MPIAIJ *b; 3283 PetscErrorCode ierr; 3284 PetscInt i; 3285 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3286 3287 PetscFunctionBegin; 3288 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3289 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3290 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3291 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3292 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3293 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3294 3295 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3296 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3297 if (d_nnz) { 3298 for (i=0; i<B->rmap->n; i++) { 3299 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3300 } 3301 } 3302 if (o_nnz) { 3303 for (i=0; i<B->rmap->n; i++) { 3304 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3305 } 3306 } 3307 b = (Mat_MPIAIJ*)B->data; 3308 3309 if (!B->preallocated) { 3310 /* Explicitly create 2 MATSEQAIJ matrices. */ 3311 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3312 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3313 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3314 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3315 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3316 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3317 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3318 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3319 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3320 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3321 } 3322 3323 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3324 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3325 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3326 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3327 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3328 B->preallocated = PETSC_TRUE; 3329 PetscFunctionReturn(0); 3330 } 3331 EXTERN_C_END 3332 3333 #undef __FUNCT__ 3334 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3335 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3336 { 3337 Mat mat; 3338 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3339 PetscErrorCode ierr; 3340 3341 PetscFunctionBegin; 3342 *newmat = 0; 3343 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3344 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3345 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3346 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3347 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3348 a = (Mat_MPIAIJ*)mat->data; 3349 3350 mat->factortype = matin->factortype; 3351 mat->rmap->bs = matin->rmap->bs; 3352 mat->cmap->bs = matin->cmap->bs; 3353 mat->assembled = PETSC_TRUE; 3354 mat->insertmode = NOT_SET_VALUES; 3355 mat->preallocated = PETSC_TRUE; 3356 3357 a->size = oldmat->size; 3358 a->rank = oldmat->rank; 3359 a->donotstash = oldmat->donotstash; 3360 a->roworiented = oldmat->roworiented; 3361 a->rowindices = 0; 3362 a->rowvalues = 0; 3363 a->getrowactive = PETSC_FALSE; 3364 3365 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3366 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3367 3368 if (oldmat->colmap) { 3369 #if defined(PETSC_USE_CTABLE) 3370 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3371 #else 3372 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3373 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3374 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3375 #endif 3376 } else a->colmap = 0; 3377 if (oldmat->garray) { 3378 PetscInt len; 3379 len = oldmat->B->cmap->n; 3380 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3381 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3382 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3383 } else a->garray = 0; 3384 3385 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3386 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3387 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3388 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3389 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3390 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3391 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3392 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3393 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3394 *newmat = mat; 3395 PetscFunctionReturn(0); 3396 } 3397 3398 3399 3400 #undef __FUNCT__ 3401 #define __FUNCT__ "MatLoad_MPIAIJ" 3402 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3403 { 3404 PetscScalar *vals,*svals; 3405 MPI_Comm comm = ((PetscObject)viewer)->comm; 3406 PetscErrorCode ierr; 3407 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3408 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3409 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3410 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3411 PetscInt cend,cstart,n,*rowners,sizesset=1; 3412 int fd; 3413 PetscInt bs = 1; 3414 3415 PetscFunctionBegin; 3416 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3417 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3418 if (!rank) { 3419 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3420 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3421 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3422 } 3423 3424 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3425 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3426 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3427 3428 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3429 3430 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3431 M = header[1]; N = header[2]; 3432 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3433 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3434 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3435 3436 /* If global sizes are set, check if they are consistent with that given in the file */ 3437 if (sizesset) { 3438 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3439 } 3440 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3441 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3442 3443 /* determine ownership of all (block) rows */ 3444 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3445 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3446 else m = newMat->rmap->n; /* Set by user */ 3447 3448 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3449 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3450 3451 /* First process needs enough room for process with most rows */ 3452 if (!rank) { 3453 mmax = rowners[1]; 3454 for (i=2; i<=size; i++) { 3455 mmax = PetscMax(mmax, rowners[i]); 3456 } 3457 } else mmax = m; 3458 3459 rowners[0] = 0; 3460 for (i=2; i<=size; i++) { 3461 rowners[i] += rowners[i-1]; 3462 } 3463 rstart = rowners[rank]; 3464 rend = rowners[rank+1]; 3465 3466 /* distribute row lengths to all processors */ 3467 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3468 if (!rank) { 3469 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3470 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3471 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3472 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3473 for (j=0; j<m; j++) { 3474 procsnz[0] += ourlens[j]; 3475 } 3476 for (i=1; i<size; i++) { 3477 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3478 /* calculate the number of nonzeros on each processor */ 3479 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3480 procsnz[i] += rowlengths[j]; 3481 } 3482 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3483 } 3484 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3485 } else { 3486 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3487 } 3488 3489 if (!rank) { 3490 /* determine max buffer needed and allocate it */ 3491 maxnz = 0; 3492 for (i=0; i<size; i++) { 3493 maxnz = PetscMax(maxnz,procsnz[i]); 3494 } 3495 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3496 3497 /* read in my part of the matrix column indices */ 3498 nz = procsnz[0]; 3499 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3500 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3501 3502 /* read in every one elses and ship off */ 3503 for (i=1; i<size; i++) { 3504 nz = procsnz[i]; 3505 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3506 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3507 } 3508 ierr = PetscFree(cols);CHKERRQ(ierr); 3509 } else { 3510 /* determine buffer space needed for message */ 3511 nz = 0; 3512 for (i=0; i<m; i++) { 3513 nz += ourlens[i]; 3514 } 3515 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3516 3517 /* receive message of column indices*/ 3518 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3519 } 3520 3521 /* determine column ownership if matrix is not square */ 3522 if (N != M) { 3523 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3524 else n = newMat->cmap->n; 3525 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3526 cstart = cend - n; 3527 } else { 3528 cstart = rstart; 3529 cend = rend; 3530 n = cend - cstart; 3531 } 3532 3533 /* loop over local rows, determining number of off diagonal entries */ 3534 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3535 jj = 0; 3536 for (i=0; i<m; i++) { 3537 for (j=0; j<ourlens[i]; j++) { 3538 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3539 jj++; 3540 } 3541 } 3542 3543 for (i=0; i<m; i++) { 3544 ourlens[i] -= offlens[i]; 3545 } 3546 if (!sizesset) { 3547 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3548 } 3549 3550 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3551 3552 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3553 3554 for (i=0; i<m; i++) { 3555 ourlens[i] += offlens[i]; 3556 } 3557 3558 if (!rank) { 3559 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3560 3561 /* read in my part of the matrix numerical values */ 3562 nz = procsnz[0]; 3563 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3564 3565 /* insert into matrix */ 3566 jj = rstart; 3567 smycols = mycols; 3568 svals = vals; 3569 for (i=0; i<m; i++) { 3570 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3571 smycols += ourlens[i]; 3572 svals += ourlens[i]; 3573 jj++; 3574 } 3575 3576 /* read in other processors and ship out */ 3577 for (i=1; i<size; i++) { 3578 nz = procsnz[i]; 3579 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3580 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3581 } 3582 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3583 } else { 3584 /* receive numeric values */ 3585 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3586 3587 /* receive message of values*/ 3588 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3589 3590 /* insert into matrix */ 3591 jj = rstart; 3592 smycols = mycols; 3593 svals = vals; 3594 for (i=0; i<m; i++) { 3595 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3596 smycols += ourlens[i]; 3597 svals += ourlens[i]; 3598 jj++; 3599 } 3600 } 3601 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3602 ierr = PetscFree(vals);CHKERRQ(ierr); 3603 ierr = PetscFree(mycols);CHKERRQ(ierr); 3604 ierr = PetscFree(rowners);CHKERRQ(ierr); 3605 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3606 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3607 PetscFunctionReturn(0); 3608 } 3609 3610 #undef __FUNCT__ 3611 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3612 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3613 { 3614 PetscErrorCode ierr; 3615 IS iscol_local; 3616 PetscInt csize; 3617 3618 PetscFunctionBegin; 3619 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3620 if (call == MAT_REUSE_MATRIX) { 3621 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3622 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3623 } else { 3624 PetscInt cbs; 3625 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3626 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3627 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3628 } 3629 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3630 if (call == MAT_INITIAL_MATRIX) { 3631 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3632 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3633 } 3634 PetscFunctionReturn(0); 3635 } 3636 3637 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3638 #undef __FUNCT__ 3639 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3640 /* 3641 Not great since it makes two copies of the submatrix, first an SeqAIJ 3642 in local and then by concatenating the local matrices the end result. 3643 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3644 3645 Note: This requires a sequential iscol with all indices. 3646 */ 3647 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3648 { 3649 PetscErrorCode ierr; 3650 PetscMPIInt rank,size; 3651 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3652 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3653 PetscBool allcolumns, colflag; 3654 Mat M,Mreuse; 3655 MatScalar *vwork,*aa; 3656 MPI_Comm comm = ((PetscObject)mat)->comm; 3657 Mat_SeqAIJ *aij; 3658 3659 3660 PetscFunctionBegin; 3661 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3662 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3663 3664 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3665 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3666 if (colflag && ncol == mat->cmap->N) { 3667 allcolumns = PETSC_TRUE; 3668 } else { 3669 allcolumns = PETSC_FALSE; 3670 } 3671 if (call == MAT_REUSE_MATRIX) { 3672 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3673 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3674 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3675 } else { 3676 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3677 } 3678 3679 /* 3680 m - number of local rows 3681 n - number of columns (same on all processors) 3682 rstart - first row in new global matrix generated 3683 */ 3684 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3685 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3686 if (call == MAT_INITIAL_MATRIX) { 3687 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3688 ii = aij->i; 3689 jj = aij->j; 3690 3691 /* 3692 Determine the number of non-zeros in the diagonal and off-diagonal 3693 portions of the matrix in order to do correct preallocation 3694 */ 3695 3696 /* first get start and end of "diagonal" columns */ 3697 if (csize == PETSC_DECIDE) { 3698 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3699 if (mglobal == n) { /* square matrix */ 3700 nlocal = m; 3701 } else { 3702 nlocal = n/size + ((n % size) > rank); 3703 } 3704 } else { 3705 nlocal = csize; 3706 } 3707 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3708 rstart = rend - nlocal; 3709 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3710 3711 /* next, compute all the lengths */ 3712 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3713 olens = dlens + m; 3714 for (i=0; i<m; i++) { 3715 jend = ii[i+1] - ii[i]; 3716 olen = 0; 3717 dlen = 0; 3718 for (j=0; j<jend; j++) { 3719 if (*jj < rstart || *jj >= rend) olen++; 3720 else dlen++; 3721 jj++; 3722 } 3723 olens[i] = olen; 3724 dlens[i] = dlen; 3725 } 3726 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3727 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3728 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3729 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3730 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3731 ierr = PetscFree(dlens);CHKERRQ(ierr); 3732 } else { 3733 PetscInt ml,nl; 3734 3735 M = *newmat; 3736 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3737 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3738 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3739 /* 3740 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3741 rather than the slower MatSetValues(). 3742 */ 3743 M->was_assembled = PETSC_TRUE; 3744 M->assembled = PETSC_FALSE; 3745 } 3746 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3747 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3748 ii = aij->i; 3749 jj = aij->j; 3750 aa = aij->a; 3751 for (i=0; i<m; i++) { 3752 row = rstart + i; 3753 nz = ii[i+1] - ii[i]; 3754 cwork = jj; jj += nz; 3755 vwork = aa; aa += nz; 3756 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3757 } 3758 3759 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3760 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3761 *newmat = M; 3762 3763 /* save submatrix used in processor for next request */ 3764 if (call == MAT_INITIAL_MATRIX) { 3765 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3766 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3767 } 3768 PetscFunctionReturn(0); 3769 } 3770 3771 EXTERN_C_BEGIN 3772 #undef __FUNCT__ 3773 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3774 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3775 { 3776 PetscInt m,cstart, cend,j,nnz,i,d; 3777 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3778 const PetscInt *JJ; 3779 PetscScalar *values; 3780 PetscErrorCode ierr; 3781 3782 PetscFunctionBegin; 3783 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3784 3785 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3786 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3787 m = B->rmap->n; 3788 cstart = B->cmap->rstart; 3789 cend = B->cmap->rend; 3790 rstart = B->rmap->rstart; 3791 3792 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3793 3794 #if defined(PETSC_USE_DEBUGGING) 3795 for (i=0; i<m; i++) { 3796 nnz = Ii[i+1]- Ii[i]; 3797 JJ = J + Ii[i]; 3798 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3799 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3800 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3801 } 3802 #endif 3803 3804 for (i=0; i<m; i++) { 3805 nnz = Ii[i+1]- Ii[i]; 3806 JJ = J + Ii[i]; 3807 nnz_max = PetscMax(nnz_max,nnz); 3808 d = 0; 3809 for (j=0; j<nnz; j++) { 3810 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3811 } 3812 d_nnz[i] = d; 3813 o_nnz[i] = nnz - d; 3814 } 3815 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3816 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3817 3818 if (v) values = (PetscScalar*)v; 3819 else { 3820 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3821 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3822 } 3823 3824 for (i=0; i<m; i++) { 3825 ii = i + rstart; 3826 nnz = Ii[i+1]- Ii[i]; 3827 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3828 } 3829 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 3832 if (!v) { 3833 ierr = PetscFree(values);CHKERRQ(ierr); 3834 } 3835 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3836 PetscFunctionReturn(0); 3837 } 3838 EXTERN_C_END 3839 3840 #undef __FUNCT__ 3841 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3842 /*@ 3843 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3844 (the default parallel PETSc format). 3845 3846 Collective on MPI_Comm 3847 3848 Input Parameters: 3849 + B - the matrix 3850 . i - the indices into j for the start of each local row (starts with zero) 3851 . j - the column indices for each local row (starts with zero) 3852 - v - optional values in the matrix 3853 3854 Level: developer 3855 3856 Notes: 3857 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3858 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3859 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3860 3861 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3862 3863 The format which is used for the sparse matrix input, is equivalent to a 3864 row-major ordering.. i.e for the following matrix, the input data expected is 3865 as shown: 3866 3867 1 0 0 3868 2 0 3 P0 3869 ------- 3870 4 5 6 P1 3871 3872 Process0 [P0]: rows_owned=[0,1] 3873 i = {0,1,3} [size = nrow+1 = 2+1] 3874 j = {0,0,2} [size = nz = 6] 3875 v = {1,2,3} [size = nz = 6] 3876 3877 Process1 [P1]: rows_owned=[2] 3878 i = {0,3} [size = nrow+1 = 1+1] 3879 j = {0,1,2} [size = nz = 6] 3880 v = {4,5,6} [size = nz = 6] 3881 3882 .keywords: matrix, aij, compressed row, sparse, parallel 3883 3884 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3885 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3886 @*/ 3887 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3888 { 3889 PetscErrorCode ierr; 3890 3891 PetscFunctionBegin; 3892 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3893 PetscFunctionReturn(0); 3894 } 3895 3896 #undef __FUNCT__ 3897 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3898 /*@C 3899 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3900 (the default parallel PETSc format). For good matrix assembly performance 3901 the user should preallocate the matrix storage by setting the parameters 3902 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3903 performance can be increased by more than a factor of 50. 3904 3905 Collective on MPI_Comm 3906 3907 Input Parameters: 3908 + A - the matrix 3909 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3910 (same value is used for all local rows) 3911 . d_nnz - array containing the number of nonzeros in the various rows of the 3912 DIAGONAL portion of the local submatrix (possibly different for each row) 3913 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3914 The size of this array is equal to the number of local rows, i.e 'm'. 3915 For matrices that will be factored, you must leave room for (and set) 3916 the diagonal entry even if it is zero. 3917 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3918 submatrix (same value is used for all local rows). 3919 - o_nnz - array containing the number of nonzeros in the various rows of the 3920 OFF-DIAGONAL portion of the local submatrix (possibly different for 3921 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3922 structure. The size of this array is equal to the number 3923 of local rows, i.e 'm'. 3924 3925 If the *_nnz parameter is given then the *_nz parameter is ignored 3926 3927 The AIJ format (also called the Yale sparse matrix format or 3928 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3929 storage. The stored row and column indices begin with zero. 3930 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3931 3932 The parallel matrix is partitioned such that the first m0 rows belong to 3933 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3934 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3935 3936 The DIAGONAL portion of the local submatrix of a processor can be defined 3937 as the submatrix which is obtained by extraction the part corresponding to 3938 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3939 first row that belongs to the processor, r2 is the last row belonging to 3940 the this processor, and c1-c2 is range of indices of the local part of a 3941 vector suitable for applying the matrix to. This is an mxn matrix. In the 3942 common case of a square matrix, the row and column ranges are the same and 3943 the DIAGONAL part is also square. The remaining portion of the local 3944 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3945 3946 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3947 3948 You can call MatGetInfo() to get information on how effective the preallocation was; 3949 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3950 You can also run with the option -info and look for messages with the string 3951 malloc in them to see if additional memory allocation was needed. 3952 3953 Example usage: 3954 3955 Consider the following 8x8 matrix with 34 non-zero values, that is 3956 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3957 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3958 as follows: 3959 3960 .vb 3961 1 2 0 | 0 3 0 | 0 4 3962 Proc0 0 5 6 | 7 0 0 | 8 0 3963 9 0 10 | 11 0 0 | 12 0 3964 ------------------------------------- 3965 13 0 14 | 15 16 17 | 0 0 3966 Proc1 0 18 0 | 19 20 21 | 0 0 3967 0 0 0 | 22 23 0 | 24 0 3968 ------------------------------------- 3969 Proc2 25 26 27 | 0 0 28 | 29 0 3970 30 0 0 | 31 32 33 | 0 34 3971 .ve 3972 3973 This can be represented as a collection of submatrices as: 3974 3975 .vb 3976 A B C 3977 D E F 3978 G H I 3979 .ve 3980 3981 Where the submatrices A,B,C are owned by proc0, D,E,F are 3982 owned by proc1, G,H,I are owned by proc2. 3983 3984 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3985 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3986 The 'M','N' parameters are 8,8, and have the same values on all procs. 3987 3988 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3989 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3990 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3991 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3992 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3993 matrix, ans [DF] as another SeqAIJ matrix. 3994 3995 When d_nz, o_nz parameters are specified, d_nz storage elements are 3996 allocated for every row of the local diagonal submatrix, and o_nz 3997 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3998 One way to choose d_nz and o_nz is to use the max nonzerors per local 3999 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4000 In this case, the values of d_nz,o_nz are: 4001 .vb 4002 proc0 : dnz = 2, o_nz = 2 4003 proc1 : dnz = 3, o_nz = 2 4004 proc2 : dnz = 1, o_nz = 4 4005 .ve 4006 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4007 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4008 for proc3. i.e we are using 12+15+10=37 storage locations to store 4009 34 values. 4010 4011 When d_nnz, o_nnz parameters are specified, the storage is specified 4012 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4013 In the above case the values for d_nnz,o_nnz are: 4014 .vb 4015 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4016 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4017 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4018 .ve 4019 Here the space allocated is sum of all the above values i.e 34, and 4020 hence pre-allocation is perfect. 4021 4022 Level: intermediate 4023 4024 .keywords: matrix, aij, compressed row, sparse, parallel 4025 4026 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4027 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4028 @*/ 4029 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4030 { 4031 PetscErrorCode ierr; 4032 4033 PetscFunctionBegin; 4034 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4035 PetscValidType(B,1); 4036 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4037 PetscFunctionReturn(0); 4038 } 4039 4040 #undef __FUNCT__ 4041 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4042 /*@ 4043 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4044 CSR format the local rows. 4045 4046 Collective on MPI_Comm 4047 4048 Input Parameters: 4049 + comm - MPI communicator 4050 . m - number of local rows (Cannot be PETSC_DECIDE) 4051 . n - This value should be the same as the local size used in creating the 4052 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4053 calculated if N is given) For square matrices n is almost always m. 4054 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4055 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4056 . i - row indices 4057 . j - column indices 4058 - a - matrix values 4059 4060 Output Parameter: 4061 . mat - the matrix 4062 4063 Level: intermediate 4064 4065 Notes: 4066 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4067 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4068 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4069 4070 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4071 4072 The format which is used for the sparse matrix input, is equivalent to a 4073 row-major ordering.. i.e for the following matrix, the input data expected is 4074 as shown: 4075 4076 1 0 0 4077 2 0 3 P0 4078 ------- 4079 4 5 6 P1 4080 4081 Process0 [P0]: rows_owned=[0,1] 4082 i = {0,1,3} [size = nrow+1 = 2+1] 4083 j = {0,0,2} [size = nz = 6] 4084 v = {1,2,3} [size = nz = 6] 4085 4086 Process1 [P1]: rows_owned=[2] 4087 i = {0,3} [size = nrow+1 = 1+1] 4088 j = {0,1,2} [size = nz = 6] 4089 v = {4,5,6} [size = nz = 6] 4090 4091 .keywords: matrix, aij, compressed row, sparse, parallel 4092 4093 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4094 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4095 @*/ 4096 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4097 { 4098 PetscErrorCode ierr; 4099 4100 PetscFunctionBegin; 4101 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4102 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4103 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4104 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4105 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4106 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4107 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4108 PetscFunctionReturn(0); 4109 } 4110 4111 #undef __FUNCT__ 4112 #define __FUNCT__ "MatCreateAIJ" 4113 /*@C 4114 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4115 (the default parallel PETSc format). For good matrix assembly performance 4116 the user should preallocate the matrix storage by setting the parameters 4117 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4118 performance can be increased by more than a factor of 50. 4119 4120 Collective on MPI_Comm 4121 4122 Input Parameters: 4123 + comm - MPI communicator 4124 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4125 This value should be the same as the local size used in creating the 4126 y vector for the matrix-vector product y = Ax. 4127 . n - This value should be the same as the local size used in creating the 4128 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4129 calculated if N is given) For square matrices n is almost always m. 4130 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4131 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4132 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4133 (same value is used for all local rows) 4134 . d_nnz - array containing the number of nonzeros in the various rows of the 4135 DIAGONAL portion of the local submatrix (possibly different for each row) 4136 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4137 The size of this array is equal to the number of local rows, i.e 'm'. 4138 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4139 submatrix (same value is used for all local rows). 4140 - o_nnz - array containing the number of nonzeros in the various rows of the 4141 OFF-DIAGONAL portion of the local submatrix (possibly different for 4142 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4143 structure. The size of this array is equal to the number 4144 of local rows, i.e 'm'. 4145 4146 Output Parameter: 4147 . A - the matrix 4148 4149 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4150 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4151 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4152 4153 Notes: 4154 If the *_nnz parameter is given then the *_nz parameter is ignored 4155 4156 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4157 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4158 storage requirements for this matrix. 4159 4160 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4161 processor than it must be used on all processors that share the object for 4162 that argument. 4163 4164 The user MUST specify either the local or global matrix dimensions 4165 (possibly both). 4166 4167 The parallel matrix is partitioned across processors such that the 4168 first m0 rows belong to process 0, the next m1 rows belong to 4169 process 1, the next m2 rows belong to process 2 etc.. where 4170 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4171 values corresponding to [m x N] submatrix. 4172 4173 The columns are logically partitioned with the n0 columns belonging 4174 to 0th partition, the next n1 columns belonging to the next 4175 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4176 4177 The DIAGONAL portion of the local submatrix on any given processor 4178 is the submatrix corresponding to the rows and columns m,n 4179 corresponding to the given processor. i.e diagonal matrix on 4180 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4181 etc. The remaining portion of the local submatrix [m x (N-n)] 4182 constitute the OFF-DIAGONAL portion. The example below better 4183 illustrates this concept. 4184 4185 For a square global matrix we define each processor's diagonal portion 4186 to be its local rows and the corresponding columns (a square submatrix); 4187 each processor's off-diagonal portion encompasses the remainder of the 4188 local matrix (a rectangular submatrix). 4189 4190 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4191 4192 When calling this routine with a single process communicator, a matrix of 4193 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4194 type of communicator, use the construction mechanism: 4195 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4196 4197 By default, this format uses inodes (identical nodes) when possible. 4198 We search for consecutive rows with the same nonzero structure, thereby 4199 reusing matrix information to achieve increased efficiency. 4200 4201 Options Database Keys: 4202 + -mat_no_inode - Do not use inodes 4203 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4204 - -mat_aij_oneindex - Internally use indexing starting at 1 4205 rather than 0. Note that when calling MatSetValues(), 4206 the user still MUST index entries starting at 0! 4207 4208 4209 Example usage: 4210 4211 Consider the following 8x8 matrix with 34 non-zero values, that is 4212 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4213 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4214 as follows: 4215 4216 .vb 4217 1 2 0 | 0 3 0 | 0 4 4218 Proc0 0 5 6 | 7 0 0 | 8 0 4219 9 0 10 | 11 0 0 | 12 0 4220 ------------------------------------- 4221 13 0 14 | 15 16 17 | 0 0 4222 Proc1 0 18 0 | 19 20 21 | 0 0 4223 0 0 0 | 22 23 0 | 24 0 4224 ------------------------------------- 4225 Proc2 25 26 27 | 0 0 28 | 29 0 4226 30 0 0 | 31 32 33 | 0 34 4227 .ve 4228 4229 This can be represented as a collection of submatrices as: 4230 4231 .vb 4232 A B C 4233 D E F 4234 G H I 4235 .ve 4236 4237 Where the submatrices A,B,C are owned by proc0, D,E,F are 4238 owned by proc1, G,H,I are owned by proc2. 4239 4240 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4241 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4242 The 'M','N' parameters are 8,8, and have the same values on all procs. 4243 4244 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4245 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4246 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4247 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4248 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4249 matrix, ans [DF] as another SeqAIJ matrix. 4250 4251 When d_nz, o_nz parameters are specified, d_nz storage elements are 4252 allocated for every row of the local diagonal submatrix, and o_nz 4253 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4254 One way to choose d_nz and o_nz is to use the max nonzerors per local 4255 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4256 In this case, the values of d_nz,o_nz are: 4257 .vb 4258 proc0 : dnz = 2, o_nz = 2 4259 proc1 : dnz = 3, o_nz = 2 4260 proc2 : dnz = 1, o_nz = 4 4261 .ve 4262 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4263 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4264 for proc3. i.e we are using 12+15+10=37 storage locations to store 4265 34 values. 4266 4267 When d_nnz, o_nnz parameters are specified, the storage is specified 4268 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4269 In the above case the values for d_nnz,o_nnz are: 4270 .vb 4271 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4272 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4273 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4274 .ve 4275 Here the space allocated is sum of all the above values i.e 34, and 4276 hence pre-allocation is perfect. 4277 4278 Level: intermediate 4279 4280 .keywords: matrix, aij, compressed row, sparse, parallel 4281 4282 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4283 MPIAIJ, MatCreateMPIAIJWithArrays() 4284 @*/ 4285 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4286 { 4287 PetscErrorCode ierr; 4288 PetscMPIInt size; 4289 4290 PetscFunctionBegin; 4291 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4292 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4293 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4294 if (size > 1) { 4295 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4296 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4297 } else { 4298 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4299 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4300 } 4301 PetscFunctionReturn(0); 4302 } 4303 4304 #undef __FUNCT__ 4305 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4306 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4307 { 4308 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4309 4310 PetscFunctionBegin; 4311 *Ad = a->A; 4312 *Ao = a->B; 4313 *colmap = a->garray; 4314 PetscFunctionReturn(0); 4315 } 4316 4317 #undef __FUNCT__ 4318 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4319 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4320 { 4321 PetscErrorCode ierr; 4322 PetscInt i; 4323 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4324 4325 PetscFunctionBegin; 4326 if (coloring->ctype == IS_COLORING_GLOBAL) { 4327 ISColoringValue *allcolors,*colors; 4328 ISColoring ocoloring; 4329 4330 /* set coloring for diagonal portion */ 4331 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4332 4333 /* set coloring for off-diagonal portion */ 4334 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4335 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4336 for (i=0; i<a->B->cmap->n; i++) { 4337 colors[i] = allcolors[a->garray[i]]; 4338 } 4339 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4340 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4341 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4342 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4343 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4344 ISColoringValue *colors; 4345 PetscInt *larray; 4346 ISColoring ocoloring; 4347 4348 /* set coloring for diagonal portion */ 4349 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4350 for (i=0; i<a->A->cmap->n; i++) { 4351 larray[i] = i + A->cmap->rstart; 4352 } 4353 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4354 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4355 for (i=0; i<a->A->cmap->n; i++) { 4356 colors[i] = coloring->colors[larray[i]]; 4357 } 4358 ierr = PetscFree(larray);CHKERRQ(ierr); 4359 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4360 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4361 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4362 4363 /* set coloring for off-diagonal portion */ 4364 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4365 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4366 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4367 for (i=0; i<a->B->cmap->n; i++) { 4368 colors[i] = coloring->colors[larray[i]]; 4369 } 4370 ierr = PetscFree(larray);CHKERRQ(ierr); 4371 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4372 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4373 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4374 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4375 PetscFunctionReturn(0); 4376 } 4377 4378 #undef __FUNCT__ 4379 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4380 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4381 { 4382 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4383 PetscErrorCode ierr; 4384 4385 PetscFunctionBegin; 4386 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4387 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4388 PetscFunctionReturn(0); 4389 } 4390 4391 #undef __FUNCT__ 4392 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4393 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4394 { 4395 PetscErrorCode ierr; 4396 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4397 PetscInt *indx; 4398 4399 PetscFunctionBegin; 4400 /* This routine will ONLY return MPIAIJ type matrix */ 4401 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4402 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4403 if (n == PETSC_DECIDE) { 4404 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4405 } 4406 /* Check sum(n) = N */ 4407 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4408 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4409 4410 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4411 rstart -= m; 4412 4413 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4414 for (i=0; i<m; i++) { 4415 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4416 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4417 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4418 } 4419 4420 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4421 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4422 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4423 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4424 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4425 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4426 PetscFunctionReturn(0); 4427 } 4428 4429 #undef __FUNCT__ 4430 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4431 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4432 { 4433 PetscErrorCode ierr; 4434 PetscInt m,N,i,rstart,nnz,Ii; 4435 PetscInt *indx; 4436 PetscScalar *values; 4437 4438 PetscFunctionBegin; 4439 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4440 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4441 for (i=0; i<m; i++) { 4442 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4443 Ii = i + rstart; 4444 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4445 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4446 } 4447 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4448 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4449 PetscFunctionReturn(0); 4450 } 4451 4452 #undef __FUNCT__ 4453 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4454 /*@ 4455 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4456 matrices from each processor 4457 4458 Collective on MPI_Comm 4459 4460 Input Parameters: 4461 + comm - the communicators the parallel matrix will live on 4462 . inmat - the input sequential matrices 4463 . n - number of local columns (or PETSC_DECIDE) 4464 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4465 4466 Output Parameter: 4467 . outmat - the parallel matrix generated 4468 4469 Level: advanced 4470 4471 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4472 4473 @*/ 4474 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4475 { 4476 PetscErrorCode ierr; 4477 4478 PetscFunctionBegin; 4479 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4480 if (scall == MAT_INITIAL_MATRIX) { 4481 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4482 } 4483 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4484 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4485 PetscFunctionReturn(0); 4486 } 4487 4488 #undef __FUNCT__ 4489 #define __FUNCT__ "MatFileSplit" 4490 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4491 { 4492 PetscErrorCode ierr; 4493 PetscMPIInt rank; 4494 PetscInt m,N,i,rstart,nnz; 4495 size_t len; 4496 const PetscInt *indx; 4497 PetscViewer out; 4498 char *name; 4499 Mat B; 4500 const PetscScalar *values; 4501 4502 PetscFunctionBegin; 4503 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4504 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4505 /* Should this be the type of the diagonal block of A? */ 4506 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4507 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4508 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4509 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4510 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4511 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4512 for (i=0; i<m; i++) { 4513 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4514 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4515 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4516 } 4517 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4518 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4519 4520 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4521 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4522 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4523 sprintf(name,"%s.%d",outfile,rank); 4524 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4525 ierr = PetscFree(name);CHKERRQ(ierr); 4526 ierr = MatView(B,out);CHKERRQ(ierr); 4527 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4528 ierr = MatDestroy(&B);CHKERRQ(ierr); 4529 PetscFunctionReturn(0); 4530 } 4531 4532 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4533 #undef __FUNCT__ 4534 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4535 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4536 { 4537 PetscErrorCode ierr; 4538 Mat_Merge_SeqsToMPI *merge; 4539 PetscContainer container; 4540 4541 PetscFunctionBegin; 4542 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4543 if (container) { 4544 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4545 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4546 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4547 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4548 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4557 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4558 ierr = PetscFree(merge);CHKERRQ(ierr); 4559 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4560 } 4561 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4562 PetscFunctionReturn(0); 4563 } 4564 4565 #include <../src/mat/utils/freespace.h> 4566 #include <petscbt.h> 4567 4568 #undef __FUNCT__ 4569 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4570 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4571 { 4572 PetscErrorCode ierr; 4573 MPI_Comm comm=((PetscObject)mpimat)->comm; 4574 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4575 PetscMPIInt size,rank,taga,*len_s; 4576 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4577 PetscInt proc,m; 4578 PetscInt **buf_ri,**buf_rj; 4579 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4580 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4581 MPI_Request *s_waits,*r_waits; 4582 MPI_Status *status; 4583 MatScalar *aa=a->a; 4584 MatScalar **abuf_r,*ba_i; 4585 Mat_Merge_SeqsToMPI *merge; 4586 PetscContainer container; 4587 4588 PetscFunctionBegin; 4589 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4590 4591 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4592 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4593 4594 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4595 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4596 4597 bi = merge->bi; 4598 bj = merge->bj; 4599 buf_ri = merge->buf_ri; 4600 buf_rj = merge->buf_rj; 4601 4602 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4603 owners = merge->rowmap->range; 4604 len_s = merge->len_s; 4605 4606 /* send and recv matrix values */ 4607 /*-----------------------------*/ 4608 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4609 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4610 4611 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4612 for (proc=0,k=0; proc<size; proc++) { 4613 if (!len_s[proc]) continue; 4614 i = owners[proc]; 4615 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4616 k++; 4617 } 4618 4619 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4620 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4621 ierr = PetscFree(status);CHKERRQ(ierr); 4622 4623 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4624 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4625 4626 /* insert mat values of mpimat */ 4627 /*----------------------------*/ 4628 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4629 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4630 4631 for (k=0; k<merge->nrecv; k++) { 4632 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4633 nrows = *(buf_ri_k[k]); 4634 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4635 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4636 } 4637 4638 /* set values of ba */ 4639 m = merge->rowmap->n; 4640 for (i=0; i<m; i++) { 4641 arow = owners[rank] + i; 4642 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4643 bnzi = bi[i+1] - bi[i]; 4644 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4645 4646 /* add local non-zero vals of this proc's seqmat into ba */ 4647 anzi = ai[arow+1] - ai[arow]; 4648 aj = a->j + ai[arow]; 4649 aa = a->a + ai[arow]; 4650 nextaj = 0; 4651 for (j=0; nextaj<anzi; j++) { 4652 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4653 ba_i[j] += aa[nextaj++]; 4654 } 4655 } 4656 4657 /* add received vals into ba */ 4658 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4659 /* i-th row */ 4660 if (i == *nextrow[k]) { 4661 anzi = *(nextai[k]+1) - *nextai[k]; 4662 aj = buf_rj[k] + *(nextai[k]); 4663 aa = abuf_r[k] + *(nextai[k]); 4664 nextaj = 0; 4665 for (j=0; nextaj<anzi; j++) { 4666 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4667 ba_i[j] += aa[nextaj++]; 4668 } 4669 } 4670 nextrow[k]++; nextai[k]++; 4671 } 4672 } 4673 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4674 } 4675 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4676 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4677 4678 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4679 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4680 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4681 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4682 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4683 PetscFunctionReturn(0); 4684 } 4685 4686 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4687 4688 #undef __FUNCT__ 4689 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4690 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4691 { 4692 PetscErrorCode ierr; 4693 Mat B_mpi; 4694 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4695 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4696 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4697 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4698 PetscInt len,proc,*dnz,*onz,bs,cbs; 4699 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4700 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4701 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4702 MPI_Status *status; 4703 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4704 PetscBT lnkbt; 4705 Mat_Merge_SeqsToMPI *merge; 4706 PetscContainer container; 4707 4708 PetscFunctionBegin; 4709 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4710 4711 /* make sure it is a PETSc comm */ 4712 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4713 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4714 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4715 4716 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4717 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4718 4719 /* determine row ownership */ 4720 /*---------------------------------------------------------*/ 4721 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4722 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4723 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4724 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4725 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4726 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4727 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4728 4729 m = merge->rowmap->n; 4730 owners = merge->rowmap->range; 4731 4732 /* determine the number of messages to send, their lengths */ 4733 /*---------------------------------------------------------*/ 4734 len_s = merge->len_s; 4735 4736 len = 0; /* length of buf_si[] */ 4737 merge->nsend = 0; 4738 for (proc=0; proc<size; proc++) { 4739 len_si[proc] = 0; 4740 if (proc == rank) { 4741 len_s[proc] = 0; 4742 } else { 4743 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4744 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4745 } 4746 if (len_s[proc]) { 4747 merge->nsend++; 4748 nrows = 0; 4749 for (i=owners[proc]; i<owners[proc+1]; i++) { 4750 if (ai[i+1] > ai[i]) nrows++; 4751 } 4752 len_si[proc] = 2*(nrows+1); 4753 len += len_si[proc]; 4754 } 4755 } 4756 4757 /* determine the number and length of messages to receive for ij-structure */ 4758 /*-------------------------------------------------------------------------*/ 4759 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4760 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4761 4762 /* post the Irecv of j-structure */ 4763 /*-------------------------------*/ 4764 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4765 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4766 4767 /* post the Isend of j-structure */ 4768 /*--------------------------------*/ 4769 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4770 4771 for (proc=0, k=0; proc<size; proc++) { 4772 if (!len_s[proc]) continue; 4773 i = owners[proc]; 4774 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4775 k++; 4776 } 4777 4778 /* receives and sends of j-structure are complete */ 4779 /*------------------------------------------------*/ 4780 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4781 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4782 4783 /* send and recv i-structure */ 4784 /*---------------------------*/ 4785 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4786 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4787 4788 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4789 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4790 for (proc=0,k=0; proc<size; proc++) { 4791 if (!len_s[proc]) continue; 4792 /* form outgoing message for i-structure: 4793 buf_si[0]: nrows to be sent 4794 [1:nrows]: row index (global) 4795 [nrows+1:2*nrows+1]: i-structure index 4796 */ 4797 /*-------------------------------------------*/ 4798 nrows = len_si[proc]/2 - 1; 4799 buf_si_i = buf_si + nrows+1; 4800 buf_si[0] = nrows; 4801 buf_si_i[0] = 0; 4802 nrows = 0; 4803 for (i=owners[proc]; i<owners[proc+1]; i++) { 4804 anzi = ai[i+1] - ai[i]; 4805 if (anzi) { 4806 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4807 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4808 nrows++; 4809 } 4810 } 4811 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4812 k++; 4813 buf_si += len_si[proc]; 4814 } 4815 4816 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4817 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4818 4819 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4820 for (i=0; i<merge->nrecv; i++) { 4821 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4822 } 4823 4824 ierr = PetscFree(len_si);CHKERRQ(ierr); 4825 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4826 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4827 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4828 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4829 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4830 ierr = PetscFree(status);CHKERRQ(ierr); 4831 4832 /* compute a local seq matrix in each processor */ 4833 /*----------------------------------------------*/ 4834 /* allocate bi array and free space for accumulating nonzero column info */ 4835 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4836 bi[0] = 0; 4837 4838 /* create and initialize a linked list */ 4839 nlnk = N+1; 4840 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4841 4842 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4843 len = ai[owners[rank+1]] - ai[owners[rank]]; 4844 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4845 4846 current_space = free_space; 4847 4848 /* determine symbolic info for each local row */ 4849 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4850 4851 for (k=0; k<merge->nrecv; k++) { 4852 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4853 nrows = *buf_ri_k[k]; 4854 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4855 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4856 } 4857 4858 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4859 len = 0; 4860 for (i=0; i<m; i++) { 4861 bnzi = 0; 4862 /* add local non-zero cols of this proc's seqmat into lnk */ 4863 arow = owners[rank] + i; 4864 anzi = ai[arow+1] - ai[arow]; 4865 aj = a->j + ai[arow]; 4866 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4867 bnzi += nlnk; 4868 /* add received col data into lnk */ 4869 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4870 if (i == *nextrow[k]) { /* i-th row */ 4871 anzi = *(nextai[k]+1) - *nextai[k]; 4872 aj = buf_rj[k] + *nextai[k]; 4873 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4874 bnzi += nlnk; 4875 nextrow[k]++; nextai[k]++; 4876 } 4877 } 4878 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4879 4880 /* if free space is not available, make more free space */ 4881 if (current_space->local_remaining<bnzi) { 4882 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4883 nspacedouble++; 4884 } 4885 /* copy data into free space, then initialize lnk */ 4886 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4887 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4888 4889 current_space->array += bnzi; 4890 current_space->local_used += bnzi; 4891 current_space->local_remaining -= bnzi; 4892 4893 bi[i+1] = bi[i] + bnzi; 4894 } 4895 4896 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4897 4898 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4899 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4900 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4901 4902 /* create symbolic parallel matrix B_mpi */ 4903 /*---------------------------------------*/ 4904 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4905 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4906 if (n==PETSC_DECIDE) { 4907 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4908 } else { 4909 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4910 } 4911 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4912 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4913 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4914 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4915 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4916 4917 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4918 B_mpi->assembled = PETSC_FALSE; 4919 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4920 merge->bi = bi; 4921 merge->bj = bj; 4922 merge->buf_ri = buf_ri; 4923 merge->buf_rj = buf_rj; 4924 merge->coi = PETSC_NULL; 4925 merge->coj = PETSC_NULL; 4926 merge->owners_co = PETSC_NULL; 4927 4928 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4929 4930 /* attach the supporting struct to B_mpi for reuse */ 4931 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4932 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4933 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4934 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4935 *mpimat = B_mpi; 4936 4937 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4938 PetscFunctionReturn(0); 4939 } 4940 4941 #undef __FUNCT__ 4942 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4943 /*@C 4944 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4945 matrices from each processor 4946 4947 Collective on MPI_Comm 4948 4949 Input Parameters: 4950 + comm - the communicators the parallel matrix will live on 4951 . seqmat - the input sequential matrices 4952 . m - number of local rows (or PETSC_DECIDE) 4953 . n - number of local columns (or PETSC_DECIDE) 4954 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4955 4956 Output Parameter: 4957 . mpimat - the parallel matrix generated 4958 4959 Level: advanced 4960 4961 Notes: 4962 The dimensions of the sequential matrix in each processor MUST be the same. 4963 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4964 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4965 @*/ 4966 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4967 { 4968 PetscErrorCode ierr; 4969 PetscMPIInt size; 4970 4971 PetscFunctionBegin; 4972 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4973 if (size == 1) { 4974 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4975 if (scall == MAT_INITIAL_MATRIX) { 4976 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4977 } else { 4978 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4979 } 4980 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4981 PetscFunctionReturn(0); 4982 } 4983 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4984 if (scall == MAT_INITIAL_MATRIX) { 4985 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4986 } 4987 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4988 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4989 PetscFunctionReturn(0); 4990 } 4991 4992 #undef __FUNCT__ 4993 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4994 /*@ 4995 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4996 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4997 with MatGetSize() 4998 4999 Not Collective 5000 5001 Input Parameters: 5002 + A - the matrix 5003 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5004 5005 Output Parameter: 5006 . A_loc - the local sequential matrix generated 5007 5008 Level: developer 5009 5010 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5011 5012 @*/ 5013 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5014 { 5015 PetscErrorCode ierr; 5016 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5017 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5018 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5019 MatScalar *aa=a->a,*ba=b->a,*cam; 5020 PetscScalar *ca; 5021 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5022 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5023 PetscBool match; 5024 5025 PetscFunctionBegin; 5026 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5027 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5028 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5029 if (scall == MAT_INITIAL_MATRIX) { 5030 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5031 ci[0] = 0; 5032 for (i=0; i<am; i++) { 5033 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5034 } 5035 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5036 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5037 k = 0; 5038 for (i=0; i<am; i++) { 5039 ncols_o = bi[i+1] - bi[i]; 5040 ncols_d = ai[i+1] - ai[i]; 5041 /* off-diagonal portion of A */ 5042 for (jo=0; jo<ncols_o; jo++) { 5043 col = cmap[*bj]; 5044 if (col >= cstart) break; 5045 cj[k] = col; bj++; 5046 ca[k++] = *ba++; 5047 } 5048 /* diagonal portion of A */ 5049 for (j=0; j<ncols_d; j++) { 5050 cj[k] = cstart + *aj++; 5051 ca[k++] = *aa++; 5052 } 5053 /* off-diagonal portion of A */ 5054 for (j=jo; j<ncols_o; j++) { 5055 cj[k] = cmap[*bj++]; 5056 ca[k++] = *ba++; 5057 } 5058 } 5059 /* put together the new matrix */ 5060 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5061 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5062 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5063 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5064 mat->free_a = PETSC_TRUE; 5065 mat->free_ij = PETSC_TRUE; 5066 mat->nonew = 0; 5067 } else if (scall == MAT_REUSE_MATRIX) { 5068 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5069 ci = mat->i; cj = mat->j; cam = mat->a; 5070 for (i=0; i<am; i++) { 5071 /* off-diagonal portion of A */ 5072 ncols_o = bi[i+1] - bi[i]; 5073 for (jo=0; jo<ncols_o; jo++) { 5074 col = cmap[*bj]; 5075 if (col >= cstart) break; 5076 *cam++ = *ba++; bj++; 5077 } 5078 /* diagonal portion of A */ 5079 ncols_d = ai[i+1] - ai[i]; 5080 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5081 /* off-diagonal portion of A */ 5082 for (j=jo; j<ncols_o; j++) { 5083 *cam++ = *ba++; bj++; 5084 } 5085 } 5086 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5087 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5088 PetscFunctionReturn(0); 5089 } 5090 5091 #undef __FUNCT__ 5092 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5093 /*@C 5094 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5095 5096 Not Collective 5097 5098 Input Parameters: 5099 + A - the matrix 5100 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5101 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5102 5103 Output Parameter: 5104 . A_loc - the local sequential matrix generated 5105 5106 Level: developer 5107 5108 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5109 5110 @*/ 5111 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5112 { 5113 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5114 PetscErrorCode ierr; 5115 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5116 IS isrowa,iscola; 5117 Mat *aloc; 5118 PetscBool match; 5119 5120 PetscFunctionBegin; 5121 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5122 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5123 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5124 if (!row) { 5125 start = A->rmap->rstart; end = A->rmap->rend; 5126 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5127 } else { 5128 isrowa = *row; 5129 } 5130 if (!col) { 5131 start = A->cmap->rstart; 5132 cmap = a->garray; 5133 nzA = a->A->cmap->n; 5134 nzB = a->B->cmap->n; 5135 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5136 ncols = 0; 5137 for (i=0; i<nzB; i++) { 5138 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5139 else break; 5140 } 5141 imark = i; 5142 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5143 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5144 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5145 } else { 5146 iscola = *col; 5147 } 5148 if (scall != MAT_INITIAL_MATRIX) { 5149 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5150 aloc[0] = *A_loc; 5151 } 5152 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5153 *A_loc = aloc[0]; 5154 ierr = PetscFree(aloc);CHKERRQ(ierr); 5155 if (!row) { 5156 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5157 } 5158 if (!col) { 5159 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5160 } 5161 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5162 PetscFunctionReturn(0); 5163 } 5164 5165 #undef __FUNCT__ 5166 #define __FUNCT__ "MatGetBrowsOfAcols" 5167 /*@C 5168 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5169 5170 Collective on Mat 5171 5172 Input Parameters: 5173 + A,B - the matrices in mpiaij format 5174 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5175 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5176 5177 Output Parameter: 5178 + rowb, colb - index sets of rows and columns of B to extract 5179 - B_seq - the sequential matrix generated 5180 5181 Level: developer 5182 5183 @*/ 5184 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5185 { 5186 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5187 PetscErrorCode ierr; 5188 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5189 IS isrowb,iscolb; 5190 Mat *bseq=PETSC_NULL; 5191 5192 PetscFunctionBegin; 5193 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5194 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5195 } 5196 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5197 5198 if (scall == MAT_INITIAL_MATRIX) { 5199 start = A->cmap->rstart; 5200 cmap = a->garray; 5201 nzA = a->A->cmap->n; 5202 nzB = a->B->cmap->n; 5203 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5204 ncols = 0; 5205 for (i=0; i<nzB; i++) { /* row < local row index */ 5206 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5207 else break; 5208 } 5209 imark = i; 5210 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5211 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5212 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5213 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5214 } else { 5215 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5216 isrowb = *rowb; iscolb = *colb; 5217 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5218 bseq[0] = *B_seq; 5219 } 5220 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5221 *B_seq = bseq[0]; 5222 ierr = PetscFree(bseq);CHKERRQ(ierr); 5223 if (!rowb) { 5224 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5225 } else { 5226 *rowb = isrowb; 5227 } 5228 if (!colb) { 5229 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5230 } else { 5231 *colb = iscolb; 5232 } 5233 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5234 PetscFunctionReturn(0); 5235 } 5236 5237 #undef __FUNCT__ 5238 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5239 /* 5240 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5241 of the OFF-DIAGONAL portion of local A 5242 5243 Collective on Mat 5244 5245 Input Parameters: 5246 + A,B - the matrices in mpiaij format 5247 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5248 5249 Output Parameter: 5250 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5251 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5252 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5253 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5254 5255 Level: developer 5256 5257 */ 5258 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5259 { 5260 VecScatter_MPI_General *gen_to,*gen_from; 5261 PetscErrorCode ierr; 5262 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5263 Mat_SeqAIJ *b_oth; 5264 VecScatter ctx =a->Mvctx; 5265 MPI_Comm comm=((PetscObject)ctx)->comm; 5266 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5267 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5268 PetscScalar *rvalues,*svalues; 5269 MatScalar *b_otha,*bufa,*bufA; 5270 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5271 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5272 MPI_Status *sstatus,rstatus; 5273 PetscMPIInt jj; 5274 PetscInt *cols,sbs,rbs; 5275 PetscScalar *vals; 5276 5277 PetscFunctionBegin; 5278 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5279 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5280 } 5281 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5282 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5283 5284 gen_to = (VecScatter_MPI_General*)ctx->todata; 5285 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5286 rvalues = gen_from->values; /* holds the length of receiving row */ 5287 svalues = gen_to->values; /* holds the length of sending row */ 5288 nrecvs = gen_from->n; 5289 nsends = gen_to->n; 5290 5291 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5292 srow = gen_to->indices; /* local row index to be sent */ 5293 sstarts = gen_to->starts; 5294 sprocs = gen_to->procs; 5295 sstatus = gen_to->sstatus; 5296 sbs = gen_to->bs; 5297 rstarts = gen_from->starts; 5298 rprocs = gen_from->procs; 5299 rbs = gen_from->bs; 5300 5301 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5302 if (scall == MAT_INITIAL_MATRIX) { 5303 /* i-array */ 5304 /*---------*/ 5305 /* post receives */ 5306 for (i=0; i<nrecvs; i++) { 5307 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5308 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5309 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5310 } 5311 5312 /* pack the outgoing message */ 5313 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5314 5315 sstartsj[0] = 0; 5316 rstartsj[0] = 0; 5317 len = 0; /* total length of j or a array to be sent */ 5318 k = 0; 5319 for (i=0; i<nsends; i++) { 5320 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5321 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5322 for (j=0; j<nrows; j++) { 5323 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5324 for (l=0; l<sbs; l++) { 5325 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5326 5327 rowlen[j*sbs+l] = ncols; 5328 5329 len += ncols; 5330 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5331 } 5332 k++; 5333 } 5334 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5335 5336 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5337 } 5338 /* recvs and sends of i-array are completed */ 5339 i = nrecvs; 5340 while (i--) { 5341 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5342 } 5343 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5344 5345 /* allocate buffers for sending j and a arrays */ 5346 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5347 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5348 5349 /* create i-array of B_oth */ 5350 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5351 5352 b_othi[0] = 0; 5353 len = 0; /* total length of j or a array to be received */ 5354 k = 0; 5355 for (i=0; i<nrecvs; i++) { 5356 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5357 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5358 for (j=0; j<nrows; j++) { 5359 b_othi[k+1] = b_othi[k] + rowlen[j]; 5360 len += rowlen[j]; k++; 5361 } 5362 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5363 } 5364 5365 /* allocate space for j and a arrrays of B_oth */ 5366 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5367 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5368 5369 /* j-array */ 5370 /*---------*/ 5371 /* post receives of j-array */ 5372 for (i=0; i<nrecvs; i++) { 5373 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5374 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5375 } 5376 5377 /* pack the outgoing message j-array */ 5378 k = 0; 5379 for (i=0; i<nsends; i++) { 5380 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5381 bufJ = bufj+sstartsj[i]; 5382 for (j=0; j<nrows; j++) { 5383 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5384 for (ll=0; ll<sbs; ll++) { 5385 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5386 for (l=0; l<ncols; l++) { 5387 *bufJ++ = cols[l]; 5388 } 5389 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5390 } 5391 } 5392 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5393 } 5394 5395 /* recvs and sends of j-array are completed */ 5396 i = nrecvs; 5397 while (i--) { 5398 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5399 } 5400 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5401 } else if (scall == MAT_REUSE_MATRIX) { 5402 sstartsj = *startsj_s; 5403 rstartsj = *startsj_r; 5404 bufa = *bufa_ptr; 5405 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5406 b_otha = b_oth->a; 5407 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5408 5409 /* a-array */ 5410 /*---------*/ 5411 /* post receives of a-array */ 5412 for (i=0; i<nrecvs; i++) { 5413 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5414 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5415 } 5416 5417 /* pack the outgoing message a-array */ 5418 k = 0; 5419 for (i=0; i<nsends; i++) { 5420 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5421 bufA = bufa+sstartsj[i]; 5422 for (j=0; j<nrows; j++) { 5423 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5424 for (ll=0; ll<sbs; ll++) { 5425 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5426 for (l=0; l<ncols; l++) { 5427 *bufA++ = vals[l]; 5428 } 5429 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5430 } 5431 } 5432 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5433 } 5434 /* recvs and sends of a-array are completed */ 5435 i = nrecvs; 5436 while (i--) { 5437 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5438 } 5439 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5440 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5441 5442 if (scall == MAT_INITIAL_MATRIX) { 5443 /* put together the new matrix */ 5444 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5445 5446 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5447 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5448 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5449 b_oth->free_a = PETSC_TRUE; 5450 b_oth->free_ij = PETSC_TRUE; 5451 b_oth->nonew = 0; 5452 5453 ierr = PetscFree(bufj);CHKERRQ(ierr); 5454 if (!startsj_s || !bufa_ptr) { 5455 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5456 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5457 } else { 5458 *startsj_s = sstartsj; 5459 *startsj_r = rstartsj; 5460 *bufa_ptr = bufa; 5461 } 5462 } 5463 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5464 PetscFunctionReturn(0); 5465 } 5466 5467 #undef __FUNCT__ 5468 #define __FUNCT__ "MatGetCommunicationStructs" 5469 /*@C 5470 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5471 5472 Not Collective 5473 5474 Input Parameters: 5475 . A - The matrix in mpiaij format 5476 5477 Output Parameter: 5478 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5479 . colmap - A map from global column index to local index into lvec 5480 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5481 5482 Level: developer 5483 5484 @*/ 5485 #if defined(PETSC_USE_CTABLE) 5486 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5487 #else 5488 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5489 #endif 5490 { 5491 Mat_MPIAIJ *a; 5492 5493 PetscFunctionBegin; 5494 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5495 PetscValidPointer(lvec, 2); 5496 PetscValidPointer(colmap, 3); 5497 PetscValidPointer(multScatter, 4); 5498 a = (Mat_MPIAIJ*) A->data; 5499 if (lvec) *lvec = a->lvec; 5500 if (colmap) *colmap = a->colmap; 5501 if (multScatter) *multScatter = a->Mvctx; 5502 PetscFunctionReturn(0); 5503 } 5504 5505 EXTERN_C_BEGIN 5506 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5507 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5508 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5509 EXTERN_C_END 5510 5511 #undef __FUNCT__ 5512 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5513 /* 5514 Computes (B'*A')' since computing B*A directly is untenable 5515 5516 n p p 5517 ( ) ( ) ( ) 5518 m ( A ) * n ( B ) = m ( C ) 5519 ( ) ( ) ( ) 5520 5521 */ 5522 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5523 { 5524 PetscErrorCode ierr; 5525 Mat At,Bt,Ct; 5526 5527 PetscFunctionBegin; 5528 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5529 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5530 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5531 ierr = MatDestroy(&At);CHKERRQ(ierr); 5532 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5533 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5534 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5535 PetscFunctionReturn(0); 5536 } 5537 5538 #undef __FUNCT__ 5539 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5540 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5541 { 5542 PetscErrorCode ierr; 5543 PetscInt m=A->rmap->n,n=B->cmap->n; 5544 Mat Cmat; 5545 5546 PetscFunctionBegin; 5547 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5548 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5549 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5550 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5551 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5552 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5553 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5554 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5555 5556 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5557 5558 *C = Cmat; 5559 PetscFunctionReturn(0); 5560 } 5561 5562 /* ----------------------------------------------------------------*/ 5563 #undef __FUNCT__ 5564 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5565 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5566 { 5567 PetscErrorCode ierr; 5568 5569 PetscFunctionBegin; 5570 if (scall == MAT_INITIAL_MATRIX) { 5571 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5572 } 5573 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5574 PetscFunctionReturn(0); 5575 } 5576 5577 EXTERN_C_BEGIN 5578 #if defined(PETSC_HAVE_MUMPS) 5579 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5580 #endif 5581 #if defined(PETSC_HAVE_PASTIX) 5582 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5583 #endif 5584 #if defined(PETSC_HAVE_SUPERLU_DIST) 5585 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5586 #endif 5587 #if defined(PETSC_HAVE_CLIQUE) 5588 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5589 #endif 5590 EXTERN_C_END 5591 5592 /*MC 5593 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5594 5595 Options Database Keys: 5596 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5597 5598 Level: beginner 5599 5600 .seealso: MatCreateAIJ() 5601 M*/ 5602 5603 EXTERN_C_BEGIN 5604 #undef __FUNCT__ 5605 #define __FUNCT__ "MatCreate_MPIAIJ" 5606 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5607 { 5608 Mat_MPIAIJ *b; 5609 PetscErrorCode ierr; 5610 PetscMPIInt size; 5611 5612 PetscFunctionBegin; 5613 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5614 5615 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5616 B->data = (void*)b; 5617 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5618 B->assembled = PETSC_FALSE; 5619 B->insertmode = NOT_SET_VALUES; 5620 b->size = size; 5621 5622 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5623 5624 /* build cache for off array entries formed */ 5625 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5626 5627 b->donotstash = PETSC_FALSE; 5628 b->colmap = 0; 5629 b->garray = 0; 5630 b->roworiented = PETSC_TRUE; 5631 5632 /* stuff used for matrix vector multiply */ 5633 b->lvec = PETSC_NULL; 5634 b->Mvctx = PETSC_NULL; 5635 5636 /* stuff for MatGetRow() */ 5637 b->rowindices = 0; 5638 b->rowvalues = 0; 5639 b->getrowactive = PETSC_FALSE; 5640 5641 /* flexible pointer used in CUSP/CUSPARSE classes */ 5642 b->spptr = PETSC_NULL; 5643 5644 #if defined(PETSC_HAVE_MUMPS) 5645 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5646 "MatGetFactor_aij_mumps", 5647 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5648 #endif 5649 #if defined(PETSC_HAVE_PASTIX) 5650 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5651 "MatGetFactor_mpiaij_pastix", 5652 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5653 #endif 5654 #if defined(PETSC_HAVE_SUPERLU_DIST) 5655 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5656 "MatGetFactor_mpiaij_superlu_dist", 5657 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5658 #endif 5659 #if defined(PETSC_HAVE_CLIQUE) 5660 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5661 "MatGetFactor_aij_clique", 5662 MatGetFactor_aij_clique);CHKERRQ(ierr); 5663 #endif 5664 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5665 "MatStoreValues_MPIAIJ", 5666 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5667 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5668 "MatRetrieveValues_MPIAIJ", 5669 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5670 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5671 "MatGetDiagonalBlock_MPIAIJ", 5672 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5673 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5674 "MatIsTranspose_MPIAIJ", 5675 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5677 "MatMPIAIJSetPreallocation_MPIAIJ", 5678 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5679 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5680 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5681 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5682 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5683 "MatDiagonalScaleLocal_MPIAIJ", 5684 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5685 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5686 "MatConvert_MPIAIJ_MPIAIJPERM", 5687 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5688 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5689 "MatConvert_MPIAIJ_MPIAIJCRL", 5690 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5691 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5692 "MatConvert_MPIAIJ_MPISBAIJ", 5693 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5694 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5695 "MatMatMult_MPIDense_MPIAIJ", 5696 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5697 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5698 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5699 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5700 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5701 "MatMatMultNumeric_MPIDense_MPIAIJ", 5702 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5703 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5704 PetscFunctionReturn(0); 5705 } 5706 EXTERN_C_END 5707 5708 #undef __FUNCT__ 5709 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5710 /*@ 5711 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5712 and "off-diagonal" part of the matrix in CSR format. 5713 5714 Collective on MPI_Comm 5715 5716 Input Parameters: 5717 + comm - MPI communicator 5718 . m - number of local rows (Cannot be PETSC_DECIDE) 5719 . n - This value should be the same as the local size used in creating the 5720 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5721 calculated if N is given) For square matrices n is almost always m. 5722 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5723 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5724 . i - row indices for "diagonal" portion of matrix 5725 . j - column indices 5726 . a - matrix values 5727 . oi - row indices for "off-diagonal" portion of matrix 5728 . oj - column indices 5729 - oa - matrix values 5730 5731 Output Parameter: 5732 . mat - the matrix 5733 5734 Level: advanced 5735 5736 Notes: 5737 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5738 must free the arrays once the matrix has been destroyed and not before. 5739 5740 The i and j indices are 0 based 5741 5742 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5743 5744 This sets local rows and cannot be used to set off-processor values. 5745 5746 You cannot later use MatSetValues() to change values in this matrix. 5747 5748 .keywords: matrix, aij, compressed row, sparse, parallel 5749 5750 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5751 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5752 @*/ 5753 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5754 { 5755 PetscErrorCode ierr; 5756 Mat_MPIAIJ *maij; 5757 5758 PetscFunctionBegin; 5759 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5760 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5761 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5762 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5763 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5764 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5765 maij = (Mat_MPIAIJ*) (*mat)->data; 5766 5767 maij->donotstash = PETSC_TRUE; 5768 (*mat)->preallocated = PETSC_TRUE; 5769 5770 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5771 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5772 5773 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5774 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5775 5776 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5777 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5778 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5779 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5780 5781 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5782 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5783 PetscFunctionReturn(0); 5784 } 5785 5786 /* 5787 Special version for direct calls from Fortran 5788 */ 5789 #include <petsc-private/fortranimpl.h> 5790 5791 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5792 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5793 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5794 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5795 #endif 5796 5797 /* Change these macros so can be used in void function */ 5798 #undef CHKERRQ 5799 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5800 #undef SETERRQ2 5801 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5802 #undef SETERRQ3 5803 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5804 #undef SETERRQ 5805 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5806 5807 EXTERN_C_BEGIN 5808 #undef __FUNCT__ 5809 #define __FUNCT__ "matsetvaluesmpiaij_" 5810 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5811 { 5812 Mat mat = *mmat; 5813 PetscInt m = *mm, n = *mn; 5814 InsertMode addv = *maddv; 5815 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5816 PetscScalar value; 5817 PetscErrorCode ierr; 5818 5819 MatCheckPreallocated(mat,1); 5820 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5821 5822 #if defined(PETSC_USE_DEBUG) 5823 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5824 #endif 5825 { 5826 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5827 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5828 PetscBool roworiented = aij->roworiented; 5829 5830 /* Some Variables required in the macro */ 5831 Mat A = aij->A; 5832 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5833 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5834 MatScalar *aa = a->a; 5835 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5836 Mat B = aij->B; 5837 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5838 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5839 MatScalar *ba = b->a; 5840 5841 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5842 PetscInt nonew = a->nonew; 5843 MatScalar *ap1,*ap2; 5844 5845 PetscFunctionBegin; 5846 for (i=0; i<m; i++) { 5847 if (im[i] < 0) continue; 5848 #if defined(PETSC_USE_DEBUG) 5849 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5850 #endif 5851 if (im[i] >= rstart && im[i] < rend) { 5852 row = im[i] - rstart; 5853 lastcol1 = -1; 5854 rp1 = aj + ai[row]; 5855 ap1 = aa + ai[row]; 5856 rmax1 = aimax[row]; 5857 nrow1 = ailen[row]; 5858 low1 = 0; 5859 high1 = nrow1; 5860 lastcol2 = -1; 5861 rp2 = bj + bi[row]; 5862 ap2 = ba + bi[row]; 5863 rmax2 = bimax[row]; 5864 nrow2 = bilen[row]; 5865 low2 = 0; 5866 high2 = nrow2; 5867 5868 for (j=0; j<n; j++) { 5869 if (roworiented) value = v[i*n+j]; 5870 else value = v[i+j*m]; 5871 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5872 if (in[j] >= cstart && in[j] < cend) { 5873 col = in[j] - cstart; 5874 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5875 } else if (in[j] < 0) continue; 5876 #if defined(PETSC_USE_DEBUG) 5877 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5878 #endif 5879 else { 5880 if (mat->was_assembled) { 5881 if (!aij->colmap) { 5882 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5883 } 5884 #if defined(PETSC_USE_CTABLE) 5885 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5886 col--; 5887 #else 5888 col = aij->colmap[in[j]] - 1; 5889 #endif 5890 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5891 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5892 col = in[j]; 5893 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5894 B = aij->B; 5895 b = (Mat_SeqAIJ*)B->data; 5896 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5897 rp2 = bj + bi[row]; 5898 ap2 = ba + bi[row]; 5899 rmax2 = bimax[row]; 5900 nrow2 = bilen[row]; 5901 low2 = 0; 5902 high2 = nrow2; 5903 bm = aij->B->rmap->n; 5904 ba = b->a; 5905 } 5906 } else col = in[j]; 5907 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5908 } 5909 } 5910 } else if (!aij->donotstash) { 5911 if (roworiented) { 5912 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5913 } else { 5914 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5915 } 5916 } 5917 } 5918 } 5919 PetscFunctionReturnVoid(); 5920 } 5921 EXTERN_C_END 5922 5923