1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 214 rowners[0] = 0; 215 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 216 rstart = rowners[rank]; 217 rend = rowners[rank+1]; 218 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 219 if (!rank) { 220 gmata = (Mat_SeqAIJ*) gmat->data; 221 /* send row lengths to all processors */ 222 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 223 for (i=1; i<size; i++) { 224 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 225 } 226 /* determine number diagonal and off-diagonal counts */ 227 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 228 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 229 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 230 jj = 0; 231 for (i=0; i<m; i++) { 232 for (j=0; j<dlens[i]; j++) { 233 if (gmata->j[jj] < rstart) ld[i]++; 234 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 235 jj++; 236 } 237 } 238 /* send column indices to other processes */ 239 for (i=1; i<size; i++) { 240 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 241 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 242 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 } 244 245 /* send numerical values to other processes */ 246 for (i=1; i<size; i++) { 247 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 248 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 249 } 250 gmataa = gmata->a; 251 gmataj = gmata->j; 252 253 } else { 254 /* receive row lengths */ 255 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 /* receive column indices */ 257 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 259 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 260 /* determine number diagonal and off-diagonal counts */ 261 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 262 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 263 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 264 jj = 0; 265 for (i=0; i<m; i++) { 266 for (j=0; j<dlens[i]; j++) { 267 if (gmataj[jj] < rstart) ld[i]++; 268 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 269 jj++; 270 } 271 } 272 /* receive numerical values */ 273 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 275 } 276 /* set preallocation */ 277 for (i=0; i<m; i++) { 278 dlens[i] -= olens[i]; 279 } 280 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 281 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 282 283 for (i=0; i<m; i++) { 284 dlens[i] += olens[i]; 285 } 286 cnt = 0; 287 for (i=0; i<m; i++) { 288 row = rstart + i; 289 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 290 cnt += dlens[i]; 291 } 292 if (rank) { 293 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 294 } 295 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 296 ierr = PetscFree(rowners);CHKERRQ(ierr); 297 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 300 *inmat = mat; 301 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 302 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 303 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 304 mat = *inmat; 305 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 306 if (!rank) { 307 /* send numerical values to other processes */ 308 gmata = (Mat_SeqAIJ*) gmat->data; 309 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 310 gmataa = gmata->a; 311 for (i=1; i<size; i++) { 312 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 313 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 314 } 315 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 316 } else { 317 /* receive numerical values from process 0*/ 318 nz = Ad->nz + Ao->nz; 319 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 320 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 321 } 322 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 323 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 324 ad = Ad->a; 325 ao = Ao->a; 326 if (mat->rmap->n) { 327 i = 0; 328 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 329 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 330 } 331 for (i=1; i<mat->rmap->n; i++) { 332 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 333 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 334 } 335 i--; 336 if (mat->rmap->n) { 337 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 338 } 339 if (rank) { 340 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 341 } 342 } 343 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 345 CHKMEMQ; 346 PetscFunctionReturn(0); 347 } 348 349 /* 350 Local utility routine that creates a mapping from the global column 351 number to the local number in the off-diagonal part of the local 352 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 353 a slightly higher hash table cost; without it it is not scalable (each processor 354 has an order N integer array but is fast to acess. 355 */ 356 #undef __FUNCT__ 357 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 358 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 359 { 360 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 361 PetscErrorCode ierr; 362 PetscInt n = aij->B->cmap->n,i; 363 364 PetscFunctionBegin; 365 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 366 #if defined(PETSC_USE_CTABLE) 367 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 368 for (i=0; i<n; i++) { 369 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 370 } 371 #else 372 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 373 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 375 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 376 #endif 377 PetscFunctionReturn(0); 378 } 379 380 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 381 { \ 382 if (col <= lastcol1) low1 = 0; \ 383 else high1 = nrow1; \ 384 lastcol1 = col;\ 385 while (high1-low1 > 5) { \ 386 t = (low1+high1)/2; \ 387 if (rp1[t] > col) high1 = t; \ 388 else low1 = t; \ 389 } \ 390 for (_i=low1; _i<high1; _i++) { \ 391 if (rp1[_i] > col) break; \ 392 if (rp1[_i] == col) { \ 393 if (addv == ADD_VALUES) ap1[_i] += value; \ 394 else ap1[_i] = value; \ 395 goto a_noinsert; \ 396 } \ 397 } \ 398 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 399 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 400 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 401 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 402 N = nrow1++ - 1; a->nz++; high1++; \ 403 /* shift up all the later entries in this row */ \ 404 for (ii=N; ii>=_i; ii--) { \ 405 rp1[ii+1] = rp1[ii]; \ 406 ap1[ii+1] = ap1[ii]; \ 407 } \ 408 rp1[_i] = col; \ 409 ap1[_i] = value; \ 410 a_noinsert: ; \ 411 ailen[row] = nrow1; \ 412 } 413 414 415 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 416 { \ 417 if (col <= lastcol2) low2 = 0; \ 418 else high2 = nrow2; \ 419 lastcol2 = col; \ 420 while (high2-low2 > 5) { \ 421 t = (low2+high2)/2; \ 422 if (rp2[t] > col) high2 = t; \ 423 else low2 = t; \ 424 } \ 425 for (_i=low2; _i<high2; _i++) { \ 426 if (rp2[_i] > col) break; \ 427 if (rp2[_i] == col) { \ 428 if (addv == ADD_VALUES) ap2[_i] += value; \ 429 else ap2[_i] = value; \ 430 goto b_noinsert; \ 431 } \ 432 } \ 433 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 434 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 435 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 436 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 437 N = nrow2++ - 1; b->nz++; high2++; \ 438 /* shift up all the later entries in this row */ \ 439 for (ii=N; ii>=_i; ii--) { \ 440 rp2[ii+1] = rp2[ii]; \ 441 ap2[ii+1] = ap2[ii]; \ 442 } \ 443 rp2[_i] = col; \ 444 ap2[_i] = value; \ 445 b_noinsert: ; \ 446 bilen[row] = nrow2; \ 447 } 448 449 #undef __FUNCT__ 450 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 451 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 452 { 453 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 454 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 455 PetscErrorCode ierr; 456 PetscInt l,*garray = mat->garray,diag; 457 458 PetscFunctionBegin; 459 /* code only works for square matrices A */ 460 461 /* find size of row to the left of the diagonal part */ 462 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 463 row = row - diag; 464 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 465 if (garray[b->j[b->i[row]+l]] > diag) break; 466 } 467 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* diagonal part */ 470 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 471 472 /* right of diagonal part */ 473 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatSetValues_MPIAIJ" 479 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 480 { 481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 482 PetscScalar value; 483 PetscErrorCode ierr; 484 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 485 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 486 PetscBool roworiented = aij->roworiented; 487 488 /* Some Variables required in the macro */ 489 Mat A = aij->A; 490 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 491 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 492 MatScalar *aa = a->a; 493 PetscBool ignorezeroentries = a->ignorezeroentries; 494 Mat B = aij->B; 495 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 496 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 497 MatScalar *ba = b->a; 498 499 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 500 PetscInt nonew; 501 MatScalar *ap1,*ap2; 502 503 PetscFunctionBegin; 504 if (v) PetscValidScalarPointer(v,6); 505 for (i=0; i<m; i++) { 506 if (im[i] < 0) continue; 507 #if defined(PETSC_USE_DEBUG) 508 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 509 #endif 510 if (im[i] >= rstart && im[i] < rend) { 511 row = im[i] - rstart; 512 lastcol1 = -1; 513 rp1 = aj + ai[row]; 514 ap1 = aa + ai[row]; 515 rmax1 = aimax[row]; 516 nrow1 = ailen[row]; 517 low1 = 0; 518 high1 = nrow1; 519 lastcol2 = -1; 520 rp2 = bj + bi[row]; 521 ap2 = ba + bi[row]; 522 rmax2 = bimax[row]; 523 nrow2 = bilen[row]; 524 low2 = 0; 525 high2 = nrow2; 526 527 for (j=0; j<n; j++) { 528 if (v) { 529 if (roworiented) value = v[i*n+j]; 530 else value = v[i+j*m]; 531 } else value = 0.0; 532 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 533 if (in[j] >= cstart && in[j] < cend) { 534 col = in[j] - cstart; 535 nonew = a->nonew; 536 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 537 } else if (in[j] < 0) continue; 538 #if defined(PETSC_USE_DEBUG) 539 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 540 #endif 541 else { 542 if (mat->was_assembled) { 543 if (!aij->colmap) { 544 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 545 } 546 #if defined(PETSC_USE_CTABLE) 547 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 548 col--; 549 #else 550 col = aij->colmap[in[j]] - 1; 551 #endif 552 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 553 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 554 col = in[j]; 555 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 556 B = aij->B; 557 b = (Mat_SeqAIJ*)B->data; 558 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 559 rp2 = bj + bi[row]; 560 ap2 = ba + bi[row]; 561 rmax2 = bimax[row]; 562 nrow2 = bilen[row]; 563 low2 = 0; 564 high2 = nrow2; 565 bm = aij->B->rmap->n; 566 ba = b->a; 567 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 568 } else col = in[j]; 569 nonew = b->nonew; 570 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 571 } 572 } 573 } else { 574 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 575 if (!aij->donotstash) { 576 mat->assembled = PETSC_FALSE; 577 if (roworiented) { 578 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 579 } else { 580 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } 582 } 583 } 584 } 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatGetValues_MPIAIJ" 590 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 591 { 592 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 593 PetscErrorCode ierr; 594 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 595 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 596 597 PetscFunctionBegin; 598 for (i=0; i<m; i++) { 599 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 600 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 601 if (idxm[i] >= rstart && idxm[i] < rend) { 602 row = idxm[i] - rstart; 603 for (j=0; j<n; j++) { 604 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 605 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 606 if (idxn[j] >= cstart && idxn[j] < cend) { 607 col = idxn[j] - cstart; 608 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 609 } else { 610 if (!aij->colmap) { 611 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 612 } 613 #if defined(PETSC_USE_CTABLE) 614 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 615 col--; 616 #else 617 col = aij->colmap[idxn[j]] - 1; 618 #endif 619 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 620 else { 621 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 622 } 623 } 624 } 625 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 626 } 627 PetscFunctionReturn(0); 628 } 629 630 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 631 632 #undef __FUNCT__ 633 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 634 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 635 { 636 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 637 PetscErrorCode ierr; 638 PetscInt nstash,reallocs; 639 InsertMode addv; 640 641 PetscFunctionBegin; 642 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 643 644 /* make sure all processors are either in INSERTMODE or ADDMODE */ 645 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 646 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 647 mat->insertmode = addv; /* in case this processor had no cache */ 648 649 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 650 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 651 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 #undef __FUNCT__ 656 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 657 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 658 { 659 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 661 PetscErrorCode ierr; 662 PetscMPIInt n; 663 PetscInt i,j,rstart,ncols,flg; 664 PetscInt *row,*col; 665 PetscBool other_disassembled; 666 PetscScalar *val; 667 InsertMode addv = mat->insertmode; 668 669 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 670 671 PetscFunctionBegin; 672 if (!aij->donotstash && !mat->nooffprocentries) { 673 while (1) { 674 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 675 if (!flg) break; 676 677 for (i=0; i<n; ) { 678 /* Now identify the consecutive vals belonging to the same row */ 679 for (j=i,rstart=row[j]; j<n; j++) { 680 if (row[j] != rstart) break; 681 } 682 if (j < n) ncols = j-i; 683 else ncols = n-i; 684 /* Now assemble all these values with a single function call */ 685 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 686 687 i = j; 688 } 689 } 690 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 691 } 692 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 693 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 694 695 /* determine if any processor has disassembled, if so we must 696 also disassemble ourselfs, in order that we may reassemble. */ 697 /* 698 if nonzero structure of submatrix B cannot change then we know that 699 no processor disassembled thus we can skip this stuff 700 */ 701 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 702 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 703 if (mat->was_assembled && !other_disassembled) { 704 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 705 } 706 } 707 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 708 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 709 } 710 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 711 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 712 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 713 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 714 715 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 716 717 aij->rowvalues = 0; 718 719 /* used by MatAXPY() */ 720 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 721 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 722 723 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 724 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 730 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 731 { 732 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 737 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatZeroRows_MPIAIJ" 743 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 744 { 745 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 746 PetscErrorCode ierr; 747 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 748 PetscInt i,*owners = A->rmap->range; 749 PetscInt *nprocs,j,idx,nsends,row; 750 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 751 PetscInt *rvalues,count,base,slen,*source; 752 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 753 MPI_Comm comm = ((PetscObject)A)->comm; 754 MPI_Request *send_waits,*recv_waits; 755 MPI_Status recv_status,*send_status; 756 const PetscScalar *xx; 757 PetscScalar *bb; 758 #if defined(PETSC_DEBUG) 759 PetscBool found = PETSC_FALSE; 760 #endif 761 762 PetscFunctionBegin; 763 /* first count number of contributors to each processor */ 764 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 765 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 766 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 767 j = 0; 768 for (i=0; i<N; i++) { 769 if (lastidx > (idx = rows[i])) j = 0; 770 lastidx = idx; 771 for (; j<size; j++) { 772 if (idx >= owners[j] && idx < owners[j+1]) { 773 nprocs[2*j]++; 774 nprocs[2*j+1] = 1; 775 owner[i] = j; 776 #if defined(PETSC_DEBUG) 777 found = PETSC_TRUE; 778 #endif 779 break; 780 } 781 } 782 #if defined(PETSC_DEBUG) 783 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 784 found = PETSC_FALSE; 785 #endif 786 } 787 nsends = 0; 788 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 789 790 if (A->nooffproczerorows) { 791 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 792 nrecvs = nsends; 793 nmax = N; 794 } else { 795 /* inform other processors of number of messages and max length*/ 796 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 797 } 798 799 /* post receives: */ 800 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 801 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 802 for (i=0; i<nrecvs; i++) { 803 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 804 } 805 806 /* do sends: 807 1) starts[i] gives the starting index in svalues for stuff going to 808 the ith processor 809 */ 810 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 811 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 812 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 813 814 starts[0] = 0; 815 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 816 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 817 818 starts[0] = 0; 819 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 820 count = 0; 821 for (i=0; i<size; i++) { 822 if (nprocs[2*i+1]) { 823 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 824 } 825 } 826 ierr = PetscFree(starts);CHKERRQ(ierr); 827 828 base = owners[rank]; 829 830 /* wait on receives */ 831 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 832 count = nrecvs; slen = 0; 833 while (count) { 834 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 835 /* unpack receives into our local space */ 836 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 837 838 source[imdex] = recv_status.MPI_SOURCE; 839 lens[imdex] = n; 840 slen += n; 841 count--; 842 } 843 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 844 845 /* move the data into the send scatter */ 846 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 847 count = 0; 848 for (i=0; i<nrecvs; i++) { 849 values = rvalues + i*nmax; 850 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 851 } 852 ierr = PetscFree(rvalues);CHKERRQ(ierr); 853 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 854 ierr = PetscFree(owner);CHKERRQ(ierr); 855 ierr = PetscFree(nprocs);CHKERRQ(ierr); 856 857 /* fix right hand side if needed */ 858 if (x && b) { 859 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 860 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 861 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 862 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 864 } 865 /* 866 Zero the required rows. If the "diagonal block" of the matrix 867 is square and the user wishes to set the diagonal we use separate 868 code so that MatSetValues() is not called for each diagonal allocating 869 new memory, thus calling lots of mallocs and slowing things down. 870 871 */ 872 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 873 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 875 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 876 } else if (diag != 0.0) { 877 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 878 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 879 for (i = 0; i < slen; i++) { 880 row = lrows[i] + rstart; 881 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 882 } 883 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 885 } else { 886 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 887 } 888 ierr = PetscFree(lrows);CHKERRQ(ierr); 889 890 /* wait on sends */ 891 if (nsends) { 892 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 893 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 894 ierr = PetscFree(send_status);CHKERRQ(ierr); 895 } 896 ierr = PetscFree(send_waits);CHKERRQ(ierr); 897 ierr = PetscFree(svalues);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 903 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 904 { 905 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 906 PetscErrorCode ierr; 907 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 908 PetscInt i,*owners = A->rmap->range; 909 PetscInt *nprocs,j,idx,nsends; 910 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 911 PetscInt *rvalues,count,base,slen,*source; 912 PetscInt *lens,*lrows,*values,m; 913 MPI_Comm comm = ((PetscObject)A)->comm; 914 MPI_Request *send_waits,*recv_waits; 915 MPI_Status recv_status,*send_status; 916 const PetscScalar *xx; 917 PetscScalar *bb,*mask; 918 Vec xmask,lmask; 919 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 920 const PetscInt *aj, *ii,*ridx; 921 PetscScalar *aa; 922 #if defined(PETSC_DEBUG) 923 PetscBool found = PETSC_FALSE; 924 #endif 925 926 PetscFunctionBegin; 927 /* first count number of contributors to each processor */ 928 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 929 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 930 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 931 j = 0; 932 for (i=0; i<N; i++) { 933 if (lastidx > (idx = rows[i])) j = 0; 934 lastidx = idx; 935 for (; j<size; j++) { 936 if (idx >= owners[j] && idx < owners[j+1]) { 937 nprocs[2*j]++; 938 nprocs[2*j+1] = 1; 939 owner[i] = j; 940 #if defined(PETSC_DEBUG) 941 found = PETSC_TRUE; 942 #endif 943 break; 944 } 945 } 946 #if defined(PETSC_DEBUG) 947 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 948 found = PETSC_FALSE; 949 #endif 950 } 951 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 952 953 /* inform other processors of number of messages and max length*/ 954 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 955 956 /* post receives: */ 957 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 958 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 959 for (i=0; i<nrecvs; i++) { 960 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 961 } 962 963 /* do sends: 964 1) starts[i] gives the starting index in svalues for stuff going to 965 the ith processor 966 */ 967 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 968 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 969 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 970 971 starts[0] = 0; 972 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 973 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 974 975 starts[0] = 0; 976 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 count = 0; 978 for (i=0; i<size; i++) { 979 if (nprocs[2*i+1]) { 980 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 981 } 982 } 983 ierr = PetscFree(starts);CHKERRQ(ierr); 984 985 base = owners[rank]; 986 987 /* wait on receives */ 988 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 989 count = nrecvs; slen = 0; 990 while (count) { 991 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 992 /* unpack receives into our local space */ 993 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 994 995 source[imdex] = recv_status.MPI_SOURCE; 996 lens[imdex] = n; 997 slen += n; 998 count--; 999 } 1000 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1001 1002 /* move the data into the send scatter */ 1003 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1004 count = 0; 1005 for (i=0; i<nrecvs; i++) { 1006 values = rvalues + i*nmax; 1007 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1008 } 1009 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1010 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1011 ierr = PetscFree(owner);CHKERRQ(ierr); 1012 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1013 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1014 1015 /* zero diagonal part of matrix */ 1016 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1017 1018 /* handle off diagonal part of matrix */ 1019 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1020 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1021 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1022 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1023 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1024 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1027 if (x) { 1028 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1031 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1032 } 1033 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1034 1035 /* remove zeroed rows of off diagonal matrix */ 1036 ii = aij->i; 1037 for (i=0; i<slen; i++) { 1038 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1039 } 1040 1041 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1042 if (aij->compressedrow.use) { 1043 m = aij->compressedrow.nrows; 1044 ii = aij->compressedrow.i; 1045 ridx = aij->compressedrow.rindex; 1046 for (i=0; i<m; i++) { 1047 n = ii[i+1] - ii[i]; 1048 aj = aij->j + ii[i]; 1049 aa = aij->a + ii[i]; 1050 1051 for (j=0; j<n; j++) { 1052 if (PetscAbsScalar(mask[*aj])) { 1053 if (b) bb[*ridx] -= *aa*xx[*aj]; 1054 *aa = 0.0; 1055 } 1056 aa++; 1057 aj++; 1058 } 1059 ridx++; 1060 } 1061 } else { /* do not use compressed row format */ 1062 m = l->B->rmap->n; 1063 for (i=0; i<m; i++) { 1064 n = ii[i+1] - ii[i]; 1065 aj = aij->j + ii[i]; 1066 aa = aij->a + ii[i]; 1067 for (j=0; j<n; j++) { 1068 if (PetscAbsScalar(mask[*aj])) { 1069 if (b) bb[i] -= *aa*xx[*aj]; 1070 *aa = 0.0; 1071 } 1072 aa++; 1073 aj++; 1074 } 1075 } 1076 } 1077 if (x) { 1078 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1079 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1080 } 1081 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1082 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1083 ierr = PetscFree(lrows);CHKERRQ(ierr); 1084 1085 /* wait on sends */ 1086 if (nsends) { 1087 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1088 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1089 ierr = PetscFree(send_status);CHKERRQ(ierr); 1090 } 1091 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1092 ierr = PetscFree(svalues);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMult_MPIAIJ" 1098 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 PetscInt nt; 1103 1104 PetscFunctionBegin; 1105 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1106 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1107 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1109 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1116 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1128 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1129 { 1130 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1135 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1136 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1137 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1143 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1144 { 1145 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1146 PetscErrorCode ierr; 1147 PetscBool merged; 1148 1149 PetscFunctionBegin; 1150 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 if (!merged) { 1154 /* send it on its way */ 1155 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1156 /* do local part */ 1157 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1158 /* receive remote parts: note this assumes the values are not actually */ 1159 /* added in yy until the next line, */ 1160 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1161 } else { 1162 /* do local part */ 1163 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1164 /* send it on its way */ 1165 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1166 /* values actually were received in the Begin() but we need to call this nop */ 1167 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 EXTERN_C_BEGIN 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1175 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1176 { 1177 MPI_Comm comm; 1178 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1179 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1180 IS Me,Notme; 1181 PetscErrorCode ierr; 1182 PetscInt M,N,first,last,*notme,i; 1183 PetscMPIInt size; 1184 1185 PetscFunctionBegin; 1186 /* Easy test: symmetric diagonal block */ 1187 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1188 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1189 if (!*f) PetscFunctionReturn(0); 1190 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1191 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1192 if (size == 1) PetscFunctionReturn(0); 1193 1194 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1195 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1196 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1197 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1198 for (i=0; i<first; i++) notme[i] = i; 1199 for (i=last; i<M; i++) notme[i-last+first] = i; 1200 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1201 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1202 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1203 Aoff = Aoffs[0]; 1204 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1205 Boff = Boffs[0]; 1206 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1207 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1208 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1209 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1210 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1211 ierr = PetscFree(notme);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 EXTERN_C_END 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1218 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1219 { 1220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 /* do nondiagonal part */ 1225 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1226 /* send it on its way */ 1227 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1228 /* do local part */ 1229 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1230 /* receive remote parts */ 1231 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 /* 1236 This only works correctly for square matrices where the subblock A->A is the 1237 diagonal block 1238 */ 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1241 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1242 { 1243 PetscErrorCode ierr; 1244 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1245 1246 PetscFunctionBegin; 1247 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1248 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1249 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatScale_MPIAIJ" 1255 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1256 { 1257 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1262 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatDestroy_MPIAIJ" 1268 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 #if defined(PETSC_USE_LOG) 1275 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1276 #endif 1277 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1278 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1279 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1280 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1281 #if defined(PETSC_USE_CTABLE) 1282 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1283 #else 1284 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1285 #endif 1286 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1287 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1288 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1289 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1290 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1291 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1292 1293 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1307 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1308 { 1309 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1310 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1311 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1312 PetscErrorCode ierr; 1313 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1314 int fd; 1315 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1316 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1317 PetscScalar *column_values; 1318 PetscInt message_count,flowcontrolcount; 1319 FILE *file; 1320 1321 PetscFunctionBegin; 1322 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1323 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1324 nz = A->nz + B->nz; 1325 if (!rank) { 1326 header[0] = MAT_FILE_CLASSID; 1327 header[1] = mat->rmap->N; 1328 header[2] = mat->cmap->N; 1329 1330 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1331 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1332 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1333 /* get largest number of rows any processor has */ 1334 rlen = mat->rmap->n; 1335 range = mat->rmap->range; 1336 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1337 } else { 1338 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 rlen = mat->rmap->n; 1340 } 1341 1342 /* load up the local row counts */ 1343 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1344 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1345 1346 /* store the row lengths to the file */ 1347 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1348 if (!rank) { 1349 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1350 for (i=1; i<size; i++) { 1351 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1352 rlen = range[i+1] - range[i]; 1353 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1354 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1355 } 1356 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1357 } else { 1358 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1359 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1360 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1361 } 1362 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1363 1364 /* load up the local column indices */ 1365 nzmax = nz; /* th processor needs space a largest processor needs */ 1366 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1367 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1368 cnt = 0; 1369 for (i=0; i<mat->rmap->n; i++) { 1370 for (j=B->i[i]; j<B->i[i+1]; j++) { 1371 if ((col = garray[B->j[j]]) > cstart) break; 1372 column_indices[cnt++] = col; 1373 } 1374 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1375 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1376 } 1377 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1378 1379 /* store the column indices to the file */ 1380 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1381 if (!rank) { 1382 MPI_Status status; 1383 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1384 for (i=1; i<size; i++) { 1385 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1386 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1387 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1388 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1389 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1390 } 1391 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1392 } else { 1393 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1394 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1395 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1396 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1397 } 1398 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1399 1400 /* load up the local column values */ 1401 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1402 cnt = 0; 1403 for (i=0; i<mat->rmap->n; i++) { 1404 for (j=B->i[i]; j<B->i[i+1]; j++) { 1405 if (garray[B->j[j]] > cstart) break; 1406 column_values[cnt++] = B->a[j]; 1407 } 1408 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1409 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1410 } 1411 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1412 1413 /* store the column values to the file */ 1414 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1415 if (!rank) { 1416 MPI_Status status; 1417 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1418 for (i=1; i<size; i++) { 1419 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1420 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1421 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1422 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1424 } 1425 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1426 } else { 1427 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1428 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1429 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1430 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1431 } 1432 ierr = PetscFree(column_values);CHKERRQ(ierr); 1433 1434 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1435 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1441 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1442 { 1443 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1444 PetscErrorCode ierr; 1445 PetscMPIInt rank = aij->rank,size = aij->size; 1446 PetscBool isdraw,iascii,isbinary; 1447 PetscViewer sviewer; 1448 PetscViewerFormat format; 1449 1450 PetscFunctionBegin; 1451 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1452 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1453 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1454 if (iascii) { 1455 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1456 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1457 MatInfo info; 1458 PetscBool inodes; 1459 1460 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1461 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1462 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt**)&inodes,PETSC_NULL);CHKERRQ(ierr); 1463 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1464 if (!inodes) { 1465 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1466 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1467 } else { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } 1471 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1472 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1473 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1475 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1478 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1481 PetscInt inodecount,inodelimit,*inodes; 1482 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1483 if (inodes) { 1484 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1485 } else { 1486 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1487 } 1488 PetscFunctionReturn(0); 1489 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1490 PetscFunctionReturn(0); 1491 } 1492 } else if (isbinary) { 1493 if (size == 1) { 1494 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1495 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1496 } else { 1497 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1498 } 1499 PetscFunctionReturn(0); 1500 } else if (isdraw) { 1501 PetscDraw draw; 1502 PetscBool isnull; 1503 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1504 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1505 } 1506 1507 if (size == 1) { 1508 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1509 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1510 } else { 1511 /* assemble the entire matrix onto first processor. */ 1512 Mat A; 1513 Mat_SeqAIJ *Aloc; 1514 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1515 MatScalar *a; 1516 1517 if (mat->rmap->N > 1024) { 1518 PetscBool flg = PETSC_FALSE; 1519 1520 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1521 if (!flg) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1522 } 1523 1524 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1525 if (!rank) { 1526 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1527 } else { 1528 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1529 } 1530 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1531 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1532 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1533 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1534 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1535 1536 /* copy over the A part */ 1537 Aloc = (Mat_SeqAIJ*)aij->A->data; 1538 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1539 row = mat->rmap->rstart; 1540 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1541 for (i=0; i<m; i++) { 1542 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1543 row++; 1544 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1545 } 1546 aj = Aloc->j; 1547 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1548 1549 /* copy over the B part */ 1550 Aloc = (Mat_SeqAIJ*)aij->B->data; 1551 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1552 row = mat->rmap->rstart; 1553 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1554 ct = cols; 1555 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1556 for (i=0; i<m; i++) { 1557 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1558 row++; 1559 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1560 } 1561 ierr = PetscFree(ct);CHKERRQ(ierr); 1562 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1563 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1564 /* 1565 Everyone has to call to draw the matrix since the graphics waits are 1566 synchronized across all processors that share the PetscDraw object 1567 */ 1568 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1569 if (!rank) { 1570 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1571 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1572 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1573 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1574 } 1575 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1576 ierr = MatDestroy(&A);CHKERRQ(ierr); 1577 } 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatView_MPIAIJ" 1583 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1584 { 1585 PetscErrorCode ierr; 1586 PetscBool iascii,isdraw,issocket,isbinary; 1587 1588 PetscFunctionBegin; 1589 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1590 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1591 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1593 if (iascii || isdraw || isbinary || issocket) { 1594 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1595 } 1596 PetscFunctionReturn(0); 1597 } 1598 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatSOR_MPIAIJ" 1601 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1602 { 1603 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1604 PetscErrorCode ierr; 1605 Vec bb1 = 0; 1606 PetscBool hasop; 1607 1608 PetscFunctionBegin; 1609 if (flag == SOR_APPLY_UPPER) { 1610 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1611 PetscFunctionReturn(0); 1612 } 1613 1614 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1615 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1616 } 1617 1618 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1619 if (flag & SOR_ZERO_INITIAL_GUESS) { 1620 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1621 its--; 1622 } 1623 1624 while (its--) { 1625 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1627 1628 /* update rhs: bb1 = bb - B*x */ 1629 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1630 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1631 1632 /* local sweep */ 1633 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1634 } 1635 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1636 if (flag & SOR_ZERO_INITIAL_GUESS) { 1637 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1638 its--; 1639 } 1640 while (its--) { 1641 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 1644 /* update rhs: bb1 = bb - B*x */ 1645 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1646 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1647 1648 /* local sweep */ 1649 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1650 } 1651 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1652 if (flag & SOR_ZERO_INITIAL_GUESS) { 1653 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1654 its--; 1655 } 1656 while (its--) { 1657 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1659 1660 /* update rhs: bb1 = bb - B*x */ 1661 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1662 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1663 1664 /* local sweep */ 1665 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1666 } 1667 } else if (flag & SOR_EISENSTAT) { 1668 Vec xx1; 1669 1670 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1671 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1672 1673 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1675 if (!mat->diag) { 1676 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1677 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1678 } 1679 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1680 if (hasop) { 1681 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1682 } else { 1683 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1684 } 1685 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1686 1687 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1688 1689 /* local sweep */ 1690 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1691 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1692 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1693 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1694 1695 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatPermute_MPIAIJ" 1701 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1702 { 1703 Mat aA,aB,Aperm; 1704 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1705 PetscScalar *aa,*ba; 1706 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1707 PetscSF rowsf,sf; 1708 IS parcolp = PETSC_NULL; 1709 PetscBool done; 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1714 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1715 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1716 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1717 1718 /* Invert row permutation to find out where my rows should go */ 1719 ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr); 1720 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1721 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1722 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1723 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1724 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1725 1726 /* Invert column permutation to find out where my columns should go */ 1727 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1728 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1729 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1730 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1731 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1732 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1733 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1734 1735 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1736 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1737 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1738 1739 /* Find out where my gcols should go */ 1740 ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr); 1741 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1742 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1743 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1744 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1745 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1746 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1747 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1748 1749 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1750 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1751 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1752 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1753 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1754 for (i=0; i<m; i++) { 1755 PetscInt row = rdest[i],rowner; 1756 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1757 for (j=ai[i]; j<ai[i+1]; j++) { 1758 PetscInt cowner,col = cdest[aj[j]]; 1759 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1760 if (rowner == cowner) dnnz[i]++; 1761 else onnz[i]++; 1762 } 1763 for (j=bi[i]; j<bi[i+1]; j++) { 1764 PetscInt cowner,col = gcdest[bj[j]]; 1765 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1766 if (rowner == cowner) dnnz[i]++; 1767 else onnz[i]++; 1768 } 1769 } 1770 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1771 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1772 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1773 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1774 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1775 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1776 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1777 1778 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1779 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1780 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1781 for (i=0; i<m; i++) { 1782 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1783 PetscInt rowlen; 1784 rowlen = ai[i+1] - ai[i]; 1785 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1786 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1787 rowlen = bi[i+1] - bi[i]; 1788 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1790 } 1791 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1792 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1793 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1794 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1795 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1796 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1797 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1798 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1799 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1800 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1801 *B = Aperm; 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1807 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1808 { 1809 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1810 Mat A = mat->A,B = mat->B; 1811 PetscErrorCode ierr; 1812 PetscReal isend[5],irecv[5]; 1813 1814 PetscFunctionBegin; 1815 info->block_size = 1.0; 1816 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1817 1818 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1819 isend[3] = info->memory; isend[4] = info->mallocs; 1820 1821 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1822 1823 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1824 isend[3] += info->memory; isend[4] += info->mallocs; 1825 if (flag == MAT_LOCAL) { 1826 info->nz_used = isend[0]; 1827 info->nz_allocated = isend[1]; 1828 info->nz_unneeded = isend[2]; 1829 info->memory = isend[3]; 1830 info->mallocs = isend[4]; 1831 } else if (flag == MAT_GLOBAL_MAX) { 1832 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1833 1834 info->nz_used = irecv[0]; 1835 info->nz_allocated = irecv[1]; 1836 info->nz_unneeded = irecv[2]; 1837 info->memory = irecv[3]; 1838 info->mallocs = irecv[4]; 1839 } else if (flag == MAT_GLOBAL_SUM) { 1840 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1841 1842 info->nz_used = irecv[0]; 1843 info->nz_allocated = irecv[1]; 1844 info->nz_unneeded = irecv[2]; 1845 info->memory = irecv[3]; 1846 info->mallocs = irecv[4]; 1847 } 1848 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1849 info->fill_ratio_needed = 0; 1850 info->factor_mallocs = 0; 1851 PetscFunctionReturn(0); 1852 } 1853 1854 #undef __FUNCT__ 1855 #define __FUNCT__ "MatSetOption_MPIAIJ" 1856 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1857 { 1858 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1859 PetscErrorCode ierr; 1860 1861 PetscFunctionBegin; 1862 switch (op) { 1863 case MAT_NEW_NONZERO_LOCATIONS: 1864 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1865 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1866 case MAT_KEEP_NONZERO_PATTERN: 1867 case MAT_NEW_NONZERO_LOCATION_ERR: 1868 case MAT_USE_INODES: 1869 case MAT_IGNORE_ZERO_ENTRIES: 1870 MatCheckPreallocated(A,1); 1871 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1872 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1873 break; 1874 case MAT_ROW_ORIENTED: 1875 a->roworiented = flg; 1876 1877 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1878 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1879 break; 1880 case MAT_NEW_DIAGONALS: 1881 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1882 break; 1883 case MAT_IGNORE_OFF_PROC_ENTRIES: 1884 a->donotstash = flg; 1885 break; 1886 case MAT_SPD: 1887 A->spd_set = PETSC_TRUE; 1888 A->spd = flg; 1889 if (flg) { 1890 A->symmetric = PETSC_TRUE; 1891 A->structurally_symmetric = PETSC_TRUE; 1892 A->symmetric_set = PETSC_TRUE; 1893 A->structurally_symmetric_set = PETSC_TRUE; 1894 } 1895 break; 1896 case MAT_SYMMETRIC: 1897 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1898 break; 1899 case MAT_STRUCTURALLY_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_HERMITIAN: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_SYMMETRY_ETERNAL: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 default: 1909 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatGetRow_MPIAIJ" 1916 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1917 { 1918 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1919 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1920 PetscErrorCode ierr; 1921 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1922 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1923 PetscInt *cmap,*idx_p; 1924 1925 PetscFunctionBegin; 1926 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1927 mat->getrowactive = PETSC_TRUE; 1928 1929 if (!mat->rowvalues && (idx || v)) { 1930 /* 1931 allocate enough space to hold information from the longest row. 1932 */ 1933 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1934 PetscInt max = 1,tmp; 1935 for (i=0; i<matin->rmap->n; i++) { 1936 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1937 if (max < tmp) max = tmp; 1938 } 1939 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1940 } 1941 1942 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1943 lrow = row - rstart; 1944 1945 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1946 if (!v) {pvA = 0; pvB = 0;} 1947 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1948 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1949 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1950 nztot = nzA + nzB; 1951 1952 cmap = mat->garray; 1953 if (v || idx) { 1954 if (nztot) { 1955 /* Sort by increasing column numbers, assuming A and B already sorted */ 1956 PetscInt imark = -1; 1957 if (v) { 1958 *v = v_p = mat->rowvalues; 1959 for (i=0; i<nzB; i++) { 1960 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1961 else break; 1962 } 1963 imark = i; 1964 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1965 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1966 } 1967 if (idx) { 1968 *idx = idx_p = mat->rowindices; 1969 if (imark > -1) { 1970 for (i=0; i<imark; i++) { 1971 idx_p[i] = cmap[cworkB[i]]; 1972 } 1973 } else { 1974 for (i=0; i<nzB; i++) { 1975 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1976 else break; 1977 } 1978 imark = i; 1979 } 1980 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1981 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1982 } 1983 } else { 1984 if (idx) *idx = 0; 1985 if (v) *v = 0; 1986 } 1987 } 1988 *nz = nztot; 1989 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1990 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1991 PetscFunctionReturn(0); 1992 } 1993 1994 #undef __FUNCT__ 1995 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1996 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1997 { 1998 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1999 2000 PetscFunctionBegin; 2001 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2002 aij->getrowactive = PETSC_FALSE; 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatNorm_MPIAIJ" 2008 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2009 { 2010 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2011 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2012 PetscErrorCode ierr; 2013 PetscInt i,j,cstart = mat->cmap->rstart; 2014 PetscReal sum = 0.0; 2015 MatScalar *v; 2016 2017 PetscFunctionBegin; 2018 if (aij->size == 1) { 2019 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2020 } else { 2021 if (type == NORM_FROBENIUS) { 2022 v = amat->a; 2023 for (i=0; i<amat->nz; i++) { 2024 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2025 } 2026 v = bmat->a; 2027 for (i=0; i<bmat->nz; i++) { 2028 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2029 } 2030 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2031 *norm = PetscSqrtReal(*norm); 2032 } else if (type == NORM_1) { /* max column norm */ 2033 PetscReal *tmp,*tmp2; 2034 PetscInt *jj,*garray = aij->garray; 2035 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2036 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2037 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2038 *norm = 0.0; 2039 v = amat->a; jj = amat->j; 2040 for (j=0; j<amat->nz; j++) { 2041 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2042 } 2043 v = bmat->a; jj = bmat->j; 2044 for (j=0; j<bmat->nz; j++) { 2045 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2046 } 2047 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2048 for (j=0; j<mat->cmap->N; j++) { 2049 if (tmp2[j] > *norm) *norm = tmp2[j]; 2050 } 2051 ierr = PetscFree(tmp);CHKERRQ(ierr); 2052 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2053 } else if (type == NORM_INFINITY) { /* max row norm */ 2054 PetscReal ntemp = 0.0; 2055 for (j=0; j<aij->A->rmap->n; j++) { 2056 v = amat->a + amat->i[j]; 2057 sum = 0.0; 2058 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2059 sum += PetscAbsScalar(*v); v++; 2060 } 2061 v = bmat->a + bmat->i[j]; 2062 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2063 sum += PetscAbsScalar(*v); v++; 2064 } 2065 if (sum > ntemp) ntemp = sum; 2066 } 2067 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2068 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2069 } 2070 PetscFunctionReturn(0); 2071 } 2072 2073 #undef __FUNCT__ 2074 #define __FUNCT__ "MatTranspose_MPIAIJ" 2075 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2076 { 2077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2078 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2079 PetscErrorCode ierr; 2080 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2081 PetscInt cstart = A->cmap->rstart,ncol; 2082 Mat B; 2083 MatScalar *array; 2084 2085 PetscFunctionBegin; 2086 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2087 2088 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2089 ai = Aloc->i; aj = Aloc->j; 2090 bi = Bloc->i; bj = Bloc->j; 2091 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2092 PetscInt *d_nnz,*g_nnz,*o_nnz; 2093 PetscSFNode *oloc; 2094 PETSC_UNUSED PetscSF sf; 2095 2096 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2097 /* compute d_nnz for preallocation */ 2098 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2099 for (i=0; i<ai[ma]; i++) { 2100 d_nnz[aj[i]]++; 2101 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2102 } 2103 /* compute local off-diagonal contributions */ 2104 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2105 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2106 /* map those to global */ 2107 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2108 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2109 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2110 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2111 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2112 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2113 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2114 2115 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2116 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2117 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2118 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2119 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2120 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2121 } else { 2122 B = *matout; 2123 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2124 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2125 } 2126 2127 /* copy over the A part */ 2128 array = Aloc->a; 2129 row = A->rmap->rstart; 2130 for (i=0; i<ma; i++) { 2131 ncol = ai[i+1]-ai[i]; 2132 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2133 row++; 2134 array += ncol; aj += ncol; 2135 } 2136 aj = Aloc->j; 2137 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2138 2139 /* copy over the B part */ 2140 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2141 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2142 array = Bloc->a; 2143 row = A->rmap->rstart; 2144 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2145 cols_tmp = cols; 2146 for (i=0; i<mb; i++) { 2147 ncol = bi[i+1]-bi[i]; 2148 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2149 row++; 2150 array += ncol; cols_tmp += ncol; 2151 } 2152 ierr = PetscFree(cols);CHKERRQ(ierr); 2153 2154 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2155 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2157 *matout = B; 2158 } else { 2159 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2160 } 2161 PetscFunctionReturn(0); 2162 } 2163 2164 #undef __FUNCT__ 2165 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2166 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2167 { 2168 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2169 Mat a = aij->A,b = aij->B; 2170 PetscErrorCode ierr; 2171 PetscInt s1,s2,s3; 2172 2173 PetscFunctionBegin; 2174 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2175 if (rr) { 2176 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2177 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2178 /* Overlap communication with computation. */ 2179 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2180 } 2181 if (ll) { 2182 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2183 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2184 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2185 } 2186 /* scale the diagonal block */ 2187 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2188 2189 if (rr) { 2190 /* Do a scatter end and then right scale the off-diagonal block */ 2191 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2192 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2193 } 2194 PetscFunctionReturn(0); 2195 } 2196 2197 #undef __FUNCT__ 2198 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2199 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2200 { 2201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2202 PetscErrorCode ierr; 2203 2204 PetscFunctionBegin; 2205 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2206 PetscFunctionReturn(0); 2207 } 2208 2209 #undef __FUNCT__ 2210 #define __FUNCT__ "MatEqual_MPIAIJ" 2211 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2212 { 2213 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2214 Mat a,b,c,d; 2215 PetscBool flg; 2216 PetscErrorCode ierr; 2217 2218 PetscFunctionBegin; 2219 a = matA->A; b = matA->B; 2220 c = matB->A; d = matB->B; 2221 2222 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2223 if (flg) { 2224 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2225 } 2226 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2227 PetscFunctionReturn(0); 2228 } 2229 2230 #undef __FUNCT__ 2231 #define __FUNCT__ "MatCopy_MPIAIJ" 2232 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2233 { 2234 PetscErrorCode ierr; 2235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2236 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2237 2238 PetscFunctionBegin; 2239 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2240 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2241 /* because of the column compression in the off-processor part of the matrix a->B, 2242 the number of columns in a->B and b->B may be different, hence we cannot call 2243 the MatCopy() directly on the two parts. If need be, we can provide a more 2244 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2245 then copying the submatrices */ 2246 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2247 } else { 2248 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2249 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2250 } 2251 PetscFunctionReturn(0); 2252 } 2253 2254 #undef __FUNCT__ 2255 #define __FUNCT__ "MatSetUp_MPIAIJ" 2256 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2257 { 2258 PetscErrorCode ierr; 2259 2260 PetscFunctionBegin; 2261 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2262 PetscFunctionReturn(0); 2263 } 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2267 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2268 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2269 { 2270 PetscInt i,m=Y->rmap->N; 2271 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2272 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2273 const PetscInt *xi = x->i,*yi = y->i; 2274 2275 PetscFunctionBegin; 2276 /* Set the number of nonzeros in the new matrix */ 2277 for (i=0; i<m; i++) { 2278 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2279 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2280 nnz[i] = 0; 2281 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2282 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2283 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2284 nnz[i]++; 2285 } 2286 for (; k<nzy; k++) nnz[i]++; 2287 } 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatAXPY_MPIAIJ" 2293 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2294 { 2295 PetscErrorCode ierr; 2296 PetscInt i; 2297 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2298 PetscBLASInt bnz,one=1; 2299 Mat_SeqAIJ *x,*y; 2300 2301 PetscFunctionBegin; 2302 if (str == SAME_NONZERO_PATTERN) { 2303 PetscScalar alpha = a; 2304 x = (Mat_SeqAIJ*)xx->A->data; 2305 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2306 y = (Mat_SeqAIJ*)yy->A->data; 2307 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2308 x = (Mat_SeqAIJ*)xx->B->data; 2309 y = (Mat_SeqAIJ*)yy->B->data; 2310 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2311 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2312 } else if (str == SUBSET_NONZERO_PATTERN) { 2313 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2314 2315 x = (Mat_SeqAIJ*)xx->B->data; 2316 y = (Mat_SeqAIJ*)yy->B->data; 2317 if (y->xtoy && y->XtoY != xx->B) { 2318 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2319 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2320 } 2321 if (!y->xtoy) { /* get xtoy */ 2322 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2323 y->XtoY = xx->B; 2324 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2325 } 2326 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2327 } else { 2328 Mat B; 2329 PetscInt *nnz_d,*nnz_o; 2330 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2331 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2332 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2333 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2334 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2335 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2336 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2337 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2338 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2339 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2340 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2341 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2342 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2343 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2344 } 2345 PetscFunctionReturn(0); 2346 } 2347 2348 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2349 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatConjugate_MPIAIJ" 2352 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2353 { 2354 #if defined(PETSC_USE_COMPLEX) 2355 PetscErrorCode ierr; 2356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2357 2358 PetscFunctionBegin; 2359 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2360 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2361 #else 2362 PetscFunctionBegin; 2363 #endif 2364 PetscFunctionReturn(0); 2365 } 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "MatRealPart_MPIAIJ" 2369 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2370 { 2371 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2372 PetscErrorCode ierr; 2373 2374 PetscFunctionBegin; 2375 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2376 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 #undef __FUNCT__ 2381 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2382 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2383 { 2384 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2385 PetscErrorCode ierr; 2386 2387 PetscFunctionBegin; 2388 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2389 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #if defined(PETSC_HAVE_PBGL) 2394 2395 #include <boost/parallel/mpi/bsp_process_group.hpp> 2396 #include <boost/graph/distributed/ilu_default_graph.hpp> 2397 #include <boost/graph/distributed/ilu_0_block.hpp> 2398 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2399 #include <boost/graph/distributed/petsc/interface.hpp> 2400 #include <boost/multi_array.hpp> 2401 #include <boost/parallel/distributed_property_map->hpp> 2402 2403 #undef __FUNCT__ 2404 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2405 /* 2406 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2407 */ 2408 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2409 { 2410 namespace petsc = boost::distributed::petsc; 2411 2412 namespace graph_dist = boost::graph::distributed; 2413 using boost::graph::distributed::ilu_default::process_group_type; 2414 using boost::graph::ilu_permuted; 2415 2416 PetscBool row_identity, col_identity; 2417 PetscContainer c; 2418 PetscInt m, n, M, N; 2419 PetscErrorCode ierr; 2420 2421 PetscFunctionBegin; 2422 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2423 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2424 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2425 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2426 2427 process_group_type pg; 2428 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2429 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2430 lgraph_type& level_graph = *lgraph_p; 2431 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2432 2433 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2434 ilu_permuted(level_graph); 2435 2436 /* put together the new matrix */ 2437 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2438 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2439 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2440 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2441 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2442 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2443 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2444 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2445 2446 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2447 ierr = PetscContainerSetPointer(c, lgraph_p); 2448 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2449 ierr = PetscContainerDestroy(&c); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2455 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2456 { 2457 PetscFunctionBegin; 2458 PetscFunctionReturn(0); 2459 } 2460 2461 #undef __FUNCT__ 2462 #define __FUNCT__ "MatSolve_MPIAIJ" 2463 /* 2464 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2465 */ 2466 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2467 { 2468 namespace graph_dist = boost::graph::distributed; 2469 2470 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2471 lgraph_type *lgraph_p; 2472 PetscContainer c; 2473 PetscErrorCode ierr; 2474 2475 PetscFunctionBegin; 2476 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2477 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2478 ierr = VecCopy(b, x);CHKERRQ(ierr); 2479 2480 PetscScalar *array_x; 2481 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2482 PetscInt sx; 2483 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2484 2485 PetscScalar *array_b; 2486 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2487 PetscInt sb; 2488 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2489 2490 lgraph_type& level_graph = *lgraph_p; 2491 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2492 2493 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2494 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2495 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2496 2497 typedef boost::iterator_property_map<array_ref_type::iterator, 2498 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2499 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2500 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2501 2502 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2503 PetscFunctionReturn(0); 2504 } 2505 #endif 2506 2507 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2508 PetscInt nzlocal,nsends,nrecvs; 2509 PetscMPIInt *send_rank,*recv_rank; 2510 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2511 PetscScalar *sbuf_a,**rbuf_a; 2512 PetscErrorCode (*Destroy)(Mat); 2513 } Mat_Redundant; 2514 2515 #undef __FUNCT__ 2516 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2517 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2518 { 2519 PetscErrorCode ierr; 2520 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2521 PetscInt i; 2522 2523 PetscFunctionBegin; 2524 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2525 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2526 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2527 for (i=0; i<redund->nrecvs; i++) { 2528 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2529 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2530 } 2531 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2532 ierr = PetscFree(redund);CHKERRQ(ierr); 2533 PetscFunctionReturn(0); 2534 } 2535 2536 #undef __FUNCT__ 2537 #define __FUNCT__ "MatDestroy_MatRedundant" 2538 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2539 { 2540 PetscErrorCode ierr; 2541 PetscContainer container; 2542 Mat_Redundant *redund=PETSC_NULL; 2543 2544 PetscFunctionBegin; 2545 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2546 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2547 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2548 2549 A->ops->destroy = redund->Destroy; 2550 2551 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2552 if (A->ops->destroy) { 2553 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2554 } 2555 PetscFunctionReturn(0); 2556 } 2557 2558 #undef __FUNCT__ 2559 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2560 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2561 { 2562 PetscMPIInt rank,size; 2563 MPI_Comm comm=((PetscObject)mat)->comm; 2564 PetscErrorCode ierr; 2565 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2566 PetscMPIInt *send_rank= PETSC_NULL,*recv_rank=PETSC_NULL; 2567 PetscInt *rowrange = mat->rmap->range; 2568 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2569 Mat A = aij->A,B=aij->B,C=*matredundant; 2570 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2571 PetscScalar *sbuf_a; 2572 PetscInt nzlocal=a->nz+b->nz; 2573 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2574 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2575 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2576 MatScalar *aworkA,*aworkB; 2577 PetscScalar *vals; 2578 PetscMPIInt tag1,tag2,tag3,imdex; 2579 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL; 2580 MPI_Request *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2581 MPI_Status recv_status,*send_status; 2582 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2583 PetscInt **rbuf_j=PETSC_NULL; 2584 PetscScalar **rbuf_a=PETSC_NULL; 2585 Mat_Redundant *redund =PETSC_NULL; 2586 PetscContainer container; 2587 2588 PetscFunctionBegin; 2589 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2590 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2591 2592 if (reuse == MAT_REUSE_MATRIX) { 2593 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2594 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2595 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2596 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2597 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2598 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2599 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2600 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2601 2602 nsends = redund->nsends; 2603 nrecvs = redund->nrecvs; 2604 send_rank = redund->send_rank; 2605 recv_rank = redund->recv_rank; 2606 sbuf_nz = redund->sbuf_nz; 2607 rbuf_nz = redund->rbuf_nz; 2608 sbuf_j = redund->sbuf_j; 2609 sbuf_a = redund->sbuf_a; 2610 rbuf_j = redund->rbuf_j; 2611 rbuf_a = redund->rbuf_a; 2612 } 2613 2614 if (reuse == MAT_INITIAL_MATRIX) { 2615 PetscMPIInt subrank,subsize; 2616 PetscInt nleftover,np_subcomm; 2617 /* get the destination processors' id send_rank, nsends and nrecvs */ 2618 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2619 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2620 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2621 2622 np_subcomm = size/nsubcomm; 2623 nleftover = size - nsubcomm*np_subcomm; 2624 2625 nsends = 0; nrecvs = 0; 2626 for (i=0; i<size; i++) { /* i=rank*/ 2627 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2628 send_rank[nsends] = i; nsends++; 2629 recv_rank[nrecvs++] = i; 2630 } 2631 } 2632 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2633 i = size-nleftover-1; 2634 j = 0; 2635 while (j < nsubcomm - nleftover) { 2636 send_rank[nsends++] = i; 2637 i--; j++; 2638 } 2639 } 2640 2641 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2642 for (i=0; i<nleftover; i++) { 2643 recv_rank[nrecvs++] = size-nleftover+i; 2644 } 2645 } 2646 2647 /* allocate sbuf_j, sbuf_a */ 2648 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2649 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2650 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2651 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2652 2653 /* copy mat's local entries into the buffers */ 2654 if (reuse == MAT_INITIAL_MATRIX) { 2655 rownz_max = 0; 2656 rptr = sbuf_j; 2657 cols = sbuf_j + rend-rstart + 1; 2658 vals = sbuf_a; 2659 rptr[0] = 0; 2660 for (i=0; i<rend-rstart; i++) { 2661 row = i + rstart; 2662 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2663 ncols = nzA + nzB; 2664 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2665 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2666 /* load the column indices for this row into cols */ 2667 lwrite = 0; 2668 for (l=0; l<nzB; l++) { 2669 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2670 vals[lwrite] = aworkB[l]; 2671 cols[lwrite++] = ctmp; 2672 } 2673 } 2674 for (l=0; l<nzA; l++) { 2675 vals[lwrite] = aworkA[l]; 2676 cols[lwrite++] = cstart + cworkA[l]; 2677 } 2678 for (l=0; l<nzB; l++) { 2679 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2680 vals[lwrite] = aworkB[l]; 2681 cols[lwrite++] = ctmp; 2682 } 2683 } 2684 vals += ncols; 2685 cols += ncols; 2686 rptr[i+1] = rptr[i] + ncols; 2687 if (rownz_max < ncols) rownz_max = ncols; 2688 } 2689 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2690 } else { /* only copy matrix values into sbuf_a */ 2691 rptr = sbuf_j; 2692 vals = sbuf_a; 2693 rptr[0] = 0; 2694 for (i=0; i<rend-rstart; i++) { 2695 row = i + rstart; 2696 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2697 ncols = nzA + nzB; 2698 cworkB = b->j + b->i[i]; 2699 aworkA = a->a + a->i[i]; 2700 aworkB = b->a + b->i[i]; 2701 lwrite = 0; 2702 for (l=0; l<nzB; l++) { 2703 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2704 } 2705 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2706 for (l=0; l<nzB; l++) { 2707 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2708 } 2709 vals += ncols; 2710 rptr[i+1] = rptr[i] + ncols; 2711 } 2712 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2713 2714 /* send nzlocal to others, and recv other's nzlocal */ 2715 /*--------------------------------------------------*/ 2716 if (reuse == MAT_INITIAL_MATRIX) { 2717 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2718 2719 s_waits2 = s_waits3 + nsends; 2720 s_waits1 = s_waits2 + nsends; 2721 r_waits1 = s_waits1 + nsends; 2722 r_waits2 = r_waits1 + nrecvs; 2723 r_waits3 = r_waits2 + nrecvs; 2724 } else { 2725 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2726 2727 r_waits3 = s_waits3 + nsends; 2728 } 2729 2730 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2731 if (reuse == MAT_INITIAL_MATRIX) { 2732 /* get new tags to keep the communication clean */ 2733 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2734 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2735 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2736 2737 /* post receives of other's nzlocal */ 2738 for (i=0; i<nrecvs; i++) { 2739 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2740 } 2741 /* send nzlocal to others */ 2742 for (i=0; i<nsends; i++) { 2743 sbuf_nz[i] = nzlocal; 2744 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2745 } 2746 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2747 count = nrecvs; 2748 while (count) { 2749 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2750 2751 recv_rank[imdex] = recv_status.MPI_SOURCE; 2752 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2753 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2754 2755 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2756 2757 rbuf_nz[imdex] += i + 2; 2758 2759 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2760 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2761 count--; 2762 } 2763 /* wait on sends of nzlocal */ 2764 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2765 /* send mat->i,j to others, and recv from other's */ 2766 /*------------------------------------------------*/ 2767 for (i=0; i<nsends; i++) { 2768 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2769 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2770 } 2771 /* wait on receives of mat->i,j */ 2772 /*------------------------------*/ 2773 count = nrecvs; 2774 while (count) { 2775 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2776 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2777 count--; 2778 } 2779 /* wait on sends of mat->i,j */ 2780 /*---------------------------*/ 2781 if (nsends) { 2782 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2783 } 2784 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2785 2786 /* post receives, send and receive mat->a */ 2787 /*----------------------------------------*/ 2788 for (imdex=0; imdex<nrecvs; imdex++) { 2789 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2790 } 2791 for (i=0; i<nsends; i++) { 2792 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2793 } 2794 count = nrecvs; 2795 while (count) { 2796 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2797 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2798 count--; 2799 } 2800 if (nsends) { 2801 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2802 } 2803 2804 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2805 2806 /* create redundant matrix */ 2807 /*-------------------------*/ 2808 if (reuse == MAT_INITIAL_MATRIX) { 2809 /* compute rownz_max for preallocation */ 2810 for (imdex=0; imdex<nrecvs; imdex++) { 2811 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2812 rptr = rbuf_j[imdex]; 2813 for (i=0; i<j; i++) { 2814 ncols = rptr[i+1] - rptr[i]; 2815 if (rownz_max < ncols) rownz_max = ncols; 2816 } 2817 } 2818 2819 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2820 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2821 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2822 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2823 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2824 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2825 } else { 2826 C = *matredundant; 2827 } 2828 2829 /* insert local matrix entries */ 2830 rptr = sbuf_j; 2831 cols = sbuf_j + rend-rstart + 1; 2832 vals = sbuf_a; 2833 for (i=0; i<rend-rstart; i++) { 2834 row = i + rstart; 2835 ncols = rptr[i+1] - rptr[i]; 2836 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2837 vals += ncols; 2838 cols += ncols; 2839 } 2840 /* insert received matrix entries */ 2841 for (imdex=0; imdex<nrecvs; imdex++) { 2842 rstart = rowrange[recv_rank[imdex]]; 2843 rend = rowrange[recv_rank[imdex]+1]; 2844 rptr = rbuf_j[imdex]; 2845 cols = rbuf_j[imdex] + rend-rstart + 1; 2846 vals = rbuf_a[imdex]; 2847 for (i=0; i<rend-rstart; i++) { 2848 row = i + rstart; 2849 ncols = rptr[i+1] - rptr[i]; 2850 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2851 vals += ncols; 2852 cols += ncols; 2853 } 2854 } 2855 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2856 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2857 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2858 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2859 if (reuse == MAT_INITIAL_MATRIX) { 2860 PetscContainer container; 2861 *matredundant = C; 2862 /* create a supporting struct and attach it to C for reuse */ 2863 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2864 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2865 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2866 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2867 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2868 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2869 2870 redund->nzlocal = nzlocal; 2871 redund->nsends = nsends; 2872 redund->nrecvs = nrecvs; 2873 redund->send_rank = send_rank; 2874 redund->recv_rank = recv_rank; 2875 redund->sbuf_nz = sbuf_nz; 2876 redund->rbuf_nz = rbuf_nz; 2877 redund->sbuf_j = sbuf_j; 2878 redund->sbuf_a = sbuf_a; 2879 redund->rbuf_j = rbuf_j; 2880 redund->rbuf_a = rbuf_a; 2881 2882 redund->Destroy = C->ops->destroy; 2883 C->ops->destroy = MatDestroy_MatRedundant; 2884 } 2885 PetscFunctionReturn(0); 2886 } 2887 2888 #undef __FUNCT__ 2889 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2890 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2891 { 2892 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2893 PetscErrorCode ierr; 2894 PetscInt i,*idxb = 0; 2895 PetscScalar *va,*vb; 2896 Vec vtmp; 2897 2898 PetscFunctionBegin; 2899 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2900 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2901 if (idx) { 2902 for (i=0; i<A->rmap->n; i++) { 2903 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2904 } 2905 } 2906 2907 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2908 if (idx) { 2909 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2910 } 2911 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2912 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2913 2914 for (i=0; i<A->rmap->n; i++) { 2915 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2916 va[i] = vb[i]; 2917 if (idx) idx[i] = a->garray[idxb[i]]; 2918 } 2919 } 2920 2921 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2922 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2923 ierr = PetscFree(idxb);CHKERRQ(ierr); 2924 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2925 PetscFunctionReturn(0); 2926 } 2927 2928 #undef __FUNCT__ 2929 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2930 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2931 { 2932 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2933 PetscErrorCode ierr; 2934 PetscInt i,*idxb = 0; 2935 PetscScalar *va,*vb; 2936 Vec vtmp; 2937 2938 PetscFunctionBegin; 2939 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2940 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2941 if (idx) { 2942 for (i=0; i<A->cmap->n; i++) { 2943 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2944 } 2945 } 2946 2947 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2948 if (idx) { 2949 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2950 } 2951 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2952 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2953 2954 for (i=0; i<A->rmap->n; i++) { 2955 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2956 va[i] = vb[i]; 2957 if (idx) idx[i] = a->garray[idxb[i]]; 2958 } 2959 } 2960 2961 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2962 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2963 ierr = PetscFree(idxb);CHKERRQ(ierr); 2964 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2965 PetscFunctionReturn(0); 2966 } 2967 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2970 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2971 { 2972 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2973 PetscInt n = A->rmap->n; 2974 PetscInt cstart = A->cmap->rstart; 2975 PetscInt *cmap = mat->garray; 2976 PetscInt *diagIdx, *offdiagIdx; 2977 Vec diagV, offdiagV; 2978 PetscScalar *a, *diagA, *offdiagA; 2979 PetscInt r; 2980 PetscErrorCode ierr; 2981 2982 PetscFunctionBegin; 2983 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2984 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2985 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2986 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2987 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2988 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2989 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2990 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2991 for (r = 0; r < n; ++r) { 2992 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2993 a[r] = diagA[r]; 2994 idx[r] = cstart + diagIdx[r]; 2995 } else { 2996 a[r] = offdiagA[r]; 2997 idx[r] = cmap[offdiagIdx[r]]; 2998 } 2999 } 3000 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3001 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3002 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3003 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3004 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3005 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3006 PetscFunctionReturn(0); 3007 } 3008 3009 #undef __FUNCT__ 3010 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3011 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3012 { 3013 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3014 PetscInt n = A->rmap->n; 3015 PetscInt cstart = A->cmap->rstart; 3016 PetscInt *cmap = mat->garray; 3017 PetscInt *diagIdx, *offdiagIdx; 3018 Vec diagV, offdiagV; 3019 PetscScalar *a, *diagA, *offdiagA; 3020 PetscInt r; 3021 PetscErrorCode ierr; 3022 3023 PetscFunctionBegin; 3024 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3025 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3026 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3027 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3028 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3029 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3030 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3031 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3032 for (r = 0; r < n; ++r) { 3033 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3034 a[r] = diagA[r]; 3035 idx[r] = cstart + diagIdx[r]; 3036 } else { 3037 a[r] = offdiagA[r]; 3038 idx[r] = cmap[offdiagIdx[r]]; 3039 } 3040 } 3041 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3042 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3043 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3044 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3045 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3046 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3047 PetscFunctionReturn(0); 3048 } 3049 3050 #undef __FUNCT__ 3051 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3052 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3053 { 3054 PetscErrorCode ierr; 3055 Mat *dummy; 3056 3057 PetscFunctionBegin; 3058 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3059 *newmat = *dummy; 3060 ierr = PetscFree(dummy);CHKERRQ(ierr); 3061 PetscFunctionReturn(0); 3062 } 3063 3064 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3065 3066 #undef __FUNCT__ 3067 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3068 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3069 { 3070 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3071 PetscErrorCode ierr; 3072 3073 PetscFunctionBegin; 3074 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3075 PetscFunctionReturn(0); 3076 } 3077 3078 #undef __FUNCT__ 3079 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3080 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3081 { 3082 PetscErrorCode ierr; 3083 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3084 3085 PetscFunctionBegin; 3086 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3087 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3088 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3089 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3090 PetscFunctionReturn(0); 3091 } 3092 3093 /* -------------------------------------------------------------------*/ 3094 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3095 MatGetRow_MPIAIJ, 3096 MatRestoreRow_MPIAIJ, 3097 MatMult_MPIAIJ, 3098 /* 4*/ MatMultAdd_MPIAIJ, 3099 MatMultTranspose_MPIAIJ, 3100 MatMultTransposeAdd_MPIAIJ, 3101 #if defined(PETSC_HAVE_PBGL) 3102 MatSolve_MPIAIJ, 3103 #else 3104 0, 3105 #endif 3106 0, 3107 0, 3108 /*10*/ 0, 3109 0, 3110 0, 3111 MatSOR_MPIAIJ, 3112 MatTranspose_MPIAIJ, 3113 /*15*/ MatGetInfo_MPIAIJ, 3114 MatEqual_MPIAIJ, 3115 MatGetDiagonal_MPIAIJ, 3116 MatDiagonalScale_MPIAIJ, 3117 MatNorm_MPIAIJ, 3118 /*20*/ MatAssemblyBegin_MPIAIJ, 3119 MatAssemblyEnd_MPIAIJ, 3120 MatSetOption_MPIAIJ, 3121 MatZeroEntries_MPIAIJ, 3122 /*24*/ MatZeroRows_MPIAIJ, 3123 0, 3124 #if defined(PETSC_HAVE_PBGL) 3125 0, 3126 #else 3127 0, 3128 #endif 3129 0, 3130 0, 3131 /*29*/ MatSetUp_MPIAIJ, 3132 #if defined(PETSC_HAVE_PBGL) 3133 0, 3134 #else 3135 0, 3136 #endif 3137 0, 3138 0, 3139 0, 3140 /*34*/ MatDuplicate_MPIAIJ, 3141 0, 3142 0, 3143 0, 3144 0, 3145 /*39*/ MatAXPY_MPIAIJ, 3146 MatGetSubMatrices_MPIAIJ, 3147 MatIncreaseOverlap_MPIAIJ, 3148 MatGetValues_MPIAIJ, 3149 MatCopy_MPIAIJ, 3150 /*44*/ MatGetRowMax_MPIAIJ, 3151 MatScale_MPIAIJ, 3152 0, 3153 0, 3154 MatZeroRowsColumns_MPIAIJ, 3155 /*49*/ MatSetRandom_MPIAIJ, 3156 0, 3157 0, 3158 0, 3159 0, 3160 /*54*/ MatFDColoringCreate_MPIAIJ, 3161 0, 3162 MatSetUnfactored_MPIAIJ, 3163 MatPermute_MPIAIJ, 3164 0, 3165 /*59*/ MatGetSubMatrix_MPIAIJ, 3166 MatDestroy_MPIAIJ, 3167 MatView_MPIAIJ, 3168 0, 3169 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3170 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3171 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3172 0, 3173 0, 3174 0, 3175 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3176 MatGetRowMinAbs_MPIAIJ, 3177 0, 3178 MatSetColoring_MPIAIJ, 3179 0, 3180 MatSetValuesAdifor_MPIAIJ, 3181 /*75*/ MatFDColoringApply_AIJ, 3182 0, 3183 0, 3184 0, 3185 MatFindZeroDiagonals_MPIAIJ, 3186 /*80*/ 0, 3187 0, 3188 0, 3189 /*83*/ MatLoad_MPIAIJ, 3190 0, 3191 0, 3192 0, 3193 0, 3194 0, 3195 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3196 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3197 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3198 MatPtAP_MPIAIJ_MPIAIJ, 3199 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3200 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3201 0, 3202 0, 3203 0, 3204 0, 3205 /*99*/ 0, 3206 0, 3207 0, 3208 MatConjugate_MPIAIJ, 3209 0, 3210 /*104*/MatSetValuesRow_MPIAIJ, 3211 MatRealPart_MPIAIJ, 3212 MatImaginaryPart_MPIAIJ, 3213 0, 3214 0, 3215 /*109*/0, 3216 MatGetRedundantMatrix_MPIAIJ, 3217 MatGetRowMin_MPIAIJ, 3218 0, 3219 0, 3220 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3221 0, 3222 0, 3223 0, 3224 0, 3225 /*119*/0, 3226 0, 3227 0, 3228 0, 3229 MatGetMultiProcBlock_MPIAIJ, 3230 /*124*/MatFindNonzeroRows_MPIAIJ, 3231 MatGetColumnNorms_MPIAIJ, 3232 MatInvertBlockDiagonal_MPIAIJ, 3233 0, 3234 MatGetSubMatricesParallel_MPIAIJ, 3235 /*129*/0, 3236 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3237 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3238 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3239 0, 3240 /*134*/0, 3241 0, 3242 0, 3243 0, 3244 0 3245 }; 3246 3247 /* ----------------------------------------------------------------------------------------*/ 3248 3249 EXTERN_C_BEGIN 3250 #undef __FUNCT__ 3251 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3252 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3253 { 3254 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3255 PetscErrorCode ierr; 3256 3257 PetscFunctionBegin; 3258 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3259 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3260 PetscFunctionReturn(0); 3261 } 3262 EXTERN_C_END 3263 3264 EXTERN_C_BEGIN 3265 #undef __FUNCT__ 3266 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3267 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3268 { 3269 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3270 PetscErrorCode ierr; 3271 3272 PetscFunctionBegin; 3273 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3274 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3275 PetscFunctionReturn(0); 3276 } 3277 EXTERN_C_END 3278 3279 EXTERN_C_BEGIN 3280 #undef __FUNCT__ 3281 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3282 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3283 { 3284 Mat_MPIAIJ *b; 3285 PetscErrorCode ierr; 3286 PetscInt i; 3287 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3288 3289 PetscFunctionBegin; 3290 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3291 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3292 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3293 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3294 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3295 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3296 3297 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3298 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3299 if (d_nnz) { 3300 for (i=0; i<B->rmap->n; i++) { 3301 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3302 } 3303 } 3304 if (o_nnz) { 3305 for (i=0; i<B->rmap->n; i++) { 3306 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3307 } 3308 } 3309 b = (Mat_MPIAIJ*)B->data; 3310 3311 if (!B->preallocated) { 3312 /* Explicitly create 2 MATSEQAIJ matrices. */ 3313 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3314 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3315 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3316 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3317 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3318 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3319 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3320 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3321 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3322 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3323 } 3324 3325 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3326 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3327 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3328 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3329 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3330 B->preallocated = PETSC_TRUE; 3331 PetscFunctionReturn(0); 3332 } 3333 EXTERN_C_END 3334 3335 #undef __FUNCT__ 3336 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3337 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3338 { 3339 Mat mat; 3340 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3341 PetscErrorCode ierr; 3342 3343 PetscFunctionBegin; 3344 *newmat = 0; 3345 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3346 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3347 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3348 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3349 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3350 a = (Mat_MPIAIJ*)mat->data; 3351 3352 mat->factortype = matin->factortype; 3353 mat->rmap->bs = matin->rmap->bs; 3354 mat->cmap->bs = matin->cmap->bs; 3355 mat->assembled = PETSC_TRUE; 3356 mat->insertmode = NOT_SET_VALUES; 3357 mat->preallocated = PETSC_TRUE; 3358 3359 a->size = oldmat->size; 3360 a->rank = oldmat->rank; 3361 a->donotstash = oldmat->donotstash; 3362 a->roworiented = oldmat->roworiented; 3363 a->rowindices = 0; 3364 a->rowvalues = 0; 3365 a->getrowactive = PETSC_FALSE; 3366 3367 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3368 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3369 3370 if (oldmat->colmap) { 3371 #if defined(PETSC_USE_CTABLE) 3372 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3373 #else 3374 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3375 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3376 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3377 #endif 3378 } else a->colmap = 0; 3379 if (oldmat->garray) { 3380 PetscInt len; 3381 len = oldmat->B->cmap->n; 3382 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3383 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3384 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3385 } else a->garray = 0; 3386 3387 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3388 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3389 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3390 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3391 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3392 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3393 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3394 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3395 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3396 *newmat = mat; 3397 PetscFunctionReturn(0); 3398 } 3399 3400 3401 3402 #undef __FUNCT__ 3403 #define __FUNCT__ "MatLoad_MPIAIJ" 3404 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3405 { 3406 PetscScalar *vals,*svals; 3407 MPI_Comm comm = ((PetscObject)viewer)->comm; 3408 PetscErrorCode ierr; 3409 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3410 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3411 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3412 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3413 PetscInt cend,cstart,n,*rowners,sizesset=1; 3414 int fd; 3415 PetscInt bs = 1; 3416 3417 PetscFunctionBegin; 3418 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3419 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3420 if (!rank) { 3421 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3422 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3423 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3424 } 3425 3426 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3427 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3428 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3429 3430 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3431 3432 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3433 M = header[1]; N = header[2]; 3434 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3435 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3436 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3437 3438 /* If global sizes are set, check if they are consistent with that given in the file */ 3439 if (sizesset) { 3440 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3441 } 3442 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3443 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3444 3445 /* determine ownership of all (block) rows */ 3446 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3447 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3448 else m = newMat->rmap->n; /* Set by user */ 3449 3450 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3451 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3452 3453 /* First process needs enough room for process with most rows */ 3454 if (!rank) { 3455 mmax = rowners[1]; 3456 for (i=2; i<=size; i++) { 3457 mmax = PetscMax(mmax, rowners[i]); 3458 } 3459 } else mmax = m; 3460 3461 rowners[0] = 0; 3462 for (i=2; i<=size; i++) { 3463 rowners[i] += rowners[i-1]; 3464 } 3465 rstart = rowners[rank]; 3466 rend = rowners[rank+1]; 3467 3468 /* distribute row lengths to all processors */ 3469 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3470 if (!rank) { 3471 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3472 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3473 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3474 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3475 for (j=0; j<m; j++) { 3476 procsnz[0] += ourlens[j]; 3477 } 3478 for (i=1; i<size; i++) { 3479 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3480 /* calculate the number of nonzeros on each processor */ 3481 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3482 procsnz[i] += rowlengths[j]; 3483 } 3484 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3485 } 3486 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3487 } else { 3488 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3489 } 3490 3491 if (!rank) { 3492 /* determine max buffer needed and allocate it */ 3493 maxnz = 0; 3494 for (i=0; i<size; i++) { 3495 maxnz = PetscMax(maxnz,procsnz[i]); 3496 } 3497 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3498 3499 /* read in my part of the matrix column indices */ 3500 nz = procsnz[0]; 3501 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3502 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3503 3504 /* read in every one elses and ship off */ 3505 for (i=1; i<size; i++) { 3506 nz = procsnz[i]; 3507 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3508 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3509 } 3510 ierr = PetscFree(cols);CHKERRQ(ierr); 3511 } else { 3512 /* determine buffer space needed for message */ 3513 nz = 0; 3514 for (i=0; i<m; i++) { 3515 nz += ourlens[i]; 3516 } 3517 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3518 3519 /* receive message of column indices*/ 3520 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3521 } 3522 3523 /* determine column ownership if matrix is not square */ 3524 if (N != M) { 3525 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3526 else n = newMat->cmap->n; 3527 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3528 cstart = cend - n; 3529 } else { 3530 cstart = rstart; 3531 cend = rend; 3532 n = cend - cstart; 3533 } 3534 3535 /* loop over local rows, determining number of off diagonal entries */ 3536 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3537 jj = 0; 3538 for (i=0; i<m; i++) { 3539 for (j=0; j<ourlens[i]; j++) { 3540 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3541 jj++; 3542 } 3543 } 3544 3545 for (i=0; i<m; i++) { 3546 ourlens[i] -= offlens[i]; 3547 } 3548 if (!sizesset) { 3549 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3550 } 3551 3552 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3553 3554 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3555 3556 for (i=0; i<m; i++) { 3557 ourlens[i] += offlens[i]; 3558 } 3559 3560 if (!rank) { 3561 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3562 3563 /* read in my part of the matrix numerical values */ 3564 nz = procsnz[0]; 3565 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3566 3567 /* insert into matrix */ 3568 jj = rstart; 3569 smycols = mycols; 3570 svals = vals; 3571 for (i=0; i<m; i++) { 3572 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3573 smycols += ourlens[i]; 3574 svals += ourlens[i]; 3575 jj++; 3576 } 3577 3578 /* read in other processors and ship out */ 3579 for (i=1; i<size; i++) { 3580 nz = procsnz[i]; 3581 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3582 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3583 } 3584 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3585 } else { 3586 /* receive numeric values */ 3587 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3588 3589 /* receive message of values*/ 3590 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3591 3592 /* insert into matrix */ 3593 jj = rstart; 3594 smycols = mycols; 3595 svals = vals; 3596 for (i=0; i<m; i++) { 3597 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3598 smycols += ourlens[i]; 3599 svals += ourlens[i]; 3600 jj++; 3601 } 3602 } 3603 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3604 ierr = PetscFree(vals);CHKERRQ(ierr); 3605 ierr = PetscFree(mycols);CHKERRQ(ierr); 3606 ierr = PetscFree(rowners);CHKERRQ(ierr); 3607 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3608 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3609 PetscFunctionReturn(0); 3610 } 3611 3612 #undef __FUNCT__ 3613 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3614 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3615 { 3616 PetscErrorCode ierr; 3617 IS iscol_local; 3618 PetscInt csize; 3619 3620 PetscFunctionBegin; 3621 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3622 if (call == MAT_REUSE_MATRIX) { 3623 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3624 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3625 } else { 3626 PetscInt cbs; 3627 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3628 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3629 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3630 } 3631 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3632 if (call == MAT_INITIAL_MATRIX) { 3633 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3634 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3635 } 3636 PetscFunctionReturn(0); 3637 } 3638 3639 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3640 #undef __FUNCT__ 3641 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3642 /* 3643 Not great since it makes two copies of the submatrix, first an SeqAIJ 3644 in local and then by concatenating the local matrices the end result. 3645 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3646 3647 Note: This requires a sequential iscol with all indices. 3648 */ 3649 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3650 { 3651 PetscErrorCode ierr; 3652 PetscMPIInt rank,size; 3653 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3654 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3655 PetscBool allcolumns, colflag; 3656 Mat M,Mreuse; 3657 MatScalar *vwork,*aa; 3658 MPI_Comm comm = ((PetscObject)mat)->comm; 3659 Mat_SeqAIJ *aij; 3660 3661 3662 PetscFunctionBegin; 3663 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3664 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3665 3666 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3667 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3668 if (colflag && ncol == mat->cmap->N) { 3669 allcolumns = PETSC_TRUE; 3670 } else { 3671 allcolumns = PETSC_FALSE; 3672 } 3673 if (call == MAT_REUSE_MATRIX) { 3674 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3675 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3676 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3677 } else { 3678 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3679 } 3680 3681 /* 3682 m - number of local rows 3683 n - number of columns (same on all processors) 3684 rstart - first row in new global matrix generated 3685 */ 3686 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3687 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3688 if (call == MAT_INITIAL_MATRIX) { 3689 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3690 ii = aij->i; 3691 jj = aij->j; 3692 3693 /* 3694 Determine the number of non-zeros in the diagonal and off-diagonal 3695 portions of the matrix in order to do correct preallocation 3696 */ 3697 3698 /* first get start and end of "diagonal" columns */ 3699 if (csize == PETSC_DECIDE) { 3700 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3701 if (mglobal == n) { /* square matrix */ 3702 nlocal = m; 3703 } else { 3704 nlocal = n/size + ((n % size) > rank); 3705 } 3706 } else { 3707 nlocal = csize; 3708 } 3709 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3710 rstart = rend - nlocal; 3711 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3712 3713 /* next, compute all the lengths */ 3714 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3715 olens = dlens + m; 3716 for (i=0; i<m; i++) { 3717 jend = ii[i+1] - ii[i]; 3718 olen = 0; 3719 dlen = 0; 3720 for (j=0; j<jend; j++) { 3721 if (*jj < rstart || *jj >= rend) olen++; 3722 else dlen++; 3723 jj++; 3724 } 3725 olens[i] = olen; 3726 dlens[i] = dlen; 3727 } 3728 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3729 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3730 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3731 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3732 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3733 ierr = PetscFree(dlens);CHKERRQ(ierr); 3734 } else { 3735 PetscInt ml,nl; 3736 3737 M = *newmat; 3738 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3739 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3740 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3741 /* 3742 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3743 rather than the slower MatSetValues(). 3744 */ 3745 M->was_assembled = PETSC_TRUE; 3746 M->assembled = PETSC_FALSE; 3747 } 3748 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3749 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3750 ii = aij->i; 3751 jj = aij->j; 3752 aa = aij->a; 3753 for (i=0; i<m; i++) { 3754 row = rstart + i; 3755 nz = ii[i+1] - ii[i]; 3756 cwork = jj; jj += nz; 3757 vwork = aa; aa += nz; 3758 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3759 } 3760 3761 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3762 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3763 *newmat = M; 3764 3765 /* save submatrix used in processor for next request */ 3766 if (call == MAT_INITIAL_MATRIX) { 3767 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3768 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3769 } 3770 PetscFunctionReturn(0); 3771 } 3772 3773 EXTERN_C_BEGIN 3774 #undef __FUNCT__ 3775 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3776 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3777 { 3778 PetscInt m,cstart, cend,j,nnz,i,d; 3779 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3780 const PetscInt *JJ; 3781 PetscScalar *values; 3782 PetscErrorCode ierr; 3783 3784 PetscFunctionBegin; 3785 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3786 3787 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3788 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3789 m = B->rmap->n; 3790 cstart = B->cmap->rstart; 3791 cend = B->cmap->rend; 3792 rstart = B->rmap->rstart; 3793 3794 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3795 3796 #if defined(PETSC_USE_DEBUGGING) 3797 for (i=0; i<m; i++) { 3798 nnz = Ii[i+1]- Ii[i]; 3799 JJ = J + Ii[i]; 3800 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3801 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3802 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3803 } 3804 #endif 3805 3806 for (i=0; i<m; i++) { 3807 nnz = Ii[i+1]- Ii[i]; 3808 JJ = J + Ii[i]; 3809 nnz_max = PetscMax(nnz_max,nnz); 3810 d = 0; 3811 for (j=0; j<nnz; j++) { 3812 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3813 } 3814 d_nnz[i] = d; 3815 o_nnz[i] = nnz - d; 3816 } 3817 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3818 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3819 3820 if (v) values = (PetscScalar*)v; 3821 else { 3822 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3823 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3824 } 3825 3826 for (i=0; i<m; i++) { 3827 ii = i + rstart; 3828 nnz = Ii[i+1]- Ii[i]; 3829 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3830 } 3831 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3832 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3833 3834 if (!v) { 3835 ierr = PetscFree(values);CHKERRQ(ierr); 3836 } 3837 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3838 PetscFunctionReturn(0); 3839 } 3840 EXTERN_C_END 3841 3842 #undef __FUNCT__ 3843 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3844 /*@ 3845 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3846 (the default parallel PETSc format). 3847 3848 Collective on MPI_Comm 3849 3850 Input Parameters: 3851 + B - the matrix 3852 . i - the indices into j for the start of each local row (starts with zero) 3853 . j - the column indices for each local row (starts with zero) 3854 - v - optional values in the matrix 3855 3856 Level: developer 3857 3858 Notes: 3859 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3860 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3861 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3862 3863 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3864 3865 The format which is used for the sparse matrix input, is equivalent to a 3866 row-major ordering.. i.e for the following matrix, the input data expected is 3867 as shown: 3868 3869 1 0 0 3870 2 0 3 P0 3871 ------- 3872 4 5 6 P1 3873 3874 Process0 [P0]: rows_owned=[0,1] 3875 i = {0,1,3} [size = nrow+1 = 2+1] 3876 j = {0,0,2} [size = nz = 6] 3877 v = {1,2,3} [size = nz = 6] 3878 3879 Process1 [P1]: rows_owned=[2] 3880 i = {0,3} [size = nrow+1 = 1+1] 3881 j = {0,1,2} [size = nz = 6] 3882 v = {4,5,6} [size = nz = 6] 3883 3884 .keywords: matrix, aij, compressed row, sparse, parallel 3885 3886 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3887 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3888 @*/ 3889 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3890 { 3891 PetscErrorCode ierr; 3892 3893 PetscFunctionBegin; 3894 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3895 PetscFunctionReturn(0); 3896 } 3897 3898 #undef __FUNCT__ 3899 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3900 /*@C 3901 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3902 (the default parallel PETSc format). For good matrix assembly performance 3903 the user should preallocate the matrix storage by setting the parameters 3904 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3905 performance can be increased by more than a factor of 50. 3906 3907 Collective on MPI_Comm 3908 3909 Input Parameters: 3910 + A - the matrix 3911 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3912 (same value is used for all local rows) 3913 . d_nnz - array containing the number of nonzeros in the various rows of the 3914 DIAGONAL portion of the local submatrix (possibly different for each row) 3915 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3916 The size of this array is equal to the number of local rows, i.e 'm'. 3917 For matrices that will be factored, you must leave room for (and set) 3918 the diagonal entry even if it is zero. 3919 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3920 submatrix (same value is used for all local rows). 3921 - o_nnz - array containing the number of nonzeros in the various rows of the 3922 OFF-DIAGONAL portion of the local submatrix (possibly different for 3923 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3924 structure. The size of this array is equal to the number 3925 of local rows, i.e 'm'. 3926 3927 If the *_nnz parameter is given then the *_nz parameter is ignored 3928 3929 The AIJ format (also called the Yale sparse matrix format or 3930 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3931 storage. The stored row and column indices begin with zero. 3932 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3933 3934 The parallel matrix is partitioned such that the first m0 rows belong to 3935 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3936 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3937 3938 The DIAGONAL portion of the local submatrix of a processor can be defined 3939 as the submatrix which is obtained by extraction the part corresponding to 3940 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3941 first row that belongs to the processor, r2 is the last row belonging to 3942 the this processor, and c1-c2 is range of indices of the local part of a 3943 vector suitable for applying the matrix to. This is an mxn matrix. In the 3944 common case of a square matrix, the row and column ranges are the same and 3945 the DIAGONAL part is also square. The remaining portion of the local 3946 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3947 3948 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3949 3950 You can call MatGetInfo() to get information on how effective the preallocation was; 3951 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3952 You can also run with the option -info and look for messages with the string 3953 malloc in them to see if additional memory allocation was needed. 3954 3955 Example usage: 3956 3957 Consider the following 8x8 matrix with 34 non-zero values, that is 3958 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3959 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3960 as follows: 3961 3962 .vb 3963 1 2 0 | 0 3 0 | 0 4 3964 Proc0 0 5 6 | 7 0 0 | 8 0 3965 9 0 10 | 11 0 0 | 12 0 3966 ------------------------------------- 3967 13 0 14 | 15 16 17 | 0 0 3968 Proc1 0 18 0 | 19 20 21 | 0 0 3969 0 0 0 | 22 23 0 | 24 0 3970 ------------------------------------- 3971 Proc2 25 26 27 | 0 0 28 | 29 0 3972 30 0 0 | 31 32 33 | 0 34 3973 .ve 3974 3975 This can be represented as a collection of submatrices as: 3976 3977 .vb 3978 A B C 3979 D E F 3980 G H I 3981 .ve 3982 3983 Where the submatrices A,B,C are owned by proc0, D,E,F are 3984 owned by proc1, G,H,I are owned by proc2. 3985 3986 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3987 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3988 The 'M','N' parameters are 8,8, and have the same values on all procs. 3989 3990 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3991 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3992 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3993 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3994 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3995 matrix, ans [DF] as another SeqAIJ matrix. 3996 3997 When d_nz, o_nz parameters are specified, d_nz storage elements are 3998 allocated for every row of the local diagonal submatrix, and o_nz 3999 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4000 One way to choose d_nz and o_nz is to use the max nonzerors per local 4001 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4002 In this case, the values of d_nz,o_nz are: 4003 .vb 4004 proc0 : dnz = 2, o_nz = 2 4005 proc1 : dnz = 3, o_nz = 2 4006 proc2 : dnz = 1, o_nz = 4 4007 .ve 4008 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4009 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4010 for proc3. i.e we are using 12+15+10=37 storage locations to store 4011 34 values. 4012 4013 When d_nnz, o_nnz parameters are specified, the storage is specified 4014 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4015 In the above case the values for d_nnz,o_nnz are: 4016 .vb 4017 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4018 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4019 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4020 .ve 4021 Here the space allocated is sum of all the above values i.e 34, and 4022 hence pre-allocation is perfect. 4023 4024 Level: intermediate 4025 4026 .keywords: matrix, aij, compressed row, sparse, parallel 4027 4028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4029 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4030 @*/ 4031 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4032 { 4033 PetscErrorCode ierr; 4034 4035 PetscFunctionBegin; 4036 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4037 PetscValidType(B,1); 4038 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4039 PetscFunctionReturn(0); 4040 } 4041 4042 #undef __FUNCT__ 4043 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4044 /*@ 4045 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4046 CSR format the local rows. 4047 4048 Collective on MPI_Comm 4049 4050 Input Parameters: 4051 + comm - MPI communicator 4052 . m - number of local rows (Cannot be PETSC_DECIDE) 4053 . n - This value should be the same as the local size used in creating the 4054 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4055 calculated if N is given) For square matrices n is almost always m. 4056 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4057 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4058 . i - row indices 4059 . j - column indices 4060 - a - matrix values 4061 4062 Output Parameter: 4063 . mat - the matrix 4064 4065 Level: intermediate 4066 4067 Notes: 4068 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4069 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4070 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4071 4072 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4073 4074 The format which is used for the sparse matrix input, is equivalent to a 4075 row-major ordering.. i.e for the following matrix, the input data expected is 4076 as shown: 4077 4078 1 0 0 4079 2 0 3 P0 4080 ------- 4081 4 5 6 P1 4082 4083 Process0 [P0]: rows_owned=[0,1] 4084 i = {0,1,3} [size = nrow+1 = 2+1] 4085 j = {0,0,2} [size = nz = 6] 4086 v = {1,2,3} [size = nz = 6] 4087 4088 Process1 [P1]: rows_owned=[2] 4089 i = {0,3} [size = nrow+1 = 1+1] 4090 j = {0,1,2} [size = nz = 6] 4091 v = {4,5,6} [size = nz = 6] 4092 4093 .keywords: matrix, aij, compressed row, sparse, parallel 4094 4095 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4096 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4097 @*/ 4098 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4099 { 4100 PetscErrorCode ierr; 4101 4102 PetscFunctionBegin; 4103 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4104 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4105 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4106 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4107 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4108 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4109 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4110 PetscFunctionReturn(0); 4111 } 4112 4113 #undef __FUNCT__ 4114 #define __FUNCT__ "MatCreateAIJ" 4115 /*@C 4116 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4117 (the default parallel PETSc format). For good matrix assembly performance 4118 the user should preallocate the matrix storage by setting the parameters 4119 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4120 performance can be increased by more than a factor of 50. 4121 4122 Collective on MPI_Comm 4123 4124 Input Parameters: 4125 + comm - MPI communicator 4126 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4127 This value should be the same as the local size used in creating the 4128 y vector for the matrix-vector product y = Ax. 4129 . n - This value should be the same as the local size used in creating the 4130 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4131 calculated if N is given) For square matrices n is almost always m. 4132 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4133 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4134 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4135 (same value is used for all local rows) 4136 . d_nnz - array containing the number of nonzeros in the various rows of the 4137 DIAGONAL portion of the local submatrix (possibly different for each row) 4138 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4139 The size of this array is equal to the number of local rows, i.e 'm'. 4140 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4141 submatrix (same value is used for all local rows). 4142 - o_nnz - array containing the number of nonzeros in the various rows of the 4143 OFF-DIAGONAL portion of the local submatrix (possibly different for 4144 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4145 structure. The size of this array is equal to the number 4146 of local rows, i.e 'm'. 4147 4148 Output Parameter: 4149 . A - the matrix 4150 4151 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4152 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4153 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4154 4155 Notes: 4156 If the *_nnz parameter is given then the *_nz parameter is ignored 4157 4158 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4159 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4160 storage requirements for this matrix. 4161 4162 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4163 processor than it must be used on all processors that share the object for 4164 that argument. 4165 4166 The user MUST specify either the local or global matrix dimensions 4167 (possibly both). 4168 4169 The parallel matrix is partitioned across processors such that the 4170 first m0 rows belong to process 0, the next m1 rows belong to 4171 process 1, the next m2 rows belong to process 2 etc.. where 4172 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4173 values corresponding to [m x N] submatrix. 4174 4175 The columns are logically partitioned with the n0 columns belonging 4176 to 0th partition, the next n1 columns belonging to the next 4177 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4178 4179 The DIAGONAL portion of the local submatrix on any given processor 4180 is the submatrix corresponding to the rows and columns m,n 4181 corresponding to the given processor. i.e diagonal matrix on 4182 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4183 etc. The remaining portion of the local submatrix [m x (N-n)] 4184 constitute the OFF-DIAGONAL portion. The example below better 4185 illustrates this concept. 4186 4187 For a square global matrix we define each processor's diagonal portion 4188 to be its local rows and the corresponding columns (a square submatrix); 4189 each processor's off-diagonal portion encompasses the remainder of the 4190 local matrix (a rectangular submatrix). 4191 4192 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4193 4194 When calling this routine with a single process communicator, a matrix of 4195 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4196 type of communicator, use the construction mechanism: 4197 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4198 4199 By default, this format uses inodes (identical nodes) when possible. 4200 We search for consecutive rows with the same nonzero structure, thereby 4201 reusing matrix information to achieve increased efficiency. 4202 4203 Options Database Keys: 4204 + -mat_no_inode - Do not use inodes 4205 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4206 - -mat_aij_oneindex - Internally use indexing starting at 1 4207 rather than 0. Note that when calling MatSetValues(), 4208 the user still MUST index entries starting at 0! 4209 4210 4211 Example usage: 4212 4213 Consider the following 8x8 matrix with 34 non-zero values, that is 4214 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4215 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4216 as follows: 4217 4218 .vb 4219 1 2 0 | 0 3 0 | 0 4 4220 Proc0 0 5 6 | 7 0 0 | 8 0 4221 9 0 10 | 11 0 0 | 12 0 4222 ------------------------------------- 4223 13 0 14 | 15 16 17 | 0 0 4224 Proc1 0 18 0 | 19 20 21 | 0 0 4225 0 0 0 | 22 23 0 | 24 0 4226 ------------------------------------- 4227 Proc2 25 26 27 | 0 0 28 | 29 0 4228 30 0 0 | 31 32 33 | 0 34 4229 .ve 4230 4231 This can be represented as a collection of submatrices as: 4232 4233 .vb 4234 A B C 4235 D E F 4236 G H I 4237 .ve 4238 4239 Where the submatrices A,B,C are owned by proc0, D,E,F are 4240 owned by proc1, G,H,I are owned by proc2. 4241 4242 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4243 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4244 The 'M','N' parameters are 8,8, and have the same values on all procs. 4245 4246 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4247 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4248 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4249 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4250 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4251 matrix, ans [DF] as another SeqAIJ matrix. 4252 4253 When d_nz, o_nz parameters are specified, d_nz storage elements are 4254 allocated for every row of the local diagonal submatrix, and o_nz 4255 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4256 One way to choose d_nz and o_nz is to use the max nonzerors per local 4257 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4258 In this case, the values of d_nz,o_nz are: 4259 .vb 4260 proc0 : dnz = 2, o_nz = 2 4261 proc1 : dnz = 3, o_nz = 2 4262 proc2 : dnz = 1, o_nz = 4 4263 .ve 4264 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4265 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4266 for proc3. i.e we are using 12+15+10=37 storage locations to store 4267 34 values. 4268 4269 When d_nnz, o_nnz parameters are specified, the storage is specified 4270 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4271 In the above case the values for d_nnz,o_nnz are: 4272 .vb 4273 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4274 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4275 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4276 .ve 4277 Here the space allocated is sum of all the above values i.e 34, and 4278 hence pre-allocation is perfect. 4279 4280 Level: intermediate 4281 4282 .keywords: matrix, aij, compressed row, sparse, parallel 4283 4284 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4285 MPIAIJ, MatCreateMPIAIJWithArrays() 4286 @*/ 4287 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4288 { 4289 PetscErrorCode ierr; 4290 PetscMPIInt size; 4291 4292 PetscFunctionBegin; 4293 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4294 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4295 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4296 if (size > 1) { 4297 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4298 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4299 } else { 4300 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4301 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4302 } 4303 PetscFunctionReturn(0); 4304 } 4305 4306 #undef __FUNCT__ 4307 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4308 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4309 { 4310 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4311 4312 PetscFunctionBegin; 4313 *Ad = a->A; 4314 *Ao = a->B; 4315 *colmap = a->garray; 4316 PetscFunctionReturn(0); 4317 } 4318 4319 #undef __FUNCT__ 4320 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4321 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4322 { 4323 PetscErrorCode ierr; 4324 PetscInt i; 4325 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4326 4327 PetscFunctionBegin; 4328 if (coloring->ctype == IS_COLORING_GLOBAL) { 4329 ISColoringValue *allcolors,*colors; 4330 ISColoring ocoloring; 4331 4332 /* set coloring for diagonal portion */ 4333 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4334 4335 /* set coloring for off-diagonal portion */ 4336 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4337 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4338 for (i=0; i<a->B->cmap->n; i++) { 4339 colors[i] = allcolors[a->garray[i]]; 4340 } 4341 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4342 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4343 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4344 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4345 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4346 ISColoringValue *colors; 4347 PetscInt *larray; 4348 ISColoring ocoloring; 4349 4350 /* set coloring for diagonal portion */ 4351 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4352 for (i=0; i<a->A->cmap->n; i++) { 4353 larray[i] = i + A->cmap->rstart; 4354 } 4355 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4356 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4357 for (i=0; i<a->A->cmap->n; i++) { 4358 colors[i] = coloring->colors[larray[i]]; 4359 } 4360 ierr = PetscFree(larray);CHKERRQ(ierr); 4361 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4362 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4363 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4364 4365 /* set coloring for off-diagonal portion */ 4366 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4367 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4368 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4369 for (i=0; i<a->B->cmap->n; i++) { 4370 colors[i] = coloring->colors[larray[i]]; 4371 } 4372 ierr = PetscFree(larray);CHKERRQ(ierr); 4373 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4374 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4375 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4376 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4377 PetscFunctionReturn(0); 4378 } 4379 4380 #undef __FUNCT__ 4381 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4382 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4383 { 4384 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4385 PetscErrorCode ierr; 4386 4387 PetscFunctionBegin; 4388 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4389 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4390 PetscFunctionReturn(0); 4391 } 4392 4393 #undef __FUNCT__ 4394 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4395 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4396 { 4397 PetscErrorCode ierr; 4398 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4399 PetscInt *indx; 4400 4401 PetscFunctionBegin; 4402 /* This routine will ONLY return MPIAIJ type matrix */ 4403 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4404 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4405 if (n == PETSC_DECIDE) { 4406 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4407 } 4408 /* Check sum(n) = N */ 4409 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4410 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4411 4412 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4413 rstart -= m; 4414 4415 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4416 for (i=0; i<m; i++) { 4417 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4418 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4419 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4420 } 4421 4422 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4423 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4424 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4425 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4426 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4427 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4428 PetscFunctionReturn(0); 4429 } 4430 4431 #undef __FUNCT__ 4432 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4433 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4434 { 4435 PetscErrorCode ierr; 4436 PetscInt m,N,i,rstart,nnz,Ii; 4437 PetscInt *indx; 4438 PetscScalar *values; 4439 4440 PetscFunctionBegin; 4441 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4442 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4443 for (i=0; i<m; i++) { 4444 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4445 Ii = i + rstart; 4446 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4447 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4448 } 4449 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4450 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4451 PetscFunctionReturn(0); 4452 } 4453 4454 #undef __FUNCT__ 4455 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4456 /*@ 4457 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4458 matrices from each processor 4459 4460 Collective on MPI_Comm 4461 4462 Input Parameters: 4463 + comm - the communicators the parallel matrix will live on 4464 . inmat - the input sequential matrices 4465 . n - number of local columns (or PETSC_DECIDE) 4466 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4467 4468 Output Parameter: 4469 . outmat - the parallel matrix generated 4470 4471 Level: advanced 4472 4473 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4474 4475 @*/ 4476 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4477 { 4478 PetscErrorCode ierr; 4479 4480 PetscFunctionBegin; 4481 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4482 if (scall == MAT_INITIAL_MATRIX) { 4483 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4484 } 4485 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4486 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4487 PetscFunctionReturn(0); 4488 } 4489 4490 #undef __FUNCT__ 4491 #define __FUNCT__ "MatFileSplit" 4492 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4493 { 4494 PetscErrorCode ierr; 4495 PetscMPIInt rank; 4496 PetscInt m,N,i,rstart,nnz; 4497 size_t len; 4498 const PetscInt *indx; 4499 PetscViewer out; 4500 char *name; 4501 Mat B; 4502 const PetscScalar *values; 4503 4504 PetscFunctionBegin; 4505 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4506 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4507 /* Should this be the type of the diagonal block of A? */ 4508 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4509 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4510 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4511 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4512 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4513 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4514 for (i=0; i<m; i++) { 4515 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4516 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4517 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4518 } 4519 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4520 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4521 4522 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4523 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4524 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4525 sprintf(name,"%s.%d",outfile,rank); 4526 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4527 ierr = PetscFree(name);CHKERRQ(ierr); 4528 ierr = MatView(B,out);CHKERRQ(ierr); 4529 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4530 ierr = MatDestroy(&B);CHKERRQ(ierr); 4531 PetscFunctionReturn(0); 4532 } 4533 4534 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4535 #undef __FUNCT__ 4536 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4537 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4538 { 4539 PetscErrorCode ierr; 4540 Mat_Merge_SeqsToMPI *merge; 4541 PetscContainer container; 4542 4543 PetscFunctionBegin; 4544 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4545 if (container) { 4546 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4547 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4548 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4557 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4558 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4559 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4560 ierr = PetscFree(merge);CHKERRQ(ierr); 4561 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4562 } 4563 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4564 PetscFunctionReturn(0); 4565 } 4566 4567 #include <../src/mat/utils/freespace.h> 4568 #include <petscbt.h> 4569 4570 #undef __FUNCT__ 4571 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4572 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4573 { 4574 PetscErrorCode ierr; 4575 MPI_Comm comm=((PetscObject)mpimat)->comm; 4576 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4577 PetscMPIInt size,rank,taga,*len_s; 4578 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4579 PetscInt proc,m; 4580 PetscInt **buf_ri,**buf_rj; 4581 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4582 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4583 MPI_Request *s_waits,*r_waits; 4584 MPI_Status *status; 4585 MatScalar *aa=a->a; 4586 MatScalar **abuf_r,*ba_i; 4587 Mat_Merge_SeqsToMPI *merge; 4588 PetscContainer container; 4589 4590 PetscFunctionBegin; 4591 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4592 4593 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4594 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4595 4596 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4597 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4598 4599 bi = merge->bi; 4600 bj = merge->bj; 4601 buf_ri = merge->buf_ri; 4602 buf_rj = merge->buf_rj; 4603 4604 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4605 owners = merge->rowmap->range; 4606 len_s = merge->len_s; 4607 4608 /* send and recv matrix values */ 4609 /*-----------------------------*/ 4610 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4611 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4612 4613 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4614 for (proc=0,k=0; proc<size; proc++) { 4615 if (!len_s[proc]) continue; 4616 i = owners[proc]; 4617 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4618 k++; 4619 } 4620 4621 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4622 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4623 ierr = PetscFree(status);CHKERRQ(ierr); 4624 4625 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4626 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4627 4628 /* insert mat values of mpimat */ 4629 /*----------------------------*/ 4630 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4631 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4632 4633 for (k=0; k<merge->nrecv; k++) { 4634 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4635 nrows = *(buf_ri_k[k]); 4636 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4637 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4638 } 4639 4640 /* set values of ba */ 4641 m = merge->rowmap->n; 4642 for (i=0; i<m; i++) { 4643 arow = owners[rank] + i; 4644 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4645 bnzi = bi[i+1] - bi[i]; 4646 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4647 4648 /* add local non-zero vals of this proc's seqmat into ba */ 4649 anzi = ai[arow+1] - ai[arow]; 4650 aj = a->j + ai[arow]; 4651 aa = a->a + ai[arow]; 4652 nextaj = 0; 4653 for (j=0; nextaj<anzi; j++) { 4654 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4655 ba_i[j] += aa[nextaj++]; 4656 } 4657 } 4658 4659 /* add received vals into ba */ 4660 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4661 /* i-th row */ 4662 if (i == *nextrow[k]) { 4663 anzi = *(nextai[k]+1) - *nextai[k]; 4664 aj = buf_rj[k] + *(nextai[k]); 4665 aa = abuf_r[k] + *(nextai[k]); 4666 nextaj = 0; 4667 for (j=0; nextaj<anzi; j++) { 4668 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4669 ba_i[j] += aa[nextaj++]; 4670 } 4671 } 4672 nextrow[k]++; nextai[k]++; 4673 } 4674 } 4675 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4676 } 4677 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4678 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4679 4680 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4681 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4682 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4683 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4684 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4685 PetscFunctionReturn(0); 4686 } 4687 4688 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4689 4690 #undef __FUNCT__ 4691 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4692 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4693 { 4694 PetscErrorCode ierr; 4695 Mat B_mpi; 4696 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4697 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4698 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4699 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4700 PetscInt len,proc,*dnz,*onz,bs,cbs; 4701 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4702 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4703 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4704 MPI_Status *status; 4705 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4706 PetscBT lnkbt; 4707 Mat_Merge_SeqsToMPI *merge; 4708 PetscContainer container; 4709 4710 PetscFunctionBegin; 4711 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4712 4713 /* make sure it is a PETSc comm */ 4714 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4715 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4716 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4717 4718 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4719 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4720 4721 /* determine row ownership */ 4722 /*---------------------------------------------------------*/ 4723 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4724 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4725 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4726 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4727 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4728 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4729 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4730 4731 m = merge->rowmap->n; 4732 owners = merge->rowmap->range; 4733 4734 /* determine the number of messages to send, their lengths */ 4735 /*---------------------------------------------------------*/ 4736 len_s = merge->len_s; 4737 4738 len = 0; /* length of buf_si[] */ 4739 merge->nsend = 0; 4740 for (proc=0; proc<size; proc++) { 4741 len_si[proc] = 0; 4742 if (proc == rank) { 4743 len_s[proc] = 0; 4744 } else { 4745 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4746 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4747 } 4748 if (len_s[proc]) { 4749 merge->nsend++; 4750 nrows = 0; 4751 for (i=owners[proc]; i<owners[proc+1]; i++) { 4752 if (ai[i+1] > ai[i]) nrows++; 4753 } 4754 len_si[proc] = 2*(nrows+1); 4755 len += len_si[proc]; 4756 } 4757 } 4758 4759 /* determine the number and length of messages to receive for ij-structure */ 4760 /*-------------------------------------------------------------------------*/ 4761 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4762 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4763 4764 /* post the Irecv of j-structure */ 4765 /*-------------------------------*/ 4766 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4767 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4768 4769 /* post the Isend of j-structure */ 4770 /*--------------------------------*/ 4771 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4772 4773 for (proc=0, k=0; proc<size; proc++) { 4774 if (!len_s[proc]) continue; 4775 i = owners[proc]; 4776 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4777 k++; 4778 } 4779 4780 /* receives and sends of j-structure are complete */ 4781 /*------------------------------------------------*/ 4782 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4783 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4784 4785 /* send and recv i-structure */ 4786 /*---------------------------*/ 4787 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4788 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4789 4790 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4791 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4792 for (proc=0,k=0; proc<size; proc++) { 4793 if (!len_s[proc]) continue; 4794 /* form outgoing message for i-structure: 4795 buf_si[0]: nrows to be sent 4796 [1:nrows]: row index (global) 4797 [nrows+1:2*nrows+1]: i-structure index 4798 */ 4799 /*-------------------------------------------*/ 4800 nrows = len_si[proc]/2 - 1; 4801 buf_si_i = buf_si + nrows+1; 4802 buf_si[0] = nrows; 4803 buf_si_i[0] = 0; 4804 nrows = 0; 4805 for (i=owners[proc]; i<owners[proc+1]; i++) { 4806 anzi = ai[i+1] - ai[i]; 4807 if (anzi) { 4808 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4809 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4810 nrows++; 4811 } 4812 } 4813 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4814 k++; 4815 buf_si += len_si[proc]; 4816 } 4817 4818 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4819 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4820 4821 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4822 for (i=0; i<merge->nrecv; i++) { 4823 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4824 } 4825 4826 ierr = PetscFree(len_si);CHKERRQ(ierr); 4827 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4828 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4829 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4830 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4831 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4832 ierr = PetscFree(status);CHKERRQ(ierr); 4833 4834 /* compute a local seq matrix in each processor */ 4835 /*----------------------------------------------*/ 4836 /* allocate bi array and free space for accumulating nonzero column info */ 4837 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4838 bi[0] = 0; 4839 4840 /* create and initialize a linked list */ 4841 nlnk = N+1; 4842 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4843 4844 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4845 len = ai[owners[rank+1]] - ai[owners[rank]]; 4846 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4847 4848 current_space = free_space; 4849 4850 /* determine symbolic info for each local row */ 4851 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4852 4853 for (k=0; k<merge->nrecv; k++) { 4854 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4855 nrows = *buf_ri_k[k]; 4856 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4857 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4858 } 4859 4860 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4861 len = 0; 4862 for (i=0; i<m; i++) { 4863 bnzi = 0; 4864 /* add local non-zero cols of this proc's seqmat into lnk */ 4865 arow = owners[rank] + i; 4866 anzi = ai[arow+1] - ai[arow]; 4867 aj = a->j + ai[arow]; 4868 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4869 bnzi += nlnk; 4870 /* add received col data into lnk */ 4871 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4872 if (i == *nextrow[k]) { /* i-th row */ 4873 anzi = *(nextai[k]+1) - *nextai[k]; 4874 aj = buf_rj[k] + *nextai[k]; 4875 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4876 bnzi += nlnk; 4877 nextrow[k]++; nextai[k]++; 4878 } 4879 } 4880 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4881 4882 /* if free space is not available, make more free space */ 4883 if (current_space->local_remaining<bnzi) { 4884 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4885 nspacedouble++; 4886 } 4887 /* copy data into free space, then initialize lnk */ 4888 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4889 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4890 4891 current_space->array += bnzi; 4892 current_space->local_used += bnzi; 4893 current_space->local_remaining -= bnzi; 4894 4895 bi[i+1] = bi[i] + bnzi; 4896 } 4897 4898 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4899 4900 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4901 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4902 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4903 4904 /* create symbolic parallel matrix B_mpi */ 4905 /*---------------------------------------*/ 4906 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4907 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4908 if (n==PETSC_DECIDE) { 4909 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4910 } else { 4911 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4912 } 4913 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4914 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4915 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4916 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4917 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4918 4919 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4920 B_mpi->assembled = PETSC_FALSE; 4921 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4922 merge->bi = bi; 4923 merge->bj = bj; 4924 merge->buf_ri = buf_ri; 4925 merge->buf_rj = buf_rj; 4926 merge->coi = PETSC_NULL; 4927 merge->coj = PETSC_NULL; 4928 merge->owners_co = PETSC_NULL; 4929 4930 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4931 4932 /* attach the supporting struct to B_mpi for reuse */ 4933 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4934 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4935 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4936 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4937 *mpimat = B_mpi; 4938 4939 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4940 PetscFunctionReturn(0); 4941 } 4942 4943 #undef __FUNCT__ 4944 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4945 /*@C 4946 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4947 matrices from each processor 4948 4949 Collective on MPI_Comm 4950 4951 Input Parameters: 4952 + comm - the communicators the parallel matrix will live on 4953 . seqmat - the input sequential matrices 4954 . m - number of local rows (or PETSC_DECIDE) 4955 . n - number of local columns (or PETSC_DECIDE) 4956 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4957 4958 Output Parameter: 4959 . mpimat - the parallel matrix generated 4960 4961 Level: advanced 4962 4963 Notes: 4964 The dimensions of the sequential matrix in each processor MUST be the same. 4965 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4966 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4967 @*/ 4968 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4969 { 4970 PetscErrorCode ierr; 4971 PetscMPIInt size; 4972 4973 PetscFunctionBegin; 4974 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4975 if (size == 1) { 4976 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4977 if (scall == MAT_INITIAL_MATRIX) { 4978 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4979 } else { 4980 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4981 } 4982 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4983 PetscFunctionReturn(0); 4984 } 4985 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4986 if (scall == MAT_INITIAL_MATRIX) { 4987 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4988 } 4989 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4990 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4991 PetscFunctionReturn(0); 4992 } 4993 4994 #undef __FUNCT__ 4995 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4996 /*@ 4997 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4998 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4999 with MatGetSize() 5000 5001 Not Collective 5002 5003 Input Parameters: 5004 + A - the matrix 5005 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5006 5007 Output Parameter: 5008 . A_loc - the local sequential matrix generated 5009 5010 Level: developer 5011 5012 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5013 5014 @*/ 5015 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5016 { 5017 PetscErrorCode ierr; 5018 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5019 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5020 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5021 MatScalar *aa=a->a,*ba=b->a,*cam; 5022 PetscScalar *ca; 5023 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5024 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5025 PetscBool match; 5026 5027 PetscFunctionBegin; 5028 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5029 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5030 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5031 if (scall == MAT_INITIAL_MATRIX) { 5032 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5033 ci[0] = 0; 5034 for (i=0; i<am; i++) { 5035 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5036 } 5037 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5038 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5039 k = 0; 5040 for (i=0; i<am; i++) { 5041 ncols_o = bi[i+1] - bi[i]; 5042 ncols_d = ai[i+1] - ai[i]; 5043 /* off-diagonal portion of A */ 5044 for (jo=0; jo<ncols_o; jo++) { 5045 col = cmap[*bj]; 5046 if (col >= cstart) break; 5047 cj[k] = col; bj++; 5048 ca[k++] = *ba++; 5049 } 5050 /* diagonal portion of A */ 5051 for (j=0; j<ncols_d; j++) { 5052 cj[k] = cstart + *aj++; 5053 ca[k++] = *aa++; 5054 } 5055 /* off-diagonal portion of A */ 5056 for (j=jo; j<ncols_o; j++) { 5057 cj[k] = cmap[*bj++]; 5058 ca[k++] = *ba++; 5059 } 5060 } 5061 /* put together the new matrix */ 5062 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5063 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5064 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5065 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5066 mat->free_a = PETSC_TRUE; 5067 mat->free_ij = PETSC_TRUE; 5068 mat->nonew = 0; 5069 } else if (scall == MAT_REUSE_MATRIX) { 5070 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5071 ci = mat->i; cj = mat->j; cam = mat->a; 5072 for (i=0; i<am; i++) { 5073 /* off-diagonal portion of A */ 5074 ncols_o = bi[i+1] - bi[i]; 5075 for (jo=0; jo<ncols_o; jo++) { 5076 col = cmap[*bj]; 5077 if (col >= cstart) break; 5078 *cam++ = *ba++; bj++; 5079 } 5080 /* diagonal portion of A */ 5081 ncols_d = ai[i+1] - ai[i]; 5082 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5083 /* off-diagonal portion of A */ 5084 for (j=jo; j<ncols_o; j++) { 5085 *cam++ = *ba++; bj++; 5086 } 5087 } 5088 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5089 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5090 PetscFunctionReturn(0); 5091 } 5092 5093 #undef __FUNCT__ 5094 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5095 /*@C 5096 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5097 5098 Not Collective 5099 5100 Input Parameters: 5101 + A - the matrix 5102 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5103 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5104 5105 Output Parameter: 5106 . A_loc - the local sequential matrix generated 5107 5108 Level: developer 5109 5110 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5111 5112 @*/ 5113 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5114 { 5115 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5116 PetscErrorCode ierr; 5117 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5118 IS isrowa,iscola; 5119 Mat *aloc; 5120 PetscBool match; 5121 5122 PetscFunctionBegin; 5123 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5124 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5125 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5126 if (!row) { 5127 start = A->rmap->rstart; end = A->rmap->rend; 5128 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5129 } else { 5130 isrowa = *row; 5131 } 5132 if (!col) { 5133 start = A->cmap->rstart; 5134 cmap = a->garray; 5135 nzA = a->A->cmap->n; 5136 nzB = a->B->cmap->n; 5137 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5138 ncols = 0; 5139 for (i=0; i<nzB; i++) { 5140 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5141 else break; 5142 } 5143 imark = i; 5144 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5145 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5146 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5147 } else { 5148 iscola = *col; 5149 } 5150 if (scall != MAT_INITIAL_MATRIX) { 5151 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5152 aloc[0] = *A_loc; 5153 } 5154 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5155 *A_loc = aloc[0]; 5156 ierr = PetscFree(aloc);CHKERRQ(ierr); 5157 if (!row) { 5158 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5159 } 5160 if (!col) { 5161 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5162 } 5163 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5164 PetscFunctionReturn(0); 5165 } 5166 5167 #undef __FUNCT__ 5168 #define __FUNCT__ "MatGetBrowsOfAcols" 5169 /*@C 5170 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5171 5172 Collective on Mat 5173 5174 Input Parameters: 5175 + A,B - the matrices in mpiaij format 5176 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5177 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5178 5179 Output Parameter: 5180 + rowb, colb - index sets of rows and columns of B to extract 5181 - B_seq - the sequential matrix generated 5182 5183 Level: developer 5184 5185 @*/ 5186 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5187 { 5188 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5189 PetscErrorCode ierr; 5190 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5191 IS isrowb,iscolb; 5192 Mat *bseq=PETSC_NULL; 5193 5194 PetscFunctionBegin; 5195 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5196 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5197 } 5198 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5199 5200 if (scall == MAT_INITIAL_MATRIX) { 5201 start = A->cmap->rstart; 5202 cmap = a->garray; 5203 nzA = a->A->cmap->n; 5204 nzB = a->B->cmap->n; 5205 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5206 ncols = 0; 5207 for (i=0; i<nzB; i++) { /* row < local row index */ 5208 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5209 else break; 5210 } 5211 imark = i; 5212 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5213 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5214 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5215 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5216 } else { 5217 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5218 isrowb = *rowb; iscolb = *colb; 5219 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5220 bseq[0] = *B_seq; 5221 } 5222 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5223 *B_seq = bseq[0]; 5224 ierr = PetscFree(bseq);CHKERRQ(ierr); 5225 if (!rowb) { 5226 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5227 } else { 5228 *rowb = isrowb; 5229 } 5230 if (!colb) { 5231 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5232 } else { 5233 *colb = iscolb; 5234 } 5235 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5236 PetscFunctionReturn(0); 5237 } 5238 5239 #undef __FUNCT__ 5240 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5241 /* 5242 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5243 of the OFF-DIAGONAL portion of local A 5244 5245 Collective on Mat 5246 5247 Input Parameters: 5248 + A,B - the matrices in mpiaij format 5249 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5250 5251 Output Parameter: 5252 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5253 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5254 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5255 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5256 5257 Level: developer 5258 5259 */ 5260 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5261 { 5262 VecScatter_MPI_General *gen_to,*gen_from; 5263 PetscErrorCode ierr; 5264 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5265 Mat_SeqAIJ *b_oth; 5266 VecScatter ctx =a->Mvctx; 5267 MPI_Comm comm=((PetscObject)ctx)->comm; 5268 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5269 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5270 PetscScalar *rvalues,*svalues; 5271 MatScalar *b_otha,*bufa,*bufA; 5272 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5273 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5274 MPI_Status *sstatus,rstatus; 5275 PetscMPIInt jj; 5276 PetscInt *cols,sbs,rbs; 5277 PetscScalar *vals; 5278 5279 PetscFunctionBegin; 5280 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5281 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5282 } 5283 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5284 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5285 5286 gen_to = (VecScatter_MPI_General*)ctx->todata; 5287 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5288 rvalues = gen_from->values; /* holds the length of receiving row */ 5289 svalues = gen_to->values; /* holds the length of sending row */ 5290 nrecvs = gen_from->n; 5291 nsends = gen_to->n; 5292 5293 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5294 srow = gen_to->indices; /* local row index to be sent */ 5295 sstarts = gen_to->starts; 5296 sprocs = gen_to->procs; 5297 sstatus = gen_to->sstatus; 5298 sbs = gen_to->bs; 5299 rstarts = gen_from->starts; 5300 rprocs = gen_from->procs; 5301 rbs = gen_from->bs; 5302 5303 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5304 if (scall == MAT_INITIAL_MATRIX) { 5305 /* i-array */ 5306 /*---------*/ 5307 /* post receives */ 5308 for (i=0; i<nrecvs; i++) { 5309 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5310 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5311 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5312 } 5313 5314 /* pack the outgoing message */ 5315 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5316 5317 sstartsj[0] = 0; 5318 rstartsj[0] = 0; 5319 len = 0; /* total length of j or a array to be sent */ 5320 k = 0; 5321 for (i=0; i<nsends; i++) { 5322 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5323 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5324 for (j=0; j<nrows; j++) { 5325 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5326 for (l=0; l<sbs; l++) { 5327 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5328 5329 rowlen[j*sbs+l] = ncols; 5330 5331 len += ncols; 5332 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5333 } 5334 k++; 5335 } 5336 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5337 5338 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5339 } 5340 /* recvs and sends of i-array are completed */ 5341 i = nrecvs; 5342 while (i--) { 5343 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5344 } 5345 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5346 5347 /* allocate buffers for sending j and a arrays */ 5348 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5349 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5350 5351 /* create i-array of B_oth */ 5352 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5353 5354 b_othi[0] = 0; 5355 len = 0; /* total length of j or a array to be received */ 5356 k = 0; 5357 for (i=0; i<nrecvs; i++) { 5358 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5359 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5360 for (j=0; j<nrows; j++) { 5361 b_othi[k+1] = b_othi[k] + rowlen[j]; 5362 len += rowlen[j]; k++; 5363 } 5364 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5365 } 5366 5367 /* allocate space for j and a arrrays of B_oth */ 5368 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5369 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5370 5371 /* j-array */ 5372 /*---------*/ 5373 /* post receives of j-array */ 5374 for (i=0; i<nrecvs; i++) { 5375 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5376 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5377 } 5378 5379 /* pack the outgoing message j-array */ 5380 k = 0; 5381 for (i=0; i<nsends; i++) { 5382 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5383 bufJ = bufj+sstartsj[i]; 5384 for (j=0; j<nrows; j++) { 5385 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5386 for (ll=0; ll<sbs; ll++) { 5387 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5388 for (l=0; l<ncols; l++) { 5389 *bufJ++ = cols[l]; 5390 } 5391 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5392 } 5393 } 5394 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5395 } 5396 5397 /* recvs and sends of j-array are completed */ 5398 i = nrecvs; 5399 while (i--) { 5400 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5401 } 5402 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5403 } else if (scall == MAT_REUSE_MATRIX) { 5404 sstartsj = *startsj_s; 5405 rstartsj = *startsj_r; 5406 bufa = *bufa_ptr; 5407 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5408 b_otha = b_oth->a; 5409 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5410 5411 /* a-array */ 5412 /*---------*/ 5413 /* post receives of a-array */ 5414 for (i=0; i<nrecvs; i++) { 5415 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5416 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5417 } 5418 5419 /* pack the outgoing message a-array */ 5420 k = 0; 5421 for (i=0; i<nsends; i++) { 5422 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5423 bufA = bufa+sstartsj[i]; 5424 for (j=0; j<nrows; j++) { 5425 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5426 for (ll=0; ll<sbs; ll++) { 5427 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5428 for (l=0; l<ncols; l++) { 5429 *bufA++ = vals[l]; 5430 } 5431 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5432 } 5433 } 5434 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5435 } 5436 /* recvs and sends of a-array are completed */ 5437 i = nrecvs; 5438 while (i--) { 5439 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5440 } 5441 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5442 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5443 5444 if (scall == MAT_INITIAL_MATRIX) { 5445 /* put together the new matrix */ 5446 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5447 5448 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5449 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5450 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5451 b_oth->free_a = PETSC_TRUE; 5452 b_oth->free_ij = PETSC_TRUE; 5453 b_oth->nonew = 0; 5454 5455 ierr = PetscFree(bufj);CHKERRQ(ierr); 5456 if (!startsj_s || !bufa_ptr) { 5457 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5458 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5459 } else { 5460 *startsj_s = sstartsj; 5461 *startsj_r = rstartsj; 5462 *bufa_ptr = bufa; 5463 } 5464 } 5465 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5466 PetscFunctionReturn(0); 5467 } 5468 5469 #undef __FUNCT__ 5470 #define __FUNCT__ "MatGetCommunicationStructs" 5471 /*@C 5472 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5473 5474 Not Collective 5475 5476 Input Parameters: 5477 . A - The matrix in mpiaij format 5478 5479 Output Parameter: 5480 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5481 . colmap - A map from global column index to local index into lvec 5482 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5483 5484 Level: developer 5485 5486 @*/ 5487 #if defined(PETSC_USE_CTABLE) 5488 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5489 #else 5490 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5491 #endif 5492 { 5493 Mat_MPIAIJ *a; 5494 5495 PetscFunctionBegin; 5496 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5497 PetscValidPointer(lvec, 2); 5498 PetscValidPointer(colmap, 3); 5499 PetscValidPointer(multScatter, 4); 5500 a = (Mat_MPIAIJ*) A->data; 5501 if (lvec) *lvec = a->lvec; 5502 if (colmap) *colmap = a->colmap; 5503 if (multScatter) *multScatter = a->Mvctx; 5504 PetscFunctionReturn(0); 5505 } 5506 5507 EXTERN_C_BEGIN 5508 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5509 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5510 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5511 EXTERN_C_END 5512 5513 #undef __FUNCT__ 5514 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5515 /* 5516 Computes (B'*A')' since computing B*A directly is untenable 5517 5518 n p p 5519 ( ) ( ) ( ) 5520 m ( A ) * n ( B ) = m ( C ) 5521 ( ) ( ) ( ) 5522 5523 */ 5524 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5525 { 5526 PetscErrorCode ierr; 5527 Mat At,Bt,Ct; 5528 5529 PetscFunctionBegin; 5530 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5531 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5532 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5533 ierr = MatDestroy(&At);CHKERRQ(ierr); 5534 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5535 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5536 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5537 PetscFunctionReturn(0); 5538 } 5539 5540 #undef __FUNCT__ 5541 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5542 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5543 { 5544 PetscErrorCode ierr; 5545 PetscInt m=A->rmap->n,n=B->cmap->n; 5546 Mat Cmat; 5547 5548 PetscFunctionBegin; 5549 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5550 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5551 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5552 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5553 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5554 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5555 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5556 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5557 5558 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5559 5560 *C = Cmat; 5561 PetscFunctionReturn(0); 5562 } 5563 5564 /* ----------------------------------------------------------------*/ 5565 #undef __FUNCT__ 5566 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5567 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5568 { 5569 PetscErrorCode ierr; 5570 5571 PetscFunctionBegin; 5572 if (scall == MAT_INITIAL_MATRIX) { 5573 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5574 } 5575 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5576 PetscFunctionReturn(0); 5577 } 5578 5579 EXTERN_C_BEGIN 5580 #if defined(PETSC_HAVE_MUMPS) 5581 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5582 #endif 5583 #if defined(PETSC_HAVE_PASTIX) 5584 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5585 #endif 5586 #if defined(PETSC_HAVE_SUPERLU_DIST) 5587 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5588 #endif 5589 #if defined(PETSC_HAVE_CLIQUE) 5590 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5591 #endif 5592 EXTERN_C_END 5593 5594 /*MC 5595 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5596 5597 Options Database Keys: 5598 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5599 5600 Level: beginner 5601 5602 .seealso: MatCreateAIJ() 5603 M*/ 5604 5605 EXTERN_C_BEGIN 5606 #undef __FUNCT__ 5607 #define __FUNCT__ "MatCreate_MPIAIJ" 5608 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5609 { 5610 Mat_MPIAIJ *b; 5611 PetscErrorCode ierr; 5612 PetscMPIInt size; 5613 5614 PetscFunctionBegin; 5615 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5616 5617 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5618 B->data = (void*)b; 5619 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5620 B->assembled = PETSC_FALSE; 5621 B->insertmode = NOT_SET_VALUES; 5622 b->size = size; 5623 5624 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5625 5626 /* build cache for off array entries formed */ 5627 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5628 5629 b->donotstash = PETSC_FALSE; 5630 b->colmap = 0; 5631 b->garray = 0; 5632 b->roworiented = PETSC_TRUE; 5633 5634 /* stuff used for matrix vector multiply */ 5635 b->lvec = PETSC_NULL; 5636 b->Mvctx = PETSC_NULL; 5637 5638 /* stuff for MatGetRow() */ 5639 b->rowindices = 0; 5640 b->rowvalues = 0; 5641 b->getrowactive = PETSC_FALSE; 5642 5643 /* flexible pointer used in CUSP/CUSPARSE classes */ 5644 b->spptr = PETSC_NULL; 5645 5646 #if defined(PETSC_HAVE_MUMPS) 5647 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5648 "MatGetFactor_aij_mumps", 5649 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5650 #endif 5651 #if defined(PETSC_HAVE_PASTIX) 5652 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5653 "MatGetFactor_mpiaij_pastix", 5654 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5655 #endif 5656 #if defined(PETSC_HAVE_SUPERLU_DIST) 5657 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5658 "MatGetFactor_mpiaij_superlu_dist", 5659 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5660 #endif 5661 #if defined(PETSC_HAVE_CLIQUE) 5662 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5663 "MatGetFactor_aij_clique", 5664 MatGetFactor_aij_clique);CHKERRQ(ierr); 5665 #endif 5666 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5667 "MatStoreValues_MPIAIJ", 5668 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5669 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5670 "MatRetrieveValues_MPIAIJ", 5671 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5672 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5673 "MatGetDiagonalBlock_MPIAIJ", 5674 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5675 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5676 "MatIsTranspose_MPIAIJ", 5677 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5678 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5679 "MatMPIAIJSetPreallocation_MPIAIJ", 5680 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5681 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5682 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5683 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5684 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5685 "MatDiagonalScaleLocal_MPIAIJ", 5686 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5687 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5688 "MatConvert_MPIAIJ_MPIAIJPERM", 5689 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5690 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5691 "MatConvert_MPIAIJ_MPIAIJCRL", 5692 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5693 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5694 "MatConvert_MPIAIJ_MPISBAIJ", 5695 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5696 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5697 "MatMatMult_MPIDense_MPIAIJ", 5698 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5699 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5700 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5701 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5702 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5703 "MatMatMultNumeric_MPIDense_MPIAIJ", 5704 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5705 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5706 PetscFunctionReturn(0); 5707 } 5708 EXTERN_C_END 5709 5710 #undef __FUNCT__ 5711 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5712 /*@ 5713 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5714 and "off-diagonal" part of the matrix in CSR format. 5715 5716 Collective on MPI_Comm 5717 5718 Input Parameters: 5719 + comm - MPI communicator 5720 . m - number of local rows (Cannot be PETSC_DECIDE) 5721 . n - This value should be the same as the local size used in creating the 5722 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5723 calculated if N is given) For square matrices n is almost always m. 5724 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5725 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5726 . i - row indices for "diagonal" portion of matrix 5727 . j - column indices 5728 . a - matrix values 5729 . oi - row indices for "off-diagonal" portion of matrix 5730 . oj - column indices 5731 - oa - matrix values 5732 5733 Output Parameter: 5734 . mat - the matrix 5735 5736 Level: advanced 5737 5738 Notes: 5739 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5740 must free the arrays once the matrix has been destroyed and not before. 5741 5742 The i and j indices are 0 based 5743 5744 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5745 5746 This sets local rows and cannot be used to set off-processor values. 5747 5748 You cannot later use MatSetValues() to change values in this matrix. 5749 5750 .keywords: matrix, aij, compressed row, sparse, parallel 5751 5752 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5753 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5754 @*/ 5755 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5756 { 5757 PetscErrorCode ierr; 5758 Mat_MPIAIJ *maij; 5759 5760 PetscFunctionBegin; 5761 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5762 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5763 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5764 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5765 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5766 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5767 maij = (Mat_MPIAIJ*) (*mat)->data; 5768 5769 maij->donotstash = PETSC_TRUE; 5770 (*mat)->preallocated = PETSC_TRUE; 5771 5772 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5773 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5774 5775 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5776 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5777 5778 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5779 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5780 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5781 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5782 5783 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5784 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5785 PetscFunctionReturn(0); 5786 } 5787 5788 /* 5789 Special version for direct calls from Fortran 5790 */ 5791 #include <petsc-private/fortranimpl.h> 5792 5793 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5794 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5795 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5796 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5797 #endif 5798 5799 /* Change these macros so can be used in void function */ 5800 #undef CHKERRQ 5801 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5802 #undef SETERRQ2 5803 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5804 #undef SETERRQ3 5805 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5806 #undef SETERRQ 5807 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5808 5809 EXTERN_C_BEGIN 5810 #undef __FUNCT__ 5811 #define __FUNCT__ "matsetvaluesmpiaij_" 5812 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5813 { 5814 Mat mat = *mmat; 5815 PetscInt m = *mm, n = *mn; 5816 InsertMode addv = *maddv; 5817 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5818 PetscScalar value; 5819 PetscErrorCode ierr; 5820 5821 MatCheckPreallocated(mat,1); 5822 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5823 5824 #if defined(PETSC_USE_DEBUG) 5825 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5826 #endif 5827 { 5828 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5829 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5830 PetscBool roworiented = aij->roworiented; 5831 5832 /* Some Variables required in the macro */ 5833 Mat A = aij->A; 5834 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5835 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5836 MatScalar *aa = a->a; 5837 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5838 Mat B = aij->B; 5839 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5840 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5841 MatScalar *ba = b->a; 5842 5843 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5844 PetscInt nonew = a->nonew; 5845 MatScalar *ap1,*ap2; 5846 5847 PetscFunctionBegin; 5848 for (i=0; i<m; i++) { 5849 if (im[i] < 0) continue; 5850 #if defined(PETSC_USE_DEBUG) 5851 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5852 #endif 5853 if (im[i] >= rstart && im[i] < rend) { 5854 row = im[i] - rstart; 5855 lastcol1 = -1; 5856 rp1 = aj + ai[row]; 5857 ap1 = aa + ai[row]; 5858 rmax1 = aimax[row]; 5859 nrow1 = ailen[row]; 5860 low1 = 0; 5861 high1 = nrow1; 5862 lastcol2 = -1; 5863 rp2 = bj + bi[row]; 5864 ap2 = ba + bi[row]; 5865 rmax2 = bimax[row]; 5866 nrow2 = bilen[row]; 5867 low2 = 0; 5868 high2 = nrow2; 5869 5870 for (j=0; j<n; j++) { 5871 if (roworiented) value = v[i*n+j]; 5872 else value = v[i+j*m]; 5873 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5874 if (in[j] >= cstart && in[j] < cend) { 5875 col = in[j] - cstart; 5876 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5877 } else if (in[j] < 0) continue; 5878 #if defined(PETSC_USE_DEBUG) 5879 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5880 #endif 5881 else { 5882 if (mat->was_assembled) { 5883 if (!aij->colmap) { 5884 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5885 } 5886 #if defined(PETSC_USE_CTABLE) 5887 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5888 col--; 5889 #else 5890 col = aij->colmap[in[j]] - 1; 5891 #endif 5892 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5893 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5894 col = in[j]; 5895 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5896 B = aij->B; 5897 b = (Mat_SeqAIJ*)B->data; 5898 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5899 rp2 = bj + bi[row]; 5900 ap2 = ba + bi[row]; 5901 rmax2 = bimax[row]; 5902 nrow2 = bilen[row]; 5903 low2 = 0; 5904 high2 = nrow2; 5905 bm = aij->B->rmap->n; 5906 ba = b->a; 5907 } 5908 } else col = in[j]; 5909 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5910 } 5911 } 5912 } else if (!aij->donotstash) { 5913 if (roworiented) { 5914 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5915 } else { 5916 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5917 } 5918 } 5919 } 5920 } 5921 PetscFunctionReturnVoid(); 5922 } 5923 EXTERN_C_END 5924 5925