1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 6 /*MC 7 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 8 9 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 10 and MATMPIAIJ otherwise. As a result, for single process communicators, 11 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 12 for communicators controlling multiple processes. It is recommended that you call both of 13 the above preallocation routines for simplicity. 14 15 Options Database Keys: 16 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 17 18 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 19 enough exist. 20 21 Level: beginner 22 23 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 24 M*/ 25 26 /*MC 27 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 28 29 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 30 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 31 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 32 for communicators controlling multiple processes. It is recommended that you call both of 33 the above preallocation routines for simplicity. 34 35 Options Database Keys: 36 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 37 38 Level: beginner 39 40 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 41 M*/ 42 43 #undef __FUNCT__ 44 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 45 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 46 { 47 PetscErrorCode ierr; 48 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 49 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 51 const PetscInt *ia,*ib; 52 const MatScalar *aa,*bb; 53 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 54 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 55 56 PetscFunctionBegin; 57 *keptrows = 0; 58 ia = a->i; 59 ib = b->i; 60 for (i=0; i<m; i++) { 61 na = ia[i+1] - ia[i]; 62 nb = ib[i+1] - ib[i]; 63 if (!na && !nb) { 64 cnt++; 65 goto ok1; 66 } 67 aa = a->a + ia[i]; 68 for (j=0; j<na; j++) { 69 if (aa[j] != 0.0) goto ok1; 70 } 71 bb = b->a + ib[i]; 72 for (j=0; j <nb; j++) { 73 if (bb[j] != 0.0) goto ok1; 74 } 75 cnt++; 76 ok1:; 77 } 78 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 79 if (!n0rows) PetscFunctionReturn(0); 80 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 81 cnt = 0; 82 for (i=0; i<m; i++) { 83 na = ia[i+1] - ia[i]; 84 nb = ib[i+1] - ib[i]; 85 if (!na && !nb) continue; 86 aa = a->a + ia[i]; 87 for (j=0; j<na;j++) { 88 if (aa[j] != 0.0) { 89 rows[cnt++] = rstart + i; 90 goto ok2; 91 } 92 } 93 bb = b->a + ib[i]; 94 for (j=0; j<nb; j++) { 95 if (bb[j] != 0.0) { 96 rows[cnt++] = rstart + i; 97 goto ok2; 98 } 99 } 100 ok2:; 101 } 102 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 108 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 109 { 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 111 PetscErrorCode ierr; 112 PetscInt i,rstart,nrows,*rows; 113 114 PetscFunctionBegin; 115 *zrows = NULL; 116 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 117 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 118 for (i=0; i<nrows; i++) rows[i] += rstart; 119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 125 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 126 { 127 PetscErrorCode ierr; 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 129 PetscInt i,n,*garray = aij->garray; 130 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 131 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 132 PetscReal *work; 133 134 PetscFunctionBegin; 135 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 136 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 137 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 */ 181 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 182 { 183 PetscMPIInt rank,size; 184 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 185 PetscErrorCode ierr; 186 Mat mat; 187 Mat_SeqAIJ *gmata; 188 PetscMPIInt tag; 189 MPI_Status status; 190 PetscBool aij; 191 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 192 193 PetscFunctionBegin; 194 CHKMEMQ; 195 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 197 if (!rank) { 198 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 199 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 200 } 201 if (reuse == MAT_INITIAL_MATRIX) { 202 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 203 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 204 if (!rank) { 205 bses[0] = gmat->rmap->bs; 206 bses[1] = gmat->cmap->bs; 207 } 208 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 209 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 210 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 211 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 212 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 213 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 214 215 rowners[0] = 0; 216 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 299 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 300 301 *inmat = mat; 302 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 303 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 304 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 305 mat = *inmat; 306 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 307 if (!rank) { 308 /* send numerical values to other processes */ 309 gmata = (Mat_SeqAIJ*) gmat->data; 310 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 311 gmataa = gmata->a; 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 315 } 316 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 317 } else { 318 /* receive numerical values from process 0*/ 319 nz = Ad->nz + Ao->nz; 320 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 321 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 322 } 323 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 324 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 325 ad = Ad->a; 326 ao = Ao->a; 327 if (mat->rmap->n) { 328 i = 0; 329 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 for (i=1; i<mat->rmap->n; i++) { 333 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 334 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 335 } 336 i--; 337 if (mat->rmap->n) { 338 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 339 } 340 if (rank) { 341 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 342 } 343 } 344 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 345 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 346 CHKMEMQ; 347 PetscFunctionReturn(0); 348 } 349 350 /* 351 Local utility routine that creates a mapping from the global column 352 number to the local number in the off-diagonal part of the local 353 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 354 a slightly higher hash table cost; without it it is not scalable (each processor 355 has an order N integer array but is fast to acess. 356 */ 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 359 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 360 { 361 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt n = aij->B->cmap->n,i; 364 365 PetscFunctionBegin; 366 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 367 #if defined(PETSC_USE_CTABLE) 368 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 369 for (i=0; i<n; i++) { 370 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 371 } 372 #else 373 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 374 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 375 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 377 #endif 378 PetscFunctionReturn(0); 379 } 380 381 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 382 { \ 383 if (col <= lastcol1) low1 = 0; \ 384 else high1 = nrow1; \ 385 lastcol1 = col;\ 386 while (high1-low1 > 5) { \ 387 t = (low1+high1)/2; \ 388 if (rp1[t] > col) high1 = t; \ 389 else low1 = t; \ 390 } \ 391 for (_i=low1; _i<high1; _i++) { \ 392 if (rp1[_i] > col) break; \ 393 if (rp1[_i] == col) { \ 394 if (addv == ADD_VALUES) ap1[_i] += value; \ 395 else ap1[_i] = value; \ 396 goto a_noinsert; \ 397 } \ 398 } \ 399 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 400 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 401 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 402 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 403 N = nrow1++ - 1; a->nz++; high1++; \ 404 /* shift up all the later entries in this row */ \ 405 for (ii=N; ii>=_i; ii--) { \ 406 rp1[ii+1] = rp1[ii]; \ 407 ap1[ii+1] = ap1[ii]; \ 408 } \ 409 rp1[_i] = col; \ 410 ap1[_i] = value; \ 411 a_noinsert: ; \ 412 ailen[row] = nrow1; \ 413 } 414 415 416 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 417 { \ 418 if (col <= lastcol2) low2 = 0; \ 419 else high2 = nrow2; \ 420 lastcol2 = col; \ 421 while (high2-low2 > 5) { \ 422 t = (low2+high2)/2; \ 423 if (rp2[t] > col) high2 = t; \ 424 else low2 = t; \ 425 } \ 426 for (_i=low2; _i<high2; _i++) { \ 427 if (rp2[_i] > col) break; \ 428 if (rp2[_i] == col) { \ 429 if (addv == ADD_VALUES) ap2[_i] += value; \ 430 else ap2[_i] = value; \ 431 goto b_noinsert; \ 432 } \ 433 } \ 434 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 435 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 437 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 438 N = nrow2++ - 1; b->nz++; high2++; \ 439 /* shift up all the later entries in this row */ \ 440 for (ii=N; ii>=_i; ii--) { \ 441 rp2[ii+1] = rp2[ii]; \ 442 ap2[ii+1] = ap2[ii]; \ 443 } \ 444 rp2[_i] = col; \ 445 ap2[_i] = value; \ 446 b_noinsert: ; \ 447 bilen[row] = nrow2; \ 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 452 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 453 { 454 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 455 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 456 PetscErrorCode ierr; 457 PetscInt l,*garray = mat->garray,diag; 458 459 PetscFunctionBegin; 460 /* code only works for square matrices A */ 461 462 /* find size of row to the left of the diagonal part */ 463 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 464 row = row - diag; 465 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 466 if (garray[b->j[b->i[row]+l]] > diag) break; 467 } 468 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 469 470 /* diagonal part */ 471 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 472 473 /* right of diagonal part */ 474 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatSetValues_MPIAIJ" 480 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 481 { 482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 483 PetscScalar value; 484 PetscErrorCode ierr; 485 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 486 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 487 PetscBool roworiented = aij->roworiented; 488 489 /* Some Variables required in the macro */ 490 Mat A = aij->A; 491 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 492 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 493 MatScalar *aa = a->a; 494 PetscBool ignorezeroentries = a->ignorezeroentries; 495 Mat B = aij->B; 496 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 497 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 498 MatScalar *ba = b->a; 499 500 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 501 PetscInt nonew; 502 MatScalar *ap1,*ap2; 503 504 PetscFunctionBegin; 505 if (v) PetscValidScalarPointer(v,6); 506 for (i=0; i<m; i++) { 507 if (im[i] < 0) continue; 508 #if defined(PETSC_USE_DEBUG) 509 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 510 #endif 511 if (im[i] >= rstart && im[i] < rend) { 512 row = im[i] - rstart; 513 lastcol1 = -1; 514 rp1 = aj + ai[row]; 515 ap1 = aa + ai[row]; 516 rmax1 = aimax[row]; 517 nrow1 = ailen[row]; 518 low1 = 0; 519 high1 = nrow1; 520 lastcol2 = -1; 521 rp2 = bj + bi[row]; 522 ap2 = ba + bi[row]; 523 rmax2 = bimax[row]; 524 nrow2 = bilen[row]; 525 low2 = 0; 526 high2 = nrow2; 527 528 for (j=0; j<n; j++) { 529 if (v) { 530 if (roworiented) value = v[i*n+j]; 531 else value = v[i+j*m]; 532 } else value = 0.0; 533 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 534 if (in[j] >= cstart && in[j] < cend) { 535 col = in[j] - cstart; 536 nonew = a->nonew; 537 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 538 } else if (in[j] < 0) continue; 539 #if defined(PETSC_USE_DEBUG) 540 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 541 #endif 542 else { 543 if (mat->was_assembled) { 544 if (!aij->colmap) { 545 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 546 } 547 #if defined(PETSC_USE_CTABLE) 548 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 549 col--; 550 #else 551 col = aij->colmap[in[j]] - 1; 552 #endif 553 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 554 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 555 col = in[j]; 556 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 557 B = aij->B; 558 b = (Mat_SeqAIJ*)B->data; 559 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 560 rp2 = bj + bi[row]; 561 ap2 = ba + bi[row]; 562 rmax2 = bimax[row]; 563 nrow2 = bilen[row]; 564 low2 = 0; 565 high2 = nrow2; 566 bm = aij->B->rmap->n; 567 ba = b->a; 568 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 569 } else col = in[j]; 570 nonew = b->nonew; 571 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 572 } 573 } 574 } else { 575 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 576 if (!aij->donotstash) { 577 mat->assembled = PETSC_FALSE; 578 if (roworiented) { 579 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 580 } else { 581 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 582 } 583 } 584 } 585 } 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatGetValues_MPIAIJ" 591 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 592 { 593 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 594 PetscErrorCode ierr; 595 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 596 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 597 598 PetscFunctionBegin; 599 for (i=0; i<m; i++) { 600 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 601 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 602 if (idxm[i] >= rstart && idxm[i] < rend) { 603 row = idxm[i] - rstart; 604 for (j=0; j<n; j++) { 605 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 606 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 607 if (idxn[j] >= cstart && idxn[j] < cend) { 608 col = idxn[j] - cstart; 609 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 610 } else { 611 if (!aij->colmap) { 612 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 613 } 614 #if defined(PETSC_USE_CTABLE) 615 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 616 col--; 617 #else 618 col = aij->colmap[idxn[j]] - 1; 619 #endif 620 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 621 else { 622 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 623 } 624 } 625 } 626 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 627 } 628 PetscFunctionReturn(0); 629 } 630 631 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 635 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 636 { 637 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 638 PetscErrorCode ierr; 639 PetscInt nstash,reallocs; 640 InsertMode addv; 641 642 PetscFunctionBegin; 643 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 644 645 /* make sure all processors are either in INSERTMODE or ADDMODE */ 646 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 647 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 648 mat->insertmode = addv; /* in case this processor had no cache */ 649 650 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 651 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 652 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 659 { 660 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 661 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 662 PetscErrorCode ierr; 663 PetscMPIInt n; 664 PetscInt i,j,rstart,ncols,flg; 665 PetscInt *row,*col; 666 PetscBool other_disassembled; 667 PetscScalar *val; 668 InsertMode addv = mat->insertmode; 669 670 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 671 672 PetscFunctionBegin; 673 if (!aij->donotstash && !mat->nooffprocentries) { 674 while (1) { 675 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 676 if (!flg) break; 677 678 for (i=0; i<n; ) { 679 /* Now identify the consecutive vals belonging to the same row */ 680 for (j=i,rstart=row[j]; j<n; j++) { 681 if (row[j] != rstart) break; 682 } 683 if (j < n) ncols = j-i; 684 else ncols = n-i; 685 /* Now assemble all these values with a single function call */ 686 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 687 688 i = j; 689 } 690 } 691 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 692 } 693 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 694 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 695 696 /* determine if any processor has disassembled, if so we must 697 also disassemble ourselfs, in order that we may reassemble. */ 698 /* 699 if nonzero structure of submatrix B cannot change then we know that 700 no processor disassembled thus we can skip this stuff 701 */ 702 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 703 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 704 if (mat->was_assembled && !other_disassembled) { 705 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 706 } 707 } 708 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 709 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 710 } 711 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 712 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 714 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 715 716 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 717 718 aij->rowvalues = 0; 719 720 /* used by MatAXPY() */ 721 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 722 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 723 724 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 725 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 738 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_MPIAIJ" 744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 745 { 746 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 747 PetscErrorCode ierr; 748 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 749 PetscInt i,*owners = A->rmap->range; 750 PetscInt *nprocs,j,idx,nsends,row; 751 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 752 PetscInt *rvalues,count,base,slen,*source; 753 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 754 MPI_Comm comm; 755 MPI_Request *send_waits,*recv_waits; 756 MPI_Status recv_status,*send_status; 757 const PetscScalar *xx; 758 PetscScalar *bb; 759 #if defined(PETSC_DEBUG) 760 PetscBool found = PETSC_FALSE; 761 #endif 762 763 PetscFunctionBegin; 764 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 765 /* first count number of contributors to each processor */ 766 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 767 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 768 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 769 j = 0; 770 for (i=0; i<N; i++) { 771 if (lastidx > (idx = rows[i])) j = 0; 772 lastidx = idx; 773 for (; j<size; j++) { 774 if (idx >= owners[j] && idx < owners[j+1]) { 775 nprocs[2*j]++; 776 nprocs[2*j+1] = 1; 777 owner[i] = j; 778 #if defined(PETSC_DEBUG) 779 found = PETSC_TRUE; 780 #endif 781 break; 782 } 783 } 784 #if defined(PETSC_DEBUG) 785 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 786 found = PETSC_FALSE; 787 #endif 788 } 789 nsends = 0; 790 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 791 792 if (A->nooffproczerorows) { 793 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 794 nrecvs = nsends; 795 nmax = N; 796 } else { 797 /* inform other processors of number of messages and max length*/ 798 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 799 } 800 801 /* post receives: */ 802 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 804 for (i=0; i<nrecvs; i++) { 805 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 806 } 807 808 /* do sends: 809 1) starts[i] gives the starting index in svalues for stuff going to 810 the ith processor 811 */ 812 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 813 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 814 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 815 816 starts[0] = 0; 817 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 818 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 819 820 starts[0] = 0; 821 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 822 count = 0; 823 for (i=0; i<size; i++) { 824 if (nprocs[2*i+1]) { 825 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 826 } 827 } 828 ierr = PetscFree(starts);CHKERRQ(ierr); 829 830 base = owners[rank]; 831 832 /* wait on receives */ 833 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 834 count = nrecvs; slen = 0; 835 while (count) { 836 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 837 /* unpack receives into our local space */ 838 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 839 840 source[imdex] = recv_status.MPI_SOURCE; 841 lens[imdex] = n; 842 slen += n; 843 count--; 844 } 845 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 846 847 /* move the data into the send scatter */ 848 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 849 count = 0; 850 for (i=0; i<nrecvs; i++) { 851 values = rvalues + i*nmax; 852 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 853 } 854 ierr = PetscFree(rvalues);CHKERRQ(ierr); 855 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 856 ierr = PetscFree(owner);CHKERRQ(ierr); 857 ierr = PetscFree(nprocs);CHKERRQ(ierr); 858 859 /* fix right hand side if needed */ 860 if (x && b) { 861 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 862 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 863 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 864 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 865 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 866 } 867 /* 868 Zero the required rows. If the "diagonal block" of the matrix 869 is square and the user wishes to set the diagonal we use separate 870 code so that MatSetValues() is not called for each diagonal allocating 871 new memory, thus calling lots of mallocs and slowing things down. 872 873 */ 874 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 875 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 876 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 877 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 878 } else if (diag != 0.0) { 879 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 880 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 881 for (i = 0; i < slen; i++) { 882 row = lrows[i] + rstart; 883 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 884 } 885 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 886 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 } else { 888 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(lrows);CHKERRQ(ierr); 891 892 /* wait on sends */ 893 if (nsends) { 894 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 895 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 896 ierr = PetscFree(send_status);CHKERRQ(ierr); 897 } 898 ierr = PetscFree(send_waits);CHKERRQ(ierr); 899 ierr = PetscFree(svalues);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 905 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 906 { 907 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 908 PetscErrorCode ierr; 909 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 910 PetscInt i,*owners = A->rmap->range; 911 PetscInt *nprocs,j,idx,nsends; 912 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 913 PetscInt *rvalues,count,base,slen,*source; 914 PetscInt *lens,*lrows,*values,m; 915 MPI_Comm comm; 916 MPI_Request *send_waits,*recv_waits; 917 MPI_Status recv_status,*send_status; 918 const PetscScalar *xx; 919 PetscScalar *bb,*mask; 920 Vec xmask,lmask; 921 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 922 const PetscInt *aj, *ii,*ridx; 923 PetscScalar *aa; 924 #if defined(PETSC_DEBUG) 925 PetscBool found = PETSC_FALSE; 926 #endif 927 928 PetscFunctionBegin; 929 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 930 /* first count number of contributors to each processor */ 931 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 932 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 933 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 934 j = 0; 935 for (i=0; i<N; i++) { 936 if (lastidx > (idx = rows[i])) j = 0; 937 lastidx = idx; 938 for (; j<size; j++) { 939 if (idx >= owners[j] && idx < owners[j+1]) { 940 nprocs[2*j]++; 941 nprocs[2*j+1] = 1; 942 owner[i] = j; 943 #if defined(PETSC_DEBUG) 944 found = PETSC_TRUE; 945 #endif 946 break; 947 } 948 } 949 #if defined(PETSC_DEBUG) 950 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 951 found = PETSC_FALSE; 952 #endif 953 } 954 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 955 956 /* inform other processors of number of messages and max length*/ 957 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 958 959 /* post receives: */ 960 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 961 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 962 for (i=0; i<nrecvs; i++) { 963 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 964 } 965 966 /* do sends: 967 1) starts[i] gives the starting index in svalues for stuff going to 968 the ith processor 969 */ 970 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 971 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 972 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 973 974 starts[0] = 0; 975 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 976 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 977 978 starts[0] = 0; 979 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 980 count = 0; 981 for (i=0; i<size; i++) { 982 if (nprocs[2*i+1]) { 983 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 984 } 985 } 986 ierr = PetscFree(starts);CHKERRQ(ierr); 987 988 base = owners[rank]; 989 990 /* wait on receives */ 991 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 992 count = nrecvs; slen = 0; 993 while (count) { 994 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 995 /* unpack receives into our local space */ 996 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 997 998 source[imdex] = recv_status.MPI_SOURCE; 999 lens[imdex] = n; 1000 slen += n; 1001 count--; 1002 } 1003 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1004 1005 /* move the data into the send scatter */ 1006 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1007 count = 0; 1008 for (i=0; i<nrecvs; i++) { 1009 values = rvalues + i*nmax; 1010 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1011 } 1012 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1013 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1014 ierr = PetscFree(owner);CHKERRQ(ierr); 1015 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1016 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1017 1018 /* zero diagonal part of matrix */ 1019 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1020 1021 /* handle off diagonal part of matrix */ 1022 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1023 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1024 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1025 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use) { 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++) { 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMult_MPIAIJ" 1101 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 PetscInt nt; 1106 1107 PetscFunctionBegin; 1108 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1109 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1110 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1111 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1112 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1119 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1120 { 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNCT__ 1130 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 1136 PetscFunctionBegin; 1137 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1139 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1147 { 1148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1149 PetscErrorCode ierr; 1150 PetscBool merged; 1151 1152 PetscFunctionBegin; 1153 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1154 /* do nondiagonal part */ 1155 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1156 if (!merged) { 1157 /* send it on its way */ 1158 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 /* do local part */ 1160 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1161 /* receive remote parts: note this assumes the values are not actually */ 1162 /* added in yy until the next line, */ 1163 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1164 } else { 1165 /* do local part */ 1166 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1167 /* send it on its way */ 1168 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1169 /* values actually were received in the Begin() but we need to call this nop */ 1170 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 EXTERN_C_BEGIN 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 EXTERN_C_END 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1221 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1222 { 1223 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1224 PetscErrorCode ierr; 1225 1226 PetscFunctionBegin; 1227 /* do nondiagonal part */ 1228 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1229 /* send it on its way */ 1230 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1231 /* do local part */ 1232 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1233 /* receive remote parts */ 1234 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 /* 1239 This only works correctly for square matrices where the subblock A->A is the 1240 diagonal block 1241 */ 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1244 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1245 { 1246 PetscErrorCode ierr; 1247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1248 1249 PetscFunctionBegin; 1250 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1251 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1252 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatScale_MPIAIJ" 1258 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1259 { 1260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1261 PetscErrorCode ierr; 1262 1263 PetscFunctionBegin; 1264 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1265 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatDestroy_MPIAIJ" 1271 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1272 { 1273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 #if defined(PETSC_USE_LOG) 1278 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1279 #endif 1280 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1281 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1283 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1284 #if defined(PETSC_USE_CTABLE) 1285 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1286 #else 1287 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1288 #endif 1289 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1290 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1291 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1292 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1293 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1294 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1295 1296 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 PetscErrorCode ierr; 1316 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1317 int fd; 1318 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1319 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1320 PetscScalar *column_values; 1321 PetscInt message_count,flowcontrolcount; 1322 FILE *file; 1323 1324 PetscFunctionBegin; 1325 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1326 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1327 nz = A->nz + B->nz; 1328 if (!rank) { 1329 header[0] = MAT_FILE_CLASSID; 1330 header[1] = mat->rmap->N; 1331 header[2] = mat->cmap->N; 1332 1333 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1334 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1335 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 /* get largest number of rows any processor has */ 1337 rlen = mat->rmap->n; 1338 range = mat->rmap->range; 1339 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1340 } else { 1341 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1342 rlen = mat->rmap->n; 1343 } 1344 1345 /* load up the local row counts */ 1346 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1347 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1348 1349 /* store the row lengths to the file */ 1350 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1351 if (!rank) { 1352 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1353 for (i=1; i<size; i++) { 1354 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1355 rlen = range[i+1] - range[i]; 1356 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1357 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 } 1359 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1360 } else { 1361 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1362 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1363 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1364 } 1365 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1366 1367 /* load up the local column indices */ 1368 nzmax = nz; /* th processor needs space a largest processor needs */ 1369 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1370 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1371 cnt = 0; 1372 for (i=0; i<mat->rmap->n; i++) { 1373 for (j=B->i[i]; j<B->i[i+1]; j++) { 1374 if ((col = garray[B->j[j]]) > cstart) break; 1375 column_indices[cnt++] = col; 1376 } 1377 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1378 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1379 } 1380 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1381 1382 /* store the column indices to the file */ 1383 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1384 if (!rank) { 1385 MPI_Status status; 1386 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1387 for (i=1; i<size; i++) { 1388 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1389 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1390 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1391 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1392 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1393 } 1394 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1395 } else { 1396 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1397 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1399 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1400 } 1401 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1402 1403 /* load up the local column values */ 1404 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1405 cnt = 0; 1406 for (i=0; i<mat->rmap->n; i++) { 1407 for (j=B->i[i]; j<B->i[i+1]; j++) { 1408 if (garray[B->j[j]] > cstart) break; 1409 column_values[cnt++] = B->a[j]; 1410 } 1411 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1412 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1413 } 1414 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1415 1416 /* store the column values to the file */ 1417 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1418 if (!rank) { 1419 MPI_Status status; 1420 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1421 for (i=1; i<size; i++) { 1422 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1423 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1424 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1425 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1426 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1427 } 1428 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1429 } else { 1430 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1431 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1433 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1434 } 1435 ierr = PetscFree(column_values);CHKERRQ(ierr); 1436 1437 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1438 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1445 { 1446 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1447 PetscErrorCode ierr; 1448 PetscMPIInt rank = aij->rank,size = aij->size; 1449 PetscBool isdraw,iascii,isbinary; 1450 PetscViewer sviewer; 1451 PetscViewerFormat format; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 if (iascii) { 1458 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1459 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1460 MatInfo info; 1461 PetscBool inodes; 1462 1463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1464 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1465 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 if (!inodes) { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1472 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1473 } 1474 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1481 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1484 PetscInt inodecount,inodelimit,*inodes; 1485 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1486 if (inodes) { 1487 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1488 } else { 1489 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } 1495 } else if (isbinary) { 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (isdraw) { 1504 PetscDraw draw; 1505 PetscBool isnull; 1506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1508 } 1509 1510 if (size == 1) { 1511 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1513 } else { 1514 /* assemble the entire matrix onto first processor. */ 1515 Mat A; 1516 Mat_SeqAIJ *Aloc; 1517 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1518 MatScalar *a; 1519 1520 if (mat->rmap->N > 1024) { 1521 PetscBool flg = PETSC_FALSE; 1522 1523 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1525 } 1526 1527 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1532 } 1533 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1534 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1535 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1536 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1537 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqAIJ*)aij->A->data; 1541 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1546 row++; 1547 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1548 } 1549 aj = Aloc->j; 1550 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1551 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqAIJ*)aij->B->data; 1554 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1557 ct = cols; 1558 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1559 for (i=0; i<m; i++) { 1560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1561 row++; 1562 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1563 } 1564 ierr = PetscFree(ct);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 /* 1568 Everyone has to call to draw the matrix since the graphics waits are 1569 synchronized across all processors that share the PetscDraw object 1570 */ 1571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1574 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1575 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1577 } 1578 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_MPIAIJ" 1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1587 { 1588 PetscErrorCode ierr; 1589 PetscBool iascii,isdraw,issocket,isbinary; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1596 if (iascii || isdraw || isbinary || issocket) { 1597 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatSOR_MPIAIJ" 1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1607 PetscErrorCode ierr; 1608 Vec bb1 = 0; 1609 PetscBool hasop; 1610 1611 PetscFunctionBegin; 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1618 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1619 } 1620 1621 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1657 its--; 1658 } 1659 while (its--) { 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 1663 /* update rhs: bb1 = bb - B*x */ 1664 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1665 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1666 1667 /* local sweep */ 1668 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1669 } 1670 } else if (flag & SOR_EISENSTAT) { 1671 Vec xx1; 1672 1673 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1675 1676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 if (!mat->diag) { 1679 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1680 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1681 } 1682 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1683 if (hasop) { 1684 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1685 } else { 1686 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1687 } 1688 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1689 1690 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1691 1692 /* local sweep */ 1693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1694 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1695 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1696 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1697 1698 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatPermute_MPIAIJ" 1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1705 { 1706 Mat aA,aB,Aperm; 1707 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1708 PetscScalar *aa,*ba; 1709 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1710 PetscSF rowsf,sf; 1711 IS parcolp = NULL; 1712 PetscBool done; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1717 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1718 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1719 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1720 1721 /* Invert row permutation to find out where my rows should go */ 1722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1723 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1724 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1725 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1726 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1727 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1728 1729 /* Invert column permutation to find out where my columns should go */ 1730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1731 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1732 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1733 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1734 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1737 1738 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1740 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1741 1742 /* Find out where my gcols should go */ 1743 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1744 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1751 1752 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1757 for (i=0; i<m; i++) { 1758 PetscInt row = rdest[i],rowner; 1759 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1760 for (j=ai[i]; j<ai[i+1]; j++) { 1761 PetscInt cowner,col = cdest[aj[j]]; 1762 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1763 if (rowner == cowner) dnnz[i]++; 1764 else onnz[i]++; 1765 } 1766 for (j=bi[i]; j<bi[i+1]; j++) { 1767 PetscInt cowner,col = gcdest[bj[j]]; 1768 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1769 if (rowner == cowner) dnnz[i]++; 1770 else onnz[i]++; 1771 } 1772 } 1773 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1780 1781 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1784 for (i=0; i<m; i++) { 1785 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1786 PetscInt rowlen; 1787 rowlen = ai[i+1] - ai[i]; 1788 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1790 rowlen = bi[i+1] - bi[i]; 1791 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1792 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1800 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1802 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1803 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1804 *B = Aperm; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1813 Mat A = mat->A,B = mat->B; 1814 PetscErrorCode ierr; 1815 PetscReal isend[5],irecv[5]; 1816 1817 PetscFunctionBegin; 1818 info->block_size = 1.0; 1819 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1820 1821 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1822 isend[3] = info->memory; isend[4] = info->mallocs; 1823 1824 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1825 1826 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1827 isend[3] += info->memory; isend[4] += info->mallocs; 1828 if (flag == MAT_LOCAL) { 1829 info->nz_used = isend[0]; 1830 info->nz_allocated = isend[1]; 1831 info->nz_unneeded = isend[2]; 1832 info->memory = isend[3]; 1833 info->mallocs = isend[4]; 1834 } else if (flag == MAT_GLOBAL_MAX) { 1835 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1836 1837 info->nz_used = irecv[0]; 1838 info->nz_allocated = irecv[1]; 1839 info->nz_unneeded = irecv[2]; 1840 info->memory = irecv[3]; 1841 info->mallocs = irecv[4]; 1842 } else if (flag == MAT_GLOBAL_SUM) { 1843 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1844 1845 info->nz_used = irecv[0]; 1846 info->nz_allocated = irecv[1]; 1847 info->nz_unneeded = irecv[2]; 1848 info->memory = irecv[3]; 1849 info->mallocs = irecv[4]; 1850 } 1851 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1852 info->fill_ratio_needed = 0; 1853 info->factor_mallocs = 0; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetOption_MPIAIJ" 1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1860 { 1861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 switch (op) { 1866 case MAT_NEW_NONZERO_LOCATIONS: 1867 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1868 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1869 case MAT_KEEP_NONZERO_PATTERN: 1870 case MAT_NEW_NONZERO_LOCATION_ERR: 1871 case MAT_USE_INODES: 1872 case MAT_IGNORE_ZERO_ENTRIES: 1873 MatCheckPreallocated(A,1); 1874 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1875 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1876 break; 1877 case MAT_ROW_ORIENTED: 1878 a->roworiented = flg; 1879 1880 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1881 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1882 break; 1883 case MAT_NEW_DIAGONALS: 1884 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1885 break; 1886 case MAT_IGNORE_OFF_PROC_ENTRIES: 1887 a->donotstash = flg; 1888 break; 1889 case MAT_SPD: 1890 A->spd_set = PETSC_TRUE; 1891 A->spd = flg; 1892 if (flg) { 1893 A->symmetric = PETSC_TRUE; 1894 A->structurally_symmetric = PETSC_TRUE; 1895 A->symmetric_set = PETSC_TRUE; 1896 A->structurally_symmetric_set = PETSC_TRUE; 1897 } 1898 break; 1899 case MAT_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_STRUCTURALLY_SYMMETRIC: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_HERMITIAN: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 case MAT_SYMMETRY_ETERNAL: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 default: 1912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRow_MPIAIJ" 1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1922 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1923 PetscErrorCode ierr; 1924 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1925 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1926 PetscInt *cmap,*idx_p; 1927 1928 PetscFunctionBegin; 1929 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1930 mat->getrowactive = PETSC_TRUE; 1931 1932 if (!mat->rowvalues && (idx || v)) { 1933 /* 1934 allocate enough space to hold information from the longest row. 1935 */ 1936 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1937 PetscInt max = 1,tmp; 1938 for (i=0; i<matin->rmap->n; i++) { 1939 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1940 if (max < tmp) max = tmp; 1941 } 1942 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1943 } 1944 1945 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1946 lrow = row - rstart; 1947 1948 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1949 if (!v) {pvA = 0; pvB = 0;} 1950 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1951 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 nztot = nzA + nzB; 1954 1955 cmap = mat->garray; 1956 if (v || idx) { 1957 if (nztot) { 1958 /* Sort by increasing column numbers, assuming A and B already sorted */ 1959 PetscInt imark = -1; 1960 if (v) { 1961 *v = v_p = mat->rowvalues; 1962 for (i=0; i<nzB; i++) { 1963 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1964 else break; 1965 } 1966 imark = i; 1967 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1968 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1969 } 1970 if (idx) { 1971 *idx = idx_p = mat->rowindices; 1972 if (imark > -1) { 1973 for (i=0; i<imark; i++) { 1974 idx_p[i] = cmap[cworkB[i]]; 1975 } 1976 } else { 1977 for (i=0; i<nzB; i++) { 1978 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1979 else break; 1980 } 1981 imark = i; 1982 } 1983 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1984 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1985 } 1986 } else { 1987 if (idx) *idx = 0; 1988 if (v) *v = 0; 1989 } 1990 } 1991 *nz = nztot; 1992 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1993 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 2003 PetscFunctionBegin; 2004 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2005 aij->getrowactive = PETSC_FALSE; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatNorm_MPIAIJ" 2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2015 PetscErrorCode ierr; 2016 PetscInt i,j,cstart = mat->cmap->rstart; 2017 PetscReal sum = 0.0; 2018 MatScalar *v; 2019 2020 PetscFunctionBegin; 2021 if (aij->size == 1) { 2022 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2023 } else { 2024 if (type == NORM_FROBENIUS) { 2025 v = amat->a; 2026 for (i=0; i<amat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 v = bmat->a; 2030 for (i=0; i<bmat->nz; i++) { 2031 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2032 } 2033 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2034 *norm = PetscSqrtReal(*norm); 2035 } else if (type == NORM_1) { /* max column norm */ 2036 PetscReal *tmp,*tmp2; 2037 PetscInt *jj,*garray = aij->garray; 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2040 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2041 *norm = 0.0; 2042 v = amat->a; jj = amat->j; 2043 for (j=0; j<amat->nz; j++) { 2044 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2045 } 2046 v = bmat->a; jj = bmat->j; 2047 for (j=0; j<bmat->nz; j++) { 2048 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2049 } 2050 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2051 for (j=0; j<mat->cmap->N; j++) { 2052 if (tmp2[j] > *norm) *norm = tmp2[j]; 2053 } 2054 ierr = PetscFree(tmp);CHKERRQ(ierr); 2055 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2056 } else if (type == NORM_INFINITY) { /* max row norm */ 2057 PetscReal ntemp = 0.0; 2058 for (j=0; j<aij->A->rmap->n; j++) { 2059 v = amat->a + amat->i[j]; 2060 sum = 0.0; 2061 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 v = bmat->a + bmat->i[j]; 2065 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2066 sum += PetscAbsScalar(*v); v++; 2067 } 2068 if (sum > ntemp) ntemp = sum; 2069 } 2070 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2071 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatTranspose_MPIAIJ" 2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2079 { 2080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2081 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2082 PetscErrorCode ierr; 2083 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2084 PetscInt cstart = A->cmap->rstart,ncol; 2085 Mat B; 2086 MatScalar *array; 2087 2088 PetscFunctionBegin; 2089 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2090 2091 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2092 ai = Aloc->i; aj = Aloc->j; 2093 bi = Bloc->i; bj = Bloc->j; 2094 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2095 PetscInt *d_nnz,*g_nnz,*o_nnz; 2096 PetscSFNode *oloc; 2097 PETSC_UNUSED PetscSF sf; 2098 2099 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2100 /* compute d_nnz for preallocation */ 2101 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2102 for (i=0; i<ai[ma]; i++) { 2103 d_nnz[aj[i]]++; 2104 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2105 } 2106 /* compute local off-diagonal contributions */ 2107 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2109 /* map those to global */ 2110 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2111 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2112 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2113 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2117 2118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2119 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2120 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2121 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2124 } else { 2125 B = *matout; 2126 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2127 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2128 } 2129 2130 /* copy over the A part */ 2131 array = Aloc->a; 2132 row = A->rmap->rstart; 2133 for (i=0; i<ma; i++) { 2134 ncol = ai[i+1]-ai[i]; 2135 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2136 row++; 2137 array += ncol; aj += ncol; 2138 } 2139 aj = Aloc->j; 2140 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2141 2142 /* copy over the B part */ 2143 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2145 array = Bloc->a; 2146 row = A->rmap->rstart; 2147 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2148 cols_tmp = cols; 2149 for (i=0; i<mb; i++) { 2150 ncol = bi[i+1]-bi[i]; 2151 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; 2153 array += ncol; cols_tmp += ncol; 2154 } 2155 ierr = PetscFree(cols);CHKERRQ(ierr); 2156 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2160 *matout = B; 2161 } else { 2162 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2170 { 2171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2172 Mat a = aij->A,b = aij->B; 2173 PetscErrorCode ierr; 2174 PetscInt s1,s2,s3; 2175 2176 PetscFunctionBegin; 2177 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2178 if (rr) { 2179 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2180 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2181 /* Overlap communication with computation. */ 2182 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 if (ll) { 2185 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2186 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2187 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2188 } 2189 /* scale the diagonal block */ 2190 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2191 2192 if (rr) { 2193 /* Do a scatter end and then right scale the off-diagonal block */ 2194 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatEqual_MPIAIJ" 2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2215 { 2216 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2217 Mat a,b,c,d; 2218 PetscBool flg; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 a = matA->A; b = matA->B; 2223 c = matB->A; d = matB->B; 2224 2225 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2226 if (flg) { 2227 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2228 } 2229 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCopy_MPIAIJ" 2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2239 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2240 2241 PetscFunctionBegin; 2242 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2243 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2244 /* because of the column compression in the off-processor part of the matrix a->B, 2245 the number of columns in a->B and b->B may be different, hence we cannot call 2246 the MatCopy() directly on the two parts. If need be, we can provide a more 2247 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2248 then copying the submatrices */ 2249 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2252 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatSetUp_MPIAIJ" 2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2272 { 2273 PetscInt i,m=Y->rmap->N; 2274 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2275 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2276 const PetscInt *xi = x->i,*yi = y->i; 2277 2278 PetscFunctionBegin; 2279 /* Set the number of nonzeros in the new matrix */ 2280 for (i=0; i<m; i++) { 2281 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2282 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2283 nnz[i] = 0; 2284 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2285 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2286 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2287 nnz[i]++; 2288 } 2289 for (; k<nzy; k++) nnz[i]++; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPY_MPIAIJ" 2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2297 { 2298 PetscErrorCode ierr; 2299 PetscInt i; 2300 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2301 PetscBLASInt bnz,one=1; 2302 Mat_SeqAIJ *x,*y; 2303 2304 PetscFunctionBegin; 2305 if (str == SAME_NONZERO_PATTERN) { 2306 PetscScalar alpha = a; 2307 x = (Mat_SeqAIJ*)xx->A->data; 2308 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2309 y = (Mat_SeqAIJ*)yy->A->data; 2310 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2311 x = (Mat_SeqAIJ*)xx->B->data; 2312 y = (Mat_SeqAIJ*)yy->B->data; 2313 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2314 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2315 } else if (str == SUBSET_NONZERO_PATTERN) { 2316 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2317 2318 x = (Mat_SeqAIJ*)xx->B->data; 2319 y = (Mat_SeqAIJ*)yy->B->data; 2320 if (y->xtoy && y->XtoY != xx->B) { 2321 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2322 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2323 } 2324 if (!y->xtoy) { /* get xtoy */ 2325 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2326 y->XtoY = xx->B; 2327 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2330 } else { 2331 Mat B; 2332 PetscInt *nnz_d,*nnz_o; 2333 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2334 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2335 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2337 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2338 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2339 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2343 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2344 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatConjugate_MPIAIJ" 2355 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2356 { 2357 #if defined(PETSC_USE_COMPLEX) 2358 PetscErrorCode ierr; 2359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2360 2361 PetscFunctionBegin; 2362 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2363 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2364 #else 2365 PetscFunctionBegin; 2366 #endif 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatRealPart_MPIAIJ" 2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2373 { 2374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2379 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2392 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #if defined(PETSC_HAVE_PBGL) 2397 2398 #include <boost/parallel/mpi/bsp_process_group.hpp> 2399 #include <boost/graph/distributed/ilu_default_graph.hpp> 2400 #include <boost/graph/distributed/ilu_0_block.hpp> 2401 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2402 #include <boost/graph/distributed/petsc/interface.hpp> 2403 #include <boost/multi_array.hpp> 2404 #include <boost/parallel/distributed_property_map->hpp> 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2408 /* 2409 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2410 */ 2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2412 { 2413 namespace petsc = boost::distributed::petsc; 2414 2415 namespace graph_dist = boost::graph::distributed; 2416 using boost::graph::distributed::ilu_default::process_group_type; 2417 using boost::graph::ilu_permuted; 2418 2419 PetscBool row_identity, col_identity; 2420 PetscContainer c; 2421 PetscInt m, n, M, N; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2426 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2427 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2428 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2429 2430 process_group_type pg; 2431 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2432 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2437 ilu_permuted(level_graph); 2438 2439 /* put together the new matrix */ 2440 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2441 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2442 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2443 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2445 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2446 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 2449 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2450 ierr = PetscContainerSetPointer(c, lgraph_p); 2451 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2452 ierr = PetscContainerDestroy(&c); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2459 { 2460 PetscFunctionBegin; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSolve_MPIAIJ" 2466 /* 2467 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2468 */ 2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2470 { 2471 namespace graph_dist = boost::graph::distributed; 2472 2473 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2474 lgraph_type *lgraph_p; 2475 PetscContainer c; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2480 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2481 ierr = VecCopy(b, x);CHKERRQ(ierr); 2482 2483 PetscScalar *array_x; 2484 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2485 PetscInt sx; 2486 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2487 2488 PetscScalar *array_b; 2489 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2490 PetscInt sb; 2491 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2492 2493 lgraph_type& level_graph = *lgraph_p; 2494 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2495 2496 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2497 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2498 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2499 2500 typedef boost::iterator_property_map<array_ref_type::iterator, 2501 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2502 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2503 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2504 2505 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2506 PetscFunctionReturn(0); 2507 } 2508 #endif 2509 2510 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2511 PetscInt nzlocal,nsends,nrecvs; 2512 PetscMPIInt *send_rank,*recv_rank; 2513 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2514 PetscScalar *sbuf_a,**rbuf_a; 2515 PetscErrorCode (*Destroy)(Mat); 2516 } Mat_Redundant; 2517 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2520 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2521 { 2522 PetscErrorCode ierr; 2523 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2524 PetscInt i; 2525 2526 PetscFunctionBegin; 2527 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2528 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2529 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2530 for (i=0; i<redund->nrecvs; i++) { 2531 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2532 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2533 } 2534 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2535 ierr = PetscFree(redund);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 #undef __FUNCT__ 2540 #define __FUNCT__ "MatDestroy_MatRedundant" 2541 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2542 { 2543 PetscErrorCode ierr; 2544 PetscContainer container; 2545 Mat_Redundant *redund=NULL; 2546 2547 PetscFunctionBegin; 2548 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2549 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2550 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2551 2552 A->ops->destroy = redund->Destroy; 2553 2554 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2555 if (A->ops->destroy) { 2556 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2563 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2564 { 2565 PetscMPIInt rank,size; 2566 MPI_Comm comm; 2567 PetscErrorCode ierr; 2568 PetscInt nsends = 0,nrecvs=0,i,rownz_max=0; 2569 PetscMPIInt *send_rank= NULL,*recv_rank=NULL; 2570 PetscInt *rowrange = mat->rmap->range; 2571 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2572 Mat A = aij->A,B=aij->B,C=*matredundant; 2573 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2574 PetscScalar *sbuf_a; 2575 PetscInt nzlocal=a->nz+b->nz; 2576 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2577 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2578 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2579 MatScalar *aworkA,*aworkB; 2580 PetscScalar *vals; 2581 PetscMPIInt tag1,tag2,tag3,imdex; 2582 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2583 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2584 MPI_Status recv_status,*send_status; 2585 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2586 PetscInt **rbuf_j=NULL; 2587 PetscScalar **rbuf_a=NULL; 2588 Mat_Redundant *redund =NULL; 2589 PetscContainer container; 2590 2591 PetscFunctionBegin; 2592 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2593 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2594 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2595 2596 if (reuse == MAT_REUSE_MATRIX) { 2597 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2598 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2599 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2600 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2601 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr); 2602 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2603 ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr); 2604 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2605 2606 nsends = redund->nsends; 2607 nrecvs = redund->nrecvs; 2608 send_rank = redund->send_rank; 2609 recv_rank = redund->recv_rank; 2610 sbuf_nz = redund->sbuf_nz; 2611 rbuf_nz = redund->rbuf_nz; 2612 sbuf_j = redund->sbuf_j; 2613 sbuf_a = redund->sbuf_a; 2614 rbuf_j = redund->rbuf_j; 2615 rbuf_a = redund->rbuf_a; 2616 } 2617 2618 if (reuse == MAT_INITIAL_MATRIX) { 2619 PetscMPIInt subrank,subsize; 2620 PetscInt nleftover,np_subcomm; 2621 /* get the destination processors' id send_rank, nsends and nrecvs */ 2622 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2623 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2624 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2625 2626 np_subcomm = size/nsubcomm; 2627 nleftover = size - nsubcomm*np_subcomm; 2628 2629 nsends = 0; nrecvs = 0; 2630 for (i=0; i<size; i++) { /* i=rank*/ 2631 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2632 send_rank[nsends] = i; nsends++; 2633 recv_rank[nrecvs++] = i; 2634 } 2635 } 2636 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2637 i = size-nleftover-1; 2638 j = 0; 2639 while (j < nsubcomm - nleftover) { 2640 send_rank[nsends++] = i; 2641 i--; j++; 2642 } 2643 } 2644 2645 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2646 for (i=0; i<nleftover; i++) { 2647 recv_rank[nrecvs++] = size-nleftover+i; 2648 } 2649 } 2650 2651 /* allocate sbuf_j, sbuf_a */ 2652 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2653 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2654 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2655 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2656 2657 /* copy mat's local entries into the buffers */ 2658 if (reuse == MAT_INITIAL_MATRIX) { 2659 rownz_max = 0; 2660 rptr = sbuf_j; 2661 cols = sbuf_j + rend-rstart + 1; 2662 vals = sbuf_a; 2663 rptr[0] = 0; 2664 for (i=0; i<rend-rstart; i++) { 2665 row = i + rstart; 2666 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2667 ncols = nzA + nzB; 2668 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2669 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2670 /* load the column indices for this row into cols */ 2671 lwrite = 0; 2672 for (l=0; l<nzB; l++) { 2673 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2674 vals[lwrite] = aworkB[l]; 2675 cols[lwrite++] = ctmp; 2676 } 2677 } 2678 for (l=0; l<nzA; l++) { 2679 vals[lwrite] = aworkA[l]; 2680 cols[lwrite++] = cstart + cworkA[l]; 2681 } 2682 for (l=0; l<nzB; l++) { 2683 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2684 vals[lwrite] = aworkB[l]; 2685 cols[lwrite++] = ctmp; 2686 } 2687 } 2688 vals += ncols; 2689 cols += ncols; 2690 rptr[i+1] = rptr[i] + ncols; 2691 if (rownz_max < ncols) rownz_max = ncols; 2692 } 2693 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2694 } else { /* only copy matrix values into sbuf_a */ 2695 rptr = sbuf_j; 2696 vals = sbuf_a; 2697 rptr[0] = 0; 2698 for (i=0; i<rend-rstart; i++) { 2699 row = i + rstart; 2700 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2701 ncols = nzA + nzB; 2702 cworkB = b->j + b->i[i]; 2703 aworkA = a->a + a->i[i]; 2704 aworkB = b->a + b->i[i]; 2705 lwrite = 0; 2706 for (l=0; l<nzB; l++) { 2707 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2708 } 2709 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2710 for (l=0; l<nzB; l++) { 2711 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2712 } 2713 vals += ncols; 2714 rptr[i+1] = rptr[i] + ncols; 2715 } 2716 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2717 2718 /* send nzlocal to others, and recv other's nzlocal */ 2719 /*--------------------------------------------------*/ 2720 if (reuse == MAT_INITIAL_MATRIX) { 2721 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2722 2723 s_waits2 = s_waits3 + nsends; 2724 s_waits1 = s_waits2 + nsends; 2725 r_waits1 = s_waits1 + nsends; 2726 r_waits2 = r_waits1 + nrecvs; 2727 r_waits3 = r_waits2 + nrecvs; 2728 } else { 2729 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2730 2731 r_waits3 = s_waits3 + nsends; 2732 } 2733 2734 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2735 if (reuse == MAT_INITIAL_MATRIX) { 2736 /* get new tags to keep the communication clean */ 2737 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2738 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2739 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2740 2741 /* post receives of other's nzlocal */ 2742 for (i=0; i<nrecvs; i++) { 2743 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2744 } 2745 /* send nzlocal to others */ 2746 for (i=0; i<nsends; i++) { 2747 sbuf_nz[i] = nzlocal; 2748 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2749 } 2750 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2751 count = nrecvs; 2752 while (count) { 2753 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2754 2755 recv_rank[imdex] = recv_status.MPI_SOURCE; 2756 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2757 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2758 2759 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2760 2761 rbuf_nz[imdex] += i + 2; 2762 2763 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2764 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2765 count--; 2766 } 2767 /* wait on sends of nzlocal */ 2768 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2769 /* send mat->i,j to others, and recv from other's */ 2770 /*------------------------------------------------*/ 2771 for (i=0; i<nsends; i++) { 2772 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2773 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2774 } 2775 /* wait on receives of mat->i,j */ 2776 /*------------------------------*/ 2777 count = nrecvs; 2778 while (count) { 2779 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2780 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2781 count--; 2782 } 2783 /* wait on sends of mat->i,j */ 2784 /*---------------------------*/ 2785 if (nsends) { 2786 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2787 } 2788 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2789 2790 /* post receives, send and receive mat->a */ 2791 /*----------------------------------------*/ 2792 for (imdex=0; imdex<nrecvs; imdex++) { 2793 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2794 } 2795 for (i=0; i<nsends; i++) { 2796 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2797 } 2798 count = nrecvs; 2799 while (count) { 2800 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2801 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2802 count--; 2803 } 2804 if (nsends) { 2805 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2806 } 2807 2808 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2809 2810 /* create redundant matrix */ 2811 /*-------------------------*/ 2812 if (reuse == MAT_INITIAL_MATRIX) { 2813 /* compute rownz_max for preallocation */ 2814 for (imdex=0; imdex<nrecvs; imdex++) { 2815 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2816 rptr = rbuf_j[imdex]; 2817 for (i=0; i<j; i++) { 2818 ncols = rptr[i+1] - rptr[i]; 2819 if (rownz_max < ncols) rownz_max = ncols; 2820 } 2821 } 2822 2823 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2824 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2825 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2826 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2827 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2828 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2829 } else { 2830 C = *matredundant; 2831 } 2832 2833 /* insert local matrix entries */ 2834 rptr = sbuf_j; 2835 cols = sbuf_j + rend-rstart + 1; 2836 vals = sbuf_a; 2837 for (i=0; i<rend-rstart; i++) { 2838 row = i + rstart; 2839 ncols = rptr[i+1] - rptr[i]; 2840 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2841 vals += ncols; 2842 cols += ncols; 2843 } 2844 /* insert received matrix entries */ 2845 for (imdex=0; imdex<nrecvs; imdex++) { 2846 rstart = rowrange[recv_rank[imdex]]; 2847 rend = rowrange[recv_rank[imdex]+1]; 2848 rptr = rbuf_j[imdex]; 2849 cols = rbuf_j[imdex] + rend-rstart + 1; 2850 vals = rbuf_a[imdex]; 2851 for (i=0; i<rend-rstart; i++) { 2852 row = i + rstart; 2853 ncols = rptr[i+1] - rptr[i]; 2854 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2855 vals += ncols; 2856 cols += ncols; 2857 } 2858 } 2859 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2860 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2862 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2863 if (reuse == MAT_INITIAL_MATRIX) { 2864 PetscContainer container; 2865 *matredundant = C; 2866 /* create a supporting struct and attach it to C for reuse */ 2867 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2868 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2869 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2870 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2871 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2872 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2873 2874 redund->nzlocal = nzlocal; 2875 redund->nsends = nsends; 2876 redund->nrecvs = nrecvs; 2877 redund->send_rank = send_rank; 2878 redund->recv_rank = recv_rank; 2879 redund->sbuf_nz = sbuf_nz; 2880 redund->rbuf_nz = rbuf_nz; 2881 redund->sbuf_j = sbuf_j; 2882 redund->sbuf_a = sbuf_a; 2883 redund->rbuf_j = rbuf_j; 2884 redund->rbuf_a = rbuf_a; 2885 2886 redund->Destroy = C->ops->destroy; 2887 C->ops->destroy = MatDestroy_MatRedundant; 2888 } 2889 PetscFunctionReturn(0); 2890 } 2891 2892 #undef __FUNCT__ 2893 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2894 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2895 { 2896 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2897 PetscErrorCode ierr; 2898 PetscInt i,*idxb = 0; 2899 PetscScalar *va,*vb; 2900 Vec vtmp; 2901 2902 PetscFunctionBegin; 2903 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2904 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2905 if (idx) { 2906 for (i=0; i<A->rmap->n; i++) { 2907 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2908 } 2909 } 2910 2911 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2912 if (idx) { 2913 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2914 } 2915 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2916 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2917 2918 for (i=0; i<A->rmap->n; i++) { 2919 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2920 va[i] = vb[i]; 2921 if (idx) idx[i] = a->garray[idxb[i]]; 2922 } 2923 } 2924 2925 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2926 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2927 ierr = PetscFree(idxb);CHKERRQ(ierr); 2928 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2929 PetscFunctionReturn(0); 2930 } 2931 2932 #undef __FUNCT__ 2933 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2934 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2935 { 2936 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2937 PetscErrorCode ierr; 2938 PetscInt i,*idxb = 0; 2939 PetscScalar *va,*vb; 2940 Vec vtmp; 2941 2942 PetscFunctionBegin; 2943 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2944 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2945 if (idx) { 2946 for (i=0; i<A->cmap->n; i++) { 2947 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2948 } 2949 } 2950 2951 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2952 if (idx) { 2953 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2954 } 2955 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2956 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2957 2958 for (i=0; i<A->rmap->n; i++) { 2959 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2960 va[i] = vb[i]; 2961 if (idx) idx[i] = a->garray[idxb[i]]; 2962 } 2963 } 2964 2965 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2966 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2967 ierr = PetscFree(idxb);CHKERRQ(ierr); 2968 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2969 PetscFunctionReturn(0); 2970 } 2971 2972 #undef __FUNCT__ 2973 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2974 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2975 { 2976 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2977 PetscInt n = A->rmap->n; 2978 PetscInt cstart = A->cmap->rstart; 2979 PetscInt *cmap = mat->garray; 2980 PetscInt *diagIdx, *offdiagIdx; 2981 Vec diagV, offdiagV; 2982 PetscScalar *a, *diagA, *offdiagA; 2983 PetscInt r; 2984 PetscErrorCode ierr; 2985 2986 PetscFunctionBegin; 2987 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2988 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2989 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2990 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2991 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2992 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2993 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2994 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2995 for (r = 0; r < n; ++r) { 2996 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2997 a[r] = diagA[r]; 2998 idx[r] = cstart + diagIdx[r]; 2999 } else { 3000 a[r] = offdiagA[r]; 3001 idx[r] = cmap[offdiagIdx[r]]; 3002 } 3003 } 3004 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3005 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3006 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3007 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3008 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3009 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3010 PetscFunctionReturn(0); 3011 } 3012 3013 #undef __FUNCT__ 3014 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3015 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3016 { 3017 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3018 PetscInt n = A->rmap->n; 3019 PetscInt cstart = A->cmap->rstart; 3020 PetscInt *cmap = mat->garray; 3021 PetscInt *diagIdx, *offdiagIdx; 3022 Vec diagV, offdiagV; 3023 PetscScalar *a, *diagA, *offdiagA; 3024 PetscInt r; 3025 PetscErrorCode ierr; 3026 3027 PetscFunctionBegin; 3028 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3029 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3030 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3031 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3032 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3033 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3034 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3035 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3036 for (r = 0; r < n; ++r) { 3037 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3038 a[r] = diagA[r]; 3039 idx[r] = cstart + diagIdx[r]; 3040 } else { 3041 a[r] = offdiagA[r]; 3042 idx[r] = cmap[offdiagIdx[r]]; 3043 } 3044 } 3045 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3046 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3047 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3048 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3049 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3050 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3051 PetscFunctionReturn(0); 3052 } 3053 3054 #undef __FUNCT__ 3055 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3056 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3057 { 3058 PetscErrorCode ierr; 3059 Mat *dummy; 3060 3061 PetscFunctionBegin; 3062 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3063 *newmat = *dummy; 3064 ierr = PetscFree(dummy);CHKERRQ(ierr); 3065 PetscFunctionReturn(0); 3066 } 3067 3068 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3069 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3072 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3073 { 3074 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3075 PetscErrorCode ierr; 3076 3077 PetscFunctionBegin; 3078 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3079 PetscFunctionReturn(0); 3080 } 3081 3082 #undef __FUNCT__ 3083 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3084 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3085 { 3086 PetscErrorCode ierr; 3087 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3088 3089 PetscFunctionBegin; 3090 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3091 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3092 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3093 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3094 PetscFunctionReturn(0); 3095 } 3096 3097 /* -------------------------------------------------------------------*/ 3098 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3099 MatGetRow_MPIAIJ, 3100 MatRestoreRow_MPIAIJ, 3101 MatMult_MPIAIJ, 3102 /* 4*/ MatMultAdd_MPIAIJ, 3103 MatMultTranspose_MPIAIJ, 3104 MatMultTransposeAdd_MPIAIJ, 3105 #if defined(PETSC_HAVE_PBGL) 3106 MatSolve_MPIAIJ, 3107 #else 3108 0, 3109 #endif 3110 0, 3111 0, 3112 /*10*/ 0, 3113 0, 3114 0, 3115 MatSOR_MPIAIJ, 3116 MatTranspose_MPIAIJ, 3117 /*15*/ MatGetInfo_MPIAIJ, 3118 MatEqual_MPIAIJ, 3119 MatGetDiagonal_MPIAIJ, 3120 MatDiagonalScale_MPIAIJ, 3121 MatNorm_MPIAIJ, 3122 /*20*/ MatAssemblyBegin_MPIAIJ, 3123 MatAssemblyEnd_MPIAIJ, 3124 MatSetOption_MPIAIJ, 3125 MatZeroEntries_MPIAIJ, 3126 /*24*/ MatZeroRows_MPIAIJ, 3127 0, 3128 #if defined(PETSC_HAVE_PBGL) 3129 0, 3130 #else 3131 0, 3132 #endif 3133 0, 3134 0, 3135 /*29*/ MatSetUp_MPIAIJ, 3136 #if defined(PETSC_HAVE_PBGL) 3137 0, 3138 #else 3139 0, 3140 #endif 3141 0, 3142 0, 3143 0, 3144 /*34*/ MatDuplicate_MPIAIJ, 3145 0, 3146 0, 3147 0, 3148 0, 3149 /*39*/ MatAXPY_MPIAIJ, 3150 MatGetSubMatrices_MPIAIJ, 3151 MatIncreaseOverlap_MPIAIJ, 3152 MatGetValues_MPIAIJ, 3153 MatCopy_MPIAIJ, 3154 /*44*/ MatGetRowMax_MPIAIJ, 3155 MatScale_MPIAIJ, 3156 0, 3157 0, 3158 MatZeroRowsColumns_MPIAIJ, 3159 /*49*/ MatSetRandom_MPIAIJ, 3160 0, 3161 0, 3162 0, 3163 0, 3164 /*54*/ MatFDColoringCreate_MPIAIJ, 3165 0, 3166 MatSetUnfactored_MPIAIJ, 3167 MatPermute_MPIAIJ, 3168 0, 3169 /*59*/ MatGetSubMatrix_MPIAIJ, 3170 MatDestroy_MPIAIJ, 3171 MatView_MPIAIJ, 3172 0, 3173 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3174 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3175 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3176 0, 3177 0, 3178 0, 3179 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3180 MatGetRowMinAbs_MPIAIJ, 3181 0, 3182 MatSetColoring_MPIAIJ, 3183 0, 3184 MatSetValuesAdifor_MPIAIJ, 3185 /*75*/ MatFDColoringApply_AIJ, 3186 0, 3187 0, 3188 0, 3189 MatFindZeroDiagonals_MPIAIJ, 3190 /*80*/ 0, 3191 0, 3192 0, 3193 /*83*/ MatLoad_MPIAIJ, 3194 0, 3195 0, 3196 0, 3197 0, 3198 0, 3199 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3200 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3201 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3202 MatPtAP_MPIAIJ_MPIAIJ, 3203 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3204 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3205 0, 3206 0, 3207 0, 3208 0, 3209 /*99*/ 0, 3210 0, 3211 0, 3212 MatConjugate_MPIAIJ, 3213 0, 3214 /*104*/MatSetValuesRow_MPIAIJ, 3215 MatRealPart_MPIAIJ, 3216 MatImaginaryPart_MPIAIJ, 3217 0, 3218 0, 3219 /*109*/0, 3220 MatGetRedundantMatrix_MPIAIJ, 3221 MatGetRowMin_MPIAIJ, 3222 0, 3223 0, 3224 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3225 0, 3226 0, 3227 0, 3228 0, 3229 /*119*/0, 3230 0, 3231 0, 3232 0, 3233 MatGetMultiProcBlock_MPIAIJ, 3234 /*124*/MatFindNonzeroRows_MPIAIJ, 3235 MatGetColumnNorms_MPIAIJ, 3236 MatInvertBlockDiagonal_MPIAIJ, 3237 0, 3238 MatGetSubMatricesParallel_MPIAIJ, 3239 /*129*/0, 3240 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3241 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3242 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3243 0, 3244 /*134*/0, 3245 0, 3246 0, 3247 0, 3248 0 3249 }; 3250 3251 /* ----------------------------------------------------------------------------------------*/ 3252 3253 EXTERN_C_BEGIN 3254 #undef __FUNCT__ 3255 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3256 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3257 { 3258 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3259 PetscErrorCode ierr; 3260 3261 PetscFunctionBegin; 3262 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3263 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3264 PetscFunctionReturn(0); 3265 } 3266 EXTERN_C_END 3267 3268 EXTERN_C_BEGIN 3269 #undef __FUNCT__ 3270 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3271 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3272 { 3273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3274 PetscErrorCode ierr; 3275 3276 PetscFunctionBegin; 3277 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3278 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3279 PetscFunctionReturn(0); 3280 } 3281 EXTERN_C_END 3282 3283 EXTERN_C_BEGIN 3284 #undef __FUNCT__ 3285 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3286 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3287 { 3288 Mat_MPIAIJ *b; 3289 PetscErrorCode ierr; 3290 PetscInt i; 3291 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3292 3293 PetscFunctionBegin; 3294 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3295 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3296 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3297 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3298 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3299 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3300 3301 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3302 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3303 if (d_nnz) { 3304 for (i=0; i<B->rmap->n; i++) { 3305 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3306 } 3307 } 3308 if (o_nnz) { 3309 for (i=0; i<B->rmap->n; i++) { 3310 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3311 } 3312 } 3313 b = (Mat_MPIAIJ*)B->data; 3314 3315 if (!B->preallocated) { 3316 /* Explicitly create 2 MATSEQAIJ matrices. */ 3317 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3318 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3319 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3320 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3321 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3322 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3323 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3324 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3325 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3326 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3327 } 3328 3329 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3330 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3331 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3332 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3333 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3334 B->preallocated = PETSC_TRUE; 3335 PetscFunctionReturn(0); 3336 } 3337 EXTERN_C_END 3338 3339 #undef __FUNCT__ 3340 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3341 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3342 { 3343 Mat mat; 3344 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3345 PetscErrorCode ierr; 3346 3347 PetscFunctionBegin; 3348 *newmat = 0; 3349 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3350 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3351 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3352 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3353 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3354 a = (Mat_MPIAIJ*)mat->data; 3355 3356 mat->factortype = matin->factortype; 3357 mat->rmap->bs = matin->rmap->bs; 3358 mat->cmap->bs = matin->cmap->bs; 3359 mat->assembled = PETSC_TRUE; 3360 mat->insertmode = NOT_SET_VALUES; 3361 mat->preallocated = PETSC_TRUE; 3362 3363 a->size = oldmat->size; 3364 a->rank = oldmat->rank; 3365 a->donotstash = oldmat->donotstash; 3366 a->roworiented = oldmat->roworiented; 3367 a->rowindices = 0; 3368 a->rowvalues = 0; 3369 a->getrowactive = PETSC_FALSE; 3370 3371 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3372 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3373 3374 if (oldmat->colmap) { 3375 #if defined(PETSC_USE_CTABLE) 3376 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3377 #else 3378 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3379 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3380 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3381 #endif 3382 } else a->colmap = 0; 3383 if (oldmat->garray) { 3384 PetscInt len; 3385 len = oldmat->B->cmap->n; 3386 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3387 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3388 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3389 } else a->garray = 0; 3390 3391 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3392 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3393 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3394 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3395 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3396 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3397 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3398 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3399 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3400 *newmat = mat; 3401 PetscFunctionReturn(0); 3402 } 3403 3404 3405 3406 #undef __FUNCT__ 3407 #define __FUNCT__ "MatLoad_MPIAIJ" 3408 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3409 { 3410 PetscScalar *vals,*svals; 3411 MPI_Comm comm; 3412 PetscErrorCode ierr; 3413 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3414 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3415 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3416 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3417 PetscInt cend,cstart,n,*rowners,sizesset=1; 3418 int fd; 3419 PetscInt bs = 1; 3420 3421 PetscFunctionBegin; 3422 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3423 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3424 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3425 if (!rank) { 3426 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3427 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3428 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3429 } 3430 3431 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3432 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3433 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3434 3435 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3436 3437 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3438 M = header[1]; N = header[2]; 3439 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3440 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3441 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3442 3443 /* If global sizes are set, check if they are consistent with that given in the file */ 3444 if (sizesset) { 3445 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3446 } 3447 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3448 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3449 3450 /* determine ownership of all (block) rows */ 3451 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3452 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3453 else m = newMat->rmap->n; /* Set by user */ 3454 3455 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3456 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3457 3458 /* First process needs enough room for process with most rows */ 3459 if (!rank) { 3460 mmax = rowners[1]; 3461 for (i=2; i<=size; i++) { 3462 mmax = PetscMax(mmax, rowners[i]); 3463 } 3464 } else mmax = m; 3465 3466 rowners[0] = 0; 3467 for (i=2; i<=size; i++) { 3468 rowners[i] += rowners[i-1]; 3469 } 3470 rstart = rowners[rank]; 3471 rend = rowners[rank+1]; 3472 3473 /* distribute row lengths to all processors */ 3474 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3475 if (!rank) { 3476 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3477 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3478 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3479 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3480 for (j=0; j<m; j++) { 3481 procsnz[0] += ourlens[j]; 3482 } 3483 for (i=1; i<size; i++) { 3484 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3485 /* calculate the number of nonzeros on each processor */ 3486 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3487 procsnz[i] += rowlengths[j]; 3488 } 3489 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3490 } 3491 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3492 } else { 3493 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3494 } 3495 3496 if (!rank) { 3497 /* determine max buffer needed and allocate it */ 3498 maxnz = 0; 3499 for (i=0; i<size; i++) { 3500 maxnz = PetscMax(maxnz,procsnz[i]); 3501 } 3502 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3503 3504 /* read in my part of the matrix column indices */ 3505 nz = procsnz[0]; 3506 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3507 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3508 3509 /* read in every one elses and ship off */ 3510 for (i=1; i<size; i++) { 3511 nz = procsnz[i]; 3512 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3513 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3514 } 3515 ierr = PetscFree(cols);CHKERRQ(ierr); 3516 } else { 3517 /* determine buffer space needed for message */ 3518 nz = 0; 3519 for (i=0; i<m; i++) { 3520 nz += ourlens[i]; 3521 } 3522 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3523 3524 /* receive message of column indices*/ 3525 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3526 } 3527 3528 /* determine column ownership if matrix is not square */ 3529 if (N != M) { 3530 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3531 else n = newMat->cmap->n; 3532 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3533 cstart = cend - n; 3534 } else { 3535 cstart = rstart; 3536 cend = rend; 3537 n = cend - cstart; 3538 } 3539 3540 /* loop over local rows, determining number of off diagonal entries */ 3541 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3542 jj = 0; 3543 for (i=0; i<m; i++) { 3544 for (j=0; j<ourlens[i]; j++) { 3545 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3546 jj++; 3547 } 3548 } 3549 3550 for (i=0; i<m; i++) { 3551 ourlens[i] -= offlens[i]; 3552 } 3553 if (!sizesset) { 3554 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3555 } 3556 3557 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3558 3559 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3560 3561 for (i=0; i<m; i++) { 3562 ourlens[i] += offlens[i]; 3563 } 3564 3565 if (!rank) { 3566 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3567 3568 /* read in my part of the matrix numerical values */ 3569 nz = procsnz[0]; 3570 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3571 3572 /* insert into matrix */ 3573 jj = rstart; 3574 smycols = mycols; 3575 svals = vals; 3576 for (i=0; i<m; i++) { 3577 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3578 smycols += ourlens[i]; 3579 svals += ourlens[i]; 3580 jj++; 3581 } 3582 3583 /* read in other processors and ship out */ 3584 for (i=1; i<size; i++) { 3585 nz = procsnz[i]; 3586 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3587 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3588 } 3589 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3590 } else { 3591 /* receive numeric values */ 3592 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3593 3594 /* receive message of values*/ 3595 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3596 3597 /* insert into matrix */ 3598 jj = rstart; 3599 smycols = mycols; 3600 svals = vals; 3601 for (i=0; i<m; i++) { 3602 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3603 smycols += ourlens[i]; 3604 svals += ourlens[i]; 3605 jj++; 3606 } 3607 } 3608 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3609 ierr = PetscFree(vals);CHKERRQ(ierr); 3610 ierr = PetscFree(mycols);CHKERRQ(ierr); 3611 ierr = PetscFree(rowners);CHKERRQ(ierr); 3612 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3613 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3614 PetscFunctionReturn(0); 3615 } 3616 3617 #undef __FUNCT__ 3618 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3619 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3620 { 3621 PetscErrorCode ierr; 3622 IS iscol_local; 3623 PetscInt csize; 3624 3625 PetscFunctionBegin; 3626 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3627 if (call == MAT_REUSE_MATRIX) { 3628 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3629 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3630 } else { 3631 PetscInt cbs; 3632 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3633 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3634 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3635 } 3636 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3637 if (call == MAT_INITIAL_MATRIX) { 3638 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3639 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3640 } 3641 PetscFunctionReturn(0); 3642 } 3643 3644 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3645 #undef __FUNCT__ 3646 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3647 /* 3648 Not great since it makes two copies of the submatrix, first an SeqAIJ 3649 in local and then by concatenating the local matrices the end result. 3650 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3651 3652 Note: This requires a sequential iscol with all indices. 3653 */ 3654 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3655 { 3656 PetscErrorCode ierr; 3657 PetscMPIInt rank,size; 3658 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3659 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3660 PetscBool allcolumns, colflag; 3661 Mat M,Mreuse; 3662 MatScalar *vwork,*aa; 3663 MPI_Comm comm; 3664 Mat_SeqAIJ *aij; 3665 3666 PetscFunctionBegin; 3667 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3668 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3669 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3670 3671 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3672 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3673 if (colflag && ncol == mat->cmap->N) { 3674 allcolumns = PETSC_TRUE; 3675 } else { 3676 allcolumns = PETSC_FALSE; 3677 } 3678 if (call == MAT_REUSE_MATRIX) { 3679 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3680 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3681 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3682 } else { 3683 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3684 } 3685 3686 /* 3687 m - number of local rows 3688 n - number of columns (same on all processors) 3689 rstart - first row in new global matrix generated 3690 */ 3691 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3692 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3693 if (call == MAT_INITIAL_MATRIX) { 3694 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3695 ii = aij->i; 3696 jj = aij->j; 3697 3698 /* 3699 Determine the number of non-zeros in the diagonal and off-diagonal 3700 portions of the matrix in order to do correct preallocation 3701 */ 3702 3703 /* first get start and end of "diagonal" columns */ 3704 if (csize == PETSC_DECIDE) { 3705 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3706 if (mglobal == n) { /* square matrix */ 3707 nlocal = m; 3708 } else { 3709 nlocal = n/size + ((n % size) > rank); 3710 } 3711 } else { 3712 nlocal = csize; 3713 } 3714 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3715 rstart = rend - nlocal; 3716 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3717 3718 /* next, compute all the lengths */ 3719 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3720 olens = dlens + m; 3721 for (i=0; i<m; i++) { 3722 jend = ii[i+1] - ii[i]; 3723 olen = 0; 3724 dlen = 0; 3725 for (j=0; j<jend; j++) { 3726 if (*jj < rstart || *jj >= rend) olen++; 3727 else dlen++; 3728 jj++; 3729 } 3730 olens[i] = olen; 3731 dlens[i] = dlen; 3732 } 3733 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3734 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3735 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3736 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3737 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3738 ierr = PetscFree(dlens);CHKERRQ(ierr); 3739 } else { 3740 PetscInt ml,nl; 3741 3742 M = *newmat; 3743 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3744 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3745 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3746 /* 3747 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3748 rather than the slower MatSetValues(). 3749 */ 3750 M->was_assembled = PETSC_TRUE; 3751 M->assembled = PETSC_FALSE; 3752 } 3753 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3754 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3755 ii = aij->i; 3756 jj = aij->j; 3757 aa = aij->a; 3758 for (i=0; i<m; i++) { 3759 row = rstart + i; 3760 nz = ii[i+1] - ii[i]; 3761 cwork = jj; jj += nz; 3762 vwork = aa; aa += nz; 3763 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3764 } 3765 3766 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3767 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3768 *newmat = M; 3769 3770 /* save submatrix used in processor for next request */ 3771 if (call == MAT_INITIAL_MATRIX) { 3772 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3773 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3774 } 3775 PetscFunctionReturn(0); 3776 } 3777 3778 EXTERN_C_BEGIN 3779 #undef __FUNCT__ 3780 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3781 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3782 { 3783 PetscInt m,cstart, cend,j,nnz,i,d; 3784 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3785 const PetscInt *JJ; 3786 PetscScalar *values; 3787 PetscErrorCode ierr; 3788 3789 PetscFunctionBegin; 3790 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3791 3792 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3793 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3794 m = B->rmap->n; 3795 cstart = B->cmap->rstart; 3796 cend = B->cmap->rend; 3797 rstart = B->rmap->rstart; 3798 3799 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3800 3801 #if defined(PETSC_USE_DEBUGGING) 3802 for (i=0; i<m; i++) { 3803 nnz = Ii[i+1]- Ii[i]; 3804 JJ = J + Ii[i]; 3805 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3806 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3807 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3808 } 3809 #endif 3810 3811 for (i=0; i<m; i++) { 3812 nnz = Ii[i+1]- Ii[i]; 3813 JJ = J + Ii[i]; 3814 nnz_max = PetscMax(nnz_max,nnz); 3815 d = 0; 3816 for (j=0; j<nnz; j++) { 3817 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3818 } 3819 d_nnz[i] = d; 3820 o_nnz[i] = nnz - d; 3821 } 3822 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3823 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3824 3825 if (v) values = (PetscScalar*)v; 3826 else { 3827 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3828 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3829 } 3830 3831 for (i=0; i<m; i++) { 3832 ii = i + rstart; 3833 nnz = Ii[i+1]- Ii[i]; 3834 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3835 } 3836 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3837 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3838 3839 if (!v) { 3840 ierr = PetscFree(values);CHKERRQ(ierr); 3841 } 3842 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3843 PetscFunctionReturn(0); 3844 } 3845 EXTERN_C_END 3846 3847 #undef __FUNCT__ 3848 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3849 /*@ 3850 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3851 (the default parallel PETSc format). 3852 3853 Collective on MPI_Comm 3854 3855 Input Parameters: 3856 + B - the matrix 3857 . i - the indices into j for the start of each local row (starts with zero) 3858 . j - the column indices for each local row (starts with zero) 3859 - v - optional values in the matrix 3860 3861 Level: developer 3862 3863 Notes: 3864 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3865 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3866 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3867 3868 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3869 3870 The format which is used for the sparse matrix input, is equivalent to a 3871 row-major ordering.. i.e for the following matrix, the input data expected is 3872 as shown: 3873 3874 1 0 0 3875 2 0 3 P0 3876 ------- 3877 4 5 6 P1 3878 3879 Process0 [P0]: rows_owned=[0,1] 3880 i = {0,1,3} [size = nrow+1 = 2+1] 3881 j = {0,0,2} [size = nz = 6] 3882 v = {1,2,3} [size = nz = 6] 3883 3884 Process1 [P1]: rows_owned=[2] 3885 i = {0,3} [size = nrow+1 = 1+1] 3886 j = {0,1,2} [size = nz = 6] 3887 v = {4,5,6} [size = nz = 6] 3888 3889 .keywords: matrix, aij, compressed row, sparse, parallel 3890 3891 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3892 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3893 @*/ 3894 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3895 { 3896 PetscErrorCode ierr; 3897 3898 PetscFunctionBegin; 3899 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3900 PetscFunctionReturn(0); 3901 } 3902 3903 #undef __FUNCT__ 3904 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3905 /*@C 3906 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3907 (the default parallel PETSc format). For good matrix assembly performance 3908 the user should preallocate the matrix storage by setting the parameters 3909 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3910 performance can be increased by more than a factor of 50. 3911 3912 Collective on MPI_Comm 3913 3914 Input Parameters: 3915 + A - the matrix 3916 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3917 (same value is used for all local rows) 3918 . d_nnz - array containing the number of nonzeros in the various rows of the 3919 DIAGONAL portion of the local submatrix (possibly different for each row) 3920 or NULL, if d_nz is used to specify the nonzero structure. 3921 The size of this array is equal to the number of local rows, i.e 'm'. 3922 For matrices that will be factored, you must leave room for (and set) 3923 the diagonal entry even if it is zero. 3924 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3925 submatrix (same value is used for all local rows). 3926 - o_nnz - array containing the number of nonzeros in the various rows of the 3927 OFF-DIAGONAL portion of the local submatrix (possibly different for 3928 each row) or NULL, if o_nz is used to specify the nonzero 3929 structure. The size of this array is equal to the number 3930 of local rows, i.e 'm'. 3931 3932 If the *_nnz parameter is given then the *_nz parameter is ignored 3933 3934 The AIJ format (also called the Yale sparse matrix format or 3935 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3936 storage. The stored row and column indices begin with zero. 3937 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3938 3939 The parallel matrix is partitioned such that the first m0 rows belong to 3940 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3941 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3942 3943 The DIAGONAL portion of the local submatrix of a processor can be defined 3944 as the submatrix which is obtained by extraction the part corresponding to 3945 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3946 first row that belongs to the processor, r2 is the last row belonging to 3947 the this processor, and c1-c2 is range of indices of the local part of a 3948 vector suitable for applying the matrix to. This is an mxn matrix. In the 3949 common case of a square matrix, the row and column ranges are the same and 3950 the DIAGONAL part is also square. The remaining portion of the local 3951 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3952 3953 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3954 3955 You can call MatGetInfo() to get information on how effective the preallocation was; 3956 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3957 You can also run with the option -info and look for messages with the string 3958 malloc in them to see if additional memory allocation was needed. 3959 3960 Example usage: 3961 3962 Consider the following 8x8 matrix with 34 non-zero values, that is 3963 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3964 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3965 as follows: 3966 3967 .vb 3968 1 2 0 | 0 3 0 | 0 4 3969 Proc0 0 5 6 | 7 0 0 | 8 0 3970 9 0 10 | 11 0 0 | 12 0 3971 ------------------------------------- 3972 13 0 14 | 15 16 17 | 0 0 3973 Proc1 0 18 0 | 19 20 21 | 0 0 3974 0 0 0 | 22 23 0 | 24 0 3975 ------------------------------------- 3976 Proc2 25 26 27 | 0 0 28 | 29 0 3977 30 0 0 | 31 32 33 | 0 34 3978 .ve 3979 3980 This can be represented as a collection of submatrices as: 3981 3982 .vb 3983 A B C 3984 D E F 3985 G H I 3986 .ve 3987 3988 Where the submatrices A,B,C are owned by proc0, D,E,F are 3989 owned by proc1, G,H,I are owned by proc2. 3990 3991 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3992 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3993 The 'M','N' parameters are 8,8, and have the same values on all procs. 3994 3995 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3996 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3997 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3998 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3999 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4000 matrix, ans [DF] as another SeqAIJ matrix. 4001 4002 When d_nz, o_nz parameters are specified, d_nz storage elements are 4003 allocated for every row of the local diagonal submatrix, and o_nz 4004 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4005 One way to choose d_nz and o_nz is to use the max nonzerors per local 4006 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4007 In this case, the values of d_nz,o_nz are: 4008 .vb 4009 proc0 : dnz = 2, o_nz = 2 4010 proc1 : dnz = 3, o_nz = 2 4011 proc2 : dnz = 1, o_nz = 4 4012 .ve 4013 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4014 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4015 for proc3. i.e we are using 12+15+10=37 storage locations to store 4016 34 values. 4017 4018 When d_nnz, o_nnz parameters are specified, the storage is specified 4019 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4020 In the above case the values for d_nnz,o_nnz are: 4021 .vb 4022 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4023 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4024 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4025 .ve 4026 Here the space allocated is sum of all the above values i.e 34, and 4027 hence pre-allocation is perfect. 4028 4029 Level: intermediate 4030 4031 .keywords: matrix, aij, compressed row, sparse, parallel 4032 4033 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4034 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4035 @*/ 4036 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4037 { 4038 PetscErrorCode ierr; 4039 4040 PetscFunctionBegin; 4041 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4042 PetscValidType(B,1); 4043 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4044 PetscFunctionReturn(0); 4045 } 4046 4047 #undef __FUNCT__ 4048 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4049 /*@ 4050 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4051 CSR format the local rows. 4052 4053 Collective on MPI_Comm 4054 4055 Input Parameters: 4056 + comm - MPI communicator 4057 . m - number of local rows (Cannot be PETSC_DECIDE) 4058 . n - This value should be the same as the local size used in creating the 4059 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4060 calculated if N is given) For square matrices n is almost always m. 4061 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4062 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4063 . i - row indices 4064 . j - column indices 4065 - a - matrix values 4066 4067 Output Parameter: 4068 . mat - the matrix 4069 4070 Level: intermediate 4071 4072 Notes: 4073 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4074 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4075 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4076 4077 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4078 4079 The format which is used for the sparse matrix input, is equivalent to a 4080 row-major ordering.. i.e for the following matrix, the input data expected is 4081 as shown: 4082 4083 1 0 0 4084 2 0 3 P0 4085 ------- 4086 4 5 6 P1 4087 4088 Process0 [P0]: rows_owned=[0,1] 4089 i = {0,1,3} [size = nrow+1 = 2+1] 4090 j = {0,0,2} [size = nz = 6] 4091 v = {1,2,3} [size = nz = 6] 4092 4093 Process1 [P1]: rows_owned=[2] 4094 i = {0,3} [size = nrow+1 = 1+1] 4095 j = {0,1,2} [size = nz = 6] 4096 v = {4,5,6} [size = nz = 6] 4097 4098 .keywords: matrix, aij, compressed row, sparse, parallel 4099 4100 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4101 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4102 @*/ 4103 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4104 { 4105 PetscErrorCode ierr; 4106 4107 PetscFunctionBegin; 4108 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4109 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4110 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4111 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4112 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4113 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4114 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4115 PetscFunctionReturn(0); 4116 } 4117 4118 #undef __FUNCT__ 4119 #define __FUNCT__ "MatCreateAIJ" 4120 /*@C 4121 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4122 (the default parallel PETSc format). For good matrix assembly performance 4123 the user should preallocate the matrix storage by setting the parameters 4124 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4125 performance can be increased by more than a factor of 50. 4126 4127 Collective on MPI_Comm 4128 4129 Input Parameters: 4130 + comm - MPI communicator 4131 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4132 This value should be the same as the local size used in creating the 4133 y vector for the matrix-vector product y = Ax. 4134 . n - This value should be the same as the local size used in creating the 4135 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4136 calculated if N is given) For square matrices n is almost always m. 4137 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4138 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4139 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4140 (same value is used for all local rows) 4141 . d_nnz - array containing the number of nonzeros in the various rows of the 4142 DIAGONAL portion of the local submatrix (possibly different for each row) 4143 or NULL, if d_nz is used to specify the nonzero structure. 4144 The size of this array is equal to the number of local rows, i.e 'm'. 4145 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4146 submatrix (same value is used for all local rows). 4147 - o_nnz - array containing the number of nonzeros in the various rows of the 4148 OFF-DIAGONAL portion of the local submatrix (possibly different for 4149 each row) or NULL, if o_nz is used to specify the nonzero 4150 structure. The size of this array is equal to the number 4151 of local rows, i.e 'm'. 4152 4153 Output Parameter: 4154 . A - the matrix 4155 4156 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4157 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4158 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4159 4160 Notes: 4161 If the *_nnz parameter is given then the *_nz parameter is ignored 4162 4163 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4164 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4165 storage requirements for this matrix. 4166 4167 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4168 processor than it must be used on all processors that share the object for 4169 that argument. 4170 4171 The user MUST specify either the local or global matrix dimensions 4172 (possibly both). 4173 4174 The parallel matrix is partitioned across processors such that the 4175 first m0 rows belong to process 0, the next m1 rows belong to 4176 process 1, the next m2 rows belong to process 2 etc.. where 4177 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4178 values corresponding to [m x N] submatrix. 4179 4180 The columns are logically partitioned with the n0 columns belonging 4181 to 0th partition, the next n1 columns belonging to the next 4182 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4183 4184 The DIAGONAL portion of the local submatrix on any given processor 4185 is the submatrix corresponding to the rows and columns m,n 4186 corresponding to the given processor. i.e diagonal matrix on 4187 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4188 etc. The remaining portion of the local submatrix [m x (N-n)] 4189 constitute the OFF-DIAGONAL portion. The example below better 4190 illustrates this concept. 4191 4192 For a square global matrix we define each processor's diagonal portion 4193 to be its local rows and the corresponding columns (a square submatrix); 4194 each processor's off-diagonal portion encompasses the remainder of the 4195 local matrix (a rectangular submatrix). 4196 4197 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4198 4199 When calling this routine with a single process communicator, a matrix of 4200 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4201 type of communicator, use the construction mechanism: 4202 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4203 4204 By default, this format uses inodes (identical nodes) when possible. 4205 We search for consecutive rows with the same nonzero structure, thereby 4206 reusing matrix information to achieve increased efficiency. 4207 4208 Options Database Keys: 4209 + -mat_no_inode - Do not use inodes 4210 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4211 - -mat_aij_oneindex - Internally use indexing starting at 1 4212 rather than 0. Note that when calling MatSetValues(), 4213 the user still MUST index entries starting at 0! 4214 4215 4216 Example usage: 4217 4218 Consider the following 8x8 matrix with 34 non-zero values, that is 4219 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4220 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4221 as follows: 4222 4223 .vb 4224 1 2 0 | 0 3 0 | 0 4 4225 Proc0 0 5 6 | 7 0 0 | 8 0 4226 9 0 10 | 11 0 0 | 12 0 4227 ------------------------------------- 4228 13 0 14 | 15 16 17 | 0 0 4229 Proc1 0 18 0 | 19 20 21 | 0 0 4230 0 0 0 | 22 23 0 | 24 0 4231 ------------------------------------- 4232 Proc2 25 26 27 | 0 0 28 | 29 0 4233 30 0 0 | 31 32 33 | 0 34 4234 .ve 4235 4236 This can be represented as a collection of submatrices as: 4237 4238 .vb 4239 A B C 4240 D E F 4241 G H I 4242 .ve 4243 4244 Where the submatrices A,B,C are owned by proc0, D,E,F are 4245 owned by proc1, G,H,I are owned by proc2. 4246 4247 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4248 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4249 The 'M','N' parameters are 8,8, and have the same values on all procs. 4250 4251 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4252 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4253 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4254 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4255 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4256 matrix, ans [DF] as another SeqAIJ matrix. 4257 4258 When d_nz, o_nz parameters are specified, d_nz storage elements are 4259 allocated for every row of the local diagonal submatrix, and o_nz 4260 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4261 One way to choose d_nz and o_nz is to use the max nonzerors per local 4262 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4263 In this case, the values of d_nz,o_nz are: 4264 .vb 4265 proc0 : dnz = 2, o_nz = 2 4266 proc1 : dnz = 3, o_nz = 2 4267 proc2 : dnz = 1, o_nz = 4 4268 .ve 4269 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4270 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4271 for proc3. i.e we are using 12+15+10=37 storage locations to store 4272 34 values. 4273 4274 When d_nnz, o_nnz parameters are specified, the storage is specified 4275 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4276 In the above case the values for d_nnz,o_nnz are: 4277 .vb 4278 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4279 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4280 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4281 .ve 4282 Here the space allocated is sum of all the above values i.e 34, and 4283 hence pre-allocation is perfect. 4284 4285 Level: intermediate 4286 4287 .keywords: matrix, aij, compressed row, sparse, parallel 4288 4289 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4290 MPIAIJ, MatCreateMPIAIJWithArrays() 4291 @*/ 4292 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4293 { 4294 PetscErrorCode ierr; 4295 PetscMPIInt size; 4296 4297 PetscFunctionBegin; 4298 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4299 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4300 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4301 if (size > 1) { 4302 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4303 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4304 } else { 4305 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4306 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4307 } 4308 PetscFunctionReturn(0); 4309 } 4310 4311 #undef __FUNCT__ 4312 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4313 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4314 { 4315 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4316 4317 PetscFunctionBegin; 4318 *Ad = a->A; 4319 *Ao = a->B; 4320 *colmap = a->garray; 4321 PetscFunctionReturn(0); 4322 } 4323 4324 #undef __FUNCT__ 4325 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4326 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4327 { 4328 PetscErrorCode ierr; 4329 PetscInt i; 4330 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4331 4332 PetscFunctionBegin; 4333 if (coloring->ctype == IS_COLORING_GLOBAL) { 4334 ISColoringValue *allcolors,*colors; 4335 ISColoring ocoloring; 4336 4337 /* set coloring for diagonal portion */ 4338 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4339 4340 /* set coloring for off-diagonal portion */ 4341 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4342 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4343 for (i=0; i<a->B->cmap->n; i++) { 4344 colors[i] = allcolors[a->garray[i]]; 4345 } 4346 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4347 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4348 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4349 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4350 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4351 ISColoringValue *colors; 4352 PetscInt *larray; 4353 ISColoring ocoloring; 4354 4355 /* set coloring for diagonal portion */ 4356 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4357 for (i=0; i<a->A->cmap->n; i++) { 4358 larray[i] = i + A->cmap->rstart; 4359 } 4360 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4361 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4362 for (i=0; i<a->A->cmap->n; i++) { 4363 colors[i] = coloring->colors[larray[i]]; 4364 } 4365 ierr = PetscFree(larray);CHKERRQ(ierr); 4366 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4367 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4368 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4369 4370 /* set coloring for off-diagonal portion */ 4371 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4372 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4373 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4374 for (i=0; i<a->B->cmap->n; i++) { 4375 colors[i] = coloring->colors[larray[i]]; 4376 } 4377 ierr = PetscFree(larray);CHKERRQ(ierr); 4378 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4379 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4380 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4381 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4382 PetscFunctionReturn(0); 4383 } 4384 4385 #undef __FUNCT__ 4386 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4387 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4388 { 4389 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4390 PetscErrorCode ierr; 4391 4392 PetscFunctionBegin; 4393 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4394 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4395 PetscFunctionReturn(0); 4396 } 4397 4398 #undef __FUNCT__ 4399 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4400 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4401 { 4402 PetscErrorCode ierr; 4403 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4404 PetscInt *indx; 4405 4406 PetscFunctionBegin; 4407 /* This routine will ONLY return MPIAIJ type matrix */ 4408 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4409 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4410 if (n == PETSC_DECIDE) { 4411 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4412 } 4413 /* Check sum(n) = N */ 4414 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4415 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4416 4417 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4418 rstart -= m; 4419 4420 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4421 for (i=0; i<m; i++) { 4422 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4423 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4424 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4425 } 4426 4427 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4428 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4429 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4430 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4431 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4432 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4433 PetscFunctionReturn(0); 4434 } 4435 4436 #undef __FUNCT__ 4437 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4438 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4439 { 4440 PetscErrorCode ierr; 4441 PetscInt m,N,i,rstart,nnz,Ii; 4442 PetscInt *indx; 4443 PetscScalar *values; 4444 4445 PetscFunctionBegin; 4446 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4447 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4448 for (i=0; i<m; i++) { 4449 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4450 Ii = i + rstart; 4451 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4452 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4453 } 4454 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4455 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4456 PetscFunctionReturn(0); 4457 } 4458 4459 #undef __FUNCT__ 4460 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4461 /*@ 4462 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4463 matrices from each processor 4464 4465 Collective on MPI_Comm 4466 4467 Input Parameters: 4468 + comm - the communicators the parallel matrix will live on 4469 . inmat - the input sequential matrices 4470 . n - number of local columns (or PETSC_DECIDE) 4471 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4472 4473 Output Parameter: 4474 . outmat - the parallel matrix generated 4475 4476 Level: advanced 4477 4478 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4479 4480 @*/ 4481 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4482 { 4483 PetscErrorCode ierr; 4484 4485 PetscFunctionBegin; 4486 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4487 if (scall == MAT_INITIAL_MATRIX) { 4488 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4489 } 4490 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4491 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4492 PetscFunctionReturn(0); 4493 } 4494 4495 #undef __FUNCT__ 4496 #define __FUNCT__ "MatFileSplit" 4497 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4498 { 4499 PetscErrorCode ierr; 4500 PetscMPIInt rank; 4501 PetscInt m,N,i,rstart,nnz; 4502 size_t len; 4503 const PetscInt *indx; 4504 PetscViewer out; 4505 char *name; 4506 Mat B; 4507 const PetscScalar *values; 4508 4509 PetscFunctionBegin; 4510 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4511 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4512 /* Should this be the type of the diagonal block of A? */ 4513 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4514 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4515 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4516 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4517 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4518 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4519 for (i=0; i<m; i++) { 4520 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4521 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4522 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4523 } 4524 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4525 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4526 4527 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4528 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4529 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4530 sprintf(name,"%s.%d",outfile,rank); 4531 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4532 ierr = PetscFree(name);CHKERRQ(ierr); 4533 ierr = MatView(B,out);CHKERRQ(ierr); 4534 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4535 ierr = MatDestroy(&B);CHKERRQ(ierr); 4536 PetscFunctionReturn(0); 4537 } 4538 4539 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4540 #undef __FUNCT__ 4541 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4542 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4543 { 4544 PetscErrorCode ierr; 4545 Mat_Merge_SeqsToMPI *merge; 4546 PetscContainer container; 4547 4548 PetscFunctionBegin; 4549 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4550 if (container) { 4551 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4557 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4558 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4559 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4560 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4561 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4562 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4563 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4564 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4565 ierr = PetscFree(merge);CHKERRQ(ierr); 4566 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4567 } 4568 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4569 PetscFunctionReturn(0); 4570 } 4571 4572 #include <../src/mat/utils/freespace.h> 4573 #include <petscbt.h> 4574 4575 #undef __FUNCT__ 4576 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4577 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4578 { 4579 PetscErrorCode ierr; 4580 MPI_Comm comm; 4581 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4582 PetscMPIInt size,rank,taga,*len_s; 4583 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4584 PetscInt proc,m; 4585 PetscInt **buf_ri,**buf_rj; 4586 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4587 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4588 MPI_Request *s_waits,*r_waits; 4589 MPI_Status *status; 4590 MatScalar *aa=a->a; 4591 MatScalar **abuf_r,*ba_i; 4592 Mat_Merge_SeqsToMPI *merge; 4593 PetscContainer container; 4594 4595 PetscFunctionBegin; 4596 ierr = PetscObjectGetComm((PetscObject)seqmat,&comm);CHKERRQ(ierr); 4597 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4598 4599 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4600 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4601 4602 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4603 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4604 4605 bi = merge->bi; 4606 bj = merge->bj; 4607 buf_ri = merge->buf_ri; 4608 buf_rj = merge->buf_rj; 4609 4610 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4611 owners = merge->rowmap->range; 4612 len_s = merge->len_s; 4613 4614 /* send and recv matrix values */ 4615 /*-----------------------------*/ 4616 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4617 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4618 4619 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4620 for (proc=0,k=0; proc<size; proc++) { 4621 if (!len_s[proc]) continue; 4622 i = owners[proc]; 4623 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4624 k++; 4625 } 4626 4627 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4628 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4629 ierr = PetscFree(status);CHKERRQ(ierr); 4630 4631 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4632 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4633 4634 /* insert mat values of mpimat */ 4635 /*----------------------------*/ 4636 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4637 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4638 4639 for (k=0; k<merge->nrecv; k++) { 4640 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4641 nrows = *(buf_ri_k[k]); 4642 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4643 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4644 } 4645 4646 /* set values of ba */ 4647 m = merge->rowmap->n; 4648 for (i=0; i<m; i++) { 4649 arow = owners[rank] + i; 4650 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4651 bnzi = bi[i+1] - bi[i]; 4652 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4653 4654 /* add local non-zero vals of this proc's seqmat into ba */ 4655 anzi = ai[arow+1] - ai[arow]; 4656 aj = a->j + ai[arow]; 4657 aa = a->a + ai[arow]; 4658 nextaj = 0; 4659 for (j=0; nextaj<anzi; j++) { 4660 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4661 ba_i[j] += aa[nextaj++]; 4662 } 4663 } 4664 4665 /* add received vals into ba */ 4666 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4667 /* i-th row */ 4668 if (i == *nextrow[k]) { 4669 anzi = *(nextai[k]+1) - *nextai[k]; 4670 aj = buf_rj[k] + *(nextai[k]); 4671 aa = abuf_r[k] + *(nextai[k]); 4672 nextaj = 0; 4673 for (j=0; nextaj<anzi; j++) { 4674 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4675 ba_i[j] += aa[nextaj++]; 4676 } 4677 } 4678 nextrow[k]++; nextai[k]++; 4679 } 4680 } 4681 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4682 } 4683 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4684 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4685 4686 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4687 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4688 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4689 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4690 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4691 PetscFunctionReturn(0); 4692 } 4693 4694 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4695 4696 #undef __FUNCT__ 4697 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4698 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4699 { 4700 PetscErrorCode ierr; 4701 Mat B_mpi; 4702 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4703 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4704 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4705 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4706 PetscInt len,proc,*dnz,*onz,bs,cbs; 4707 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4708 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4709 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4710 MPI_Status *status; 4711 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4712 PetscBT lnkbt; 4713 Mat_Merge_SeqsToMPI *merge; 4714 PetscContainer container; 4715 4716 PetscFunctionBegin; 4717 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4718 4719 /* make sure it is a PETSc comm */ 4720 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4721 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4722 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4723 4724 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4725 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4726 4727 /* determine row ownership */ 4728 /*---------------------------------------------------------*/ 4729 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4730 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4731 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4732 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4733 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4734 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4735 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4736 4737 m = merge->rowmap->n; 4738 owners = merge->rowmap->range; 4739 4740 /* determine the number of messages to send, their lengths */ 4741 /*---------------------------------------------------------*/ 4742 len_s = merge->len_s; 4743 4744 len = 0; /* length of buf_si[] */ 4745 merge->nsend = 0; 4746 for (proc=0; proc<size; proc++) { 4747 len_si[proc] = 0; 4748 if (proc == rank) { 4749 len_s[proc] = 0; 4750 } else { 4751 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4752 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4753 } 4754 if (len_s[proc]) { 4755 merge->nsend++; 4756 nrows = 0; 4757 for (i=owners[proc]; i<owners[proc+1]; i++) { 4758 if (ai[i+1] > ai[i]) nrows++; 4759 } 4760 len_si[proc] = 2*(nrows+1); 4761 len += len_si[proc]; 4762 } 4763 } 4764 4765 /* determine the number and length of messages to receive for ij-structure */ 4766 /*-------------------------------------------------------------------------*/ 4767 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4768 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4769 4770 /* post the Irecv of j-structure */ 4771 /*-------------------------------*/ 4772 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4773 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4774 4775 /* post the Isend of j-structure */ 4776 /*--------------------------------*/ 4777 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4778 4779 for (proc=0, k=0; proc<size; proc++) { 4780 if (!len_s[proc]) continue; 4781 i = owners[proc]; 4782 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4783 k++; 4784 } 4785 4786 /* receives and sends of j-structure are complete */ 4787 /*------------------------------------------------*/ 4788 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4789 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4790 4791 /* send and recv i-structure */ 4792 /*---------------------------*/ 4793 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4794 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4795 4796 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4797 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4798 for (proc=0,k=0; proc<size; proc++) { 4799 if (!len_s[proc]) continue; 4800 /* form outgoing message for i-structure: 4801 buf_si[0]: nrows to be sent 4802 [1:nrows]: row index (global) 4803 [nrows+1:2*nrows+1]: i-structure index 4804 */ 4805 /*-------------------------------------------*/ 4806 nrows = len_si[proc]/2 - 1; 4807 buf_si_i = buf_si + nrows+1; 4808 buf_si[0] = nrows; 4809 buf_si_i[0] = 0; 4810 nrows = 0; 4811 for (i=owners[proc]; i<owners[proc+1]; i++) { 4812 anzi = ai[i+1] - ai[i]; 4813 if (anzi) { 4814 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4815 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4816 nrows++; 4817 } 4818 } 4819 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4820 k++; 4821 buf_si += len_si[proc]; 4822 } 4823 4824 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4825 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4826 4827 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4828 for (i=0; i<merge->nrecv; i++) { 4829 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4830 } 4831 4832 ierr = PetscFree(len_si);CHKERRQ(ierr); 4833 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4834 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4835 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4836 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4837 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4838 ierr = PetscFree(status);CHKERRQ(ierr); 4839 4840 /* compute a local seq matrix in each processor */ 4841 /*----------------------------------------------*/ 4842 /* allocate bi array and free space for accumulating nonzero column info */ 4843 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4844 bi[0] = 0; 4845 4846 /* create and initialize a linked list */ 4847 nlnk = N+1; 4848 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4849 4850 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4851 len = ai[owners[rank+1]] - ai[owners[rank]]; 4852 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4853 4854 current_space = free_space; 4855 4856 /* determine symbolic info for each local row */ 4857 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4858 4859 for (k=0; k<merge->nrecv; k++) { 4860 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4861 nrows = *buf_ri_k[k]; 4862 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4863 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4864 } 4865 4866 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4867 len = 0; 4868 for (i=0; i<m; i++) { 4869 bnzi = 0; 4870 /* add local non-zero cols of this proc's seqmat into lnk */ 4871 arow = owners[rank] + i; 4872 anzi = ai[arow+1] - ai[arow]; 4873 aj = a->j + ai[arow]; 4874 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4875 bnzi += nlnk; 4876 /* add received col data into lnk */ 4877 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4878 if (i == *nextrow[k]) { /* i-th row */ 4879 anzi = *(nextai[k]+1) - *nextai[k]; 4880 aj = buf_rj[k] + *nextai[k]; 4881 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4882 bnzi += nlnk; 4883 nextrow[k]++; nextai[k]++; 4884 } 4885 } 4886 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4887 4888 /* if free space is not available, make more free space */ 4889 if (current_space->local_remaining<bnzi) { 4890 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4891 nspacedouble++; 4892 } 4893 /* copy data into free space, then initialize lnk */ 4894 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4895 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4896 4897 current_space->array += bnzi; 4898 current_space->local_used += bnzi; 4899 current_space->local_remaining -= bnzi; 4900 4901 bi[i+1] = bi[i] + bnzi; 4902 } 4903 4904 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4905 4906 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4907 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4908 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4909 4910 /* create symbolic parallel matrix B_mpi */ 4911 /*---------------------------------------*/ 4912 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4913 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4914 if (n==PETSC_DECIDE) { 4915 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4916 } else { 4917 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4918 } 4919 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4920 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4921 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4922 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4923 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4924 4925 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4926 B_mpi->assembled = PETSC_FALSE; 4927 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4928 merge->bi = bi; 4929 merge->bj = bj; 4930 merge->buf_ri = buf_ri; 4931 merge->buf_rj = buf_rj; 4932 merge->coi = NULL; 4933 merge->coj = NULL; 4934 merge->owners_co = NULL; 4935 4936 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4937 4938 /* attach the supporting struct to B_mpi for reuse */ 4939 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4940 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4941 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4942 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4943 *mpimat = B_mpi; 4944 4945 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4946 PetscFunctionReturn(0); 4947 } 4948 4949 #undef __FUNCT__ 4950 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4951 /*@C 4952 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4953 matrices from each processor 4954 4955 Collective on MPI_Comm 4956 4957 Input Parameters: 4958 + comm - the communicators the parallel matrix will live on 4959 . seqmat - the input sequential matrices 4960 . m - number of local rows (or PETSC_DECIDE) 4961 . n - number of local columns (or PETSC_DECIDE) 4962 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4963 4964 Output Parameter: 4965 . mpimat - the parallel matrix generated 4966 4967 Level: advanced 4968 4969 Notes: 4970 The dimensions of the sequential matrix in each processor MUST be the same. 4971 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4972 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4973 @*/ 4974 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4975 { 4976 PetscErrorCode ierr; 4977 PetscMPIInt size; 4978 4979 PetscFunctionBegin; 4980 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4981 if (size == 1) { 4982 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4983 if (scall == MAT_INITIAL_MATRIX) { 4984 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4985 } else { 4986 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4987 } 4988 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4989 PetscFunctionReturn(0); 4990 } 4991 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4992 if (scall == MAT_INITIAL_MATRIX) { 4993 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4994 } 4995 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4996 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4997 PetscFunctionReturn(0); 4998 } 4999 5000 #undef __FUNCT__ 5001 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5002 /*@ 5003 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5004 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5005 with MatGetSize() 5006 5007 Not Collective 5008 5009 Input Parameters: 5010 + A - the matrix 5011 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5012 5013 Output Parameter: 5014 . A_loc - the local sequential matrix generated 5015 5016 Level: developer 5017 5018 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5019 5020 @*/ 5021 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5022 { 5023 PetscErrorCode ierr; 5024 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5025 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5026 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5027 MatScalar *aa=a->a,*ba=b->a,*cam; 5028 PetscScalar *ca; 5029 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5030 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5031 PetscBool match; 5032 5033 PetscFunctionBegin; 5034 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5035 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5036 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5037 if (scall == MAT_INITIAL_MATRIX) { 5038 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5039 ci[0] = 0; 5040 for (i=0; i<am; i++) { 5041 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5042 } 5043 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5044 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5045 k = 0; 5046 for (i=0; i<am; i++) { 5047 ncols_o = bi[i+1] - bi[i]; 5048 ncols_d = ai[i+1] - ai[i]; 5049 /* off-diagonal portion of A */ 5050 for (jo=0; jo<ncols_o; jo++) { 5051 col = cmap[*bj]; 5052 if (col >= cstart) break; 5053 cj[k] = col; bj++; 5054 ca[k++] = *ba++; 5055 } 5056 /* diagonal portion of A */ 5057 for (j=0; j<ncols_d; j++) { 5058 cj[k] = cstart + *aj++; 5059 ca[k++] = *aa++; 5060 } 5061 /* off-diagonal portion of A */ 5062 for (j=jo; j<ncols_o; j++) { 5063 cj[k] = cmap[*bj++]; 5064 ca[k++] = *ba++; 5065 } 5066 } 5067 /* put together the new matrix */ 5068 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5069 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5070 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5071 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5072 mat->free_a = PETSC_TRUE; 5073 mat->free_ij = PETSC_TRUE; 5074 mat->nonew = 0; 5075 } else if (scall == MAT_REUSE_MATRIX) { 5076 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5077 ci = mat->i; cj = mat->j; cam = mat->a; 5078 for (i=0; i<am; i++) { 5079 /* off-diagonal portion of A */ 5080 ncols_o = bi[i+1] - bi[i]; 5081 for (jo=0; jo<ncols_o; jo++) { 5082 col = cmap[*bj]; 5083 if (col >= cstart) break; 5084 *cam++ = *ba++; bj++; 5085 } 5086 /* diagonal portion of A */ 5087 ncols_d = ai[i+1] - ai[i]; 5088 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5089 /* off-diagonal portion of A */ 5090 for (j=jo; j<ncols_o; j++) { 5091 *cam++ = *ba++; bj++; 5092 } 5093 } 5094 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5095 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5096 PetscFunctionReturn(0); 5097 } 5098 5099 #undef __FUNCT__ 5100 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5101 /*@C 5102 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5103 5104 Not Collective 5105 5106 Input Parameters: 5107 + A - the matrix 5108 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5109 - row, col - index sets of rows and columns to extract (or NULL) 5110 5111 Output Parameter: 5112 . A_loc - the local sequential matrix generated 5113 5114 Level: developer 5115 5116 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5117 5118 @*/ 5119 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5120 { 5121 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5122 PetscErrorCode ierr; 5123 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5124 IS isrowa,iscola; 5125 Mat *aloc; 5126 PetscBool match; 5127 5128 PetscFunctionBegin; 5129 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5130 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5131 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5132 if (!row) { 5133 start = A->rmap->rstart; end = A->rmap->rend; 5134 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5135 } else { 5136 isrowa = *row; 5137 } 5138 if (!col) { 5139 start = A->cmap->rstart; 5140 cmap = a->garray; 5141 nzA = a->A->cmap->n; 5142 nzB = a->B->cmap->n; 5143 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5144 ncols = 0; 5145 for (i=0; i<nzB; i++) { 5146 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5147 else break; 5148 } 5149 imark = i; 5150 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5151 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5152 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5153 } else { 5154 iscola = *col; 5155 } 5156 if (scall != MAT_INITIAL_MATRIX) { 5157 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5158 aloc[0] = *A_loc; 5159 } 5160 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5161 *A_loc = aloc[0]; 5162 ierr = PetscFree(aloc);CHKERRQ(ierr); 5163 if (!row) { 5164 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5165 } 5166 if (!col) { 5167 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5168 } 5169 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5170 PetscFunctionReturn(0); 5171 } 5172 5173 #undef __FUNCT__ 5174 #define __FUNCT__ "MatGetBrowsOfAcols" 5175 /*@C 5176 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5177 5178 Collective on Mat 5179 5180 Input Parameters: 5181 + A,B - the matrices in mpiaij format 5182 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5183 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5184 5185 Output Parameter: 5186 + rowb, colb - index sets of rows and columns of B to extract 5187 - B_seq - the sequential matrix generated 5188 5189 Level: developer 5190 5191 @*/ 5192 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5193 { 5194 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5195 PetscErrorCode ierr; 5196 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5197 IS isrowb,iscolb; 5198 Mat *bseq=NULL; 5199 5200 PetscFunctionBegin; 5201 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5202 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5203 } 5204 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5205 5206 if (scall == MAT_INITIAL_MATRIX) { 5207 start = A->cmap->rstart; 5208 cmap = a->garray; 5209 nzA = a->A->cmap->n; 5210 nzB = a->B->cmap->n; 5211 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5212 ncols = 0; 5213 for (i=0; i<nzB; i++) { /* row < local row index */ 5214 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5215 else break; 5216 } 5217 imark = i; 5218 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5219 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5220 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5221 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5222 } else { 5223 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5224 isrowb = *rowb; iscolb = *colb; 5225 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5226 bseq[0] = *B_seq; 5227 } 5228 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5229 *B_seq = bseq[0]; 5230 ierr = PetscFree(bseq);CHKERRQ(ierr); 5231 if (!rowb) { 5232 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5233 } else { 5234 *rowb = isrowb; 5235 } 5236 if (!colb) { 5237 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5238 } else { 5239 *colb = iscolb; 5240 } 5241 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5242 PetscFunctionReturn(0); 5243 } 5244 5245 #undef __FUNCT__ 5246 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5247 /* 5248 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5249 of the OFF-DIAGONAL portion of local A 5250 5251 Collective on Mat 5252 5253 Input Parameters: 5254 + A,B - the matrices in mpiaij format 5255 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5256 5257 Output Parameter: 5258 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5259 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5260 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5261 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5262 5263 Level: developer 5264 5265 */ 5266 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5267 { 5268 VecScatter_MPI_General *gen_to,*gen_from; 5269 PetscErrorCode ierr; 5270 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5271 Mat_SeqAIJ *b_oth; 5272 VecScatter ctx =a->Mvctx; 5273 MPI_Comm comm; 5274 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5275 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5276 PetscScalar *rvalues,*svalues; 5277 MatScalar *b_otha,*bufa,*bufA; 5278 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5279 MPI_Request *rwaits = NULL,*swaits = NULL; 5280 MPI_Status *sstatus,rstatus; 5281 PetscMPIInt jj; 5282 PetscInt *cols,sbs,rbs; 5283 PetscScalar *vals; 5284 5285 PetscFunctionBegin; 5286 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5287 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5288 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5289 } 5290 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5291 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5292 5293 gen_to = (VecScatter_MPI_General*)ctx->todata; 5294 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5295 rvalues = gen_from->values; /* holds the length of receiving row */ 5296 svalues = gen_to->values; /* holds the length of sending row */ 5297 nrecvs = gen_from->n; 5298 nsends = gen_to->n; 5299 5300 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5301 srow = gen_to->indices; /* local row index to be sent */ 5302 sstarts = gen_to->starts; 5303 sprocs = gen_to->procs; 5304 sstatus = gen_to->sstatus; 5305 sbs = gen_to->bs; 5306 rstarts = gen_from->starts; 5307 rprocs = gen_from->procs; 5308 rbs = gen_from->bs; 5309 5310 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5311 if (scall == MAT_INITIAL_MATRIX) { 5312 /* i-array */ 5313 /*---------*/ 5314 /* post receives */ 5315 for (i=0; i<nrecvs; i++) { 5316 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5317 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5318 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5319 } 5320 5321 /* pack the outgoing message */ 5322 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5323 5324 sstartsj[0] = 0; 5325 rstartsj[0] = 0; 5326 len = 0; /* total length of j or a array to be sent */ 5327 k = 0; 5328 for (i=0; i<nsends; i++) { 5329 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5330 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5331 for (j=0; j<nrows; j++) { 5332 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5333 for (l=0; l<sbs; l++) { 5334 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5335 5336 rowlen[j*sbs+l] = ncols; 5337 5338 len += ncols; 5339 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5340 } 5341 k++; 5342 } 5343 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5344 5345 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5346 } 5347 /* recvs and sends of i-array are completed */ 5348 i = nrecvs; 5349 while (i--) { 5350 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5351 } 5352 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5353 5354 /* allocate buffers for sending j and a arrays */ 5355 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5356 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5357 5358 /* create i-array of B_oth */ 5359 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5360 5361 b_othi[0] = 0; 5362 len = 0; /* total length of j or a array to be received */ 5363 k = 0; 5364 for (i=0; i<nrecvs; i++) { 5365 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5366 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5367 for (j=0; j<nrows; j++) { 5368 b_othi[k+1] = b_othi[k] + rowlen[j]; 5369 len += rowlen[j]; k++; 5370 } 5371 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5372 } 5373 5374 /* allocate space for j and a arrrays of B_oth */ 5375 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5376 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5377 5378 /* j-array */ 5379 /*---------*/ 5380 /* post receives of j-array */ 5381 for (i=0; i<nrecvs; i++) { 5382 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5383 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5384 } 5385 5386 /* pack the outgoing message j-array */ 5387 k = 0; 5388 for (i=0; i<nsends; i++) { 5389 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5390 bufJ = bufj+sstartsj[i]; 5391 for (j=0; j<nrows; j++) { 5392 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5393 for (ll=0; ll<sbs; ll++) { 5394 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5395 for (l=0; l<ncols; l++) { 5396 *bufJ++ = cols[l]; 5397 } 5398 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5399 } 5400 } 5401 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5402 } 5403 5404 /* recvs and sends of j-array are completed */ 5405 i = nrecvs; 5406 while (i--) { 5407 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5408 } 5409 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5410 } else if (scall == MAT_REUSE_MATRIX) { 5411 sstartsj = *startsj_s; 5412 rstartsj = *startsj_r; 5413 bufa = *bufa_ptr; 5414 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5415 b_otha = b_oth->a; 5416 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5417 5418 /* a-array */ 5419 /*---------*/ 5420 /* post receives of a-array */ 5421 for (i=0; i<nrecvs; i++) { 5422 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5423 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5424 } 5425 5426 /* pack the outgoing message a-array */ 5427 k = 0; 5428 for (i=0; i<nsends; i++) { 5429 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5430 bufA = bufa+sstartsj[i]; 5431 for (j=0; j<nrows; j++) { 5432 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5433 for (ll=0; ll<sbs; ll++) { 5434 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5435 for (l=0; l<ncols; l++) { 5436 *bufA++ = vals[l]; 5437 } 5438 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5439 } 5440 } 5441 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5442 } 5443 /* recvs and sends of a-array are completed */ 5444 i = nrecvs; 5445 while (i--) { 5446 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5447 } 5448 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5449 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5450 5451 if (scall == MAT_INITIAL_MATRIX) { 5452 /* put together the new matrix */ 5453 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5454 5455 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5456 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5457 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5458 b_oth->free_a = PETSC_TRUE; 5459 b_oth->free_ij = PETSC_TRUE; 5460 b_oth->nonew = 0; 5461 5462 ierr = PetscFree(bufj);CHKERRQ(ierr); 5463 if (!startsj_s || !bufa_ptr) { 5464 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5465 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5466 } else { 5467 *startsj_s = sstartsj; 5468 *startsj_r = rstartsj; 5469 *bufa_ptr = bufa; 5470 } 5471 } 5472 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5473 PetscFunctionReturn(0); 5474 } 5475 5476 #undef __FUNCT__ 5477 #define __FUNCT__ "MatGetCommunicationStructs" 5478 /*@C 5479 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5480 5481 Not Collective 5482 5483 Input Parameters: 5484 . A - The matrix in mpiaij format 5485 5486 Output Parameter: 5487 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5488 . colmap - A map from global column index to local index into lvec 5489 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5490 5491 Level: developer 5492 5493 @*/ 5494 #if defined(PETSC_USE_CTABLE) 5495 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5496 #else 5497 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5498 #endif 5499 { 5500 Mat_MPIAIJ *a; 5501 5502 PetscFunctionBegin; 5503 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5504 PetscValidPointer(lvec, 2); 5505 PetscValidPointer(colmap, 3); 5506 PetscValidPointer(multScatter, 4); 5507 a = (Mat_MPIAIJ*) A->data; 5508 if (lvec) *lvec = a->lvec; 5509 if (colmap) *colmap = a->colmap; 5510 if (multScatter) *multScatter = a->Mvctx; 5511 PetscFunctionReturn(0); 5512 } 5513 5514 EXTERN_C_BEGIN 5515 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5516 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5517 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5518 EXTERN_C_END 5519 5520 #undef __FUNCT__ 5521 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5522 /* 5523 Computes (B'*A')' since computing B*A directly is untenable 5524 5525 n p p 5526 ( ) ( ) ( ) 5527 m ( A ) * n ( B ) = m ( C ) 5528 ( ) ( ) ( ) 5529 5530 */ 5531 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5532 { 5533 PetscErrorCode ierr; 5534 Mat At,Bt,Ct; 5535 5536 PetscFunctionBegin; 5537 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5538 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5539 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5540 ierr = MatDestroy(&At);CHKERRQ(ierr); 5541 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5542 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5543 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5544 PetscFunctionReturn(0); 5545 } 5546 5547 #undef __FUNCT__ 5548 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5549 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5550 { 5551 PetscErrorCode ierr; 5552 PetscInt m=A->rmap->n,n=B->cmap->n; 5553 Mat Cmat; 5554 5555 PetscFunctionBegin; 5556 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5557 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5558 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5559 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5560 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5561 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5562 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5563 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5564 5565 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5566 5567 *C = Cmat; 5568 PetscFunctionReturn(0); 5569 } 5570 5571 /* ----------------------------------------------------------------*/ 5572 #undef __FUNCT__ 5573 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5574 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5575 { 5576 PetscErrorCode ierr; 5577 5578 PetscFunctionBegin; 5579 if (scall == MAT_INITIAL_MATRIX) { 5580 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5581 } 5582 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5583 PetscFunctionReturn(0); 5584 } 5585 5586 EXTERN_C_BEGIN 5587 #if defined(PETSC_HAVE_MUMPS) 5588 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5589 #endif 5590 #if defined(PETSC_HAVE_PASTIX) 5591 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5592 #endif 5593 #if defined(PETSC_HAVE_SUPERLU_DIST) 5594 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5595 #endif 5596 #if defined(PETSC_HAVE_CLIQUE) 5597 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5598 #endif 5599 EXTERN_C_END 5600 5601 /*MC 5602 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5603 5604 Options Database Keys: 5605 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5606 5607 Level: beginner 5608 5609 .seealso: MatCreateAIJ() 5610 M*/ 5611 5612 EXTERN_C_BEGIN 5613 #undef __FUNCT__ 5614 #define __FUNCT__ "MatCreate_MPIAIJ" 5615 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5616 { 5617 Mat_MPIAIJ *b; 5618 PetscErrorCode ierr; 5619 PetscMPIInt size; 5620 5621 PetscFunctionBegin; 5622 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5623 5624 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5625 B->data = (void*)b; 5626 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5627 B->assembled = PETSC_FALSE; 5628 B->insertmode = NOT_SET_VALUES; 5629 b->size = size; 5630 5631 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5632 5633 /* build cache for off array entries formed */ 5634 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5635 5636 b->donotstash = PETSC_FALSE; 5637 b->colmap = 0; 5638 b->garray = 0; 5639 b->roworiented = PETSC_TRUE; 5640 5641 /* stuff used for matrix vector multiply */ 5642 b->lvec = NULL; 5643 b->Mvctx = NULL; 5644 5645 /* stuff for MatGetRow() */ 5646 b->rowindices = 0; 5647 b->rowvalues = 0; 5648 b->getrowactive = PETSC_FALSE; 5649 5650 /* flexible pointer used in CUSP/CUSPARSE classes */ 5651 b->spptr = NULL; 5652 5653 #if defined(PETSC_HAVE_MUMPS) 5654 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5655 "MatGetFactor_aij_mumps", 5656 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5657 #endif 5658 #if defined(PETSC_HAVE_PASTIX) 5659 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5660 "MatGetFactor_mpiaij_pastix", 5661 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5662 #endif 5663 #if defined(PETSC_HAVE_SUPERLU_DIST) 5664 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5665 "MatGetFactor_mpiaij_superlu_dist", 5666 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5667 #endif 5668 #if defined(PETSC_HAVE_CLIQUE) 5669 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5670 "MatGetFactor_aij_clique", 5671 MatGetFactor_aij_clique);CHKERRQ(ierr); 5672 #endif 5673 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5674 "MatStoreValues_MPIAIJ", 5675 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5677 "MatRetrieveValues_MPIAIJ", 5678 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5679 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5680 "MatGetDiagonalBlock_MPIAIJ", 5681 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5682 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5683 "MatIsTranspose_MPIAIJ", 5684 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5685 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5686 "MatMPIAIJSetPreallocation_MPIAIJ", 5687 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5688 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5689 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5690 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5691 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5692 "MatDiagonalScaleLocal_MPIAIJ", 5693 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5694 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5695 "MatConvert_MPIAIJ_MPIAIJPERM", 5696 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5697 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5698 "MatConvert_MPIAIJ_MPIAIJCRL", 5699 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5700 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5701 "MatConvert_MPIAIJ_MPISBAIJ", 5702 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5703 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5704 "MatMatMult_MPIDense_MPIAIJ", 5705 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5706 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5707 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5708 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5709 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5710 "MatMatMultNumeric_MPIDense_MPIAIJ", 5711 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5712 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5713 PetscFunctionReturn(0); 5714 } 5715 EXTERN_C_END 5716 5717 #undef __FUNCT__ 5718 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5719 /*@ 5720 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5721 and "off-diagonal" part of the matrix in CSR format. 5722 5723 Collective on MPI_Comm 5724 5725 Input Parameters: 5726 + comm - MPI communicator 5727 . m - number of local rows (Cannot be PETSC_DECIDE) 5728 . n - This value should be the same as the local size used in creating the 5729 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5730 calculated if N is given) For square matrices n is almost always m. 5731 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5732 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5733 . i - row indices for "diagonal" portion of matrix 5734 . j - column indices 5735 . a - matrix values 5736 . oi - row indices for "off-diagonal" portion of matrix 5737 . oj - column indices 5738 - oa - matrix values 5739 5740 Output Parameter: 5741 . mat - the matrix 5742 5743 Level: advanced 5744 5745 Notes: 5746 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5747 must free the arrays once the matrix has been destroyed and not before. 5748 5749 The i and j indices are 0 based 5750 5751 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5752 5753 This sets local rows and cannot be used to set off-processor values. 5754 5755 You cannot later use MatSetValues() to change values in this matrix. 5756 5757 .keywords: matrix, aij, compressed row, sparse, parallel 5758 5759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5760 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5761 @*/ 5762 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5763 { 5764 PetscErrorCode ierr; 5765 Mat_MPIAIJ *maij; 5766 5767 PetscFunctionBegin; 5768 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5769 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5770 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5771 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5772 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5773 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5774 maij = (Mat_MPIAIJ*) (*mat)->data; 5775 5776 maij->donotstash = PETSC_TRUE; 5777 (*mat)->preallocated = PETSC_TRUE; 5778 5779 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5780 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5781 5782 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5783 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5784 5785 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5786 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5787 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5788 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5789 5790 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5791 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5792 PetscFunctionReturn(0); 5793 } 5794 5795 /* 5796 Special version for direct calls from Fortran 5797 */ 5798 #include <petsc-private/fortranimpl.h> 5799 5800 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5801 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5802 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5803 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5804 #endif 5805 5806 /* Change these macros so can be used in void function */ 5807 #undef CHKERRQ 5808 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5809 #undef SETERRQ2 5810 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5811 #undef SETERRQ3 5812 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5813 #undef SETERRQ 5814 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5815 5816 EXTERN_C_BEGIN 5817 #undef __FUNCT__ 5818 #define __FUNCT__ "matsetvaluesmpiaij_" 5819 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5820 { 5821 Mat mat = *mmat; 5822 PetscInt m = *mm, n = *mn; 5823 InsertMode addv = *maddv; 5824 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5825 PetscScalar value; 5826 PetscErrorCode ierr; 5827 5828 MatCheckPreallocated(mat,1); 5829 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5830 5831 #if defined(PETSC_USE_DEBUG) 5832 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5833 #endif 5834 { 5835 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5836 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5837 PetscBool roworiented = aij->roworiented; 5838 5839 /* Some Variables required in the macro */ 5840 Mat A = aij->A; 5841 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5842 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5843 MatScalar *aa = a->a; 5844 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5845 Mat B = aij->B; 5846 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5847 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5848 MatScalar *ba = b->a; 5849 5850 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5851 PetscInt nonew = a->nonew; 5852 MatScalar *ap1,*ap2; 5853 5854 PetscFunctionBegin; 5855 for (i=0; i<m; i++) { 5856 if (im[i] < 0) continue; 5857 #if defined(PETSC_USE_DEBUG) 5858 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5859 #endif 5860 if (im[i] >= rstart && im[i] < rend) { 5861 row = im[i] - rstart; 5862 lastcol1 = -1; 5863 rp1 = aj + ai[row]; 5864 ap1 = aa + ai[row]; 5865 rmax1 = aimax[row]; 5866 nrow1 = ailen[row]; 5867 low1 = 0; 5868 high1 = nrow1; 5869 lastcol2 = -1; 5870 rp2 = bj + bi[row]; 5871 ap2 = ba + bi[row]; 5872 rmax2 = bimax[row]; 5873 nrow2 = bilen[row]; 5874 low2 = 0; 5875 high2 = nrow2; 5876 5877 for (j=0; j<n; j++) { 5878 if (roworiented) value = v[i*n+j]; 5879 else value = v[i+j*m]; 5880 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5881 if (in[j] >= cstart && in[j] < cend) { 5882 col = in[j] - cstart; 5883 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5884 } else if (in[j] < 0) continue; 5885 #if defined(PETSC_USE_DEBUG) 5886 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5887 #endif 5888 else { 5889 if (mat->was_assembled) { 5890 if (!aij->colmap) { 5891 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5892 } 5893 #if defined(PETSC_USE_CTABLE) 5894 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5895 col--; 5896 #else 5897 col = aij->colmap[in[j]] - 1; 5898 #endif 5899 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5900 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5901 col = in[j]; 5902 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5903 B = aij->B; 5904 b = (Mat_SeqAIJ*)B->data; 5905 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5906 rp2 = bj + bi[row]; 5907 ap2 = ba + bi[row]; 5908 rmax2 = bimax[row]; 5909 nrow2 = bilen[row]; 5910 low2 = 0; 5911 high2 = nrow2; 5912 bm = aij->B->rmap->n; 5913 ba = b->a; 5914 } 5915 } else col = in[j]; 5916 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5917 } 5918 } 5919 } else if (!aij->donotstash) { 5920 if (roworiented) { 5921 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5922 } else { 5923 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5924 } 5925 } 5926 } 5927 } 5928 PetscFunctionReturnVoid(); 5929 } 5930 EXTERN_C_END 5931 5932