1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++){ 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 416 lastcol2 = col;\ 417 while (high2-low2 > 5) { \ 418 t = (low2+high2)/2; \ 419 if (rp2[t] > col) high2 = t; \ 420 else low2 = t; \ 421 } \ 422 for (_i=low2; _i<high2; _i++) { \ 423 if (rp2[_i] > col) break; \ 424 if (rp2[_i] == col) { \ 425 if (addv == ADD_VALUES) ap2[_i] += value; \ 426 else ap2[_i] = value; \ 427 goto b_noinsert; \ 428 } \ 429 } \ 430 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 433 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 434 N = nrow2++ - 1; b->nz++; high2++; \ 435 /* shift up all the later entries in this row */ \ 436 for (ii=N; ii>=_i; ii--) { \ 437 rp2[ii+1] = rp2[ii]; \ 438 ap2[ii+1] = ap2[ii]; \ 439 } \ 440 rp2[_i] = col; \ 441 ap2[_i] = value; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend){ 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined (PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend){ 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined (PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col --; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else { 620 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscErrorCode ierr; 634 PetscInt nstash,reallocs; 635 InsertMode addv; 636 637 PetscFunctionBegin; 638 if (aij->donotstash || mat->nooffprocentries) { 639 PetscFunctionReturn(0); 640 } 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n;) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 677 if (j < n) ncols = j-i; 678 else ncols = n-i; 679 /* Now assemble all these values with a single function call */ 680 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 706 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 708 709 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 724 { 725 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 726 PetscErrorCode ierr; 727 728 PetscFunctionBegin; 729 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 730 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatZeroRows_MPIAIJ" 736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 737 { 738 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 739 PetscErrorCode ierr; 740 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 741 PetscInt i,*owners = A->rmap->range; 742 PetscInt *nprocs,j,idx,nsends,row; 743 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 744 PetscInt *rvalues,count,base,slen,*source; 745 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 746 MPI_Comm comm = ((PetscObject)A)->comm; 747 MPI_Request *send_waits,*recv_waits; 748 MPI_Status recv_status,*send_status; 749 const PetscScalar *xx; 750 PetscScalar *bb; 751 #if defined(PETSC_DEBUG) 752 PetscBool found = PETSC_FALSE; 753 #endif 754 755 PetscFunctionBegin; 756 /* first count number of contributors to each processor */ 757 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 758 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 759 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 760 j = 0; 761 for (i=0; i<N; i++) { 762 if (lastidx > (idx = rows[i])) j = 0; 763 lastidx = idx; 764 for (; j<size; j++) { 765 if (idx >= owners[j] && idx < owners[j+1]) { 766 nprocs[2*j]++; 767 nprocs[2*j+1] = 1; 768 owner[i] = j; 769 #if defined(PETSC_DEBUG) 770 found = PETSC_TRUE; 771 #endif 772 break; 773 } 774 } 775 #if defined(PETSC_DEBUG) 776 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 777 found = PETSC_FALSE; 778 #endif 779 } 780 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 781 782 if (A->nooffproczerorows) { 783 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 784 nrecvs = nsends; 785 nmax = N; 786 } else { 787 /* inform other processors of number of messages and max length*/ 788 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 789 } 790 791 /* post receives: */ 792 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 793 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 794 for (i=0; i<nrecvs; i++) { 795 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 796 } 797 798 /* do sends: 799 1) starts[i] gives the starting index in svalues for stuff going to 800 the ith processor 801 */ 802 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 804 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 805 starts[0] = 0; 806 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 807 for (i=0; i<N; i++) { 808 svalues[starts[owner[i]]++] = rows[i]; 809 } 810 811 starts[0] = 0; 812 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 813 count = 0; 814 for (i=0; i<size; i++) { 815 if (nprocs[2*i+1]) { 816 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 817 } 818 } 819 ierr = PetscFree(starts);CHKERRQ(ierr); 820 821 base = owners[rank]; 822 823 /* wait on receives */ 824 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 825 count = nrecvs; slen = 0; 826 while (count) { 827 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 828 /* unpack receives into our local space */ 829 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 830 source[imdex] = recv_status.MPI_SOURCE; 831 lens[imdex] = n; 832 slen += n; 833 count--; 834 } 835 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 836 837 /* move the data into the send scatter */ 838 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 839 count = 0; 840 for (i=0; i<nrecvs; i++) { 841 values = rvalues + i*nmax; 842 for (j=0; j<lens[i]; j++) { 843 lrows[count++] = values[j] - base; 844 } 845 } 846 ierr = PetscFree(rvalues);CHKERRQ(ierr); 847 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 848 ierr = PetscFree(owner);CHKERRQ(ierr); 849 ierr = PetscFree(nprocs);CHKERRQ(ierr); 850 851 /* fix right hand side if needed */ 852 if (x && b) { 853 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 855 for (i=0; i<slen; i++) { 856 bb[lrows[i]] = diag*xx[lrows[i]]; 857 } 858 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 859 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 860 } 861 /* 862 Zero the required rows. If the "diagonal block" of the matrix 863 is square and the user wishes to set the diagonal we use separate 864 code so that MatSetValues() is not called for each diagonal allocating 865 new memory, thus calling lots of mallocs and slowing things down. 866 867 */ 868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 869 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 871 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 872 } else if (diag != 0.0) { 873 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 875 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 877 } 878 for (i = 0; i < slen; i++) { 879 row = lrows[i] + rstart; 880 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 881 } 882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 } else { 885 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 886 } 887 ierr = PetscFree(lrows);CHKERRQ(ierr); 888 889 /* wait on sends */ 890 if (nsends) { 891 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 892 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 893 ierr = PetscFree(send_status);CHKERRQ(ierr); 894 } 895 ierr = PetscFree(send_waits);CHKERRQ(ierr); 896 ierr = PetscFree(svalues);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 903 { 904 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 907 PetscInt i,*owners = A->rmap->range; 908 PetscInt *nprocs,j,idx,nsends; 909 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 910 PetscInt *rvalues,count,base,slen,*source; 911 PetscInt *lens,*lrows,*values,m; 912 MPI_Comm comm = ((PetscObject)A)->comm; 913 MPI_Request *send_waits,*recv_waits; 914 MPI_Status recv_status,*send_status; 915 const PetscScalar *xx; 916 PetscScalar *bb,*mask; 917 Vec xmask,lmask; 918 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 919 const PetscInt *aj, *ii,*ridx; 920 PetscScalar *aa; 921 #if defined(PETSC_DEBUG) 922 PetscBool found = PETSC_FALSE; 923 #endif 924 925 PetscFunctionBegin; 926 /* first count number of contributors to each processor */ 927 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 928 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 929 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 930 j = 0; 931 for (i=0; i<N; i++) { 932 if (lastidx > (idx = rows[i])) j = 0; 933 lastidx = idx; 934 for (; j<size; j++) { 935 if (idx >= owners[j] && idx < owners[j+1]) { 936 nprocs[2*j]++; 937 nprocs[2*j+1] = 1; 938 owner[i] = j; 939 #if defined(PETSC_DEBUG) 940 found = PETSC_TRUE; 941 #endif 942 break; 943 } 944 } 945 #if defined(PETSC_DEBUG) 946 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 947 found = PETSC_FALSE; 948 #endif 949 } 950 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 951 952 /* inform other processors of number of messages and max length*/ 953 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 954 955 /* post receives: */ 956 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 957 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 958 for (i=0; i<nrecvs; i++) { 959 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 960 } 961 962 /* do sends: 963 1) starts[i] gives the starting index in svalues for stuff going to 964 the ith processor 965 */ 966 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 967 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 968 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 969 starts[0] = 0; 970 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 971 for (i=0; i<N; i++) { 972 svalues[starts[owner[i]]++] = rows[i]; 973 } 974 975 starts[0] = 0; 976 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 977 count = 0; 978 for (i=0; i<size; i++) { 979 if (nprocs[2*i+1]) { 980 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 981 } 982 } 983 ierr = PetscFree(starts);CHKERRQ(ierr); 984 985 base = owners[rank]; 986 987 /* wait on receives */ 988 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 989 count = nrecvs; slen = 0; 990 while (count) { 991 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 992 /* unpack receives into our local space */ 993 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 994 source[imdex] = recv_status.MPI_SOURCE; 995 lens[imdex] = n; 996 slen += n; 997 count--; 998 } 999 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1000 1001 /* move the data into the send scatter */ 1002 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1003 count = 0; 1004 for (i=0; i<nrecvs; i++) { 1005 values = rvalues + i*nmax; 1006 for (j=0; j<lens[i]; j++) { 1007 lrows[count++] = values[j] - base; 1008 } 1009 } 1010 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1011 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1012 ierr = PetscFree(owner);CHKERRQ(ierr); 1013 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1014 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1015 1016 /* zero diagonal part of matrix */ 1017 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1018 1019 /* handle off diagonal part of matrix */ 1020 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1021 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1022 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1023 for (i=0; i<slen; i++) { 1024 bb[lrows[i]] = 1; 1025 } 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use){ 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++){ 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 EXTERN_C_BEGIN 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 1191 /* Easy test: symmetric diagonal block */ 1192 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1193 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1194 if (!*f) PetscFunctionReturn(0); 1195 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1197 if (size == 1) PetscFunctionReturn(0); 1198 1199 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1200 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1201 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1202 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1203 for (i=0; i<first; i++) notme[i] = i; 1204 for (i=last; i<M; i++) notme[i-last+first] = i; 1205 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1206 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1207 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1208 Aoff = Aoffs[0]; 1209 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1210 Boff = Boffs[0]; 1211 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1213 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1215 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1216 ierr = PetscFree(notme);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 EXTERN_C_END 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1224 { 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* do nondiagonal part */ 1230 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1231 /* send it on its way */ 1232 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 /* do local part */ 1234 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1235 /* receive remote parts */ 1236 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 /* 1241 This only works correctly for square matrices where the subblock A->A is the 1242 diagonal block 1243 */ 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1247 { 1248 PetscErrorCode ierr; 1249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1250 1251 PetscFunctionBegin; 1252 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1253 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1254 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatScale_MPIAIJ" 1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1261 { 1262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1267 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDestroy_MPIAIJ" 1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 #if defined(PETSC_USE_LOG) 1280 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1281 #endif 1282 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1283 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1284 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1285 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1286 #if defined (PETSC_USE_CTABLE) 1287 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1288 #else 1289 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1290 #endif 1291 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1292 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1293 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1294 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1295 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1316 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1319 int fd; 1320 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1321 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1322 PetscScalar *column_values; 1323 PetscInt message_count,flowcontrolcount; 1324 FILE *file; 1325 1326 PetscFunctionBegin; 1327 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1328 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1329 nz = A->nz + B->nz; 1330 if (!rank) { 1331 header[0] = MAT_FILE_CLASSID; 1332 header[1] = mat->rmap->N; 1333 header[2] = mat->cmap->N; 1334 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1335 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1336 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1337 /* get largest number of rows any processor has */ 1338 rlen = mat->rmap->n; 1339 range = mat->rmap->range; 1340 for (i=1; i<size; i++) { 1341 rlen = PetscMax(rlen,range[i+1] - range[i]); 1342 } 1343 } else { 1344 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 rlen = mat->rmap->n; 1346 } 1347 1348 /* load up the local row counts */ 1349 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1350 for (i=0; i<mat->rmap->n; i++) { 1351 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1352 } 1353 1354 /* store the row lengths to the file */ 1355 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 for (i=1; i<size; i++) { 1359 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1360 rlen = range[i+1] - range[i]; 1361 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1362 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1363 } 1364 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1367 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1368 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1369 } 1370 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1371 1372 /* load up the local column indices */ 1373 nzmax = nz; /* )th processor needs space a largest processor needs */ 1374 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1375 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1376 cnt = 0; 1377 for (i=0; i<mat->rmap->n; i++) { 1378 for (j=B->i[i]; j<B->i[i+1]; j++) { 1379 if ( (col = garray[B->j[j]]) > cstart) break; 1380 column_indices[cnt++] = col; 1381 } 1382 for (k=A->i[i]; k<A->i[i+1]; k++) { 1383 column_indices[cnt++] = A->j[k] + cstart; 1384 } 1385 for (; j<B->i[i+1]; j++) { 1386 column_indices[cnt++] = garray[B->j[j]]; 1387 } 1388 } 1389 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1390 1391 /* store the column indices to the file */ 1392 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1393 if (!rank) { 1394 MPI_Status status; 1395 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1396 for (i=1; i<size; i++) { 1397 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1398 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1399 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1400 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1401 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1402 } 1403 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1406 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1408 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1409 } 1410 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1411 1412 /* load up the local column values */ 1413 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1414 cnt = 0; 1415 for (i=0; i<mat->rmap->n; i++) { 1416 for (j=B->i[i]; j<B->i[i+1]; j++) { 1417 if ( garray[B->j[j]] > cstart) break; 1418 column_values[cnt++] = B->a[j]; 1419 } 1420 for (k=A->i[i]; k<A->i[i+1]; k++) { 1421 column_values[cnt++] = A->a[k]; 1422 } 1423 for (; j<B->i[i+1]; j++) { 1424 column_values[cnt++] = B->a[j]; 1425 } 1426 } 1427 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1428 1429 /* store the column values to the file */ 1430 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1431 if (!rank) { 1432 MPI_Status status; 1433 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1434 for (i=1; i<size; i++) { 1435 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1436 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1437 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1438 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1439 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1440 } 1441 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1442 } else { 1443 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1444 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1445 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1446 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1447 } 1448 ierr = PetscFree(column_values);CHKERRQ(ierr); 1449 1450 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1451 if (file) { 1452 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1453 } 1454 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1461 { 1462 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1463 PetscErrorCode ierr; 1464 PetscMPIInt rank = aij->rank,size = aij->size; 1465 PetscBool isdraw,iascii,isbinary; 1466 PetscViewer sviewer; 1467 PetscViewerFormat format; 1468 1469 PetscFunctionBegin; 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1472 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1473 if (iascii) { 1474 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1475 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1476 MatInfo info; 1477 PetscBool inodes; 1478 1479 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1480 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1481 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1482 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1483 if (!inodes) { 1484 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1485 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1486 } else { 1487 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1488 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1489 } 1490 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1491 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1492 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1493 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1494 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1495 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1497 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1500 PetscInt inodecount,inodelimit,*inodes; 1501 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1502 if (inodes) { 1503 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1504 } else { 1505 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1509 PetscFunctionReturn(0); 1510 } 1511 } else if (isbinary) { 1512 if (size == 1) { 1513 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1514 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1515 } else { 1516 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1517 } 1518 PetscFunctionReturn(0); 1519 } else if (isdraw) { 1520 PetscDraw draw; 1521 PetscBool isnull; 1522 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1523 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1524 } 1525 1526 if (size == 1) { 1527 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1528 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1529 } else { 1530 /* assemble the entire matrix onto first processor. */ 1531 Mat A; 1532 Mat_SeqAIJ *Aloc; 1533 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1534 MatScalar *a; 1535 1536 if (mat->rmap->N > 1024) { 1537 PetscBool flg = PETSC_FALSE; 1538 1539 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1540 if (!flg) { 1541 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1542 } 1543 } 1544 1545 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1546 if (!rank) { 1547 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1550 } 1551 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1552 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1553 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1554 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1555 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1556 1557 /* copy over the A part */ 1558 Aloc = (Mat_SeqAIJ*)aij->A->data; 1559 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1560 row = mat->rmap->rstart; 1561 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1562 for (i=0; i<m; i++) { 1563 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1564 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1565 } 1566 aj = Aloc->j; 1567 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1568 1569 /* copy over the B part */ 1570 Aloc = (Mat_SeqAIJ*)aij->B->data; 1571 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1572 row = mat->rmap->rstart; 1573 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1574 ct = cols; 1575 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1576 for (i=0; i<m; i++) { 1577 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1578 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1579 } 1580 ierr = PetscFree(ct);CHKERRQ(ierr); 1581 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 /* 1584 Everyone has to call to draw the matrix since the graphics waits are 1585 synchronized across all processors that share the PetscDraw object 1586 */ 1587 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1588 if (!rank) { 1589 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1590 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1591 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1592 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1593 } 1594 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1595 ierr = MatDestroy(&A);CHKERRQ(ierr); 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatView_MPIAIJ" 1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1603 { 1604 PetscErrorCode ierr; 1605 PetscBool iascii,isdraw,issocket,isbinary; 1606 1607 PetscFunctionBegin; 1608 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1610 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1611 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1612 if (iascii || isdraw || isbinary || issocket) { 1613 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1614 } else { 1615 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatSOR_MPIAIJ" 1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1623 { 1624 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1625 PetscErrorCode ierr; 1626 Vec bb1 = 0; 1627 PetscBool hasop; 1628 1629 PetscFunctionBegin; 1630 if (flag == SOR_APPLY_UPPER) { 1631 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1636 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1637 } 1638 1639 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 1645 while (its--) { 1646 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 1649 /* update rhs: bb1 = bb - B*x */ 1650 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1651 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1652 1653 /* local sweep */ 1654 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1655 } 1656 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1657 if (flag & SOR_ZERO_INITIAL_GUESS) { 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1659 its--; 1660 } 1661 while (its--) { 1662 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 1665 /* update rhs: bb1 = bb - B*x */ 1666 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1667 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1668 1669 /* local sweep */ 1670 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1671 } 1672 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1673 if (flag & SOR_ZERO_INITIAL_GUESS) { 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1675 its--; 1676 } 1677 while (its--) { 1678 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1680 1681 /* update rhs: bb1 = bb - B*x */ 1682 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1683 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1684 1685 /* local sweep */ 1686 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1687 } 1688 } else if (flag & SOR_EISENSTAT) { 1689 Vec xx1; 1690 1691 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1693 1694 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1695 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 if (!mat->diag) { 1697 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1698 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1699 } 1700 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1701 if (hasop) { 1702 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1703 } else { 1704 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1705 } 1706 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1707 1708 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1709 1710 /* local sweep */ 1711 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1712 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1713 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1714 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1715 1716 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatPermute_MPIAIJ" 1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1723 { 1724 Mat aA,aB,Aperm; 1725 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1726 PetscScalar *aa,*ba; 1727 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1728 PetscSF rowsf,sf; 1729 IS parcolp = PETSC_NULL; 1730 PetscBool done; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1735 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1736 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1737 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1738 1739 /* Invert row permutation to find out where my rows should go */ 1740 ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr); 1741 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1742 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1743 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1744 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1745 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1746 1747 /* Invert column permutation to find out where my columns should go */ 1748 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1749 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1750 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1751 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1752 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1753 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1754 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1755 1756 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1757 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1758 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1759 1760 /* Find out where my gcols should go */ 1761 ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr); 1762 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1763 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1764 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1765 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1766 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1767 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1768 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1769 1770 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1771 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1772 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1773 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1774 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1775 for (i=0; i<m; i++) { 1776 PetscInt row = rdest[i],rowner; 1777 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1778 for (j=ai[i]; j<ai[i+1]; j++) { 1779 PetscInt cowner,col = cdest[aj[j]]; 1780 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1781 if (rowner == cowner) dnnz[i]++; 1782 else onnz[i]++; 1783 } 1784 for (j=bi[i]; j<bi[i+1]; j++) { 1785 PetscInt cowner,col = gcdest[bj[j]]; 1786 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1787 if (rowner == cowner) dnnz[i]++; 1788 else onnz[i]++; 1789 } 1790 } 1791 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1792 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1793 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1794 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1795 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1796 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1797 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1798 1799 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1800 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1801 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1802 for (i=0; i<m; i++) { 1803 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1804 PetscInt rowlen; 1805 rowlen = ai[i+1] - ai[i]; 1806 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1807 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1808 rowlen = bi[i+1] - bi[i]; 1809 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1810 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1811 } 1812 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1813 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1814 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1815 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1816 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1817 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1818 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1819 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1820 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1821 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1822 *B = Aperm; 1823 PetscFunctionReturn(0); 1824 } 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1828 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1829 { 1830 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1831 Mat A = mat->A,B = mat->B; 1832 PetscErrorCode ierr; 1833 PetscReal isend[5],irecv[5]; 1834 1835 PetscFunctionBegin; 1836 info->block_size = 1.0; 1837 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1838 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1839 isend[3] = info->memory; isend[4] = info->mallocs; 1840 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1841 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1842 isend[3] += info->memory; isend[4] += info->mallocs; 1843 if (flag == MAT_LOCAL) { 1844 info->nz_used = isend[0]; 1845 info->nz_allocated = isend[1]; 1846 info->nz_unneeded = isend[2]; 1847 info->memory = isend[3]; 1848 info->mallocs = isend[4]; 1849 } else if (flag == MAT_GLOBAL_MAX) { 1850 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1851 info->nz_used = irecv[0]; 1852 info->nz_allocated = irecv[1]; 1853 info->nz_unneeded = irecv[2]; 1854 info->memory = irecv[3]; 1855 info->mallocs = irecv[4]; 1856 } else if (flag == MAT_GLOBAL_SUM) { 1857 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1858 info->nz_used = irecv[0]; 1859 info->nz_allocated = irecv[1]; 1860 info->nz_unneeded = irecv[2]; 1861 info->memory = irecv[3]; 1862 info->mallocs = irecv[4]; 1863 } 1864 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1865 info->fill_ratio_needed = 0; 1866 info->factor_mallocs = 0; 1867 PetscFunctionReturn(0); 1868 } 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatSetOption_MPIAIJ" 1872 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1873 { 1874 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1875 PetscErrorCode ierr; 1876 1877 PetscFunctionBegin; 1878 switch (op) { 1879 case MAT_NEW_NONZERO_LOCATIONS: 1880 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1881 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1882 case MAT_KEEP_NONZERO_PATTERN: 1883 case MAT_NEW_NONZERO_LOCATION_ERR: 1884 case MAT_USE_INODES: 1885 case MAT_IGNORE_ZERO_ENTRIES: 1886 MatCheckPreallocated(A,1); 1887 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1888 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1889 break; 1890 case MAT_ROW_ORIENTED: 1891 a->roworiented = flg; 1892 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1893 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1894 break; 1895 case MAT_NEW_DIAGONALS: 1896 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1897 break; 1898 case MAT_IGNORE_OFF_PROC_ENTRIES: 1899 a->donotstash = flg; 1900 break; 1901 case MAT_SPD: 1902 A->spd_set = PETSC_TRUE; 1903 A->spd = flg; 1904 if (flg) { 1905 A->symmetric = PETSC_TRUE; 1906 A->structurally_symmetric = PETSC_TRUE; 1907 A->symmetric_set = PETSC_TRUE; 1908 A->structurally_symmetric_set = PETSC_TRUE; 1909 } 1910 break; 1911 case MAT_SYMMETRIC: 1912 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1913 break; 1914 case MAT_STRUCTURALLY_SYMMETRIC: 1915 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1916 break; 1917 case MAT_HERMITIAN: 1918 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1919 break; 1920 case MAT_SYMMETRY_ETERNAL: 1921 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1922 break; 1923 default: 1924 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1925 } 1926 PetscFunctionReturn(0); 1927 } 1928 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "MatGetRow_MPIAIJ" 1931 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1932 { 1933 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1934 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1935 PetscErrorCode ierr; 1936 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1937 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1938 PetscInt *cmap,*idx_p; 1939 1940 PetscFunctionBegin; 1941 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1942 mat->getrowactive = PETSC_TRUE; 1943 1944 if (!mat->rowvalues && (idx || v)) { 1945 /* 1946 allocate enough space to hold information from the longest row. 1947 */ 1948 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1949 PetscInt max = 1,tmp; 1950 for (i=0; i<matin->rmap->n; i++) { 1951 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1952 if (max < tmp) { max = tmp; } 1953 } 1954 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1955 } 1956 1957 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1958 lrow = row - rstart; 1959 1960 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1961 if (!v) {pvA = 0; pvB = 0;} 1962 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1963 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1964 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1965 nztot = nzA + nzB; 1966 1967 cmap = mat->garray; 1968 if (v || idx) { 1969 if (nztot) { 1970 /* Sort by increasing column numbers, assuming A and B already sorted */ 1971 PetscInt imark = -1; 1972 if (v) { 1973 *v = v_p = mat->rowvalues; 1974 for (i=0; i<nzB; i++) { 1975 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1976 else break; 1977 } 1978 imark = i; 1979 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1980 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1981 } 1982 if (idx) { 1983 *idx = idx_p = mat->rowindices; 1984 if (imark > -1) { 1985 for (i=0; i<imark; i++) { 1986 idx_p[i] = cmap[cworkB[i]]; 1987 } 1988 } else { 1989 for (i=0; i<nzB; i++) { 1990 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1991 else break; 1992 } 1993 imark = i; 1994 } 1995 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1996 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1997 } 1998 } else { 1999 if (idx) *idx = 0; 2000 if (v) *v = 0; 2001 } 2002 } 2003 *nz = nztot; 2004 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 2005 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2011 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 2015 PetscFunctionBegin; 2016 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2017 aij->getrowactive = PETSC_FALSE; 2018 PetscFunctionReturn(0); 2019 } 2020 2021 #undef __FUNCT__ 2022 #define __FUNCT__ "MatNorm_MPIAIJ" 2023 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2024 { 2025 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2026 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2027 PetscErrorCode ierr; 2028 PetscInt i,j,cstart = mat->cmap->rstart; 2029 PetscReal sum = 0.0; 2030 MatScalar *v; 2031 2032 PetscFunctionBegin; 2033 if (aij->size == 1) { 2034 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2035 } else { 2036 if (type == NORM_FROBENIUS) { 2037 v = amat->a; 2038 for (i=0; i<amat->nz; i++) { 2039 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2040 } 2041 v = bmat->a; 2042 for (i=0; i<bmat->nz; i++) { 2043 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2044 } 2045 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2046 *norm = PetscSqrtReal(*norm); 2047 } else if (type == NORM_1) { /* max column norm */ 2048 PetscReal *tmp,*tmp2; 2049 PetscInt *jj,*garray = aij->garray; 2050 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2051 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2052 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2053 *norm = 0.0; 2054 v = amat->a; jj = amat->j; 2055 for (j=0; j<amat->nz; j++) { 2056 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 2057 } 2058 v = bmat->a; jj = bmat->j; 2059 for (j=0; j<bmat->nz; j++) { 2060 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2061 } 2062 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2063 for (j=0; j<mat->cmap->N; j++) { 2064 if (tmp2[j] > *norm) *norm = tmp2[j]; 2065 } 2066 ierr = PetscFree(tmp);CHKERRQ(ierr); 2067 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2068 } else if (type == NORM_INFINITY) { /* max row norm */ 2069 PetscReal ntemp = 0.0; 2070 for (j=0; j<aij->A->rmap->n; j++) { 2071 v = amat->a + amat->i[j]; 2072 sum = 0.0; 2073 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2074 sum += PetscAbsScalar(*v); v++; 2075 } 2076 v = bmat->a + bmat->i[j]; 2077 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2078 sum += PetscAbsScalar(*v); v++; 2079 } 2080 if (sum > ntemp) ntemp = sum; 2081 } 2082 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2083 } else { 2084 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2085 } 2086 } 2087 PetscFunctionReturn(0); 2088 } 2089 2090 #undef __FUNCT__ 2091 #define __FUNCT__ "MatTranspose_MPIAIJ" 2092 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2093 { 2094 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2095 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2096 PetscErrorCode ierr; 2097 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2098 PetscInt cstart=A->cmap->rstart,ncol; 2099 Mat B; 2100 MatScalar *array; 2101 2102 PetscFunctionBegin; 2103 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2104 2105 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2106 ai = Aloc->i; aj = Aloc->j; 2107 bi = Bloc->i; bj = Bloc->j; 2108 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2109 PetscInt *d_nnz,*g_nnz,*o_nnz; 2110 PetscSFNode *oloc; 2111 PETSC_UNUSED PetscSF sf; 2112 2113 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2114 /* compute d_nnz for preallocation */ 2115 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2116 for (i=0; i<ai[ma]; i++) { 2117 d_nnz[aj[i]] ++; 2118 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2119 } 2120 /* compute local off-diagonal contributions */ 2121 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2122 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2123 /* map those to global */ 2124 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2125 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2126 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2127 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2128 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2129 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2130 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2131 2132 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2133 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2134 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2135 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2136 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2137 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2138 } else { 2139 B = *matout; 2140 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2141 for (i=0; i<ai[ma]; i++){ 2142 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2143 } 2144 } 2145 2146 /* copy over the A part */ 2147 array = Aloc->a; 2148 row = A->rmap->rstart; 2149 for (i=0; i<ma; i++) { 2150 ncol = ai[i+1]-ai[i]; 2151 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; array += ncol; aj += ncol; 2153 } 2154 aj = Aloc->j; 2155 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2156 2157 /* copy over the B part */ 2158 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2159 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2160 array = Bloc->a; 2161 row = A->rmap->rstart; 2162 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2163 cols_tmp = cols; 2164 for (i=0; i<mb; i++) { 2165 ncol = bi[i+1]-bi[i]; 2166 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2167 row++; array += ncol; cols_tmp += ncol; 2168 } 2169 ierr = PetscFree(cols);CHKERRQ(ierr); 2170 2171 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2172 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2173 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2174 *matout = B; 2175 } else { 2176 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2177 } 2178 PetscFunctionReturn(0); 2179 } 2180 2181 #undef __FUNCT__ 2182 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2183 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2184 { 2185 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2186 Mat a = aij->A,b = aij->B; 2187 PetscErrorCode ierr; 2188 PetscInt s1,s2,s3; 2189 2190 PetscFunctionBegin; 2191 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2192 if (rr) { 2193 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2194 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2195 /* Overlap communication with computation. */ 2196 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2197 } 2198 if (ll) { 2199 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2200 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2201 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2202 } 2203 /* scale the diagonal block */ 2204 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2205 2206 if (rr) { 2207 /* Do a scatter end and then right scale the off-diagonal block */ 2208 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2209 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2210 } 2211 2212 PetscFunctionReturn(0); 2213 } 2214 2215 #undef __FUNCT__ 2216 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2217 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2218 { 2219 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2220 PetscErrorCode ierr; 2221 2222 PetscFunctionBegin; 2223 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2224 PetscFunctionReturn(0); 2225 } 2226 2227 #undef __FUNCT__ 2228 #define __FUNCT__ "MatEqual_MPIAIJ" 2229 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2230 { 2231 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2232 Mat a,b,c,d; 2233 PetscBool flg; 2234 PetscErrorCode ierr; 2235 2236 PetscFunctionBegin; 2237 a = matA->A; b = matA->B; 2238 c = matB->A; d = matB->B; 2239 2240 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2241 if (flg) { 2242 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2243 } 2244 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2245 PetscFunctionReturn(0); 2246 } 2247 2248 #undef __FUNCT__ 2249 #define __FUNCT__ "MatCopy_MPIAIJ" 2250 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2251 { 2252 PetscErrorCode ierr; 2253 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2254 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2255 2256 PetscFunctionBegin; 2257 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2258 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2259 /* because of the column compression in the off-processor part of the matrix a->B, 2260 the number of columns in a->B and b->B may be different, hence we cannot call 2261 the MatCopy() directly on the two parts. If need be, we can provide a more 2262 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2263 then copying the submatrices */ 2264 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2265 } else { 2266 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2267 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2268 } 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatSetUp_MPIAIJ" 2274 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2275 { 2276 PetscErrorCode ierr; 2277 2278 PetscFunctionBegin; 2279 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2280 PetscFunctionReturn(0); 2281 } 2282 2283 #undef __FUNCT__ 2284 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2285 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2286 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2287 { 2288 PetscInt i,m=Y->rmap->N; 2289 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2290 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2291 const PetscInt *xi = x->i,*yi = y->i; 2292 2293 PetscFunctionBegin; 2294 /* Set the number of nonzeros in the new matrix */ 2295 for (i=0; i<m; i++) { 2296 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2297 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2298 nnz[i] = 0; 2299 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2300 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2301 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2302 nnz[i]++; 2303 } 2304 for (; k<nzy; k++) nnz[i]++; 2305 } 2306 PetscFunctionReturn(0); 2307 } 2308 2309 #undef __FUNCT__ 2310 #define __FUNCT__ "MatAXPY_MPIAIJ" 2311 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2312 { 2313 PetscErrorCode ierr; 2314 PetscInt i; 2315 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2316 PetscBLASInt bnz,one=1; 2317 Mat_SeqAIJ *x,*y; 2318 2319 PetscFunctionBegin; 2320 if (str == SAME_NONZERO_PATTERN) { 2321 PetscScalar alpha = a; 2322 x = (Mat_SeqAIJ *)xx->A->data; 2323 y = (Mat_SeqAIJ *)yy->A->data; 2324 bnz = PetscBLASIntCast(x->nz); 2325 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2326 x = (Mat_SeqAIJ *)xx->B->data; 2327 y = (Mat_SeqAIJ *)yy->B->data; 2328 bnz = PetscBLASIntCast(x->nz); 2329 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2330 } else if (str == SUBSET_NONZERO_PATTERN) { 2331 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2332 2333 x = (Mat_SeqAIJ *)xx->B->data; 2334 y = (Mat_SeqAIJ *)yy->B->data; 2335 if (y->xtoy && y->XtoY != xx->B) { 2336 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2337 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2338 } 2339 if (!y->xtoy) { /* get xtoy */ 2340 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2341 y->XtoY = xx->B; 2342 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2343 } 2344 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2345 } else { 2346 Mat B; 2347 PetscInt *nnz_d,*nnz_o; 2348 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2349 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2350 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2351 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2352 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2353 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2354 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2355 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2356 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2357 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2358 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2359 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2360 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2361 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2362 } 2363 PetscFunctionReturn(0); 2364 } 2365 2366 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2367 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatConjugate_MPIAIJ" 2370 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2371 { 2372 #if defined(PETSC_USE_COMPLEX) 2373 PetscErrorCode ierr; 2374 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2375 2376 PetscFunctionBegin; 2377 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2378 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2379 #else 2380 PetscFunctionBegin; 2381 #endif 2382 PetscFunctionReturn(0); 2383 } 2384 2385 #undef __FUNCT__ 2386 #define __FUNCT__ "MatRealPart_MPIAIJ" 2387 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2388 { 2389 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2390 PetscErrorCode ierr; 2391 2392 PetscFunctionBegin; 2393 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2394 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2395 PetscFunctionReturn(0); 2396 } 2397 2398 #undef __FUNCT__ 2399 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2400 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2401 { 2402 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2403 PetscErrorCode ierr; 2404 2405 PetscFunctionBegin; 2406 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2407 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 #ifdef PETSC_HAVE_PBGL 2412 2413 #include <boost/parallel/mpi/bsp_process_group.hpp> 2414 #include <boost/graph/distributed/ilu_default_graph.hpp> 2415 #include <boost/graph/distributed/ilu_0_block.hpp> 2416 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2417 #include <boost/graph/distributed/petsc/interface.hpp> 2418 #include <boost/multi_array.hpp> 2419 #include <boost/parallel/distributed_property_map->hpp> 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2423 /* 2424 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2425 */ 2426 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2427 { 2428 namespace petsc = boost::distributed::petsc; 2429 2430 namespace graph_dist = boost::graph::distributed; 2431 using boost::graph::distributed::ilu_default::process_group_type; 2432 using boost::graph::ilu_permuted; 2433 2434 PetscBool row_identity, col_identity; 2435 PetscContainer c; 2436 PetscInt m, n, M, N; 2437 PetscErrorCode ierr; 2438 2439 PetscFunctionBegin; 2440 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2441 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2442 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2443 if (!row_identity || !col_identity) { 2444 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2445 } 2446 2447 process_group_type pg; 2448 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2449 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2450 lgraph_type& level_graph = *lgraph_p; 2451 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2452 2453 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2454 ilu_permuted(level_graph); 2455 2456 /* put together the new matrix */ 2457 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2458 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2459 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2460 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2461 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2462 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2463 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2464 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2465 2466 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2467 ierr = PetscContainerSetPointer(c, lgraph_p); 2468 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2469 ierr = PetscContainerDestroy(&c); 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2475 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2476 { 2477 PetscFunctionBegin; 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatSolve_MPIAIJ" 2483 /* 2484 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2485 */ 2486 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2487 { 2488 namespace graph_dist = boost::graph::distributed; 2489 2490 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2491 lgraph_type* lgraph_p; 2492 PetscContainer c; 2493 PetscErrorCode ierr; 2494 2495 PetscFunctionBegin; 2496 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2497 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2498 ierr = VecCopy(b, x);CHKERRQ(ierr); 2499 2500 PetscScalar* array_x; 2501 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2502 PetscInt sx; 2503 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2504 2505 PetscScalar* array_b; 2506 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2507 PetscInt sb; 2508 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2509 2510 lgraph_type& level_graph = *lgraph_p; 2511 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2512 2513 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2514 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2515 ref_x(array_x, boost::extents[num_vertices(graph)]); 2516 2517 typedef boost::iterator_property_map<array_ref_type::iterator, 2518 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2519 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2520 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2521 2522 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2523 2524 PetscFunctionReturn(0); 2525 } 2526 #endif 2527 2528 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2529 PetscInt nzlocal,nsends,nrecvs; 2530 PetscMPIInt *send_rank,*recv_rank; 2531 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2532 PetscScalar *sbuf_a,**rbuf_a; 2533 PetscErrorCode (*Destroy)(Mat); 2534 } Mat_Redundant; 2535 2536 #undef __FUNCT__ 2537 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2538 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2539 { 2540 PetscErrorCode ierr; 2541 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2542 PetscInt i; 2543 2544 PetscFunctionBegin; 2545 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2546 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2547 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2548 for (i=0; i<redund->nrecvs; i++){ 2549 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2550 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2551 } 2552 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2553 ierr = PetscFree(redund);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatDestroy_MatRedundant" 2559 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2560 { 2561 PetscErrorCode ierr; 2562 PetscContainer container; 2563 Mat_Redundant *redund=PETSC_NULL; 2564 2565 PetscFunctionBegin; 2566 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2567 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2568 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2569 A->ops->destroy = redund->Destroy; 2570 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2571 if (A->ops->destroy) { 2572 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2573 } 2574 PetscFunctionReturn(0); 2575 } 2576 2577 #undef __FUNCT__ 2578 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2579 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2580 { 2581 PetscMPIInt rank,size; 2582 MPI_Comm comm=((PetscObject)mat)->comm; 2583 PetscErrorCode ierr; 2584 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2585 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2586 PetscInt *rowrange=mat->rmap->range; 2587 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2588 Mat A=aij->A,B=aij->B,C=*matredundant; 2589 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2590 PetscScalar *sbuf_a; 2591 PetscInt nzlocal=a->nz+b->nz; 2592 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2593 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2594 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2595 MatScalar *aworkA,*aworkB; 2596 PetscScalar *vals; 2597 PetscMPIInt tag1,tag2,tag3,imdex; 2598 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2599 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2600 MPI_Status recv_status,*send_status; 2601 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2602 PetscInt **rbuf_j=PETSC_NULL; 2603 PetscScalar **rbuf_a=PETSC_NULL; 2604 Mat_Redundant *redund=PETSC_NULL; 2605 PetscContainer container; 2606 2607 PetscFunctionBegin; 2608 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2609 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2610 2611 if (reuse == MAT_REUSE_MATRIX) { 2612 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2613 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2614 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2615 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2616 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2617 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2618 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2619 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2620 2621 nsends = redund->nsends; 2622 nrecvs = redund->nrecvs; 2623 send_rank = redund->send_rank; 2624 recv_rank = redund->recv_rank; 2625 sbuf_nz = redund->sbuf_nz; 2626 rbuf_nz = redund->rbuf_nz; 2627 sbuf_j = redund->sbuf_j; 2628 sbuf_a = redund->sbuf_a; 2629 rbuf_j = redund->rbuf_j; 2630 rbuf_a = redund->rbuf_a; 2631 } 2632 2633 if (reuse == MAT_INITIAL_MATRIX){ 2634 PetscMPIInt subrank,subsize; 2635 PetscInt nleftover,np_subcomm; 2636 /* get the destination processors' id send_rank, nsends and nrecvs */ 2637 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2638 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2639 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2640 np_subcomm = size/nsubcomm; 2641 nleftover = size - nsubcomm*np_subcomm; 2642 nsends = 0; nrecvs = 0; 2643 for (i=0; i<size; i++){ /* i=rank*/ 2644 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2645 send_rank[nsends] = i; nsends++; 2646 recv_rank[nrecvs++] = i; 2647 } 2648 } 2649 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2650 i = size-nleftover-1; 2651 j = 0; 2652 while (j < nsubcomm - nleftover){ 2653 send_rank[nsends++] = i; 2654 i--; j++; 2655 } 2656 } 2657 2658 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2659 for (i=0; i<nleftover; i++){ 2660 recv_rank[nrecvs++] = size-nleftover+i; 2661 } 2662 } 2663 2664 /* allocate sbuf_j, sbuf_a */ 2665 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2666 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2667 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2668 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2669 2670 /* copy mat's local entries into the buffers */ 2671 if (reuse == MAT_INITIAL_MATRIX){ 2672 rownz_max = 0; 2673 rptr = sbuf_j; 2674 cols = sbuf_j + rend-rstart + 1; 2675 vals = sbuf_a; 2676 rptr[0] = 0; 2677 for (i=0; i<rend-rstart; i++){ 2678 row = i + rstart; 2679 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2680 ncols = nzA + nzB; 2681 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2682 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2683 /* load the column indices for this row into cols */ 2684 lwrite = 0; 2685 for (l=0; l<nzB; l++) { 2686 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2687 vals[lwrite] = aworkB[l]; 2688 cols[lwrite++] = ctmp; 2689 } 2690 } 2691 for (l=0; l<nzA; l++){ 2692 vals[lwrite] = aworkA[l]; 2693 cols[lwrite++] = cstart + cworkA[l]; 2694 } 2695 for (l=0; l<nzB; l++) { 2696 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2697 vals[lwrite] = aworkB[l]; 2698 cols[lwrite++] = ctmp; 2699 } 2700 } 2701 vals += ncols; 2702 cols += ncols; 2703 rptr[i+1] = rptr[i] + ncols; 2704 if (rownz_max < ncols) rownz_max = ncols; 2705 } 2706 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2707 } else { /* only copy matrix values into sbuf_a */ 2708 rptr = sbuf_j; 2709 vals = sbuf_a; 2710 rptr[0] = 0; 2711 for (i=0; i<rend-rstart; i++){ 2712 row = i + rstart; 2713 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2714 ncols = nzA + nzB; 2715 cworkB = b->j + b->i[i]; 2716 aworkA = a->a + a->i[i]; 2717 aworkB = b->a + b->i[i]; 2718 lwrite = 0; 2719 for (l=0; l<nzB; l++) { 2720 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2721 } 2722 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2723 for (l=0; l<nzB; l++) { 2724 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2725 } 2726 vals += ncols; 2727 rptr[i+1] = rptr[i] + ncols; 2728 } 2729 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2730 2731 /* send nzlocal to others, and recv other's nzlocal */ 2732 /*--------------------------------------------------*/ 2733 if (reuse == MAT_INITIAL_MATRIX){ 2734 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2735 s_waits2 = s_waits3 + nsends; 2736 s_waits1 = s_waits2 + nsends; 2737 r_waits1 = s_waits1 + nsends; 2738 r_waits2 = r_waits1 + nrecvs; 2739 r_waits3 = r_waits2 + nrecvs; 2740 } else { 2741 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2742 r_waits3 = s_waits3 + nsends; 2743 } 2744 2745 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2746 if (reuse == MAT_INITIAL_MATRIX){ 2747 /* get new tags to keep the communication clean */ 2748 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2749 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2750 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2751 2752 /* post receives of other's nzlocal */ 2753 for (i=0; i<nrecvs; i++){ 2754 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2755 } 2756 /* send nzlocal to others */ 2757 for (i=0; i<nsends; i++){ 2758 sbuf_nz[i] = nzlocal; 2759 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2760 } 2761 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2762 count = nrecvs; 2763 while (count) { 2764 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2765 recv_rank[imdex] = recv_status.MPI_SOURCE; 2766 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2767 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2768 2769 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2770 rbuf_nz[imdex] += i + 2; 2771 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2772 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2773 count--; 2774 } 2775 /* wait on sends of nzlocal */ 2776 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2777 /* send mat->i,j to others, and recv from other's */ 2778 /*------------------------------------------------*/ 2779 for (i=0; i<nsends; i++){ 2780 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2781 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2782 } 2783 /* wait on receives of mat->i,j */ 2784 /*------------------------------*/ 2785 count = nrecvs; 2786 while (count) { 2787 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2788 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2789 count--; 2790 } 2791 /* wait on sends of mat->i,j */ 2792 /*---------------------------*/ 2793 if (nsends) { 2794 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2795 } 2796 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2797 2798 /* post receives, send and receive mat->a */ 2799 /*----------------------------------------*/ 2800 for (imdex=0; imdex<nrecvs; imdex++) { 2801 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2802 } 2803 for (i=0; i<nsends; i++){ 2804 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2805 } 2806 count = nrecvs; 2807 while (count) { 2808 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2809 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2810 count--; 2811 } 2812 if (nsends) { 2813 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2814 } 2815 2816 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2817 2818 /* create redundant matrix */ 2819 /*-------------------------*/ 2820 if (reuse == MAT_INITIAL_MATRIX){ 2821 /* compute rownz_max for preallocation */ 2822 for (imdex=0; imdex<nrecvs; imdex++){ 2823 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2824 rptr = rbuf_j[imdex]; 2825 for (i=0; i<j; i++){ 2826 ncols = rptr[i+1] - rptr[i]; 2827 if (rownz_max < ncols) rownz_max = ncols; 2828 } 2829 } 2830 2831 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2832 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2833 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2834 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2835 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2836 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2837 } else { 2838 C = *matredundant; 2839 } 2840 2841 /* insert local matrix entries */ 2842 rptr = sbuf_j; 2843 cols = sbuf_j + rend-rstart + 1; 2844 vals = sbuf_a; 2845 for (i=0; i<rend-rstart; i++){ 2846 row = i + rstart; 2847 ncols = rptr[i+1] - rptr[i]; 2848 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2849 vals += ncols; 2850 cols += ncols; 2851 } 2852 /* insert received matrix entries */ 2853 for (imdex=0; imdex<nrecvs; imdex++){ 2854 rstart = rowrange[recv_rank[imdex]]; 2855 rend = rowrange[recv_rank[imdex]+1]; 2856 rptr = rbuf_j[imdex]; 2857 cols = rbuf_j[imdex] + rend-rstart + 1; 2858 vals = rbuf_a[imdex]; 2859 for (i=0; i<rend-rstart; i++){ 2860 row = i + rstart; 2861 ncols = rptr[i+1] - rptr[i]; 2862 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2863 vals += ncols; 2864 cols += ncols; 2865 } 2866 } 2867 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2868 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2869 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2870 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2871 if (reuse == MAT_INITIAL_MATRIX) { 2872 PetscContainer container; 2873 *matredundant = C; 2874 /* create a supporting struct and attach it to C for reuse */ 2875 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2876 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2877 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2878 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2879 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2880 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2881 2882 redund->nzlocal = nzlocal; 2883 redund->nsends = nsends; 2884 redund->nrecvs = nrecvs; 2885 redund->send_rank = send_rank; 2886 redund->recv_rank = recv_rank; 2887 redund->sbuf_nz = sbuf_nz; 2888 redund->rbuf_nz = rbuf_nz; 2889 redund->sbuf_j = sbuf_j; 2890 redund->sbuf_a = sbuf_a; 2891 redund->rbuf_j = rbuf_j; 2892 redund->rbuf_a = rbuf_a; 2893 2894 redund->Destroy = C->ops->destroy; 2895 C->ops->destroy = MatDestroy_MatRedundant; 2896 } 2897 PetscFunctionReturn(0); 2898 } 2899 2900 #undef __FUNCT__ 2901 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2902 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2903 { 2904 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2905 PetscErrorCode ierr; 2906 PetscInt i,*idxb = 0; 2907 PetscScalar *va,*vb; 2908 Vec vtmp; 2909 2910 PetscFunctionBegin; 2911 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2912 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2913 if (idx) { 2914 for (i=0; i<A->rmap->n; i++) { 2915 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2916 } 2917 } 2918 2919 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2920 if (idx) { 2921 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2922 } 2923 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2924 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2925 2926 for (i=0; i<A->rmap->n; i++){ 2927 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2928 va[i] = vb[i]; 2929 if (idx) idx[i] = a->garray[idxb[i]]; 2930 } 2931 } 2932 2933 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2934 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2935 ierr = PetscFree(idxb);CHKERRQ(ierr); 2936 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2937 PetscFunctionReturn(0); 2938 } 2939 2940 #undef __FUNCT__ 2941 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2942 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2943 { 2944 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2945 PetscErrorCode ierr; 2946 PetscInt i,*idxb = 0; 2947 PetscScalar *va,*vb; 2948 Vec vtmp; 2949 2950 PetscFunctionBegin; 2951 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2952 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2953 if (idx) { 2954 for (i=0; i<A->cmap->n; i++) { 2955 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2956 } 2957 } 2958 2959 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2960 if (idx) { 2961 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2962 } 2963 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2964 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2965 2966 for (i=0; i<A->rmap->n; i++){ 2967 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2968 va[i] = vb[i]; 2969 if (idx) idx[i] = a->garray[idxb[i]]; 2970 } 2971 } 2972 2973 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2974 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2975 ierr = PetscFree(idxb);CHKERRQ(ierr); 2976 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2977 PetscFunctionReturn(0); 2978 } 2979 2980 #undef __FUNCT__ 2981 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2982 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2983 { 2984 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2985 PetscInt n = A->rmap->n; 2986 PetscInt cstart = A->cmap->rstart; 2987 PetscInt *cmap = mat->garray; 2988 PetscInt *diagIdx, *offdiagIdx; 2989 Vec diagV, offdiagV; 2990 PetscScalar *a, *diagA, *offdiagA; 2991 PetscInt r; 2992 PetscErrorCode ierr; 2993 2994 PetscFunctionBegin; 2995 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2996 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2997 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2998 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2999 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3000 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3001 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3002 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3003 for (r = 0; r < n; ++r) { 3004 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3005 a[r] = diagA[r]; 3006 idx[r] = cstart + diagIdx[r]; 3007 } else { 3008 a[r] = offdiagA[r]; 3009 idx[r] = cmap[offdiagIdx[r]]; 3010 } 3011 } 3012 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3013 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3014 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3015 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3016 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3017 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3018 PetscFunctionReturn(0); 3019 } 3020 3021 #undef __FUNCT__ 3022 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3023 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3024 { 3025 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 3026 PetscInt n = A->rmap->n; 3027 PetscInt cstart = A->cmap->rstart; 3028 PetscInt *cmap = mat->garray; 3029 PetscInt *diagIdx, *offdiagIdx; 3030 Vec diagV, offdiagV; 3031 PetscScalar *a, *diagA, *offdiagA; 3032 PetscInt r; 3033 PetscErrorCode ierr; 3034 3035 PetscFunctionBegin; 3036 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3037 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3038 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3039 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3040 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3041 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3042 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3043 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3044 for (r = 0; r < n; ++r) { 3045 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3046 a[r] = diagA[r]; 3047 idx[r] = cstart + diagIdx[r]; 3048 } else { 3049 a[r] = offdiagA[r]; 3050 idx[r] = cmap[offdiagIdx[r]]; 3051 } 3052 } 3053 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3054 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3055 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3056 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3057 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3058 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3059 PetscFunctionReturn(0); 3060 } 3061 3062 #undef __FUNCT__ 3063 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3064 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3065 { 3066 PetscErrorCode ierr; 3067 Mat *dummy; 3068 3069 PetscFunctionBegin; 3070 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3071 *newmat = *dummy; 3072 ierr = PetscFree(dummy);CHKERRQ(ierr); 3073 PetscFunctionReturn(0); 3074 } 3075 3076 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3077 3078 #undef __FUNCT__ 3079 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3080 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3081 { 3082 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3083 PetscErrorCode ierr; 3084 3085 PetscFunctionBegin; 3086 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3087 PetscFunctionReturn(0); 3088 } 3089 3090 #undef __FUNCT__ 3091 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3092 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3093 { 3094 PetscErrorCode ierr; 3095 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3096 3097 PetscFunctionBegin; 3098 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3099 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3100 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3101 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3102 PetscFunctionReturn(0); 3103 } 3104 3105 /* -------------------------------------------------------------------*/ 3106 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3107 MatGetRow_MPIAIJ, 3108 MatRestoreRow_MPIAIJ, 3109 MatMult_MPIAIJ, 3110 /* 4*/ MatMultAdd_MPIAIJ, 3111 MatMultTranspose_MPIAIJ, 3112 MatMultTransposeAdd_MPIAIJ, 3113 #ifdef PETSC_HAVE_PBGL 3114 MatSolve_MPIAIJ, 3115 #else 3116 0, 3117 #endif 3118 0, 3119 0, 3120 /*10*/ 0, 3121 0, 3122 0, 3123 MatSOR_MPIAIJ, 3124 MatTranspose_MPIAIJ, 3125 /*15*/ MatGetInfo_MPIAIJ, 3126 MatEqual_MPIAIJ, 3127 MatGetDiagonal_MPIAIJ, 3128 MatDiagonalScale_MPIAIJ, 3129 MatNorm_MPIAIJ, 3130 /*20*/ MatAssemblyBegin_MPIAIJ, 3131 MatAssemblyEnd_MPIAIJ, 3132 MatSetOption_MPIAIJ, 3133 MatZeroEntries_MPIAIJ, 3134 /*24*/ MatZeroRows_MPIAIJ, 3135 0, 3136 #ifdef PETSC_HAVE_PBGL 3137 0, 3138 #else 3139 0, 3140 #endif 3141 0, 3142 0, 3143 /*29*/ MatSetUp_MPIAIJ, 3144 #ifdef PETSC_HAVE_PBGL 3145 0, 3146 #else 3147 0, 3148 #endif 3149 0, 3150 0, 3151 0, 3152 /*34*/ MatDuplicate_MPIAIJ, 3153 0, 3154 0, 3155 0, 3156 0, 3157 /*39*/ MatAXPY_MPIAIJ, 3158 MatGetSubMatrices_MPIAIJ, 3159 MatIncreaseOverlap_MPIAIJ, 3160 MatGetValues_MPIAIJ, 3161 MatCopy_MPIAIJ, 3162 /*44*/ MatGetRowMax_MPIAIJ, 3163 MatScale_MPIAIJ, 3164 0, 3165 0, 3166 MatZeroRowsColumns_MPIAIJ, 3167 /*49*/ MatSetRandom_MPIAIJ, 3168 0, 3169 0, 3170 0, 3171 0, 3172 /*54*/ MatFDColoringCreate_MPIAIJ, 3173 0, 3174 MatSetUnfactored_MPIAIJ, 3175 MatPermute_MPIAIJ, 3176 0, 3177 /*59*/ MatGetSubMatrix_MPIAIJ, 3178 MatDestroy_MPIAIJ, 3179 MatView_MPIAIJ, 3180 0, 3181 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3182 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3183 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3184 0, 3185 0, 3186 0, 3187 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3188 MatGetRowMinAbs_MPIAIJ, 3189 0, 3190 MatSetColoring_MPIAIJ, 3191 0, 3192 MatSetValuesAdifor_MPIAIJ, 3193 /*75*/ MatFDColoringApply_AIJ, 3194 0, 3195 0, 3196 0, 3197 MatFindZeroDiagonals_MPIAIJ, 3198 /*80*/ 0, 3199 0, 3200 0, 3201 /*83*/ MatLoad_MPIAIJ, 3202 0, 3203 0, 3204 0, 3205 0, 3206 0, 3207 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3208 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3209 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3210 MatPtAP_MPIAIJ_MPIAIJ, 3211 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3212 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3213 0, 3214 0, 3215 0, 3216 0, 3217 /*99*/ 0, 3218 0, 3219 0, 3220 MatConjugate_MPIAIJ, 3221 0, 3222 /*104*/MatSetValuesRow_MPIAIJ, 3223 MatRealPart_MPIAIJ, 3224 MatImaginaryPart_MPIAIJ, 3225 0, 3226 0, 3227 /*109*/0, 3228 MatGetRedundantMatrix_MPIAIJ, 3229 MatGetRowMin_MPIAIJ, 3230 0, 3231 0, 3232 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3233 0, 3234 0, 3235 0, 3236 0, 3237 /*119*/0, 3238 0, 3239 0, 3240 0, 3241 MatGetMultiProcBlock_MPIAIJ, 3242 /*124*/MatFindNonzeroRows_MPIAIJ, 3243 MatGetColumnNorms_MPIAIJ, 3244 MatInvertBlockDiagonal_MPIAIJ, 3245 0, 3246 MatGetSubMatricesParallel_MPIAIJ, 3247 /*129*/0, 3248 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3249 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3250 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3251 0, 3252 /*134*/0, 3253 0, 3254 0, 3255 0, 3256 0 3257 }; 3258 3259 /* ----------------------------------------------------------------------------------------*/ 3260 3261 EXTERN_C_BEGIN 3262 #undef __FUNCT__ 3263 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3264 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3265 { 3266 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3267 PetscErrorCode ierr; 3268 3269 PetscFunctionBegin; 3270 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3271 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3272 PetscFunctionReturn(0); 3273 } 3274 EXTERN_C_END 3275 3276 EXTERN_C_BEGIN 3277 #undef __FUNCT__ 3278 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3279 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3280 { 3281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3282 PetscErrorCode ierr; 3283 3284 PetscFunctionBegin; 3285 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3286 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3287 PetscFunctionReturn(0); 3288 } 3289 EXTERN_C_END 3290 3291 EXTERN_C_BEGIN 3292 #undef __FUNCT__ 3293 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3294 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3295 { 3296 Mat_MPIAIJ *b; 3297 PetscErrorCode ierr; 3298 PetscInt i; 3299 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3300 3301 PetscFunctionBegin; 3302 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3303 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3304 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3305 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3306 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3307 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3308 3309 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3310 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3311 if (d_nnz) { 3312 for (i=0; i<B->rmap->n; i++) { 3313 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3314 } 3315 } 3316 if (o_nnz) { 3317 for (i=0; i<B->rmap->n; i++) { 3318 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3319 } 3320 } 3321 b = (Mat_MPIAIJ*)B->data; 3322 3323 if (!B->preallocated) { 3324 /* Explicitly create 2 MATSEQAIJ matrices. */ 3325 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3326 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3327 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3328 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3329 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3330 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3331 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3332 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3333 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3334 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3335 } 3336 3337 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3338 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3339 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3340 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3341 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3342 B->preallocated = PETSC_TRUE; 3343 PetscFunctionReturn(0); 3344 } 3345 EXTERN_C_END 3346 3347 #undef __FUNCT__ 3348 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3349 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3350 { 3351 Mat mat; 3352 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3353 PetscErrorCode ierr; 3354 3355 PetscFunctionBegin; 3356 *newmat = 0; 3357 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3358 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3359 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3360 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3361 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3362 a = (Mat_MPIAIJ*)mat->data; 3363 3364 mat->factortype = matin->factortype; 3365 mat->rmap->bs = matin->rmap->bs; 3366 mat->cmap->bs = matin->cmap->bs; 3367 mat->assembled = PETSC_TRUE; 3368 mat->insertmode = NOT_SET_VALUES; 3369 mat->preallocated = PETSC_TRUE; 3370 3371 a->size = oldmat->size; 3372 a->rank = oldmat->rank; 3373 a->donotstash = oldmat->donotstash; 3374 a->roworiented = oldmat->roworiented; 3375 a->rowindices = 0; 3376 a->rowvalues = 0; 3377 a->getrowactive = PETSC_FALSE; 3378 3379 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3380 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3381 3382 if (oldmat->colmap) { 3383 #if defined (PETSC_USE_CTABLE) 3384 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3385 #else 3386 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3387 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3388 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3389 #endif 3390 } else a->colmap = 0; 3391 if (oldmat->garray) { 3392 PetscInt len; 3393 len = oldmat->B->cmap->n; 3394 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3395 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3396 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3397 } else a->garray = 0; 3398 3399 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3400 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3401 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3402 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3403 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3404 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3405 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3406 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3407 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3408 *newmat = mat; 3409 PetscFunctionReturn(0); 3410 } 3411 3412 3413 3414 #undef __FUNCT__ 3415 #define __FUNCT__ "MatLoad_MPIAIJ" 3416 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3417 { 3418 PetscScalar *vals,*svals; 3419 MPI_Comm comm = ((PetscObject)viewer)->comm; 3420 PetscErrorCode ierr; 3421 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3422 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3423 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3424 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3425 PetscInt cend,cstart,n,*rowners,sizesset=1; 3426 int fd; 3427 PetscInt bs = 1; 3428 3429 PetscFunctionBegin; 3430 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3431 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3432 if (!rank) { 3433 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3434 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3435 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3436 } 3437 3438 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3439 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3440 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3441 3442 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3443 3444 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3445 M = header[1]; N = header[2]; 3446 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3447 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3448 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3449 3450 /* If global sizes are set, check if they are consistent with that given in the file */ 3451 if (sizesset) { 3452 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3453 } 3454 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3455 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3456 3457 /* determine ownership of all (block) rows */ 3458 if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3459 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3460 else m = newMat->rmap->n; /* Set by user */ 3461 3462 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3463 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3464 3465 /* First process needs enough room for process with most rows */ 3466 if (!rank) { 3467 mmax = rowners[1]; 3468 for (i=2; i<=size; i++) { 3469 mmax = PetscMax(mmax, rowners[i]); 3470 } 3471 } else mmax = m; 3472 3473 rowners[0] = 0; 3474 for (i=2; i<=size; i++) { 3475 rowners[i] += rowners[i-1]; 3476 } 3477 rstart = rowners[rank]; 3478 rend = rowners[rank+1]; 3479 3480 /* distribute row lengths to all processors */ 3481 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3482 if (!rank) { 3483 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3484 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3485 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3486 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3487 for (j=0; j<m; j++) { 3488 procsnz[0] += ourlens[j]; 3489 } 3490 for (i=1; i<size; i++) { 3491 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3492 /* calculate the number of nonzeros on each processor */ 3493 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3494 procsnz[i] += rowlengths[j]; 3495 } 3496 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3497 } 3498 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3499 } else { 3500 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3501 } 3502 3503 if (!rank) { 3504 /* determine max buffer needed and allocate it */ 3505 maxnz = 0; 3506 for (i=0; i<size; i++) { 3507 maxnz = PetscMax(maxnz,procsnz[i]); 3508 } 3509 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3510 3511 /* read in my part of the matrix column indices */ 3512 nz = procsnz[0]; 3513 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3514 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3515 3516 /* read in every one elses and ship off */ 3517 for (i=1; i<size; i++) { 3518 nz = procsnz[i]; 3519 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3520 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3521 } 3522 ierr = PetscFree(cols);CHKERRQ(ierr); 3523 } else { 3524 /* determine buffer space needed for message */ 3525 nz = 0; 3526 for (i=0; i<m; i++) { 3527 nz += ourlens[i]; 3528 } 3529 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3530 3531 /* receive message of column indices*/ 3532 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3533 } 3534 3535 /* determine column ownership if matrix is not square */ 3536 if (N != M) { 3537 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3538 else n = newMat->cmap->n; 3539 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3540 cstart = cend - n; 3541 } else { 3542 cstart = rstart; 3543 cend = rend; 3544 n = cend - cstart; 3545 } 3546 3547 /* loop over local rows, determining number of off diagonal entries */ 3548 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3549 jj = 0; 3550 for (i=0; i<m; i++) { 3551 for (j=0; j<ourlens[i]; j++) { 3552 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3553 jj++; 3554 } 3555 } 3556 3557 for (i=0; i<m; i++) { 3558 ourlens[i] -= offlens[i]; 3559 } 3560 if (!sizesset) { 3561 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3562 } 3563 3564 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3565 3566 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3567 3568 for (i=0; i<m; i++) { 3569 ourlens[i] += offlens[i]; 3570 } 3571 3572 if (!rank) { 3573 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3574 3575 /* read in my part of the matrix numerical values */ 3576 nz = procsnz[0]; 3577 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3578 3579 /* insert into matrix */ 3580 jj = rstart; 3581 smycols = mycols; 3582 svals = vals; 3583 for (i=0; i<m; i++) { 3584 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3585 smycols += ourlens[i]; 3586 svals += ourlens[i]; 3587 jj++; 3588 } 3589 3590 /* read in other processors and ship out */ 3591 for (i=1; i<size; i++) { 3592 nz = procsnz[i]; 3593 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3594 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3595 } 3596 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3597 } else { 3598 /* receive numeric values */ 3599 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3600 3601 /* receive message of values*/ 3602 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3603 3604 /* insert into matrix */ 3605 jj = rstart; 3606 smycols = mycols; 3607 svals = vals; 3608 for (i=0; i<m; i++) { 3609 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3610 smycols += ourlens[i]; 3611 svals += ourlens[i]; 3612 jj++; 3613 } 3614 } 3615 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3616 ierr = PetscFree(vals);CHKERRQ(ierr); 3617 ierr = PetscFree(mycols);CHKERRQ(ierr); 3618 ierr = PetscFree(rowners);CHKERRQ(ierr); 3619 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3620 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3621 3622 PetscFunctionReturn(0); 3623 } 3624 3625 #undef __FUNCT__ 3626 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3627 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3628 { 3629 PetscErrorCode ierr; 3630 IS iscol_local; 3631 PetscInt csize; 3632 3633 PetscFunctionBegin; 3634 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3635 if (call == MAT_REUSE_MATRIX) { 3636 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3637 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3638 } else { 3639 PetscInt cbs; 3640 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3641 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3642 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3643 } 3644 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3645 if (call == MAT_INITIAL_MATRIX) { 3646 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3647 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3648 } 3649 PetscFunctionReturn(0); 3650 } 3651 3652 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3653 #undef __FUNCT__ 3654 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3655 /* 3656 Not great since it makes two copies of the submatrix, first an SeqAIJ 3657 in local and then by concatenating the local matrices the end result. 3658 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3659 3660 Note: This requires a sequential iscol with all indices. 3661 */ 3662 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3663 { 3664 PetscErrorCode ierr; 3665 PetscMPIInt rank,size; 3666 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3667 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3668 PetscBool allcolumns, colflag; 3669 Mat M,Mreuse; 3670 MatScalar *vwork,*aa; 3671 MPI_Comm comm = ((PetscObject)mat)->comm; 3672 Mat_SeqAIJ *aij; 3673 3674 3675 PetscFunctionBegin; 3676 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3677 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3678 3679 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3680 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3681 if (colflag && ncol == mat->cmap->N){ 3682 allcolumns = PETSC_TRUE; 3683 } else { 3684 allcolumns = PETSC_FALSE; 3685 } 3686 if (call == MAT_REUSE_MATRIX) { 3687 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3688 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3689 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3690 } else { 3691 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3692 } 3693 3694 /* 3695 m - number of local rows 3696 n - number of columns (same on all processors) 3697 rstart - first row in new global matrix generated 3698 */ 3699 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3700 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3701 if (call == MAT_INITIAL_MATRIX) { 3702 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3703 ii = aij->i; 3704 jj = aij->j; 3705 3706 /* 3707 Determine the number of non-zeros in the diagonal and off-diagonal 3708 portions of the matrix in order to do correct preallocation 3709 */ 3710 3711 /* first get start and end of "diagonal" columns */ 3712 if (csize == PETSC_DECIDE) { 3713 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3714 if (mglobal == n) { /* square matrix */ 3715 nlocal = m; 3716 } else { 3717 nlocal = n/size + ((n % size) > rank); 3718 } 3719 } else { 3720 nlocal = csize; 3721 } 3722 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3723 rstart = rend - nlocal; 3724 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3725 3726 /* next, compute all the lengths */ 3727 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3728 olens = dlens + m; 3729 for (i=0; i<m; i++) { 3730 jend = ii[i+1] - ii[i]; 3731 olen = 0; 3732 dlen = 0; 3733 for (j=0; j<jend; j++) { 3734 if (*jj < rstart || *jj >= rend) olen++; 3735 else dlen++; 3736 jj++; 3737 } 3738 olens[i] = olen; 3739 dlens[i] = dlen; 3740 } 3741 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3742 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3743 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3744 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3745 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3746 ierr = PetscFree(dlens);CHKERRQ(ierr); 3747 } else { 3748 PetscInt ml,nl; 3749 3750 M = *newmat; 3751 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3752 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3753 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3754 /* 3755 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3756 rather than the slower MatSetValues(). 3757 */ 3758 M->was_assembled = PETSC_TRUE; 3759 M->assembled = PETSC_FALSE; 3760 } 3761 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3762 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3763 ii = aij->i; 3764 jj = aij->j; 3765 aa = aij->a; 3766 for (i=0; i<m; i++) { 3767 row = rstart + i; 3768 nz = ii[i+1] - ii[i]; 3769 cwork = jj; jj += nz; 3770 vwork = aa; aa += nz; 3771 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3772 } 3773 3774 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3775 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3776 *newmat = M; 3777 3778 /* save submatrix used in processor for next request */ 3779 if (call == MAT_INITIAL_MATRIX) { 3780 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3781 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3782 } 3783 3784 PetscFunctionReturn(0); 3785 } 3786 3787 EXTERN_C_BEGIN 3788 #undef __FUNCT__ 3789 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3790 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3791 { 3792 PetscInt m,cstart, cend,j,nnz,i,d; 3793 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3794 const PetscInt *JJ; 3795 PetscScalar *values; 3796 PetscErrorCode ierr; 3797 3798 PetscFunctionBegin; 3799 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3800 3801 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3802 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3803 m = B->rmap->n; 3804 cstart = B->cmap->rstart; 3805 cend = B->cmap->rend; 3806 rstart = B->rmap->rstart; 3807 3808 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3809 3810 #if defined(PETSC_USE_DEBUGGING) 3811 for (i=0; i<m; i++) { 3812 nnz = Ii[i+1]- Ii[i]; 3813 JJ = J + Ii[i]; 3814 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3815 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3816 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3817 } 3818 #endif 3819 3820 for (i=0; i<m; i++) { 3821 nnz = Ii[i+1]- Ii[i]; 3822 JJ = J + Ii[i]; 3823 nnz_max = PetscMax(nnz_max,nnz); 3824 d = 0; 3825 for (j=0; j<nnz; j++) { 3826 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3827 } 3828 d_nnz[i] = d; 3829 o_nnz[i] = nnz - d; 3830 } 3831 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3832 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3833 3834 if (v) values = (PetscScalar*)v; 3835 else { 3836 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3837 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3838 } 3839 3840 for (i=0; i<m; i++) { 3841 ii = i + rstart; 3842 nnz = Ii[i+1]- Ii[i]; 3843 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3844 } 3845 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3846 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3847 3848 if (!v) { 3849 ierr = PetscFree(values);CHKERRQ(ierr); 3850 } 3851 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3852 PetscFunctionReturn(0); 3853 } 3854 EXTERN_C_END 3855 3856 #undef __FUNCT__ 3857 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3858 /*@ 3859 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3860 (the default parallel PETSc format). 3861 3862 Collective on MPI_Comm 3863 3864 Input Parameters: 3865 + B - the matrix 3866 . i - the indices into j for the start of each local row (starts with zero) 3867 . j - the column indices for each local row (starts with zero) 3868 - v - optional values in the matrix 3869 3870 Level: developer 3871 3872 Notes: 3873 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3874 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3875 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3876 3877 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3878 3879 The format which is used for the sparse matrix input, is equivalent to a 3880 row-major ordering.. i.e for the following matrix, the input data expected is 3881 as shown: 3882 3883 1 0 0 3884 2 0 3 P0 3885 ------- 3886 4 5 6 P1 3887 3888 Process0 [P0]: rows_owned=[0,1] 3889 i = {0,1,3} [size = nrow+1 = 2+1] 3890 j = {0,0,2} [size = nz = 6] 3891 v = {1,2,3} [size = nz = 6] 3892 3893 Process1 [P1]: rows_owned=[2] 3894 i = {0,3} [size = nrow+1 = 1+1] 3895 j = {0,1,2} [size = nz = 6] 3896 v = {4,5,6} [size = nz = 6] 3897 3898 .keywords: matrix, aij, compressed row, sparse, parallel 3899 3900 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3901 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3902 @*/ 3903 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3904 { 3905 PetscErrorCode ierr; 3906 3907 PetscFunctionBegin; 3908 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3909 PetscFunctionReturn(0); 3910 } 3911 3912 #undef __FUNCT__ 3913 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3914 /*@C 3915 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3916 (the default parallel PETSc format). For good matrix assembly performance 3917 the user should preallocate the matrix storage by setting the parameters 3918 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3919 performance can be increased by more than a factor of 50. 3920 3921 Collective on MPI_Comm 3922 3923 Input Parameters: 3924 + A - the matrix 3925 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3926 (same value is used for all local rows) 3927 . d_nnz - array containing the number of nonzeros in the various rows of the 3928 DIAGONAL portion of the local submatrix (possibly different for each row) 3929 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3930 The size of this array is equal to the number of local rows, i.e 'm'. 3931 For matrices that will be factored, you must leave room for (and set) 3932 the diagonal entry even if it is zero. 3933 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3934 submatrix (same value is used for all local rows). 3935 - o_nnz - array containing the number of nonzeros in the various rows of the 3936 OFF-DIAGONAL portion of the local submatrix (possibly different for 3937 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3938 structure. The size of this array is equal to the number 3939 of local rows, i.e 'm'. 3940 3941 If the *_nnz parameter is given then the *_nz parameter is ignored 3942 3943 The AIJ format (also called the Yale sparse matrix format or 3944 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3945 storage. The stored row and column indices begin with zero. 3946 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3947 3948 The parallel matrix is partitioned such that the first m0 rows belong to 3949 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3950 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3951 3952 The DIAGONAL portion of the local submatrix of a processor can be defined 3953 as the submatrix which is obtained by extraction the part corresponding to 3954 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3955 first row that belongs to the processor, r2 is the last row belonging to 3956 the this processor, and c1-c2 is range of indices of the local part of a 3957 vector suitable for applying the matrix to. This is an mxn matrix. In the 3958 common case of a square matrix, the row and column ranges are the same and 3959 the DIAGONAL part is also square. The remaining portion of the local 3960 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3961 3962 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3963 3964 You can call MatGetInfo() to get information on how effective the preallocation was; 3965 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3966 You can also run with the option -info and look for messages with the string 3967 malloc in them to see if additional memory allocation was needed. 3968 3969 Example usage: 3970 3971 Consider the following 8x8 matrix with 34 non-zero values, that is 3972 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3973 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3974 as follows: 3975 3976 .vb 3977 1 2 0 | 0 3 0 | 0 4 3978 Proc0 0 5 6 | 7 0 0 | 8 0 3979 9 0 10 | 11 0 0 | 12 0 3980 ------------------------------------- 3981 13 0 14 | 15 16 17 | 0 0 3982 Proc1 0 18 0 | 19 20 21 | 0 0 3983 0 0 0 | 22 23 0 | 24 0 3984 ------------------------------------- 3985 Proc2 25 26 27 | 0 0 28 | 29 0 3986 30 0 0 | 31 32 33 | 0 34 3987 .ve 3988 3989 This can be represented as a collection of submatrices as: 3990 3991 .vb 3992 A B C 3993 D E F 3994 G H I 3995 .ve 3996 3997 Where the submatrices A,B,C are owned by proc0, D,E,F are 3998 owned by proc1, G,H,I are owned by proc2. 3999 4000 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4001 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4002 The 'M','N' parameters are 8,8, and have the same values on all procs. 4003 4004 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4005 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4006 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4007 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4008 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4009 matrix, ans [DF] as another SeqAIJ matrix. 4010 4011 When d_nz, o_nz parameters are specified, d_nz storage elements are 4012 allocated for every row of the local diagonal submatrix, and o_nz 4013 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4014 One way to choose d_nz and o_nz is to use the max nonzerors per local 4015 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4016 In this case, the values of d_nz,o_nz are: 4017 .vb 4018 proc0 : dnz = 2, o_nz = 2 4019 proc1 : dnz = 3, o_nz = 2 4020 proc2 : dnz = 1, o_nz = 4 4021 .ve 4022 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4023 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4024 for proc3. i.e we are using 12+15+10=37 storage locations to store 4025 34 values. 4026 4027 When d_nnz, o_nnz parameters are specified, the storage is specified 4028 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4029 In the above case the values for d_nnz,o_nnz are: 4030 .vb 4031 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4032 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4033 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4034 .ve 4035 Here the space allocated is sum of all the above values i.e 34, and 4036 hence pre-allocation is perfect. 4037 4038 Level: intermediate 4039 4040 .keywords: matrix, aij, compressed row, sparse, parallel 4041 4042 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4043 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4044 @*/ 4045 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4046 { 4047 PetscErrorCode ierr; 4048 4049 PetscFunctionBegin; 4050 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4051 PetscValidType(B,1); 4052 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4053 PetscFunctionReturn(0); 4054 } 4055 4056 #undef __FUNCT__ 4057 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4058 /*@ 4059 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4060 CSR format the local rows. 4061 4062 Collective on MPI_Comm 4063 4064 Input Parameters: 4065 + comm - MPI communicator 4066 . m - number of local rows (Cannot be PETSC_DECIDE) 4067 . n - This value should be the same as the local size used in creating the 4068 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4069 calculated if N is given) For square matrices n is almost always m. 4070 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4071 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4072 . i - row indices 4073 . j - column indices 4074 - a - matrix values 4075 4076 Output Parameter: 4077 . mat - the matrix 4078 4079 Level: intermediate 4080 4081 Notes: 4082 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4083 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4084 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4085 4086 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4087 4088 The format which is used for the sparse matrix input, is equivalent to a 4089 row-major ordering.. i.e for the following matrix, the input data expected is 4090 as shown: 4091 4092 1 0 0 4093 2 0 3 P0 4094 ------- 4095 4 5 6 P1 4096 4097 Process0 [P0]: rows_owned=[0,1] 4098 i = {0,1,3} [size = nrow+1 = 2+1] 4099 j = {0,0,2} [size = nz = 6] 4100 v = {1,2,3} [size = nz = 6] 4101 4102 Process1 [P1]: rows_owned=[2] 4103 i = {0,3} [size = nrow+1 = 1+1] 4104 j = {0,1,2} [size = nz = 6] 4105 v = {4,5,6} [size = nz = 6] 4106 4107 .keywords: matrix, aij, compressed row, sparse, parallel 4108 4109 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4110 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4111 @*/ 4112 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4113 { 4114 PetscErrorCode ierr; 4115 4116 PetscFunctionBegin; 4117 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4118 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4119 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4120 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4121 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4122 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4123 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4124 PetscFunctionReturn(0); 4125 } 4126 4127 #undef __FUNCT__ 4128 #define __FUNCT__ "MatCreateAIJ" 4129 /*@C 4130 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4131 (the default parallel PETSc format). For good matrix assembly performance 4132 the user should preallocate the matrix storage by setting the parameters 4133 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4134 performance can be increased by more than a factor of 50. 4135 4136 Collective on MPI_Comm 4137 4138 Input Parameters: 4139 + comm - MPI communicator 4140 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4141 This value should be the same as the local size used in creating the 4142 y vector for the matrix-vector product y = Ax. 4143 . n - This value should be the same as the local size used in creating the 4144 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4145 calculated if N is given) For square matrices n is almost always m. 4146 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4147 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4148 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4149 (same value is used for all local rows) 4150 . d_nnz - array containing the number of nonzeros in the various rows of the 4151 DIAGONAL portion of the local submatrix (possibly different for each row) 4152 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4153 The size of this array is equal to the number of local rows, i.e 'm'. 4154 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4155 submatrix (same value is used for all local rows). 4156 - o_nnz - array containing the number of nonzeros in the various rows of the 4157 OFF-DIAGONAL portion of the local submatrix (possibly different for 4158 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4159 structure. The size of this array is equal to the number 4160 of local rows, i.e 'm'. 4161 4162 Output Parameter: 4163 . A - the matrix 4164 4165 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4166 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4167 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4168 4169 Notes: 4170 If the *_nnz parameter is given then the *_nz parameter is ignored 4171 4172 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4173 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4174 storage requirements for this matrix. 4175 4176 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4177 processor than it must be used on all processors that share the object for 4178 that argument. 4179 4180 The user MUST specify either the local or global matrix dimensions 4181 (possibly both). 4182 4183 The parallel matrix is partitioned across processors such that the 4184 first m0 rows belong to process 0, the next m1 rows belong to 4185 process 1, the next m2 rows belong to process 2 etc.. where 4186 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4187 values corresponding to [m x N] submatrix. 4188 4189 The columns are logically partitioned with the n0 columns belonging 4190 to 0th partition, the next n1 columns belonging to the next 4191 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4192 4193 The DIAGONAL portion of the local submatrix on any given processor 4194 is the submatrix corresponding to the rows and columns m,n 4195 corresponding to the given processor. i.e diagonal matrix on 4196 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4197 etc. The remaining portion of the local submatrix [m x (N-n)] 4198 constitute the OFF-DIAGONAL portion. The example below better 4199 illustrates this concept. 4200 4201 For a square global matrix we define each processor's diagonal portion 4202 to be its local rows and the corresponding columns (a square submatrix); 4203 each processor's off-diagonal portion encompasses the remainder of the 4204 local matrix (a rectangular submatrix). 4205 4206 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4207 4208 When calling this routine with a single process communicator, a matrix of 4209 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4210 type of communicator, use the construction mechanism: 4211 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4212 4213 By default, this format uses inodes (identical nodes) when possible. 4214 We search for consecutive rows with the same nonzero structure, thereby 4215 reusing matrix information to achieve increased efficiency. 4216 4217 Options Database Keys: 4218 + -mat_no_inode - Do not use inodes 4219 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4220 - -mat_aij_oneindex - Internally use indexing starting at 1 4221 rather than 0. Note that when calling MatSetValues(), 4222 the user still MUST index entries starting at 0! 4223 4224 4225 Example usage: 4226 4227 Consider the following 8x8 matrix with 34 non-zero values, that is 4228 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4229 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4230 as follows: 4231 4232 .vb 4233 1 2 0 | 0 3 0 | 0 4 4234 Proc0 0 5 6 | 7 0 0 | 8 0 4235 9 0 10 | 11 0 0 | 12 0 4236 ------------------------------------- 4237 13 0 14 | 15 16 17 | 0 0 4238 Proc1 0 18 0 | 19 20 21 | 0 0 4239 0 0 0 | 22 23 0 | 24 0 4240 ------------------------------------- 4241 Proc2 25 26 27 | 0 0 28 | 29 0 4242 30 0 0 | 31 32 33 | 0 34 4243 .ve 4244 4245 This can be represented as a collection of submatrices as: 4246 4247 .vb 4248 A B C 4249 D E F 4250 G H I 4251 .ve 4252 4253 Where the submatrices A,B,C are owned by proc0, D,E,F are 4254 owned by proc1, G,H,I are owned by proc2. 4255 4256 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4257 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4258 The 'M','N' parameters are 8,8, and have the same values on all procs. 4259 4260 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4261 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4262 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4263 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4264 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4265 matrix, ans [DF] as another SeqAIJ matrix. 4266 4267 When d_nz, o_nz parameters are specified, d_nz storage elements are 4268 allocated for every row of the local diagonal submatrix, and o_nz 4269 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4270 One way to choose d_nz and o_nz is to use the max nonzerors per local 4271 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4272 In this case, the values of d_nz,o_nz are: 4273 .vb 4274 proc0 : dnz = 2, o_nz = 2 4275 proc1 : dnz = 3, o_nz = 2 4276 proc2 : dnz = 1, o_nz = 4 4277 .ve 4278 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4279 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4280 for proc3. i.e we are using 12+15+10=37 storage locations to store 4281 34 values. 4282 4283 When d_nnz, o_nnz parameters are specified, the storage is specified 4284 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4285 In the above case the values for d_nnz,o_nnz are: 4286 .vb 4287 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4288 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4289 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4290 .ve 4291 Here the space allocated is sum of all the above values i.e 34, and 4292 hence pre-allocation is perfect. 4293 4294 Level: intermediate 4295 4296 .keywords: matrix, aij, compressed row, sparse, parallel 4297 4298 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4299 MPIAIJ, MatCreateMPIAIJWithArrays() 4300 @*/ 4301 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4302 { 4303 PetscErrorCode ierr; 4304 PetscMPIInt size; 4305 4306 PetscFunctionBegin; 4307 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4308 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4309 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4310 if (size > 1) { 4311 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4312 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4313 } else { 4314 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4315 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4316 } 4317 PetscFunctionReturn(0); 4318 } 4319 4320 #undef __FUNCT__ 4321 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4322 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4323 { 4324 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4325 4326 PetscFunctionBegin; 4327 *Ad = a->A; 4328 *Ao = a->B; 4329 *colmap = a->garray; 4330 PetscFunctionReturn(0); 4331 } 4332 4333 #undef __FUNCT__ 4334 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4335 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4336 { 4337 PetscErrorCode ierr; 4338 PetscInt i; 4339 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4340 4341 PetscFunctionBegin; 4342 if (coloring->ctype == IS_COLORING_GLOBAL) { 4343 ISColoringValue *allcolors,*colors; 4344 ISColoring ocoloring; 4345 4346 /* set coloring for diagonal portion */ 4347 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4348 4349 /* set coloring for off-diagonal portion */ 4350 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4351 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4352 for (i=0; i<a->B->cmap->n; i++) { 4353 colors[i] = allcolors[a->garray[i]]; 4354 } 4355 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4356 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4357 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4358 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4359 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4360 ISColoringValue *colors; 4361 PetscInt *larray; 4362 ISColoring ocoloring; 4363 4364 /* set coloring for diagonal portion */ 4365 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4366 for (i=0; i<a->A->cmap->n; i++) { 4367 larray[i] = i + A->cmap->rstart; 4368 } 4369 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4370 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4371 for (i=0; i<a->A->cmap->n; i++) { 4372 colors[i] = coloring->colors[larray[i]]; 4373 } 4374 ierr = PetscFree(larray);CHKERRQ(ierr); 4375 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4376 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4377 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4378 4379 /* set coloring for off-diagonal portion */ 4380 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4381 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4382 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4383 for (i=0; i<a->B->cmap->n; i++) { 4384 colors[i] = coloring->colors[larray[i]]; 4385 } 4386 ierr = PetscFree(larray);CHKERRQ(ierr); 4387 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4388 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4389 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4390 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4391 4392 PetscFunctionReturn(0); 4393 } 4394 4395 #undef __FUNCT__ 4396 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4397 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4398 { 4399 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4400 PetscErrorCode ierr; 4401 4402 PetscFunctionBegin; 4403 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4404 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4405 PetscFunctionReturn(0); 4406 } 4407 4408 #undef __FUNCT__ 4409 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4410 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4411 { 4412 PetscErrorCode ierr; 4413 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4414 PetscInt *indx; 4415 4416 PetscFunctionBegin; 4417 /* This routine will ONLY return MPIAIJ type matrix */ 4418 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4419 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4420 if (n == PETSC_DECIDE){ 4421 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4422 } 4423 /* Check sum(n) = N */ 4424 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4425 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4426 4427 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4428 rstart -= m; 4429 4430 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4431 for (i=0;i<m;i++) { 4432 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4433 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4434 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4435 } 4436 4437 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4438 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4439 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4440 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4441 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4442 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4443 PetscFunctionReturn(0); 4444 } 4445 4446 #undef __FUNCT__ 4447 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4448 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4449 { 4450 PetscErrorCode ierr; 4451 PetscInt m,N,i,rstart,nnz,Ii; 4452 PetscInt *indx; 4453 PetscScalar *values; 4454 4455 PetscFunctionBegin; 4456 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4457 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4458 for (i=0;i<m;i++) { 4459 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4460 Ii = i + rstart; 4461 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4462 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4463 } 4464 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4465 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4466 PetscFunctionReturn(0); 4467 } 4468 4469 #undef __FUNCT__ 4470 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4471 /*@ 4472 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4473 matrices from each processor 4474 4475 Collective on MPI_Comm 4476 4477 Input Parameters: 4478 + comm - the communicators the parallel matrix will live on 4479 . inmat - the input sequential matrices 4480 . n - number of local columns (or PETSC_DECIDE) 4481 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4482 4483 Output Parameter: 4484 . outmat - the parallel matrix generated 4485 4486 Level: advanced 4487 4488 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4489 4490 @*/ 4491 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4492 { 4493 PetscErrorCode ierr; 4494 4495 PetscFunctionBegin; 4496 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4497 if (scall == MAT_INITIAL_MATRIX){ 4498 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4499 } 4500 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4501 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4502 PetscFunctionReturn(0); 4503 } 4504 4505 #undef __FUNCT__ 4506 #define __FUNCT__ "MatFileSplit" 4507 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4508 { 4509 PetscErrorCode ierr; 4510 PetscMPIInt rank; 4511 PetscInt m,N,i,rstart,nnz; 4512 size_t len; 4513 const PetscInt *indx; 4514 PetscViewer out; 4515 char *name; 4516 Mat B; 4517 const PetscScalar *values; 4518 4519 PetscFunctionBegin; 4520 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4521 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4522 /* Should this be the type of the diagonal block of A? */ 4523 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4524 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4525 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4526 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4527 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4528 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4529 for (i=0;i<m;i++) { 4530 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4531 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4532 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4533 } 4534 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4535 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4536 4537 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4538 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4539 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4540 sprintf(name,"%s.%d",outfile,rank); 4541 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4542 ierr = PetscFree(name);CHKERRQ(ierr); 4543 ierr = MatView(B,out);CHKERRQ(ierr); 4544 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4545 ierr = MatDestroy(&B);CHKERRQ(ierr); 4546 PetscFunctionReturn(0); 4547 } 4548 4549 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4550 #undef __FUNCT__ 4551 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4552 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4553 { 4554 PetscErrorCode ierr; 4555 Mat_Merge_SeqsToMPI *merge; 4556 PetscContainer container; 4557 4558 PetscFunctionBegin; 4559 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4560 if (container) { 4561 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4562 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4563 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4564 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4565 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4566 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4567 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4568 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4569 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4570 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4571 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4572 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4574 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4575 ierr = PetscFree(merge);CHKERRQ(ierr); 4576 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4577 } 4578 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4579 PetscFunctionReturn(0); 4580 } 4581 4582 #include <../src/mat/utils/freespace.h> 4583 #include <petscbt.h> 4584 4585 #undef __FUNCT__ 4586 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4587 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4588 { 4589 PetscErrorCode ierr; 4590 MPI_Comm comm=((PetscObject)mpimat)->comm; 4591 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4592 PetscMPIInt size,rank,taga,*len_s; 4593 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4594 PetscInt proc,m; 4595 PetscInt **buf_ri,**buf_rj; 4596 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4597 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4598 MPI_Request *s_waits,*r_waits; 4599 MPI_Status *status; 4600 MatScalar *aa=a->a; 4601 MatScalar **abuf_r,*ba_i; 4602 Mat_Merge_SeqsToMPI *merge; 4603 PetscContainer container; 4604 4605 PetscFunctionBegin; 4606 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4607 4608 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4609 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4610 4611 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4612 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4613 4614 bi = merge->bi; 4615 bj = merge->bj; 4616 buf_ri = merge->buf_ri; 4617 buf_rj = merge->buf_rj; 4618 4619 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4620 owners = merge->rowmap->range; 4621 len_s = merge->len_s; 4622 4623 /* send and recv matrix values */ 4624 /*-----------------------------*/ 4625 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4626 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4627 4628 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4629 for (proc=0,k=0; proc<size; proc++){ 4630 if (!len_s[proc]) continue; 4631 i = owners[proc]; 4632 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4633 k++; 4634 } 4635 4636 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4637 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4638 ierr = PetscFree(status);CHKERRQ(ierr); 4639 4640 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4641 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4642 4643 /* insert mat values of mpimat */ 4644 /*----------------------------*/ 4645 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4646 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4647 4648 for (k=0; k<merge->nrecv; k++){ 4649 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4650 nrows = *(buf_ri_k[k]); 4651 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4652 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4653 } 4654 4655 /* set values of ba */ 4656 m = merge->rowmap->n; 4657 for (i=0; i<m; i++) { 4658 arow = owners[rank] + i; 4659 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4660 bnzi = bi[i+1] - bi[i]; 4661 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4662 4663 /* add local non-zero vals of this proc's seqmat into ba */ 4664 anzi = ai[arow+1] - ai[arow]; 4665 aj = a->j + ai[arow]; 4666 aa = a->a + ai[arow]; 4667 nextaj = 0; 4668 for (j=0; nextaj<anzi; j++){ 4669 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4670 ba_i[j] += aa[nextaj++]; 4671 } 4672 } 4673 4674 /* add received vals into ba */ 4675 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4676 /* i-th row */ 4677 if (i == *nextrow[k]) { 4678 anzi = *(nextai[k]+1) - *nextai[k]; 4679 aj = buf_rj[k] + *(nextai[k]); 4680 aa = abuf_r[k] + *(nextai[k]); 4681 nextaj = 0; 4682 for (j=0; nextaj<anzi; j++){ 4683 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4684 ba_i[j] += aa[nextaj++]; 4685 } 4686 } 4687 nextrow[k]++; nextai[k]++; 4688 } 4689 } 4690 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4691 } 4692 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4693 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4694 4695 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4696 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4697 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4698 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4699 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4700 PetscFunctionReturn(0); 4701 } 4702 4703 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4704 4705 #undef __FUNCT__ 4706 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4707 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4708 { 4709 PetscErrorCode ierr; 4710 Mat B_mpi; 4711 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4712 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4713 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4714 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4715 PetscInt len,proc,*dnz,*onz,bs,cbs; 4716 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4717 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4718 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4719 MPI_Status *status; 4720 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4721 PetscBT lnkbt; 4722 Mat_Merge_SeqsToMPI *merge; 4723 PetscContainer container; 4724 4725 PetscFunctionBegin; 4726 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4727 4728 /* make sure it is a PETSc comm */ 4729 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4730 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4731 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4732 4733 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4734 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4735 4736 /* determine row ownership */ 4737 /*---------------------------------------------------------*/ 4738 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4739 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4740 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4741 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4742 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4743 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4744 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4745 4746 m = merge->rowmap->n; 4747 owners = merge->rowmap->range; 4748 4749 /* determine the number of messages to send, their lengths */ 4750 /*---------------------------------------------------------*/ 4751 len_s = merge->len_s; 4752 4753 len = 0; /* length of buf_si[] */ 4754 merge->nsend = 0; 4755 for (proc=0; proc<size; proc++){ 4756 len_si[proc] = 0; 4757 if (proc == rank){ 4758 len_s[proc] = 0; 4759 } else { 4760 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4761 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4762 } 4763 if (len_s[proc]) { 4764 merge->nsend++; 4765 nrows = 0; 4766 for (i=owners[proc]; i<owners[proc+1]; i++){ 4767 if (ai[i+1] > ai[i]) nrows++; 4768 } 4769 len_si[proc] = 2*(nrows+1); 4770 len += len_si[proc]; 4771 } 4772 } 4773 4774 /* determine the number and length of messages to receive for ij-structure */ 4775 /*-------------------------------------------------------------------------*/ 4776 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4777 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4778 4779 /* post the Irecv of j-structure */ 4780 /*-------------------------------*/ 4781 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4782 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4783 4784 /* post the Isend of j-structure */ 4785 /*--------------------------------*/ 4786 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4787 4788 for (proc=0, k=0; proc<size; proc++){ 4789 if (!len_s[proc]) continue; 4790 i = owners[proc]; 4791 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4792 k++; 4793 } 4794 4795 /* receives and sends of j-structure are complete */ 4796 /*------------------------------------------------*/ 4797 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4798 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4799 4800 /* send and recv i-structure */ 4801 /*---------------------------*/ 4802 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4803 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4804 4805 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4806 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4807 for (proc=0,k=0; proc<size; proc++){ 4808 if (!len_s[proc]) continue; 4809 /* form outgoing message for i-structure: 4810 buf_si[0]: nrows to be sent 4811 [1:nrows]: row index (global) 4812 [nrows+1:2*nrows+1]: i-structure index 4813 */ 4814 /*-------------------------------------------*/ 4815 nrows = len_si[proc]/2 - 1; 4816 buf_si_i = buf_si + nrows+1; 4817 buf_si[0] = nrows; 4818 buf_si_i[0] = 0; 4819 nrows = 0; 4820 for (i=owners[proc]; i<owners[proc+1]; i++){ 4821 anzi = ai[i+1] - ai[i]; 4822 if (anzi) { 4823 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4824 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4825 nrows++; 4826 } 4827 } 4828 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4829 k++; 4830 buf_si += len_si[proc]; 4831 } 4832 4833 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4834 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4835 4836 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4837 for (i=0; i<merge->nrecv; i++){ 4838 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4839 } 4840 4841 ierr = PetscFree(len_si);CHKERRQ(ierr); 4842 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4843 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4844 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4845 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4846 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4847 ierr = PetscFree(status);CHKERRQ(ierr); 4848 4849 /* compute a local seq matrix in each processor */ 4850 /*----------------------------------------------*/ 4851 /* allocate bi array and free space for accumulating nonzero column info */ 4852 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4853 bi[0] = 0; 4854 4855 /* create and initialize a linked list */ 4856 nlnk = N+1; 4857 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4858 4859 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4860 len = ai[owners[rank+1]] - ai[owners[rank]]; 4861 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4862 current_space = free_space; 4863 4864 /* determine symbolic info for each local row */ 4865 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4866 4867 for (k=0; k<merge->nrecv; k++){ 4868 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4869 nrows = *buf_ri_k[k]; 4870 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4871 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4872 } 4873 4874 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4875 len = 0; 4876 for (i=0;i<m;i++) { 4877 bnzi = 0; 4878 /* add local non-zero cols of this proc's seqmat into lnk */ 4879 arow = owners[rank] + i; 4880 anzi = ai[arow+1] - ai[arow]; 4881 aj = a->j + ai[arow]; 4882 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4883 bnzi += nlnk; 4884 /* add received col data into lnk */ 4885 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4886 if (i == *nextrow[k]) { /* i-th row */ 4887 anzi = *(nextai[k]+1) - *nextai[k]; 4888 aj = buf_rj[k] + *nextai[k]; 4889 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4890 bnzi += nlnk; 4891 nextrow[k]++; nextai[k]++; 4892 } 4893 } 4894 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4895 4896 /* if free space is not available, make more free space */ 4897 if (current_space->local_remaining<bnzi) { 4898 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4899 nspacedouble++; 4900 } 4901 /* copy data into free space, then initialize lnk */ 4902 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4903 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4904 4905 current_space->array += bnzi; 4906 current_space->local_used += bnzi; 4907 current_space->local_remaining -= bnzi; 4908 4909 bi[i+1] = bi[i] + bnzi; 4910 } 4911 4912 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4913 4914 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4915 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4916 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4917 4918 /* create symbolic parallel matrix B_mpi */ 4919 /*---------------------------------------*/ 4920 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4921 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4922 if (n==PETSC_DECIDE) { 4923 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4924 } else { 4925 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4926 } 4927 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4928 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4929 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4930 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4931 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4932 4933 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4934 B_mpi->assembled = PETSC_FALSE; 4935 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4936 merge->bi = bi; 4937 merge->bj = bj; 4938 merge->buf_ri = buf_ri; 4939 merge->buf_rj = buf_rj; 4940 merge->coi = PETSC_NULL; 4941 merge->coj = PETSC_NULL; 4942 merge->owners_co = PETSC_NULL; 4943 4944 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4945 4946 /* attach the supporting struct to B_mpi for reuse */ 4947 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4948 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4949 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4950 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4951 *mpimat = B_mpi; 4952 4953 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4954 PetscFunctionReturn(0); 4955 } 4956 4957 #undef __FUNCT__ 4958 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4959 /*@C 4960 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4961 matrices from each processor 4962 4963 Collective on MPI_Comm 4964 4965 Input Parameters: 4966 + comm - the communicators the parallel matrix will live on 4967 . seqmat - the input sequential matrices 4968 . m - number of local rows (or PETSC_DECIDE) 4969 . n - number of local columns (or PETSC_DECIDE) 4970 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4971 4972 Output Parameter: 4973 . mpimat - the parallel matrix generated 4974 4975 Level: advanced 4976 4977 Notes: 4978 The dimensions of the sequential matrix in each processor MUST be the same. 4979 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4980 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4981 @*/ 4982 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4983 { 4984 PetscErrorCode ierr; 4985 PetscMPIInt size; 4986 4987 PetscFunctionBegin; 4988 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4989 if (size == 1){ 4990 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4991 if (scall == MAT_INITIAL_MATRIX){ 4992 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4993 } else { 4994 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4995 } 4996 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4997 PetscFunctionReturn(0); 4998 } 4999 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5000 if (scall == MAT_INITIAL_MATRIX){ 5001 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5002 } 5003 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5004 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5005 PetscFunctionReturn(0); 5006 } 5007 5008 #undef __FUNCT__ 5009 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5010 /*@ 5011 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5012 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5013 with MatGetSize() 5014 5015 Not Collective 5016 5017 Input Parameters: 5018 + A - the matrix 5019 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5020 5021 Output Parameter: 5022 . A_loc - the local sequential matrix generated 5023 5024 Level: developer 5025 5026 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5027 5028 @*/ 5029 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5030 { 5031 PetscErrorCode ierr; 5032 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5033 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5034 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5035 MatScalar *aa=a->a,*ba=b->a,*cam; 5036 PetscScalar *ca; 5037 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5038 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5039 PetscBool match; 5040 5041 PetscFunctionBegin; 5042 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5043 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5044 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5045 if (scall == MAT_INITIAL_MATRIX){ 5046 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5047 ci[0] = 0; 5048 for (i=0; i<am; i++){ 5049 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5050 } 5051 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5052 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5053 k = 0; 5054 for (i=0; i<am; i++) { 5055 ncols_o = bi[i+1] - bi[i]; 5056 ncols_d = ai[i+1] - ai[i]; 5057 /* off-diagonal portion of A */ 5058 for (jo=0; jo<ncols_o; jo++) { 5059 col = cmap[*bj]; 5060 if (col >= cstart) break; 5061 cj[k] = col; bj++; 5062 ca[k++] = *ba++; 5063 } 5064 /* diagonal portion of A */ 5065 for (j=0; j<ncols_d; j++) { 5066 cj[k] = cstart + *aj++; 5067 ca[k++] = *aa++; 5068 } 5069 /* off-diagonal portion of A */ 5070 for (j=jo; j<ncols_o; j++) { 5071 cj[k] = cmap[*bj++]; 5072 ca[k++] = *ba++; 5073 } 5074 } 5075 /* put together the new matrix */ 5076 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5077 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5078 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5079 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5080 mat->free_a = PETSC_TRUE; 5081 mat->free_ij = PETSC_TRUE; 5082 mat->nonew = 0; 5083 } else if (scall == MAT_REUSE_MATRIX){ 5084 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5085 ci = mat->i; cj = mat->j; cam = mat->a; 5086 for (i=0; i<am; i++) { 5087 /* off-diagonal portion of A */ 5088 ncols_o = bi[i+1] - bi[i]; 5089 for (jo=0; jo<ncols_o; jo++) { 5090 col = cmap[*bj]; 5091 if (col >= cstart) break; 5092 *cam++ = *ba++; bj++; 5093 } 5094 /* diagonal portion of A */ 5095 ncols_d = ai[i+1] - ai[i]; 5096 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5097 /* off-diagonal portion of A */ 5098 for (j=jo; j<ncols_o; j++) { 5099 *cam++ = *ba++; bj++; 5100 } 5101 } 5102 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5103 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5104 PetscFunctionReturn(0); 5105 } 5106 5107 #undef __FUNCT__ 5108 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5109 /*@C 5110 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5111 5112 Not Collective 5113 5114 Input Parameters: 5115 + A - the matrix 5116 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5117 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5118 5119 Output Parameter: 5120 . A_loc - the local sequential matrix generated 5121 5122 Level: developer 5123 5124 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5125 5126 @*/ 5127 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5128 { 5129 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5130 PetscErrorCode ierr; 5131 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5132 IS isrowa,iscola; 5133 Mat *aloc; 5134 PetscBool match; 5135 5136 PetscFunctionBegin; 5137 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5138 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5139 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5140 if (!row){ 5141 start = A->rmap->rstart; end = A->rmap->rend; 5142 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5143 } else { 5144 isrowa = *row; 5145 } 5146 if (!col){ 5147 start = A->cmap->rstart; 5148 cmap = a->garray; 5149 nzA = a->A->cmap->n; 5150 nzB = a->B->cmap->n; 5151 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5152 ncols = 0; 5153 for (i=0; i<nzB; i++) { 5154 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5155 else break; 5156 } 5157 imark = i; 5158 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5159 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5160 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5161 } else { 5162 iscola = *col; 5163 } 5164 if (scall != MAT_INITIAL_MATRIX){ 5165 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5166 aloc[0] = *A_loc; 5167 } 5168 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5169 *A_loc = aloc[0]; 5170 ierr = PetscFree(aloc);CHKERRQ(ierr); 5171 if (!row){ 5172 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5173 } 5174 if (!col){ 5175 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5176 } 5177 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5178 PetscFunctionReturn(0); 5179 } 5180 5181 #undef __FUNCT__ 5182 #define __FUNCT__ "MatGetBrowsOfAcols" 5183 /*@C 5184 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5185 5186 Collective on Mat 5187 5188 Input Parameters: 5189 + A,B - the matrices in mpiaij format 5190 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5191 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5192 5193 Output Parameter: 5194 + rowb, colb - index sets of rows and columns of B to extract 5195 - B_seq - the sequential matrix generated 5196 5197 Level: developer 5198 5199 @*/ 5200 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5201 { 5202 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5203 PetscErrorCode ierr; 5204 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5205 IS isrowb,iscolb; 5206 Mat *bseq=PETSC_NULL; 5207 5208 PetscFunctionBegin; 5209 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5210 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5211 } 5212 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5213 5214 if (scall == MAT_INITIAL_MATRIX){ 5215 start = A->cmap->rstart; 5216 cmap = a->garray; 5217 nzA = a->A->cmap->n; 5218 nzB = a->B->cmap->n; 5219 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5220 ncols = 0; 5221 for (i=0; i<nzB; i++) { /* row < local row index */ 5222 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5223 else break; 5224 } 5225 imark = i; 5226 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5227 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5228 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5229 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5230 } else { 5231 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5232 isrowb = *rowb; iscolb = *colb; 5233 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5234 bseq[0] = *B_seq; 5235 } 5236 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5237 *B_seq = bseq[0]; 5238 ierr = PetscFree(bseq);CHKERRQ(ierr); 5239 if (!rowb){ 5240 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5241 } else { 5242 *rowb = isrowb; 5243 } 5244 if (!colb){ 5245 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5246 } else { 5247 *colb = iscolb; 5248 } 5249 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5250 PetscFunctionReturn(0); 5251 } 5252 5253 #undef __FUNCT__ 5254 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5255 /* 5256 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5257 of the OFF-DIAGONAL portion of local A 5258 5259 Collective on Mat 5260 5261 Input Parameters: 5262 + A,B - the matrices in mpiaij format 5263 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5264 5265 Output Parameter: 5266 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5267 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5268 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5269 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5270 5271 Level: developer 5272 5273 */ 5274 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5275 { 5276 VecScatter_MPI_General *gen_to,*gen_from; 5277 PetscErrorCode ierr; 5278 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5279 Mat_SeqAIJ *b_oth; 5280 VecScatter ctx=a->Mvctx; 5281 MPI_Comm comm=((PetscObject)ctx)->comm; 5282 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5283 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5284 PetscScalar *rvalues,*svalues; 5285 MatScalar *b_otha,*bufa,*bufA; 5286 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5287 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5288 MPI_Status *sstatus,rstatus; 5289 PetscMPIInt jj; 5290 PetscInt *cols,sbs,rbs; 5291 PetscScalar *vals; 5292 5293 PetscFunctionBegin; 5294 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5295 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5296 } 5297 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5298 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5299 5300 gen_to = (VecScatter_MPI_General*)ctx->todata; 5301 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5302 rvalues = gen_from->values; /* holds the length of receiving row */ 5303 svalues = gen_to->values; /* holds the length of sending row */ 5304 nrecvs = gen_from->n; 5305 nsends = gen_to->n; 5306 5307 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5308 srow = gen_to->indices; /* local row index to be sent */ 5309 sstarts = gen_to->starts; 5310 sprocs = gen_to->procs; 5311 sstatus = gen_to->sstatus; 5312 sbs = gen_to->bs; 5313 rstarts = gen_from->starts; 5314 rprocs = gen_from->procs; 5315 rbs = gen_from->bs; 5316 5317 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5318 if (scall == MAT_INITIAL_MATRIX){ 5319 /* i-array */ 5320 /*---------*/ 5321 /* post receives */ 5322 for (i=0; i<nrecvs; i++){ 5323 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5324 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5325 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5326 } 5327 5328 /* pack the outgoing message */ 5329 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5330 sstartsj[0] = 0; rstartsj[0] = 0; 5331 len = 0; /* total length of j or a array to be sent */ 5332 k = 0; 5333 for (i=0; i<nsends; i++){ 5334 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5335 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5336 for (j=0; j<nrows; j++) { 5337 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5338 for (l=0; l<sbs; l++){ 5339 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5340 rowlen[j*sbs+l] = ncols; 5341 len += ncols; 5342 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5343 } 5344 k++; 5345 } 5346 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5347 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5348 } 5349 /* recvs and sends of i-array are completed */ 5350 i = nrecvs; 5351 while (i--) { 5352 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5353 } 5354 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5355 5356 /* allocate buffers for sending j and a arrays */ 5357 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5358 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5359 5360 /* create i-array of B_oth */ 5361 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5362 b_othi[0] = 0; 5363 len = 0; /* total length of j or a array to be received */ 5364 k = 0; 5365 for (i=0; i<nrecvs; i++){ 5366 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5367 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5368 for (j=0; j<nrows; j++) { 5369 b_othi[k+1] = b_othi[k] + rowlen[j]; 5370 len += rowlen[j]; k++; 5371 } 5372 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5373 } 5374 5375 /* allocate space for j and a arrrays of B_oth */ 5376 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5377 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5378 5379 /* j-array */ 5380 /*---------*/ 5381 /* post receives of j-array */ 5382 for (i=0; i<nrecvs; i++){ 5383 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5384 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5385 } 5386 5387 /* pack the outgoing message j-array */ 5388 k = 0; 5389 for (i=0; i<nsends; i++){ 5390 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5391 bufJ = bufj+sstartsj[i]; 5392 for (j=0; j<nrows; j++) { 5393 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5394 for (ll=0; ll<sbs; ll++){ 5395 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5396 for (l=0; l<ncols; l++){ 5397 *bufJ++ = cols[l]; 5398 } 5399 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5400 } 5401 } 5402 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5403 } 5404 5405 /* recvs and sends of j-array are completed */ 5406 i = nrecvs; 5407 while (i--) { 5408 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5409 } 5410 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5411 } else if (scall == MAT_REUSE_MATRIX){ 5412 sstartsj = *startsj_s; 5413 rstartsj = *startsj_r; 5414 bufa = *bufa_ptr; 5415 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5416 b_otha = b_oth->a; 5417 } else { 5418 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5419 } 5420 5421 /* a-array */ 5422 /*---------*/ 5423 /* post receives of a-array */ 5424 for (i=0; i<nrecvs; i++){ 5425 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5426 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5427 } 5428 5429 /* pack the outgoing message a-array */ 5430 k = 0; 5431 for (i=0; i<nsends; i++){ 5432 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5433 bufA = bufa+sstartsj[i]; 5434 for (j=0; j<nrows; j++) { 5435 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5436 for (ll=0; ll<sbs; ll++){ 5437 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5438 for (l=0; l<ncols; l++){ 5439 *bufA++ = vals[l]; 5440 } 5441 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5442 } 5443 } 5444 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5445 } 5446 /* recvs and sends of a-array are completed */ 5447 i = nrecvs; 5448 while (i--) { 5449 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5450 } 5451 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5452 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5453 5454 if (scall == MAT_INITIAL_MATRIX){ 5455 /* put together the new matrix */ 5456 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5457 5458 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5459 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5460 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5461 b_oth->free_a = PETSC_TRUE; 5462 b_oth->free_ij = PETSC_TRUE; 5463 b_oth->nonew = 0; 5464 5465 ierr = PetscFree(bufj);CHKERRQ(ierr); 5466 if (!startsj_s || !bufa_ptr){ 5467 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5468 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5469 } else { 5470 *startsj_s = sstartsj; 5471 *startsj_r = rstartsj; 5472 *bufa_ptr = bufa; 5473 } 5474 } 5475 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5476 PetscFunctionReturn(0); 5477 } 5478 5479 #undef __FUNCT__ 5480 #define __FUNCT__ "MatGetCommunicationStructs" 5481 /*@C 5482 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5483 5484 Not Collective 5485 5486 Input Parameters: 5487 . A - The matrix in mpiaij format 5488 5489 Output Parameter: 5490 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5491 . colmap - A map from global column index to local index into lvec 5492 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5493 5494 Level: developer 5495 5496 @*/ 5497 #if defined (PETSC_USE_CTABLE) 5498 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5499 #else 5500 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5501 #endif 5502 { 5503 Mat_MPIAIJ *a; 5504 5505 PetscFunctionBegin; 5506 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5507 PetscValidPointer(lvec, 2); 5508 PetscValidPointer(colmap, 3); 5509 PetscValidPointer(multScatter, 4); 5510 a = (Mat_MPIAIJ *) A->data; 5511 if (lvec) *lvec = a->lvec; 5512 if (colmap) *colmap = a->colmap; 5513 if (multScatter) *multScatter = a->Mvctx; 5514 PetscFunctionReturn(0); 5515 } 5516 5517 EXTERN_C_BEGIN 5518 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5519 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5520 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5521 EXTERN_C_END 5522 5523 #undef __FUNCT__ 5524 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5525 /* 5526 Computes (B'*A')' since computing B*A directly is untenable 5527 5528 n p p 5529 ( ) ( ) ( ) 5530 m ( A ) * n ( B ) = m ( C ) 5531 ( ) ( ) ( ) 5532 5533 */ 5534 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5535 { 5536 PetscErrorCode ierr; 5537 Mat At,Bt,Ct; 5538 5539 PetscFunctionBegin; 5540 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5541 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5542 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5543 ierr = MatDestroy(&At);CHKERRQ(ierr); 5544 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5545 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5546 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5547 PetscFunctionReturn(0); 5548 } 5549 5550 #undef __FUNCT__ 5551 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5552 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5553 { 5554 PetscErrorCode ierr; 5555 PetscInt m=A->rmap->n,n=B->cmap->n; 5556 Mat Cmat; 5557 5558 PetscFunctionBegin; 5559 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5560 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5561 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5562 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5563 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5564 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5565 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5566 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5567 5568 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5569 *C = Cmat; 5570 PetscFunctionReturn(0); 5571 } 5572 5573 /* ----------------------------------------------------------------*/ 5574 #undef __FUNCT__ 5575 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5576 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5577 { 5578 PetscErrorCode ierr; 5579 5580 PetscFunctionBegin; 5581 if (scall == MAT_INITIAL_MATRIX){ 5582 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5583 } 5584 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5585 PetscFunctionReturn(0); 5586 } 5587 5588 EXTERN_C_BEGIN 5589 #if defined(PETSC_HAVE_MUMPS) 5590 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5591 #endif 5592 #if defined(PETSC_HAVE_PASTIX) 5593 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5594 #endif 5595 #if defined(PETSC_HAVE_SUPERLU_DIST) 5596 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5597 #endif 5598 #if defined(PETSC_HAVE_CLIQUE) 5599 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5600 #endif 5601 EXTERN_C_END 5602 5603 /*MC 5604 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5605 5606 Options Database Keys: 5607 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5608 5609 Level: beginner 5610 5611 .seealso: MatCreateAIJ() 5612 M*/ 5613 5614 EXTERN_C_BEGIN 5615 #undef __FUNCT__ 5616 #define __FUNCT__ "MatCreate_MPIAIJ" 5617 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5618 { 5619 Mat_MPIAIJ *b; 5620 PetscErrorCode ierr; 5621 PetscMPIInt size; 5622 5623 PetscFunctionBegin; 5624 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5625 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5626 B->data = (void*)b; 5627 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5628 B->assembled = PETSC_FALSE; 5629 B->insertmode = NOT_SET_VALUES; 5630 b->size = size; 5631 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5632 5633 /* build cache for off array entries formed */ 5634 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5635 b->donotstash = PETSC_FALSE; 5636 b->colmap = 0; 5637 b->garray = 0; 5638 b->roworiented = PETSC_TRUE; 5639 5640 /* stuff used for matrix vector multiply */ 5641 b->lvec = PETSC_NULL; 5642 b->Mvctx = PETSC_NULL; 5643 5644 /* stuff for MatGetRow() */ 5645 b->rowindices = 0; 5646 b->rowvalues = 0; 5647 b->getrowactive = PETSC_FALSE; 5648 5649 /* flexible pointer used in CUSP/CUSPARSE classes */ 5650 b->spptr = PETSC_NULL; 5651 5652 #if defined(PETSC_HAVE_MUMPS) 5653 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5654 "MatGetFactor_aij_mumps", 5655 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5656 #endif 5657 #if defined(PETSC_HAVE_PASTIX) 5658 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5659 "MatGetFactor_mpiaij_pastix", 5660 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5661 #endif 5662 #if defined(PETSC_HAVE_SUPERLU_DIST) 5663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5664 "MatGetFactor_mpiaij_superlu_dist", 5665 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5666 #endif 5667 #if defined(PETSC_HAVE_CLIQUE) 5668 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5669 "MatGetFactor_aij_clique", 5670 MatGetFactor_aij_clique);CHKERRQ(ierr); 5671 #endif 5672 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5673 "MatStoreValues_MPIAIJ", 5674 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5675 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5676 "MatRetrieveValues_MPIAIJ", 5677 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5678 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5679 "MatGetDiagonalBlock_MPIAIJ", 5680 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5681 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5682 "MatIsTranspose_MPIAIJ", 5683 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5684 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5685 "MatMPIAIJSetPreallocation_MPIAIJ", 5686 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5687 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5688 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5689 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5690 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5691 "MatDiagonalScaleLocal_MPIAIJ", 5692 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5693 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5694 "MatConvert_MPIAIJ_MPIAIJPERM", 5695 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5696 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5697 "MatConvert_MPIAIJ_MPIAIJCRL", 5698 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5699 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5700 "MatConvert_MPIAIJ_MPISBAIJ", 5701 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5702 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5703 "MatMatMult_MPIDense_MPIAIJ", 5704 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5705 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5706 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5707 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5708 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5709 "MatMatMultNumeric_MPIDense_MPIAIJ", 5710 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5711 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5712 PetscFunctionReturn(0); 5713 } 5714 EXTERN_C_END 5715 5716 #undef __FUNCT__ 5717 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5718 /*@ 5719 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5720 and "off-diagonal" part of the matrix in CSR format. 5721 5722 Collective on MPI_Comm 5723 5724 Input Parameters: 5725 + comm - MPI communicator 5726 . m - number of local rows (Cannot be PETSC_DECIDE) 5727 . n - This value should be the same as the local size used in creating the 5728 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5729 calculated if N is given) For square matrices n is almost always m. 5730 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5731 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5732 . i - row indices for "diagonal" portion of matrix 5733 . j - column indices 5734 . a - matrix values 5735 . oi - row indices for "off-diagonal" portion of matrix 5736 . oj - column indices 5737 - oa - matrix values 5738 5739 Output Parameter: 5740 . mat - the matrix 5741 5742 Level: advanced 5743 5744 Notes: 5745 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5746 must free the arrays once the matrix has been destroyed and not before. 5747 5748 The i and j indices are 0 based 5749 5750 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5751 5752 This sets local rows and cannot be used to set off-processor values. 5753 5754 You cannot later use MatSetValues() to change values in this matrix. 5755 5756 .keywords: matrix, aij, compressed row, sparse, parallel 5757 5758 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5759 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5760 @*/ 5761 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5762 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5763 { 5764 PetscErrorCode ierr; 5765 Mat_MPIAIJ *maij; 5766 5767 PetscFunctionBegin; 5768 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5769 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5770 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5771 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5772 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5773 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5774 maij = (Mat_MPIAIJ*) (*mat)->data; 5775 maij->donotstash = PETSC_TRUE; 5776 (*mat)->preallocated = PETSC_TRUE; 5777 5778 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5779 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5780 5781 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5782 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5783 5784 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5785 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5786 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5787 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5788 5789 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5790 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5791 PetscFunctionReturn(0); 5792 } 5793 5794 /* 5795 Special version for direct calls from Fortran 5796 */ 5797 #include <petsc-private/fortranimpl.h> 5798 5799 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5800 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5801 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5802 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5803 #endif 5804 5805 /* Change these macros so can be used in void function */ 5806 #undef CHKERRQ 5807 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5808 #undef SETERRQ2 5809 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5810 #undef SETERRQ3 5811 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5812 #undef SETERRQ 5813 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5814 5815 EXTERN_C_BEGIN 5816 #undef __FUNCT__ 5817 #define __FUNCT__ "matsetvaluesmpiaij_" 5818 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5819 { 5820 Mat mat = *mmat; 5821 PetscInt m = *mm, n = *mn; 5822 InsertMode addv = *maddv; 5823 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5824 PetscScalar value; 5825 PetscErrorCode ierr; 5826 5827 MatCheckPreallocated(mat,1); 5828 if (mat->insertmode == NOT_SET_VALUES) { 5829 mat->insertmode = addv; 5830 } 5831 #if defined(PETSC_USE_DEBUG) 5832 else if (mat->insertmode != addv) { 5833 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5834 } 5835 #endif 5836 { 5837 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5838 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5839 PetscBool roworiented = aij->roworiented; 5840 5841 /* Some Variables required in the macro */ 5842 Mat A = aij->A; 5843 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5844 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5845 MatScalar *aa = a->a; 5846 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5847 Mat B = aij->B; 5848 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5849 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5850 MatScalar *ba = b->a; 5851 5852 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5853 PetscInt nonew = a->nonew; 5854 MatScalar *ap1,*ap2; 5855 5856 PetscFunctionBegin; 5857 for (i=0; i<m; i++) { 5858 if (im[i] < 0) continue; 5859 #if defined(PETSC_USE_DEBUG) 5860 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5861 #endif 5862 if (im[i] >= rstart && im[i] < rend) { 5863 row = im[i] - rstart; 5864 lastcol1 = -1; 5865 rp1 = aj + ai[row]; 5866 ap1 = aa + ai[row]; 5867 rmax1 = aimax[row]; 5868 nrow1 = ailen[row]; 5869 low1 = 0; 5870 high1 = nrow1; 5871 lastcol2 = -1; 5872 rp2 = bj + bi[row]; 5873 ap2 = ba + bi[row]; 5874 rmax2 = bimax[row]; 5875 nrow2 = bilen[row]; 5876 low2 = 0; 5877 high2 = nrow2; 5878 5879 for (j=0; j<n; j++) { 5880 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5881 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5882 if (in[j] >= cstart && in[j] < cend){ 5883 col = in[j] - cstart; 5884 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5885 } else if (in[j] < 0) continue; 5886 #if defined(PETSC_USE_DEBUG) 5887 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5888 #endif 5889 else { 5890 if (mat->was_assembled) { 5891 if (!aij->colmap) { 5892 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5893 } 5894 #if defined (PETSC_USE_CTABLE) 5895 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5896 col--; 5897 #else 5898 col = aij->colmap[in[j]] - 1; 5899 #endif 5900 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5901 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5902 col = in[j]; 5903 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5904 B = aij->B; 5905 b = (Mat_SeqAIJ*)B->data; 5906 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5907 rp2 = bj + bi[row]; 5908 ap2 = ba + bi[row]; 5909 rmax2 = bimax[row]; 5910 nrow2 = bilen[row]; 5911 low2 = 0; 5912 high2 = nrow2; 5913 bm = aij->B->rmap->n; 5914 ba = b->a; 5915 } 5916 } else col = in[j]; 5917 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5918 } 5919 } 5920 } else { 5921 if (!aij->donotstash) { 5922 if (roworiented) { 5923 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5924 } else { 5925 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5926 } 5927 } 5928 } 5929 }} 5930 PetscFunctionReturnVoid(); 5931 } 5932 EXTERN_C_END 5933 5934