1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++) { 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; \ 382 else high1 = nrow1; \ 383 lastcol1 = col;\ 384 while (high1-low1 > 5) { \ 385 t = (low1+high1)/2; \ 386 if (rp1[t] > col) high1 = t; \ 387 else low1 = t; \ 388 } \ 389 for (_i=low1; _i<high1; _i++) { \ 390 if (rp1[_i] > col) break; \ 391 if (rp1[_i] == col) { \ 392 if (addv == ADD_VALUES) ap1[_i] += value; \ 393 else ap1[_i] = value; \ 394 goto a_noinsert; \ 395 } \ 396 } \ 397 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 398 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 399 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 400 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 401 N = nrow1++ - 1; a->nz++; high1++; \ 402 /* shift up all the later entries in this row */ \ 403 for (ii=N; ii>=_i; ii--) { \ 404 rp1[ii+1] = rp1[ii]; \ 405 ap1[ii+1] = ap1[ii]; \ 406 } \ 407 rp1[_i] = col; \ 408 ap1[_i] = value; \ 409 a_noinsert: ; \ 410 ailen[row] = nrow1; \ 411 } 412 413 414 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 415 { \ 416 if (col <= lastcol2) low2 = 0; \ 417 else high2 = nrow2; \ 418 lastcol2 = col; \ 419 while (high2-low2 > 5) { \ 420 t = (low2+high2)/2; \ 421 if (rp2[t] > col) high2 = t; \ 422 else low2 = t; \ 423 } \ 424 for (_i=low2; _i<high2; _i++) { \ 425 if (rp2[_i] > col) break; \ 426 if (rp2[_i] == col) { \ 427 if (addv == ADD_VALUES) ap2[_i] += value; \ 428 else ap2[_i] = value; \ 429 goto b_noinsert; \ 430 } \ 431 } \ 432 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 433 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 434 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 435 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 436 N = nrow2++ - 1; b->nz++; high2++; \ 437 /* shift up all the later entries in this row */ \ 438 for (ii=N; ii>=_i; ii--) { \ 439 rp2[ii+1] = rp2[ii]; \ 440 ap2[ii+1] = ap2[ii]; \ 441 } \ 442 rp2[_i] = col; \ 443 ap2[_i] = value; \ 444 b_noinsert: ; \ 445 bilen[row] = nrow2; \ 446 } 447 448 #undef __FUNCT__ 449 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 450 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 451 { 452 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 453 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 454 PetscErrorCode ierr; 455 PetscInt l,*garray = mat->garray,diag; 456 457 PetscFunctionBegin; 458 /* code only works for square matrices A */ 459 460 /* find size of row to the left of the diagonal part */ 461 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 462 row = row - diag; 463 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 464 if (garray[b->j[b->i[row]+l]] > diag) break; 465 } 466 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 467 468 /* diagonal part */ 469 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* right of diagonal part */ 472 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatSetValues_MPIAIJ" 478 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 479 { 480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 481 PetscScalar value; 482 PetscErrorCode ierr; 483 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 484 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 485 PetscBool roworiented = aij->roworiented; 486 487 /* Some Variables required in the macro */ 488 Mat A = aij->A; 489 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 490 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 491 MatScalar *aa = a->a; 492 PetscBool ignorezeroentries = a->ignorezeroentries; 493 Mat B = aij->B; 494 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 495 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 496 MatScalar *ba = b->a; 497 498 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 499 PetscInt nonew; 500 MatScalar *ap1,*ap2; 501 502 PetscFunctionBegin; 503 if (v) PetscValidScalarPointer(v,6); 504 for (i=0; i<m; i++) { 505 if (im[i] < 0) continue; 506 #if defined(PETSC_USE_DEBUG) 507 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 508 #endif 509 if (im[i] >= rstart && im[i] < rend) { 510 row = im[i] - rstart; 511 lastcol1 = -1; 512 rp1 = aj + ai[row]; 513 ap1 = aa + ai[row]; 514 rmax1 = aimax[row]; 515 nrow1 = ailen[row]; 516 low1 = 0; 517 high1 = nrow1; 518 lastcol2 = -1; 519 rp2 = bj + bi[row]; 520 ap2 = ba + bi[row]; 521 rmax2 = bimax[row]; 522 nrow2 = bilen[row]; 523 low2 = 0; 524 high2 = nrow2; 525 526 for (j=0; j<n; j++) { 527 if (v) { 528 if (roworiented) value = v[i*n+j]; 529 else value = v[i+j*m]; 530 } else value = 0.0; 531 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 532 if (in[j] >= cstart && in[j] < cend) { 533 col = in[j] - cstart; 534 nonew = a->nonew; 535 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 536 } else if (in[j] < 0) continue; 537 #if defined(PETSC_USE_DEBUG) 538 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 539 #endif 540 else { 541 if (mat->was_assembled) { 542 if (!aij->colmap) { 543 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 544 } 545 #if defined (PETSC_USE_CTABLE) 546 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 547 col--; 548 #else 549 col = aij->colmap[in[j]] - 1; 550 #endif 551 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 552 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 553 col = in[j]; 554 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 555 B = aij->B; 556 b = (Mat_SeqAIJ*)B->data; 557 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 558 rp2 = bj + bi[row]; 559 ap2 = ba + bi[row]; 560 rmax2 = bimax[row]; 561 nrow2 = bilen[row]; 562 low2 = 0; 563 high2 = nrow2; 564 bm = aij->B->rmap->n; 565 ba = b->a; 566 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 567 } else col = in[j]; 568 nonew = b->nonew; 569 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 570 } 571 } 572 } else { 573 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 574 if (!aij->donotstash) { 575 mat->assembled = PETSC_FALSE; 576 if (roworiented) { 577 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 578 } else { 579 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 580 } 581 } 582 } 583 } 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatGetValues_MPIAIJ" 589 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 590 { 591 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 592 PetscErrorCode ierr; 593 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 594 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 595 596 PetscFunctionBegin; 597 for (i=0; i<m; i++) { 598 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 599 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 600 if (idxm[i] >= rstart && idxm[i] < rend) { 601 row = idxm[i] - rstart; 602 for (j=0; j<n; j++) { 603 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 604 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 605 if (idxn[j] >= cstart && idxn[j] < cend) { 606 col = idxn[j] - cstart; 607 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 608 } else { 609 if (!aij->colmap) { 610 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 611 } 612 #if defined (PETSC_USE_CTABLE) 613 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 614 col --; 615 #else 616 col = aij->colmap[idxn[j]] - 1; 617 #endif 618 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 619 else { 620 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 621 } 622 } 623 } 624 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 625 } 626 PetscFunctionReturn(0); 627 } 628 629 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 633 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 PetscErrorCode ierr; 637 PetscInt nstash,reallocs; 638 InsertMode addv; 639 640 PetscFunctionBegin; 641 if (aij->donotstash || mat->nooffprocentries) { 642 PetscFunctionReturn(0); 643 } 644 645 /* make sure all processors are either in INSERTMODE or ADDMODE */ 646 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 647 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 648 mat->insertmode = addv; /* in case this processor had no cache */ 649 650 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 651 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 652 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 658 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 659 { 660 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 661 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 662 PetscErrorCode ierr; 663 PetscMPIInt n; 664 PetscInt i,j,rstart,ncols,flg; 665 PetscInt *row,*col; 666 PetscBool other_disassembled; 667 PetscScalar *val; 668 InsertMode addv = mat->insertmode; 669 670 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 671 672 PetscFunctionBegin; 673 if (!aij->donotstash && !mat->nooffprocentries) { 674 while (1) { 675 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 676 if (!flg) break; 677 678 for (i=0; i<n;) { 679 /* Now identify the consecutive vals belonging to the same row */ 680 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 681 if (j < n) ncols = j-i; 682 else ncols = n-i; 683 /* Now assemble all these values with a single function call */ 684 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 685 i = j; 686 } 687 } 688 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 689 } 690 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 692 693 /* determine if any processor has disassembled, if so we must 694 also disassemble ourselfs, in order that we may reassemble. */ 695 /* 696 if nonzero structure of submatrix B cannot change then we know that 697 no processor disassembled thus we can skip this stuff 698 */ 699 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 700 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 701 if (mat->was_assembled && !other_disassembled) { 702 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 } 705 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 706 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 709 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 710 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 711 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 712 713 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 714 aij->rowvalues = 0; 715 716 /* used by MatAXPY() */ 717 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 718 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 719 720 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 721 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 727 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 728 { 729 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 734 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatZeroRows_MPIAIJ" 740 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 741 { 742 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 743 PetscErrorCode ierr; 744 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 745 PetscInt i,*owners = A->rmap->range; 746 PetscInt *nprocs,j,idx,nsends,row; 747 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 748 PetscInt *rvalues,count,base,slen,*source; 749 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 750 MPI_Comm comm = ((PetscObject)A)->comm; 751 MPI_Request *send_waits,*recv_waits; 752 MPI_Status recv_status,*send_status; 753 const PetscScalar *xx; 754 PetscScalar *bb; 755 #if defined(PETSC_DEBUG) 756 PetscBool found = PETSC_FALSE; 757 #endif 758 759 PetscFunctionBegin; 760 /* first count number of contributors to each processor */ 761 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 762 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 763 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 764 j = 0; 765 for (i=0; i<N; i++) { 766 if (lastidx > (idx = rows[i])) j = 0; 767 lastidx = idx; 768 for (; j<size; j++) { 769 if (idx >= owners[j] && idx < owners[j+1]) { 770 nprocs[2*j]++; 771 nprocs[2*j+1] = 1; 772 owner[i] = j; 773 #if defined(PETSC_DEBUG) 774 found = PETSC_TRUE; 775 #endif 776 break; 777 } 778 } 779 #if defined(PETSC_DEBUG) 780 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 781 found = PETSC_FALSE; 782 #endif 783 } 784 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 785 786 if (A->nooffproczerorows) { 787 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 788 nrecvs = nsends; 789 nmax = N; 790 } else { 791 /* inform other processors of number of messages and max length*/ 792 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 793 } 794 795 /* post receives: */ 796 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 797 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 798 for (i=0; i<nrecvs; i++) { 799 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 800 } 801 802 /* do sends: 803 1) starts[i] gives the starting index in svalues for stuff going to 804 the ith processor 805 */ 806 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 807 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 808 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 809 starts[0] = 0; 810 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 811 for (i=0; i<N; i++) { 812 svalues[starts[owner[i]]++] = rows[i]; 813 } 814 815 starts[0] = 0; 816 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 817 count = 0; 818 for (i=0; i<size; i++) { 819 if (nprocs[2*i+1]) { 820 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 821 } 822 } 823 ierr = PetscFree(starts);CHKERRQ(ierr); 824 825 base = owners[rank]; 826 827 /* wait on receives */ 828 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 829 count = nrecvs; slen = 0; 830 while (count) { 831 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 832 /* unpack receives into our local space */ 833 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 834 source[imdex] = recv_status.MPI_SOURCE; 835 lens[imdex] = n; 836 slen += n; 837 count--; 838 } 839 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 840 841 /* move the data into the send scatter */ 842 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 843 count = 0; 844 for (i=0; i<nrecvs; i++) { 845 values = rvalues + i*nmax; 846 for (j=0; j<lens[i]; j++) { 847 lrows[count++] = values[j] - base; 848 } 849 } 850 ierr = PetscFree(rvalues);CHKERRQ(ierr); 851 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 852 ierr = PetscFree(owner);CHKERRQ(ierr); 853 ierr = PetscFree(nprocs);CHKERRQ(ierr); 854 855 /* fix right hand side if needed */ 856 if (x && b) { 857 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 858 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 859 for (i=0; i<slen; i++) { 860 bb[lrows[i]] = diag*xx[lrows[i]]; 861 } 862 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 864 } 865 /* 866 Zero the required rows. If the "diagonal block" of the matrix 867 is square and the user wishes to set the diagonal we use separate 868 code so that MatSetValues() is not called for each diagonal allocating 869 new memory, thus calling lots of mallocs and slowing things down. 870 871 */ 872 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 873 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 875 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 876 } else if (diag != 0.0) { 877 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 878 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 879 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 880 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 881 } 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm = ((PetscObject)A)->comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 /* first count number of contributors to each processor */ 931 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 932 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 933 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 934 j = 0; 935 for (i=0; i<N; i++) { 936 if (lastidx > (idx = rows[i])) j = 0; 937 lastidx = idx; 938 for (; j<size; j++) { 939 if (idx >= owners[j] && idx < owners[j+1]) { 940 nprocs[2*j]++; 941 nprocs[2*j+1] = 1; 942 owner[i] = j; 943 #if defined(PETSC_DEBUG) 944 found = PETSC_TRUE; 945 #endif 946 break; 947 } 948 } 949 #if defined(PETSC_DEBUG) 950 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 951 found = PETSC_FALSE; 952 #endif 953 } 954 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 955 956 /* inform other processors of number of messages and max length*/ 957 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 958 959 /* post receives: */ 960 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 961 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 962 for (i=0; i<nrecvs; i++) { 963 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 964 } 965 966 /* do sends: 967 1) starts[i] gives the starting index in svalues for stuff going to 968 the ith processor 969 */ 970 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 971 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 972 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 973 starts[0] = 0; 974 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 975 for (i=0; i<N; i++) { 976 svalues[starts[owner[i]]++] = rows[i]; 977 } 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 source[imdex] = recv_status.MPI_SOURCE; 999 lens[imdex] = n; 1000 slen += n; 1001 count--; 1002 } 1003 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1004 1005 /* move the data into the send scatter */ 1006 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1007 count = 0; 1008 for (i=0; i<nrecvs; i++) { 1009 values = rvalues + i*nmax; 1010 for (j=0; j<lens[i]; j++) { 1011 lrows[count++] = values[j] - base; 1012 } 1013 } 1014 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1015 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1016 ierr = PetscFree(owner);CHKERRQ(ierr); 1017 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1018 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1019 1020 /* zero diagonal part of matrix */ 1021 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1022 1023 /* handle off diagonal part of matrix */ 1024 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1025 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1026 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1027 for (i=0; i<slen; i++) { 1028 bb[lrows[i]] = 1; 1029 } 1030 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1031 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1034 if (x) { 1035 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1038 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1039 } 1040 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1041 1042 /* remove zeroed rows of off diagonal matrix */ 1043 ii = aij->i; 1044 for (i=0; i<slen; i++) { 1045 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1046 } 1047 1048 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1049 if (aij->compressedrow.use) { 1050 m = aij->compressedrow.nrows; 1051 ii = aij->compressedrow.i; 1052 ridx = aij->compressedrow.rindex; 1053 for (i=0; i<m; i++) { 1054 n = ii[i+1] - ii[i]; 1055 aj = aij->j + ii[i]; 1056 aa = aij->a + ii[i]; 1057 1058 for (j=0; j<n; j++) { 1059 if (PetscAbsScalar(mask[*aj])) { 1060 if (b) bb[*ridx] -= *aa*xx[*aj]; 1061 *aa = 0.0; 1062 } 1063 aa++; 1064 aj++; 1065 } 1066 ridx++; 1067 } 1068 } else { /* do not use compressed row format */ 1069 m = l->B->rmap->n; 1070 for (i=0; i<m; i++) { 1071 n = ii[i+1] - ii[i]; 1072 aj = aij->j + ii[i]; 1073 aa = aij->a + ii[i]; 1074 for (j=0; j<n; j++) { 1075 if (PetscAbsScalar(mask[*aj])) { 1076 if (b) bb[i] -= *aa*xx[*aj]; 1077 *aa = 0.0; 1078 } 1079 aa++; 1080 aj++; 1081 } 1082 } 1083 } 1084 if (x) { 1085 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1086 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1087 } 1088 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1089 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1090 ierr = PetscFree(lrows);CHKERRQ(ierr); 1091 1092 /* wait on sends */ 1093 if (nsends) { 1094 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1095 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1096 ierr = PetscFree(send_status);CHKERRQ(ierr); 1097 } 1098 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1099 ierr = PetscFree(svalues);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMult_MPIAIJ" 1105 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 PetscInt nt; 1110 1111 PetscFunctionBegin; 1112 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1113 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1114 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1116 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1123 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1124 { 1125 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1135 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1136 { 1137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1138 PetscErrorCode ierr; 1139 1140 PetscFunctionBegin; 1141 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1143 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1150 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1151 { 1152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1153 PetscErrorCode ierr; 1154 PetscBool merged; 1155 1156 PetscFunctionBegin; 1157 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1158 /* do nondiagonal part */ 1159 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1160 if (!merged) { 1161 /* send it on its way */ 1162 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1163 /* do local part */ 1164 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1165 /* receive remote parts: note this assumes the values are not actually */ 1166 /* added in yy until the next line, */ 1167 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1168 } else { 1169 /* do local part */ 1170 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1171 /* send it on its way */ 1172 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1173 /* values actually were received in the Begin() but we need to call this nop */ 1174 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1175 } 1176 PetscFunctionReturn(0); 1177 } 1178 1179 EXTERN_C_BEGIN 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1182 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1183 { 1184 MPI_Comm comm; 1185 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1186 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1187 IS Me,Notme; 1188 PetscErrorCode ierr; 1189 PetscInt M,N,first,last,*notme,i; 1190 PetscMPIInt size; 1191 1192 PetscFunctionBegin; 1193 /* Easy test: symmetric diagonal block */ 1194 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1195 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1196 if (!*f) PetscFunctionReturn(0); 1197 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1199 if (size == 1) PetscFunctionReturn(0); 1200 1201 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1202 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1203 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1204 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1205 for (i=0; i<first; i++) notme[i] = i; 1206 for (i=last; i<M; i++) notme[i-last+first] = i; 1207 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1208 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1209 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1210 Aoff = Aoffs[0]; 1211 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1212 Boff = Boffs[0]; 1213 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1214 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1215 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1216 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1217 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1218 ierr = PetscFree(notme);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 EXTERN_C_END 1222 1223 #undef __FUNCT__ 1224 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1225 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1226 { 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 PetscErrorCode ierr; 1229 1230 PetscFunctionBegin; 1231 /* do nondiagonal part */ 1232 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1233 /* send it on its way */ 1234 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1235 /* do local part */ 1236 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1237 /* receive remote parts */ 1238 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 /* 1243 This only works correctly for square matrices where the subblock A->A is the 1244 diagonal block 1245 */ 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1248 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1249 { 1250 PetscErrorCode ierr; 1251 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1252 1253 PetscFunctionBegin; 1254 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1255 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1256 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatScale_MPIAIJ" 1262 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1263 { 1264 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1265 PetscErrorCode ierr; 1266 1267 PetscFunctionBegin; 1268 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1269 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatDestroy_MPIAIJ" 1275 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1276 { 1277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1278 PetscErrorCode ierr; 1279 1280 PetscFunctionBegin; 1281 #if defined(PETSC_USE_LOG) 1282 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1283 #endif 1284 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1285 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1286 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1287 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1288 #if defined (PETSC_USE_CTABLE) 1289 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1290 #else 1291 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1292 #endif 1293 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1294 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1295 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1296 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1297 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1298 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1299 1300 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1314 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1315 { 1316 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1317 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1318 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1319 PetscErrorCode ierr; 1320 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1321 int fd; 1322 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1323 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1324 PetscScalar *column_values; 1325 PetscInt message_count,flowcontrolcount; 1326 FILE *file; 1327 1328 PetscFunctionBegin; 1329 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1330 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1331 nz = A->nz + B->nz; 1332 if (!rank) { 1333 header[0] = MAT_FILE_CLASSID; 1334 header[1] = mat->rmap->N; 1335 header[2] = mat->cmap->N; 1336 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1337 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1338 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1339 /* get largest number of rows any processor has */ 1340 rlen = mat->rmap->n; 1341 range = mat->rmap->range; 1342 for (i=1; i<size; i++) { 1343 rlen = PetscMax(rlen,range[i+1] - range[i]); 1344 } 1345 } else { 1346 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1347 rlen = mat->rmap->n; 1348 } 1349 1350 /* load up the local row counts */ 1351 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1352 for (i=0; i<mat->rmap->n; i++) { 1353 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1354 } 1355 1356 /* store the row lengths to the file */ 1357 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1358 if (!rank) { 1359 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1360 for (i=1; i<size; i++) { 1361 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1362 rlen = range[i+1] - range[i]; 1363 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1364 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1365 } 1366 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1367 } else { 1368 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1369 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1370 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1371 } 1372 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1373 1374 /* load up the local column indices */ 1375 nzmax = nz; /* th processor needs space a largest processor needs */ 1376 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1377 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1378 cnt = 0; 1379 for (i=0; i<mat->rmap->n; i++) { 1380 for (j=B->i[i]; j<B->i[i+1]; j++) { 1381 if ((col = garray[B->j[j]]) > cstart) break; 1382 column_indices[cnt++] = col; 1383 } 1384 for (k=A->i[i]; k<A->i[i+1]; k++) { 1385 column_indices[cnt++] = A->j[k] + cstart; 1386 } 1387 for (; j<B->i[i+1]; j++) { 1388 column_indices[cnt++] = garray[B->j[j]]; 1389 } 1390 } 1391 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1392 1393 /* store the column indices to the file */ 1394 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1395 if (!rank) { 1396 MPI_Status status; 1397 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1398 for (i=1; i<size; i++) { 1399 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1400 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1401 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1402 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1403 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1404 } 1405 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1406 } else { 1407 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1408 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1409 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1410 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1411 } 1412 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1413 1414 /* load up the local column values */ 1415 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1416 cnt = 0; 1417 for (i=0; i<mat->rmap->n; i++) { 1418 for (j=B->i[i]; j<B->i[i+1]; j++) { 1419 if (garray[B->j[j]] > cstart) break; 1420 column_values[cnt++] = B->a[j]; 1421 } 1422 for (k=A->i[i]; k<A->i[i+1]; k++) { 1423 column_values[cnt++] = A->a[k]; 1424 } 1425 for (; j<B->i[i+1]; j++) { 1426 column_values[cnt++] = B->a[j]; 1427 } 1428 } 1429 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1430 1431 /* store the column values to the file */ 1432 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1433 if (!rank) { 1434 MPI_Status status; 1435 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1436 for (i=1; i<size; i++) { 1437 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1438 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1439 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1440 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1441 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1442 } 1443 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1444 } else { 1445 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1446 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1447 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1448 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1449 } 1450 ierr = PetscFree(column_values);CHKERRQ(ierr); 1451 1452 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1453 if (file) { 1454 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1461 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1462 { 1463 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1464 PetscErrorCode ierr; 1465 PetscMPIInt rank = aij->rank,size = aij->size; 1466 PetscBool isdraw,iascii,isbinary; 1467 PetscViewer sviewer; 1468 PetscViewerFormat format; 1469 1470 PetscFunctionBegin; 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1472 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1473 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1474 if (iascii) { 1475 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1476 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1477 MatInfo info; 1478 PetscBool inodes; 1479 1480 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1481 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1482 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1483 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1484 if (!inodes) { 1485 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1486 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1487 } else { 1488 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1489 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1490 } 1491 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1492 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1493 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1494 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1495 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1497 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1498 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1501 PetscInt inodecount,inodelimit,*inodes; 1502 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1503 if (inodes) { 1504 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1505 } else { 1506 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1510 PetscFunctionReturn(0); 1511 } 1512 } else if (isbinary) { 1513 if (size == 1) { 1514 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1515 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1516 } else { 1517 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1518 } 1519 PetscFunctionReturn(0); 1520 } else if (isdraw) { 1521 PetscDraw draw; 1522 PetscBool isnull; 1523 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1524 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1525 } 1526 1527 if (size == 1) { 1528 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1529 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1530 } else { 1531 /* assemble the entire matrix onto first processor. */ 1532 Mat A; 1533 Mat_SeqAIJ *Aloc; 1534 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1535 MatScalar *a; 1536 1537 if (mat->rmap->N > 1024) { 1538 PetscBool flg = PETSC_FALSE; 1539 1540 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1541 if (!flg) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1542 } 1543 1544 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1545 if (!rank) { 1546 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1547 } else { 1548 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1549 } 1550 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1551 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1552 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1553 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1554 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1555 1556 /* copy over the A part */ 1557 Aloc = (Mat_SeqAIJ*)aij->A->data; 1558 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1559 row = mat->rmap->rstart; 1560 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1561 for (i=0; i<m; i++) { 1562 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1563 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1564 } 1565 aj = Aloc->j; 1566 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1567 1568 /* copy over the B part */ 1569 Aloc = (Mat_SeqAIJ*)aij->B->data; 1570 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1571 row = mat->rmap->rstart; 1572 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1573 ct = cols; 1574 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1575 for (i=0; i<m; i++) { 1576 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1577 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1578 } 1579 ierr = PetscFree(ct);CHKERRQ(ierr); 1580 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1581 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 /* 1583 Everyone has to call to draw the matrix since the graphics waits are 1584 synchronized across all processors that share the PetscDraw object 1585 */ 1586 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1587 if (!rank) { 1588 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1589 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1590 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1591 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1592 } 1593 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1594 ierr = MatDestroy(&A);CHKERRQ(ierr); 1595 } 1596 PetscFunctionReturn(0); 1597 } 1598 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatView_MPIAIJ" 1601 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1602 { 1603 PetscErrorCode ierr; 1604 PetscBool iascii,isdraw,issocket,isbinary; 1605 1606 PetscFunctionBegin; 1607 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1608 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1610 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1611 if (iascii || isdraw || isbinary || issocket) { 1612 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1613 } 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatSOR_MPIAIJ" 1619 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1620 { 1621 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1622 PetscErrorCode ierr; 1623 Vec bb1 = 0; 1624 PetscBool hasop; 1625 1626 PetscFunctionBegin; 1627 if (flag == SOR_APPLY_UPPER) { 1628 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1629 PetscFunctionReturn(0); 1630 } 1631 1632 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1633 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1634 } 1635 1636 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1637 if (flag & SOR_ZERO_INITIAL_GUESS) { 1638 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1639 its--; 1640 } 1641 1642 while (its--) { 1643 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 1646 /* update rhs: bb1 = bb - B*x */ 1647 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1648 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1649 1650 /* local sweep */ 1651 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1652 } 1653 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1654 if (flag & SOR_ZERO_INITIAL_GUESS) { 1655 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1656 its--; 1657 } 1658 while (its--) { 1659 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 1662 /* update rhs: bb1 = bb - B*x */ 1663 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1664 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1665 1666 /* local sweep */ 1667 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1668 } 1669 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1670 if (flag & SOR_ZERO_INITIAL_GUESS) { 1671 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1672 its--; 1673 } 1674 while (its--) { 1675 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1676 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 1678 /* update rhs: bb1 = bb - B*x */ 1679 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1680 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1681 1682 /* local sweep */ 1683 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1684 } 1685 } else if (flag & SOR_EISENSTAT) { 1686 Vec xx1; 1687 1688 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1689 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1690 1691 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1692 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1693 if (!mat->diag) { 1694 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1695 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1696 } 1697 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1698 if (hasop) { 1699 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1700 } else { 1701 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1702 } 1703 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1704 1705 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1706 1707 /* local sweep */ 1708 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1709 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1710 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1711 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1712 1713 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1714 PetscFunctionReturn(0); 1715 } 1716 1717 #undef __FUNCT__ 1718 #define __FUNCT__ "MatPermute_MPIAIJ" 1719 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1720 { 1721 Mat aA,aB,Aperm; 1722 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1723 PetscScalar *aa,*ba; 1724 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1725 PetscSF rowsf,sf; 1726 IS parcolp = PETSC_NULL; 1727 PetscBool done; 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1732 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1733 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1734 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1735 1736 /* Invert row permutation to find out where my rows should go */ 1737 ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr); 1738 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1739 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1740 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1741 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1742 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1743 1744 /* Invert column permutation to find out where my columns should go */ 1745 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1749 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1750 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1751 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1752 1753 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1754 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1755 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1756 1757 /* Find out where my gcols should go */ 1758 ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr); 1759 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1760 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1761 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1762 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1763 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1764 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1765 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1766 1767 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1768 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1769 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1770 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1771 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1772 for (i=0; i<m; i++) { 1773 PetscInt row = rdest[i],rowner; 1774 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1775 for (j=ai[i]; j<ai[i+1]; j++) { 1776 PetscInt cowner,col = cdest[aj[j]]; 1777 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1778 if (rowner == cowner) dnnz[i]++; 1779 else onnz[i]++; 1780 } 1781 for (j=bi[i]; j<bi[i+1]; j++) { 1782 PetscInt cowner,col = gcdest[bj[j]]; 1783 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1784 if (rowner == cowner) dnnz[i]++; 1785 else onnz[i]++; 1786 } 1787 } 1788 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1789 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1790 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1791 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1792 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1793 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1794 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1795 1796 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1797 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1798 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1799 for (i=0; i<m; i++) { 1800 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1801 PetscInt rowlen; 1802 rowlen = ai[i+1] - ai[i]; 1803 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1804 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1805 rowlen = bi[i+1] - bi[i]; 1806 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1807 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1808 } 1809 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1810 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1811 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1812 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1813 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1814 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1815 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1816 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1817 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1818 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1819 *B = Aperm; 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1825 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1826 { 1827 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1828 Mat A = mat->A,B = mat->B; 1829 PetscErrorCode ierr; 1830 PetscReal isend[5],irecv[5]; 1831 1832 PetscFunctionBegin; 1833 info->block_size = 1.0; 1834 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1835 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1836 isend[3] = info->memory; isend[4] = info->mallocs; 1837 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1838 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1839 isend[3] += info->memory; isend[4] += info->mallocs; 1840 if (flag == MAT_LOCAL) { 1841 info->nz_used = isend[0]; 1842 info->nz_allocated = isend[1]; 1843 info->nz_unneeded = isend[2]; 1844 info->memory = isend[3]; 1845 info->mallocs = isend[4]; 1846 } else if (flag == MAT_GLOBAL_MAX) { 1847 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1848 info->nz_used = irecv[0]; 1849 info->nz_allocated = irecv[1]; 1850 info->nz_unneeded = irecv[2]; 1851 info->memory = irecv[3]; 1852 info->mallocs = irecv[4]; 1853 } else if (flag == MAT_GLOBAL_SUM) { 1854 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1855 info->nz_used = irecv[0]; 1856 info->nz_allocated = irecv[1]; 1857 info->nz_unneeded = irecv[2]; 1858 info->memory = irecv[3]; 1859 info->mallocs = irecv[4]; 1860 } 1861 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1862 info->fill_ratio_needed = 0; 1863 info->factor_mallocs = 0; 1864 PetscFunctionReturn(0); 1865 } 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatSetOption_MPIAIJ" 1869 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1870 { 1871 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1872 PetscErrorCode ierr; 1873 1874 PetscFunctionBegin; 1875 switch (op) { 1876 case MAT_NEW_NONZERO_LOCATIONS: 1877 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1878 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1879 case MAT_KEEP_NONZERO_PATTERN: 1880 case MAT_NEW_NONZERO_LOCATION_ERR: 1881 case MAT_USE_INODES: 1882 case MAT_IGNORE_ZERO_ENTRIES: 1883 MatCheckPreallocated(A,1); 1884 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1885 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1886 break; 1887 case MAT_ROW_ORIENTED: 1888 a->roworiented = flg; 1889 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1890 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1891 break; 1892 case MAT_NEW_DIAGONALS: 1893 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1894 break; 1895 case MAT_IGNORE_OFF_PROC_ENTRIES: 1896 a->donotstash = flg; 1897 break; 1898 case MAT_SPD: 1899 A->spd_set = PETSC_TRUE; 1900 A->spd = flg; 1901 if (flg) { 1902 A->symmetric = PETSC_TRUE; 1903 A->structurally_symmetric = PETSC_TRUE; 1904 A->symmetric_set = PETSC_TRUE; 1905 A->structurally_symmetric_set = PETSC_TRUE; 1906 } 1907 break; 1908 case MAT_SYMMETRIC: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 case MAT_STRUCTURALLY_SYMMETRIC: 1912 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1913 break; 1914 case MAT_HERMITIAN: 1915 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1916 break; 1917 case MAT_SYMMETRY_ETERNAL: 1918 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1919 break; 1920 default: 1921 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1922 } 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatGetRow_MPIAIJ" 1928 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1929 { 1930 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1931 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1932 PetscErrorCode ierr; 1933 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1934 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1935 PetscInt *cmap,*idx_p; 1936 1937 PetscFunctionBegin; 1938 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1939 mat->getrowactive = PETSC_TRUE; 1940 1941 if (!mat->rowvalues && (idx || v)) { 1942 /* 1943 allocate enough space to hold information from the longest row. 1944 */ 1945 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1946 PetscInt max = 1,tmp; 1947 for (i=0; i<matin->rmap->n; i++) { 1948 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1949 if (max < tmp) { max = tmp; } 1950 } 1951 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1952 } 1953 1954 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1955 lrow = row - rstart; 1956 1957 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1958 if (!v) {pvA = 0; pvB = 0;} 1959 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1960 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1961 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1962 nztot = nzA + nzB; 1963 1964 cmap = mat->garray; 1965 if (v || idx) { 1966 if (nztot) { 1967 /* Sort by increasing column numbers, assuming A and B already sorted */ 1968 PetscInt imark = -1; 1969 if (v) { 1970 *v = v_p = mat->rowvalues; 1971 for (i=0; i<nzB; i++) { 1972 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1973 else break; 1974 } 1975 imark = i; 1976 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1977 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1978 } 1979 if (idx) { 1980 *idx = idx_p = mat->rowindices; 1981 if (imark > -1) { 1982 for (i=0; i<imark; i++) { 1983 idx_p[i] = cmap[cworkB[i]]; 1984 } 1985 } else { 1986 for (i=0; i<nzB; i++) { 1987 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1988 else break; 1989 } 1990 imark = i; 1991 } 1992 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1993 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1994 } 1995 } else { 1996 if (idx) *idx = 0; 1997 if (v) *v = 0; 1998 } 1999 } 2000 *nz = nztot; 2001 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 2002 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2008 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2009 { 2010 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2011 2012 PetscFunctionBegin; 2013 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2014 aij->getrowactive = PETSC_FALSE; 2015 PetscFunctionReturn(0); 2016 } 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "MatNorm_MPIAIJ" 2020 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2021 { 2022 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2023 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2024 PetscErrorCode ierr; 2025 PetscInt i,j,cstart = mat->cmap->rstart; 2026 PetscReal sum = 0.0; 2027 MatScalar *v; 2028 2029 PetscFunctionBegin; 2030 if (aij->size == 1) { 2031 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2032 } else { 2033 if (type == NORM_FROBENIUS) { 2034 v = amat->a; 2035 for (i=0; i<amat->nz; i++) { 2036 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2037 } 2038 v = bmat->a; 2039 for (i=0; i<bmat->nz; i++) { 2040 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2041 } 2042 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2043 *norm = PetscSqrtReal(*norm); 2044 } else if (type == NORM_1) { /* max column norm */ 2045 PetscReal *tmp,*tmp2; 2046 PetscInt *jj,*garray = aij->garray; 2047 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2048 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2049 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2050 *norm = 0.0; 2051 v = amat->a; jj = amat->j; 2052 for (j=0; j<amat->nz; j++) { 2053 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 2054 } 2055 v = bmat->a; jj = bmat->j; 2056 for (j=0; j<bmat->nz; j++) { 2057 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2058 } 2059 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2060 for (j=0; j<mat->cmap->N; j++) { 2061 if (tmp2[j] > *norm) *norm = tmp2[j]; 2062 } 2063 ierr = PetscFree(tmp);CHKERRQ(ierr); 2064 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2065 } else if (type == NORM_INFINITY) { /* max row norm */ 2066 PetscReal ntemp = 0.0; 2067 for (j=0; j<aij->A->rmap->n; j++) { 2068 v = amat->a + amat->i[j]; 2069 sum = 0.0; 2070 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2071 sum += PetscAbsScalar(*v); v++; 2072 } 2073 v = bmat->a + bmat->i[j]; 2074 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2075 sum += PetscAbsScalar(*v); v++; 2076 } 2077 if (sum > ntemp) ntemp = sum; 2078 } 2079 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2080 } else { 2081 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2082 } 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatTranspose_MPIAIJ" 2089 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2090 { 2091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2092 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2093 PetscErrorCode ierr; 2094 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2095 PetscInt cstart=A->cmap->rstart,ncol; 2096 Mat B; 2097 MatScalar *array; 2098 2099 PetscFunctionBegin; 2100 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2101 2102 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2103 ai = Aloc->i; aj = Aloc->j; 2104 bi = Bloc->i; bj = Bloc->j; 2105 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2106 PetscInt *d_nnz,*g_nnz,*o_nnz; 2107 PetscSFNode *oloc; 2108 PETSC_UNUSED PetscSF sf; 2109 2110 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2111 /* compute d_nnz for preallocation */ 2112 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2113 for (i=0; i<ai[ma]; i++) { 2114 d_nnz[aj[i]] ++; 2115 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2116 } 2117 /* compute local off-diagonal contributions */ 2118 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2119 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2120 /* map those to global */ 2121 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2122 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2123 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2124 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2125 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2126 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2127 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2128 2129 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2130 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2131 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2132 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2133 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2134 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2135 } else { 2136 B = *matout; 2137 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2138 for (i=0; i<ai[ma]; i++) { 2139 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2140 } 2141 } 2142 2143 /* copy over the A part */ 2144 array = Aloc->a; 2145 row = A->rmap->rstart; 2146 for (i=0; i<ma; i++) { 2147 ncol = ai[i+1]-ai[i]; 2148 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2149 row++; array += ncol; aj += ncol; 2150 } 2151 aj = Aloc->j; 2152 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2153 2154 /* copy over the B part */ 2155 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2156 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2157 array = Bloc->a; 2158 row = A->rmap->rstart; 2159 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2160 cols_tmp = cols; 2161 for (i=0; i<mb; i++) { 2162 ncol = bi[i+1]-bi[i]; 2163 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2164 row++; array += ncol; cols_tmp += ncol; 2165 } 2166 ierr = PetscFree(cols);CHKERRQ(ierr); 2167 2168 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2169 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2170 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2171 *matout = B; 2172 } else { 2173 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2174 } 2175 PetscFunctionReturn(0); 2176 } 2177 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2180 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2181 { 2182 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2183 Mat a = aij->A,b = aij->B; 2184 PetscErrorCode ierr; 2185 PetscInt s1,s2,s3; 2186 2187 PetscFunctionBegin; 2188 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2189 if (rr) { 2190 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2191 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2192 /* Overlap communication with computation. */ 2193 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2194 } 2195 if (ll) { 2196 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2197 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2198 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2199 } 2200 /* scale the diagonal block */ 2201 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2202 2203 if (rr) { 2204 /* Do a scatter end and then right scale the off-diagonal block */ 2205 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2206 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2207 } 2208 PetscFunctionReturn(0); 2209 } 2210 2211 #undef __FUNCT__ 2212 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2213 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2214 { 2215 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2216 PetscErrorCode ierr; 2217 2218 PetscFunctionBegin; 2219 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2220 PetscFunctionReturn(0); 2221 } 2222 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "MatEqual_MPIAIJ" 2225 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2226 { 2227 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2228 Mat a,b,c,d; 2229 PetscBool flg; 2230 PetscErrorCode ierr; 2231 2232 PetscFunctionBegin; 2233 a = matA->A; b = matA->B; 2234 c = matB->A; d = matB->B; 2235 2236 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2237 if (flg) { 2238 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2239 } 2240 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2241 PetscFunctionReturn(0); 2242 } 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "MatCopy_MPIAIJ" 2246 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2247 { 2248 PetscErrorCode ierr; 2249 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2250 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2251 2252 PetscFunctionBegin; 2253 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2254 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2255 /* because of the column compression in the off-processor part of the matrix a->B, 2256 the number of columns in a->B and b->B may be different, hence we cannot call 2257 the MatCopy() directly on the two parts. If need be, we can provide a more 2258 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2259 then copying the submatrices */ 2260 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2261 } else { 2262 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2263 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2264 } 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatSetUp_MPIAIJ" 2270 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2271 { 2272 PetscErrorCode ierr; 2273 2274 PetscFunctionBegin; 2275 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2276 PetscFunctionReturn(0); 2277 } 2278 2279 #undef __FUNCT__ 2280 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2281 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2282 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2283 { 2284 PetscInt i,m=Y->rmap->N; 2285 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2286 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2287 const PetscInt *xi = x->i,*yi = y->i; 2288 2289 PetscFunctionBegin; 2290 /* Set the number of nonzeros in the new matrix */ 2291 for (i=0; i<m; i++) { 2292 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2293 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2294 nnz[i] = 0; 2295 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2296 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2297 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2298 nnz[i]++; 2299 } 2300 for (; k<nzy; k++) nnz[i]++; 2301 } 2302 PetscFunctionReturn(0); 2303 } 2304 2305 #undef __FUNCT__ 2306 #define __FUNCT__ "MatAXPY_MPIAIJ" 2307 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2308 { 2309 PetscErrorCode ierr; 2310 PetscInt i; 2311 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2312 PetscBLASInt bnz,one=1; 2313 Mat_SeqAIJ *x,*y; 2314 2315 PetscFunctionBegin; 2316 if (str == SAME_NONZERO_PATTERN) { 2317 PetscScalar alpha = a; 2318 x = (Mat_SeqAIJ *)xx->A->data; 2319 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2320 y = (Mat_SeqAIJ *)yy->A->data; 2321 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2322 x = (Mat_SeqAIJ *)xx->B->data; 2323 y = (Mat_SeqAIJ *)yy->B->data; 2324 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2325 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2326 } else if (str == SUBSET_NONZERO_PATTERN) { 2327 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2328 2329 x = (Mat_SeqAIJ *)xx->B->data; 2330 y = (Mat_SeqAIJ *)yy->B->data; 2331 if (y->xtoy && y->XtoY != xx->B) { 2332 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2333 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2334 } 2335 if (!y->xtoy) { /* get xtoy */ 2336 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2337 y->XtoY = xx->B; 2338 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2339 } 2340 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2341 } else { 2342 Mat B; 2343 PetscInt *nnz_d,*nnz_o; 2344 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2345 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2346 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2347 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2348 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2349 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2350 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2351 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2352 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2353 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2354 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2355 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2356 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2357 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2358 } 2359 PetscFunctionReturn(0); 2360 } 2361 2362 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2363 2364 #undef __FUNCT__ 2365 #define __FUNCT__ "MatConjugate_MPIAIJ" 2366 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2367 { 2368 #if defined(PETSC_USE_COMPLEX) 2369 PetscErrorCode ierr; 2370 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2371 2372 PetscFunctionBegin; 2373 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2374 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2375 #else 2376 PetscFunctionBegin; 2377 #endif 2378 PetscFunctionReturn(0); 2379 } 2380 2381 #undef __FUNCT__ 2382 #define __FUNCT__ "MatRealPart_MPIAIJ" 2383 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2384 { 2385 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2386 PetscErrorCode ierr; 2387 2388 PetscFunctionBegin; 2389 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2390 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2396 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2397 { 2398 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2399 PetscErrorCode ierr; 2400 2401 PetscFunctionBegin; 2402 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2403 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2404 PetscFunctionReturn(0); 2405 } 2406 2407 #if defined(PETSC_HAVE_PBGL) 2408 2409 #include <boost/parallel/mpi/bsp_process_group.hpp> 2410 #include <boost/graph/distributed/ilu_default_graph.hpp> 2411 #include <boost/graph/distributed/ilu_0_block.hpp> 2412 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2413 #include <boost/graph/distributed/petsc/interface.hpp> 2414 #include <boost/multi_array.hpp> 2415 #include <boost/parallel/distributed_property_map->hpp> 2416 2417 #undef __FUNCT__ 2418 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2419 /* 2420 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2421 */ 2422 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2423 { 2424 namespace petsc = boost::distributed::petsc; 2425 2426 namespace graph_dist = boost::graph::distributed; 2427 using boost::graph::distributed::ilu_default::process_group_type; 2428 using boost::graph::ilu_permuted; 2429 2430 PetscBool row_identity, col_identity; 2431 PetscContainer c; 2432 PetscInt m, n, M, N; 2433 PetscErrorCode ierr; 2434 2435 PetscFunctionBegin; 2436 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2437 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2438 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2439 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2440 2441 process_group_type pg; 2442 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2443 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2444 lgraph_type& level_graph = *lgraph_p; 2445 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2446 2447 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2448 ilu_permuted(level_graph); 2449 2450 /* put together the new matrix */ 2451 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2452 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2453 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2454 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2455 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2456 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2457 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2458 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2459 2460 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2461 ierr = PetscContainerSetPointer(c, lgraph_p); 2462 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2463 ierr = PetscContainerDestroy(&c); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 #undef __FUNCT__ 2468 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2469 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2470 { 2471 PetscFunctionBegin; 2472 PetscFunctionReturn(0); 2473 } 2474 2475 #undef __FUNCT__ 2476 #define __FUNCT__ "MatSolve_MPIAIJ" 2477 /* 2478 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2479 */ 2480 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2481 { 2482 namespace graph_dist = boost::graph::distributed; 2483 2484 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2485 lgraph_type* lgraph_p; 2486 PetscContainer c; 2487 PetscErrorCode ierr; 2488 2489 PetscFunctionBegin; 2490 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2491 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2492 ierr = VecCopy(b, x);CHKERRQ(ierr); 2493 2494 PetscScalar* array_x; 2495 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2496 PetscInt sx; 2497 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2498 2499 PetscScalar* array_b; 2500 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2501 PetscInt sb; 2502 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2503 2504 lgraph_type& level_graph = *lgraph_p; 2505 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2506 2507 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2508 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2509 ref_x(array_x, boost::extents[num_vertices(graph)]); 2510 2511 typedef boost::iterator_property_map<array_ref_type::iterator, 2512 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2513 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2514 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2515 2516 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2517 PetscFunctionReturn(0); 2518 } 2519 #endif 2520 2521 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2522 PetscInt nzlocal,nsends,nrecvs; 2523 PetscMPIInt *send_rank,*recv_rank; 2524 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2525 PetscScalar *sbuf_a,**rbuf_a; 2526 PetscErrorCode (*Destroy)(Mat); 2527 } Mat_Redundant; 2528 2529 #undef __FUNCT__ 2530 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2531 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2532 { 2533 PetscErrorCode ierr; 2534 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2535 PetscInt i; 2536 2537 PetscFunctionBegin; 2538 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2539 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2540 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2541 for (i=0; i<redund->nrecvs; i++) { 2542 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2543 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2544 } 2545 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2546 ierr = PetscFree(redund);CHKERRQ(ierr); 2547 PetscFunctionReturn(0); 2548 } 2549 2550 #undef __FUNCT__ 2551 #define __FUNCT__ "MatDestroy_MatRedundant" 2552 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2553 { 2554 PetscErrorCode ierr; 2555 PetscContainer container; 2556 Mat_Redundant *redund=PETSC_NULL; 2557 2558 PetscFunctionBegin; 2559 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2560 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2561 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2562 A->ops->destroy = redund->Destroy; 2563 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2564 if (A->ops->destroy) { 2565 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2566 } 2567 PetscFunctionReturn(0); 2568 } 2569 2570 #undef __FUNCT__ 2571 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2572 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2573 { 2574 PetscMPIInt rank,size; 2575 MPI_Comm comm=((PetscObject)mat)->comm; 2576 PetscErrorCode ierr; 2577 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2578 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2579 PetscInt *rowrange=mat->rmap->range; 2580 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2581 Mat A=aij->A,B=aij->B,C=*matredundant; 2582 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2583 PetscScalar *sbuf_a; 2584 PetscInt nzlocal=a->nz+b->nz; 2585 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2586 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2587 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2588 MatScalar *aworkA,*aworkB; 2589 PetscScalar *vals; 2590 PetscMPIInt tag1,tag2,tag3,imdex; 2591 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2592 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2593 MPI_Status recv_status,*send_status; 2594 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2595 PetscInt **rbuf_j=PETSC_NULL; 2596 PetscScalar **rbuf_a=PETSC_NULL; 2597 Mat_Redundant *redund=PETSC_NULL; 2598 PetscContainer container; 2599 2600 PetscFunctionBegin; 2601 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2602 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2603 2604 if (reuse == MAT_REUSE_MATRIX) { 2605 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2606 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2607 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2608 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2609 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2610 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2611 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2612 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2613 2614 nsends = redund->nsends; 2615 nrecvs = redund->nrecvs; 2616 send_rank = redund->send_rank; 2617 recv_rank = redund->recv_rank; 2618 sbuf_nz = redund->sbuf_nz; 2619 rbuf_nz = redund->rbuf_nz; 2620 sbuf_j = redund->sbuf_j; 2621 sbuf_a = redund->sbuf_a; 2622 rbuf_j = redund->rbuf_j; 2623 rbuf_a = redund->rbuf_a; 2624 } 2625 2626 if (reuse == MAT_INITIAL_MATRIX) { 2627 PetscMPIInt subrank,subsize; 2628 PetscInt nleftover,np_subcomm; 2629 /* get the destination processors' id send_rank, nsends and nrecvs */ 2630 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2631 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2632 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2633 np_subcomm = size/nsubcomm; 2634 nleftover = size - nsubcomm*np_subcomm; 2635 nsends = 0; nrecvs = 0; 2636 for (i=0; i<size; i++) { /* i=rank*/ 2637 if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */ 2638 send_rank[nsends] = i; nsends++; 2639 recv_rank[nrecvs++] = i; 2640 } 2641 } 2642 if (rank >= size - nleftover) {/* this proc is a leftover processor */ 2643 i = size-nleftover-1; 2644 j = 0; 2645 while (j < nsubcomm - nleftover) { 2646 send_rank[nsends++] = i; 2647 i--; j++; 2648 } 2649 } 2650 2651 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2652 for (i=0; i<nleftover; i++) { 2653 recv_rank[nrecvs++] = size-nleftover+i; 2654 } 2655 } 2656 2657 /* allocate sbuf_j, sbuf_a */ 2658 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2659 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2660 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2661 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2662 2663 /* copy mat's local entries into the buffers */ 2664 if (reuse == MAT_INITIAL_MATRIX) { 2665 rownz_max = 0; 2666 rptr = sbuf_j; 2667 cols = sbuf_j + rend-rstart + 1; 2668 vals = sbuf_a; 2669 rptr[0] = 0; 2670 for (i=0; i<rend-rstart; i++) { 2671 row = i + rstart; 2672 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2673 ncols = nzA + nzB; 2674 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2675 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2676 /* load the column indices for this row into cols */ 2677 lwrite = 0; 2678 for (l=0; l<nzB; l++) { 2679 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2680 vals[lwrite] = aworkB[l]; 2681 cols[lwrite++] = ctmp; 2682 } 2683 } 2684 for (l=0; l<nzA; l++) { 2685 vals[lwrite] = aworkA[l]; 2686 cols[lwrite++] = cstart + cworkA[l]; 2687 } 2688 for (l=0; l<nzB; l++) { 2689 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2690 vals[lwrite] = aworkB[l]; 2691 cols[lwrite++] = ctmp; 2692 } 2693 } 2694 vals += ncols; 2695 cols += ncols; 2696 rptr[i+1] = rptr[i] + ncols; 2697 if (rownz_max < ncols) rownz_max = ncols; 2698 } 2699 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2700 } else { /* only copy matrix values into sbuf_a */ 2701 rptr = sbuf_j; 2702 vals = sbuf_a; 2703 rptr[0] = 0; 2704 for (i=0; i<rend-rstart; i++) { 2705 row = i + rstart; 2706 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2707 ncols = nzA + nzB; 2708 cworkB = b->j + b->i[i]; 2709 aworkA = a->a + a->i[i]; 2710 aworkB = b->a + b->i[i]; 2711 lwrite = 0; 2712 for (l=0; l<nzB; l++) { 2713 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2714 } 2715 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2716 for (l=0; l<nzB; l++) { 2717 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2718 } 2719 vals += ncols; 2720 rptr[i+1] = rptr[i] + ncols; 2721 } 2722 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2723 2724 /* send nzlocal to others, and recv other's nzlocal */ 2725 /*--------------------------------------------------*/ 2726 if (reuse == MAT_INITIAL_MATRIX) { 2727 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2728 s_waits2 = s_waits3 + nsends; 2729 s_waits1 = s_waits2 + nsends; 2730 r_waits1 = s_waits1 + nsends; 2731 r_waits2 = r_waits1 + nrecvs; 2732 r_waits3 = r_waits2 + nrecvs; 2733 } else { 2734 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2735 r_waits3 = s_waits3 + nsends; 2736 } 2737 2738 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2739 if (reuse == MAT_INITIAL_MATRIX) { 2740 /* get new tags to keep the communication clean */ 2741 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2742 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2743 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2744 2745 /* post receives of other's nzlocal */ 2746 for (i=0; i<nrecvs; i++) { 2747 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2748 } 2749 /* send nzlocal to others */ 2750 for (i=0; i<nsends; i++) { 2751 sbuf_nz[i] = nzlocal; 2752 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2753 } 2754 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2755 count = nrecvs; 2756 while (count) { 2757 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2758 recv_rank[imdex] = recv_status.MPI_SOURCE; 2759 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2760 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2761 2762 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2763 rbuf_nz[imdex] += i + 2; 2764 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2765 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2766 count--; 2767 } 2768 /* wait on sends of nzlocal */ 2769 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2770 /* send mat->i,j to others, and recv from other's */ 2771 /*------------------------------------------------*/ 2772 for (i=0; i<nsends; i++) { 2773 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2774 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2775 } 2776 /* wait on receives of mat->i,j */ 2777 /*------------------------------*/ 2778 count = nrecvs; 2779 while (count) { 2780 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2781 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2782 count--; 2783 } 2784 /* wait on sends of mat->i,j */ 2785 /*---------------------------*/ 2786 if (nsends) { 2787 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2788 } 2789 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2790 2791 /* post receives, send and receive mat->a */ 2792 /*----------------------------------------*/ 2793 for (imdex=0; imdex<nrecvs; imdex++) { 2794 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2795 } 2796 for (i=0; i<nsends; i++) { 2797 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2798 } 2799 count = nrecvs; 2800 while (count) { 2801 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2802 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2803 count--; 2804 } 2805 if (nsends) { 2806 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2807 } 2808 2809 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2810 2811 /* create redundant matrix */ 2812 /*-------------------------*/ 2813 if (reuse == MAT_INITIAL_MATRIX) { 2814 /* compute rownz_max for preallocation */ 2815 for (imdex=0; imdex<nrecvs; imdex++) { 2816 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2817 rptr = rbuf_j[imdex]; 2818 for (i=0; i<j; i++) { 2819 ncols = rptr[i+1] - rptr[i]; 2820 if (rownz_max < ncols) rownz_max = ncols; 2821 } 2822 } 2823 2824 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2825 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2826 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2827 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2828 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2829 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2830 } else { 2831 C = *matredundant; 2832 } 2833 2834 /* insert local matrix entries */ 2835 rptr = sbuf_j; 2836 cols = sbuf_j + rend-rstart + 1; 2837 vals = sbuf_a; 2838 for (i=0; i<rend-rstart; i++) { 2839 row = i + rstart; 2840 ncols = rptr[i+1] - rptr[i]; 2841 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2842 vals += ncols; 2843 cols += ncols; 2844 } 2845 /* insert received matrix entries */ 2846 for (imdex=0; imdex<nrecvs; imdex++) { 2847 rstart = rowrange[recv_rank[imdex]]; 2848 rend = rowrange[recv_rank[imdex]+1]; 2849 rptr = rbuf_j[imdex]; 2850 cols = rbuf_j[imdex] + rend-rstart + 1; 2851 vals = rbuf_a[imdex]; 2852 for (i=0; i<rend-rstart; i++) { 2853 row = i + rstart; 2854 ncols = rptr[i+1] - rptr[i]; 2855 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2856 vals += ncols; 2857 cols += ncols; 2858 } 2859 } 2860 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2861 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2862 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2863 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2864 if (reuse == MAT_INITIAL_MATRIX) { 2865 PetscContainer container; 2866 *matredundant = C; 2867 /* create a supporting struct and attach it to C for reuse */ 2868 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2869 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2870 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2871 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2872 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2873 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2874 2875 redund->nzlocal = nzlocal; 2876 redund->nsends = nsends; 2877 redund->nrecvs = nrecvs; 2878 redund->send_rank = send_rank; 2879 redund->recv_rank = recv_rank; 2880 redund->sbuf_nz = sbuf_nz; 2881 redund->rbuf_nz = rbuf_nz; 2882 redund->sbuf_j = sbuf_j; 2883 redund->sbuf_a = sbuf_a; 2884 redund->rbuf_j = rbuf_j; 2885 redund->rbuf_a = rbuf_a; 2886 2887 redund->Destroy = C->ops->destroy; 2888 C->ops->destroy = MatDestroy_MatRedundant; 2889 } 2890 PetscFunctionReturn(0); 2891 } 2892 2893 #undef __FUNCT__ 2894 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2895 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2896 { 2897 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2898 PetscErrorCode ierr; 2899 PetscInt i,*idxb = 0; 2900 PetscScalar *va,*vb; 2901 Vec vtmp; 2902 2903 PetscFunctionBegin; 2904 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2905 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2906 if (idx) { 2907 for (i=0; i<A->rmap->n; i++) { 2908 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2909 } 2910 } 2911 2912 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2913 if (idx) { 2914 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2915 } 2916 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2917 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2918 2919 for (i=0; i<A->rmap->n; i++) { 2920 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2921 va[i] = vb[i]; 2922 if (idx) idx[i] = a->garray[idxb[i]]; 2923 } 2924 } 2925 2926 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2928 ierr = PetscFree(idxb);CHKERRQ(ierr); 2929 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2930 PetscFunctionReturn(0); 2931 } 2932 2933 #undef __FUNCT__ 2934 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2935 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2936 { 2937 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2938 PetscErrorCode ierr; 2939 PetscInt i,*idxb = 0; 2940 PetscScalar *va,*vb; 2941 Vec vtmp; 2942 2943 PetscFunctionBegin; 2944 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2945 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2946 if (idx) { 2947 for (i=0; i<A->cmap->n; i++) { 2948 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2949 } 2950 } 2951 2952 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2953 if (idx) { 2954 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2955 } 2956 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2957 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2958 2959 for (i=0; i<A->rmap->n; i++) { 2960 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2961 va[i] = vb[i]; 2962 if (idx) idx[i] = a->garray[idxb[i]]; 2963 } 2964 } 2965 2966 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2967 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2968 ierr = PetscFree(idxb);CHKERRQ(ierr); 2969 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2975 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2976 { 2977 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2978 PetscInt n = A->rmap->n; 2979 PetscInt cstart = A->cmap->rstart; 2980 PetscInt *cmap = mat->garray; 2981 PetscInt *diagIdx, *offdiagIdx; 2982 Vec diagV, offdiagV; 2983 PetscScalar *a, *diagA, *offdiagA; 2984 PetscInt r; 2985 PetscErrorCode ierr; 2986 2987 PetscFunctionBegin; 2988 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2989 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2990 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2991 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2992 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2993 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2994 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2995 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2996 for (r = 0; r < n; ++r) { 2997 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2998 a[r] = diagA[r]; 2999 idx[r] = cstart + diagIdx[r]; 3000 } else { 3001 a[r] = offdiagA[r]; 3002 idx[r] = cmap[offdiagIdx[r]]; 3003 } 3004 } 3005 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3006 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3007 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3008 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3009 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3010 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3011 PetscFunctionReturn(0); 3012 } 3013 3014 #undef __FUNCT__ 3015 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3016 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3017 { 3018 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 3019 PetscInt n = A->rmap->n; 3020 PetscInt cstart = A->cmap->rstart; 3021 PetscInt *cmap = mat->garray; 3022 PetscInt *diagIdx, *offdiagIdx; 3023 Vec diagV, offdiagV; 3024 PetscScalar *a, *diagA, *offdiagA; 3025 PetscInt r; 3026 PetscErrorCode ierr; 3027 3028 PetscFunctionBegin; 3029 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3030 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3031 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3032 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3033 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3034 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3035 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3036 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3037 for (r = 0; r < n; ++r) { 3038 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3039 a[r] = diagA[r]; 3040 idx[r] = cstart + diagIdx[r]; 3041 } else { 3042 a[r] = offdiagA[r]; 3043 idx[r] = cmap[offdiagIdx[r]]; 3044 } 3045 } 3046 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3047 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3048 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3049 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3050 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3051 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 #undef __FUNCT__ 3056 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3057 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3058 { 3059 PetscErrorCode ierr; 3060 Mat *dummy; 3061 3062 PetscFunctionBegin; 3063 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3064 *newmat = *dummy; 3065 ierr = PetscFree(dummy);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3073 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3074 { 3075 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3076 PetscErrorCode ierr; 3077 3078 PetscFunctionBegin; 3079 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3080 PetscFunctionReturn(0); 3081 } 3082 3083 #undef __FUNCT__ 3084 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3085 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3086 { 3087 PetscErrorCode ierr; 3088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3089 3090 PetscFunctionBegin; 3091 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3092 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3093 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3094 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3095 PetscFunctionReturn(0); 3096 } 3097 3098 /* -------------------------------------------------------------------*/ 3099 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3100 MatGetRow_MPIAIJ, 3101 MatRestoreRow_MPIAIJ, 3102 MatMult_MPIAIJ, 3103 /* 4*/ MatMultAdd_MPIAIJ, 3104 MatMultTranspose_MPIAIJ, 3105 MatMultTransposeAdd_MPIAIJ, 3106 #if defined(PETSC_HAVE_PBGL) 3107 MatSolve_MPIAIJ, 3108 #else 3109 0, 3110 #endif 3111 0, 3112 0, 3113 /*10*/ 0, 3114 0, 3115 0, 3116 MatSOR_MPIAIJ, 3117 MatTranspose_MPIAIJ, 3118 /*15*/ MatGetInfo_MPIAIJ, 3119 MatEqual_MPIAIJ, 3120 MatGetDiagonal_MPIAIJ, 3121 MatDiagonalScale_MPIAIJ, 3122 MatNorm_MPIAIJ, 3123 /*20*/ MatAssemblyBegin_MPIAIJ, 3124 MatAssemblyEnd_MPIAIJ, 3125 MatSetOption_MPIAIJ, 3126 MatZeroEntries_MPIAIJ, 3127 /*24*/ MatZeroRows_MPIAIJ, 3128 0, 3129 #if defined(PETSC_HAVE_PBGL) 3130 0, 3131 #else 3132 0, 3133 #endif 3134 0, 3135 0, 3136 /*29*/ MatSetUp_MPIAIJ, 3137 #if defined(PETSC_HAVE_PBGL) 3138 0, 3139 #else 3140 0, 3141 #endif 3142 0, 3143 0, 3144 0, 3145 /*34*/ MatDuplicate_MPIAIJ, 3146 0, 3147 0, 3148 0, 3149 0, 3150 /*39*/ MatAXPY_MPIAIJ, 3151 MatGetSubMatrices_MPIAIJ, 3152 MatIncreaseOverlap_MPIAIJ, 3153 MatGetValues_MPIAIJ, 3154 MatCopy_MPIAIJ, 3155 /*44*/ MatGetRowMax_MPIAIJ, 3156 MatScale_MPIAIJ, 3157 0, 3158 0, 3159 MatZeroRowsColumns_MPIAIJ, 3160 /*49*/ MatSetRandom_MPIAIJ, 3161 0, 3162 0, 3163 0, 3164 0, 3165 /*54*/ MatFDColoringCreate_MPIAIJ, 3166 0, 3167 MatSetUnfactored_MPIAIJ, 3168 MatPermute_MPIAIJ, 3169 0, 3170 /*59*/ MatGetSubMatrix_MPIAIJ, 3171 MatDestroy_MPIAIJ, 3172 MatView_MPIAIJ, 3173 0, 3174 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3175 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3176 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3177 0, 3178 0, 3179 0, 3180 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3181 MatGetRowMinAbs_MPIAIJ, 3182 0, 3183 MatSetColoring_MPIAIJ, 3184 0, 3185 MatSetValuesAdifor_MPIAIJ, 3186 /*75*/ MatFDColoringApply_AIJ, 3187 0, 3188 0, 3189 0, 3190 MatFindZeroDiagonals_MPIAIJ, 3191 /*80*/ 0, 3192 0, 3193 0, 3194 /*83*/ MatLoad_MPIAIJ, 3195 0, 3196 0, 3197 0, 3198 0, 3199 0, 3200 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3201 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3202 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3203 MatPtAP_MPIAIJ_MPIAIJ, 3204 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3205 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3206 0, 3207 0, 3208 0, 3209 0, 3210 /*99*/ 0, 3211 0, 3212 0, 3213 MatConjugate_MPIAIJ, 3214 0, 3215 /*104*/MatSetValuesRow_MPIAIJ, 3216 MatRealPart_MPIAIJ, 3217 MatImaginaryPart_MPIAIJ, 3218 0, 3219 0, 3220 /*109*/0, 3221 MatGetRedundantMatrix_MPIAIJ, 3222 MatGetRowMin_MPIAIJ, 3223 0, 3224 0, 3225 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3226 0, 3227 0, 3228 0, 3229 0, 3230 /*119*/0, 3231 0, 3232 0, 3233 0, 3234 MatGetMultiProcBlock_MPIAIJ, 3235 /*124*/MatFindNonzeroRows_MPIAIJ, 3236 MatGetColumnNorms_MPIAIJ, 3237 MatInvertBlockDiagonal_MPIAIJ, 3238 0, 3239 MatGetSubMatricesParallel_MPIAIJ, 3240 /*129*/0, 3241 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3242 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3243 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3244 0, 3245 /*134*/0, 3246 0, 3247 0, 3248 0, 3249 0 3250 }; 3251 3252 /* ----------------------------------------------------------------------------------------*/ 3253 3254 EXTERN_C_BEGIN 3255 #undef __FUNCT__ 3256 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3257 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3258 { 3259 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3260 PetscErrorCode ierr; 3261 3262 PetscFunctionBegin; 3263 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3264 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 EXTERN_C_END 3268 3269 EXTERN_C_BEGIN 3270 #undef __FUNCT__ 3271 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3272 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3273 { 3274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3275 PetscErrorCode ierr; 3276 3277 PetscFunctionBegin; 3278 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3279 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3280 PetscFunctionReturn(0); 3281 } 3282 EXTERN_C_END 3283 3284 EXTERN_C_BEGIN 3285 #undef __FUNCT__ 3286 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3287 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3288 { 3289 Mat_MPIAIJ *b; 3290 PetscErrorCode ierr; 3291 PetscInt i; 3292 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3293 3294 PetscFunctionBegin; 3295 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3296 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3297 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3298 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3299 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3300 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3301 3302 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3303 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3304 if (d_nnz) { 3305 for (i=0; i<B->rmap->n; i++) { 3306 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3307 } 3308 } 3309 if (o_nnz) { 3310 for (i=0; i<B->rmap->n; i++) { 3311 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3312 } 3313 } 3314 b = (Mat_MPIAIJ*)B->data; 3315 3316 if (!B->preallocated) { 3317 /* Explicitly create 2 MATSEQAIJ matrices. */ 3318 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3319 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3320 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3321 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3322 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3323 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3324 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3325 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3326 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3327 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3328 } 3329 3330 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3331 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3332 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3333 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3334 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3335 B->preallocated = PETSC_TRUE; 3336 PetscFunctionReturn(0); 3337 } 3338 EXTERN_C_END 3339 3340 #undef __FUNCT__ 3341 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3342 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3343 { 3344 Mat mat; 3345 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3346 PetscErrorCode ierr; 3347 3348 PetscFunctionBegin; 3349 *newmat = 0; 3350 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3351 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3352 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3353 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3354 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3355 a = (Mat_MPIAIJ*)mat->data; 3356 3357 mat->factortype = matin->factortype; 3358 mat->rmap->bs = matin->rmap->bs; 3359 mat->cmap->bs = matin->cmap->bs; 3360 mat->assembled = PETSC_TRUE; 3361 mat->insertmode = NOT_SET_VALUES; 3362 mat->preallocated = PETSC_TRUE; 3363 3364 a->size = oldmat->size; 3365 a->rank = oldmat->rank; 3366 a->donotstash = oldmat->donotstash; 3367 a->roworiented = oldmat->roworiented; 3368 a->rowindices = 0; 3369 a->rowvalues = 0; 3370 a->getrowactive = PETSC_FALSE; 3371 3372 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3373 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3374 3375 if (oldmat->colmap) { 3376 #if defined (PETSC_USE_CTABLE) 3377 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3378 #else 3379 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3380 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3381 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3382 #endif 3383 } else a->colmap = 0; 3384 if (oldmat->garray) { 3385 PetscInt len; 3386 len = oldmat->B->cmap->n; 3387 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3388 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3389 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3390 } else a->garray = 0; 3391 3392 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3393 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3394 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3395 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3396 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3397 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3398 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3399 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3400 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3401 *newmat = mat; 3402 PetscFunctionReturn(0); 3403 } 3404 3405 3406 3407 #undef __FUNCT__ 3408 #define __FUNCT__ "MatLoad_MPIAIJ" 3409 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3410 { 3411 PetscScalar *vals,*svals; 3412 MPI_Comm comm = ((PetscObject)viewer)->comm; 3413 PetscErrorCode ierr; 3414 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3415 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3416 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3417 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3418 PetscInt cend,cstart,n,*rowners,sizesset=1; 3419 int fd; 3420 PetscInt bs = 1; 3421 3422 PetscFunctionBegin; 3423 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3424 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3425 if (!rank) { 3426 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3427 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3428 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3429 } 3430 3431 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3432 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3433 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3434 3435 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3436 3437 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3438 M = header[1]; N = header[2]; 3439 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3440 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3441 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3442 3443 /* If global sizes are set, check if they are consistent with that given in the file */ 3444 if (sizesset) { 3445 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3446 } 3447 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3448 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3449 3450 /* determine ownership of all (block) rows */ 3451 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3452 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3453 else m = newMat->rmap->n; /* Set by user */ 3454 3455 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3456 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3457 3458 /* First process needs enough room for process with most rows */ 3459 if (!rank) { 3460 mmax = rowners[1]; 3461 for (i=2; i<=size; i++) { 3462 mmax = PetscMax(mmax, rowners[i]); 3463 } 3464 } else mmax = m; 3465 3466 rowners[0] = 0; 3467 for (i=2; i<=size; i++) { 3468 rowners[i] += rowners[i-1]; 3469 } 3470 rstart = rowners[rank]; 3471 rend = rowners[rank+1]; 3472 3473 /* distribute row lengths to all processors */ 3474 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3475 if (!rank) { 3476 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3477 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3478 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3479 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3480 for (j=0; j<m; j++) { 3481 procsnz[0] += ourlens[j]; 3482 } 3483 for (i=1; i<size; i++) { 3484 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3485 /* calculate the number of nonzeros on each processor */ 3486 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3487 procsnz[i] += rowlengths[j]; 3488 } 3489 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3490 } 3491 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3492 } else { 3493 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3494 } 3495 3496 if (!rank) { 3497 /* determine max buffer needed and allocate it */ 3498 maxnz = 0; 3499 for (i=0; i<size; i++) { 3500 maxnz = PetscMax(maxnz,procsnz[i]); 3501 } 3502 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3503 3504 /* read in my part of the matrix column indices */ 3505 nz = procsnz[0]; 3506 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3507 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3508 3509 /* read in every one elses and ship off */ 3510 for (i=1; i<size; i++) { 3511 nz = procsnz[i]; 3512 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3513 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3514 } 3515 ierr = PetscFree(cols);CHKERRQ(ierr); 3516 } else { 3517 /* determine buffer space needed for message */ 3518 nz = 0; 3519 for (i=0; i<m; i++) { 3520 nz += ourlens[i]; 3521 } 3522 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3523 3524 /* receive message of column indices*/ 3525 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3526 } 3527 3528 /* determine column ownership if matrix is not square */ 3529 if (N != M) { 3530 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3531 else n = newMat->cmap->n; 3532 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3533 cstart = cend - n; 3534 } else { 3535 cstart = rstart; 3536 cend = rend; 3537 n = cend - cstart; 3538 } 3539 3540 /* loop over local rows, determining number of off diagonal entries */ 3541 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3542 jj = 0; 3543 for (i=0; i<m; i++) { 3544 for (j=0; j<ourlens[i]; j++) { 3545 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3546 jj++; 3547 } 3548 } 3549 3550 for (i=0; i<m; i++) { 3551 ourlens[i] -= offlens[i]; 3552 } 3553 if (!sizesset) { 3554 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3555 } 3556 3557 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3558 3559 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3560 3561 for (i=0; i<m; i++) { 3562 ourlens[i] += offlens[i]; 3563 } 3564 3565 if (!rank) { 3566 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3567 3568 /* read in my part of the matrix numerical values */ 3569 nz = procsnz[0]; 3570 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3571 3572 /* insert into matrix */ 3573 jj = rstart; 3574 smycols = mycols; 3575 svals = vals; 3576 for (i=0; i<m; i++) { 3577 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3578 smycols += ourlens[i]; 3579 svals += ourlens[i]; 3580 jj++; 3581 } 3582 3583 /* read in other processors and ship out */ 3584 for (i=1; i<size; i++) { 3585 nz = procsnz[i]; 3586 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3587 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3588 } 3589 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3590 } else { 3591 /* receive numeric values */ 3592 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3593 3594 /* receive message of values*/ 3595 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3596 3597 /* insert into matrix */ 3598 jj = rstart; 3599 smycols = mycols; 3600 svals = vals; 3601 for (i=0; i<m; i++) { 3602 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3603 smycols += ourlens[i]; 3604 svals += ourlens[i]; 3605 jj++; 3606 } 3607 } 3608 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3609 ierr = PetscFree(vals);CHKERRQ(ierr); 3610 ierr = PetscFree(mycols);CHKERRQ(ierr); 3611 ierr = PetscFree(rowners);CHKERRQ(ierr); 3612 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3613 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3614 PetscFunctionReturn(0); 3615 } 3616 3617 #undef __FUNCT__ 3618 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3619 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3620 { 3621 PetscErrorCode ierr; 3622 IS iscol_local; 3623 PetscInt csize; 3624 3625 PetscFunctionBegin; 3626 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3627 if (call == MAT_REUSE_MATRIX) { 3628 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3629 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3630 } else { 3631 PetscInt cbs; 3632 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3633 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3634 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3635 } 3636 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3637 if (call == MAT_INITIAL_MATRIX) { 3638 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3639 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3640 } 3641 PetscFunctionReturn(0); 3642 } 3643 3644 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3645 #undef __FUNCT__ 3646 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3647 /* 3648 Not great since it makes two copies of the submatrix, first an SeqAIJ 3649 in local and then by concatenating the local matrices the end result. 3650 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3651 3652 Note: This requires a sequential iscol with all indices. 3653 */ 3654 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3655 { 3656 PetscErrorCode ierr; 3657 PetscMPIInt rank,size; 3658 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3659 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3660 PetscBool allcolumns, colflag; 3661 Mat M,Mreuse; 3662 MatScalar *vwork,*aa; 3663 MPI_Comm comm = ((PetscObject)mat)->comm; 3664 Mat_SeqAIJ *aij; 3665 3666 3667 PetscFunctionBegin; 3668 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3669 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3670 3671 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3672 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3673 if (colflag && ncol == mat->cmap->N) { 3674 allcolumns = PETSC_TRUE; 3675 } else { 3676 allcolumns = PETSC_FALSE; 3677 } 3678 if (call == MAT_REUSE_MATRIX) { 3679 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3680 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3681 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3682 } else { 3683 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3684 } 3685 3686 /* 3687 m - number of local rows 3688 n - number of columns (same on all processors) 3689 rstart - first row in new global matrix generated 3690 */ 3691 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3692 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3693 if (call == MAT_INITIAL_MATRIX) { 3694 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3695 ii = aij->i; 3696 jj = aij->j; 3697 3698 /* 3699 Determine the number of non-zeros in the diagonal and off-diagonal 3700 portions of the matrix in order to do correct preallocation 3701 */ 3702 3703 /* first get start and end of "diagonal" columns */ 3704 if (csize == PETSC_DECIDE) { 3705 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3706 if (mglobal == n) { /* square matrix */ 3707 nlocal = m; 3708 } else { 3709 nlocal = n/size + ((n % size) > rank); 3710 } 3711 } else { 3712 nlocal = csize; 3713 } 3714 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3715 rstart = rend - nlocal; 3716 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3717 3718 /* next, compute all the lengths */ 3719 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3720 olens = dlens + m; 3721 for (i=0; i<m; i++) { 3722 jend = ii[i+1] - ii[i]; 3723 olen = 0; 3724 dlen = 0; 3725 for (j=0; j<jend; j++) { 3726 if (*jj < rstart || *jj >= rend) olen++; 3727 else dlen++; 3728 jj++; 3729 } 3730 olens[i] = olen; 3731 dlens[i] = dlen; 3732 } 3733 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3734 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3735 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3736 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3737 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3738 ierr = PetscFree(dlens);CHKERRQ(ierr); 3739 } else { 3740 PetscInt ml,nl; 3741 3742 M = *newmat; 3743 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3744 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3745 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3746 /* 3747 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3748 rather than the slower MatSetValues(). 3749 */ 3750 M->was_assembled = PETSC_TRUE; 3751 M->assembled = PETSC_FALSE; 3752 } 3753 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3754 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3755 ii = aij->i; 3756 jj = aij->j; 3757 aa = aij->a; 3758 for (i=0; i<m; i++) { 3759 row = rstart + i; 3760 nz = ii[i+1] - ii[i]; 3761 cwork = jj; jj += nz; 3762 vwork = aa; aa += nz; 3763 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3764 } 3765 3766 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3767 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3768 *newmat = M; 3769 3770 /* save submatrix used in processor for next request */ 3771 if (call == MAT_INITIAL_MATRIX) { 3772 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3773 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3774 } 3775 PetscFunctionReturn(0); 3776 } 3777 3778 EXTERN_C_BEGIN 3779 #undef __FUNCT__ 3780 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3781 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3782 { 3783 PetscInt m,cstart, cend,j,nnz,i,d; 3784 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3785 const PetscInt *JJ; 3786 PetscScalar *values; 3787 PetscErrorCode ierr; 3788 3789 PetscFunctionBegin; 3790 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3791 3792 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3793 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3794 m = B->rmap->n; 3795 cstart = B->cmap->rstart; 3796 cend = B->cmap->rend; 3797 rstart = B->rmap->rstart; 3798 3799 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3800 3801 #if defined(PETSC_USE_DEBUGGING) 3802 for (i=0; i<m; i++) { 3803 nnz = Ii[i+1]- Ii[i]; 3804 JJ = J + Ii[i]; 3805 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3806 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3807 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3808 } 3809 #endif 3810 3811 for (i=0; i<m; i++) { 3812 nnz = Ii[i+1]- Ii[i]; 3813 JJ = J + Ii[i]; 3814 nnz_max = PetscMax(nnz_max,nnz); 3815 d = 0; 3816 for (j=0; j<nnz; j++) { 3817 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3818 } 3819 d_nnz[i] = d; 3820 o_nnz[i] = nnz - d; 3821 } 3822 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3823 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3824 3825 if (v) values = (PetscScalar*)v; 3826 else { 3827 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3828 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3829 } 3830 3831 for (i=0; i<m; i++) { 3832 ii = i + rstart; 3833 nnz = Ii[i+1]- Ii[i]; 3834 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3835 } 3836 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3837 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3838 3839 if (!v) { 3840 ierr = PetscFree(values);CHKERRQ(ierr); 3841 } 3842 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3843 PetscFunctionReturn(0); 3844 } 3845 EXTERN_C_END 3846 3847 #undef __FUNCT__ 3848 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3849 /*@ 3850 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3851 (the default parallel PETSc format). 3852 3853 Collective on MPI_Comm 3854 3855 Input Parameters: 3856 + B - the matrix 3857 . i - the indices into j for the start of each local row (starts with zero) 3858 . j - the column indices for each local row (starts with zero) 3859 - v - optional values in the matrix 3860 3861 Level: developer 3862 3863 Notes: 3864 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3865 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3866 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3867 3868 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3869 3870 The format which is used for the sparse matrix input, is equivalent to a 3871 row-major ordering.. i.e for the following matrix, the input data expected is 3872 as shown: 3873 3874 1 0 0 3875 2 0 3 P0 3876 ------- 3877 4 5 6 P1 3878 3879 Process0 [P0]: rows_owned=[0,1] 3880 i = {0,1,3} [size = nrow+1 = 2+1] 3881 j = {0,0,2} [size = nz = 6] 3882 v = {1,2,3} [size = nz = 6] 3883 3884 Process1 [P1]: rows_owned=[2] 3885 i = {0,3} [size = nrow+1 = 1+1] 3886 j = {0,1,2} [size = nz = 6] 3887 v = {4,5,6} [size = nz = 6] 3888 3889 .keywords: matrix, aij, compressed row, sparse, parallel 3890 3891 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3892 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3893 @*/ 3894 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3895 { 3896 PetscErrorCode ierr; 3897 3898 PetscFunctionBegin; 3899 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3900 PetscFunctionReturn(0); 3901 } 3902 3903 #undef __FUNCT__ 3904 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3905 /*@C 3906 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3907 (the default parallel PETSc format). For good matrix assembly performance 3908 the user should preallocate the matrix storage by setting the parameters 3909 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3910 performance can be increased by more than a factor of 50. 3911 3912 Collective on MPI_Comm 3913 3914 Input Parameters: 3915 + A - the matrix 3916 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3917 (same value is used for all local rows) 3918 . d_nnz - array containing the number of nonzeros in the various rows of the 3919 DIAGONAL portion of the local submatrix (possibly different for each row) 3920 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3921 The size of this array is equal to the number of local rows, i.e 'm'. 3922 For matrices that will be factored, you must leave room for (and set) 3923 the diagonal entry even if it is zero. 3924 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3925 submatrix (same value is used for all local rows). 3926 - o_nnz - array containing the number of nonzeros in the various rows of the 3927 OFF-DIAGONAL portion of the local submatrix (possibly different for 3928 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3929 structure. The size of this array is equal to the number 3930 of local rows, i.e 'm'. 3931 3932 If the *_nnz parameter is given then the *_nz parameter is ignored 3933 3934 The AIJ format (also called the Yale sparse matrix format or 3935 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3936 storage. The stored row and column indices begin with zero. 3937 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3938 3939 The parallel matrix is partitioned such that the first m0 rows belong to 3940 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3941 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3942 3943 The DIAGONAL portion of the local submatrix of a processor can be defined 3944 as the submatrix which is obtained by extraction the part corresponding to 3945 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3946 first row that belongs to the processor, r2 is the last row belonging to 3947 the this processor, and c1-c2 is range of indices of the local part of a 3948 vector suitable for applying the matrix to. This is an mxn matrix. In the 3949 common case of a square matrix, the row and column ranges are the same and 3950 the DIAGONAL part is also square. The remaining portion of the local 3951 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3952 3953 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3954 3955 You can call MatGetInfo() to get information on how effective the preallocation was; 3956 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3957 You can also run with the option -info and look for messages with the string 3958 malloc in them to see if additional memory allocation was needed. 3959 3960 Example usage: 3961 3962 Consider the following 8x8 matrix with 34 non-zero values, that is 3963 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3964 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3965 as follows: 3966 3967 .vb 3968 1 2 0 | 0 3 0 | 0 4 3969 Proc0 0 5 6 | 7 0 0 | 8 0 3970 9 0 10 | 11 0 0 | 12 0 3971 ------------------------------------- 3972 13 0 14 | 15 16 17 | 0 0 3973 Proc1 0 18 0 | 19 20 21 | 0 0 3974 0 0 0 | 22 23 0 | 24 0 3975 ------------------------------------- 3976 Proc2 25 26 27 | 0 0 28 | 29 0 3977 30 0 0 | 31 32 33 | 0 34 3978 .ve 3979 3980 This can be represented as a collection of submatrices as: 3981 3982 .vb 3983 A B C 3984 D E F 3985 G H I 3986 .ve 3987 3988 Where the submatrices A,B,C are owned by proc0, D,E,F are 3989 owned by proc1, G,H,I are owned by proc2. 3990 3991 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3992 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3993 The 'M','N' parameters are 8,8, and have the same values on all procs. 3994 3995 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3996 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3997 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3998 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3999 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4000 matrix, ans [DF] as another SeqAIJ matrix. 4001 4002 When d_nz, o_nz parameters are specified, d_nz storage elements are 4003 allocated for every row of the local diagonal submatrix, and o_nz 4004 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4005 One way to choose d_nz and o_nz is to use the max nonzerors per local 4006 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4007 In this case, the values of d_nz,o_nz are: 4008 .vb 4009 proc0 : dnz = 2, o_nz = 2 4010 proc1 : dnz = 3, o_nz = 2 4011 proc2 : dnz = 1, o_nz = 4 4012 .ve 4013 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4014 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4015 for proc3. i.e we are using 12+15+10=37 storage locations to store 4016 34 values. 4017 4018 When d_nnz, o_nnz parameters are specified, the storage is specified 4019 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4020 In the above case the values for d_nnz,o_nnz are: 4021 .vb 4022 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4023 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4024 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4025 .ve 4026 Here the space allocated is sum of all the above values i.e 34, and 4027 hence pre-allocation is perfect. 4028 4029 Level: intermediate 4030 4031 .keywords: matrix, aij, compressed row, sparse, parallel 4032 4033 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4034 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4035 @*/ 4036 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4037 { 4038 PetscErrorCode ierr; 4039 4040 PetscFunctionBegin; 4041 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4042 PetscValidType(B,1); 4043 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4044 PetscFunctionReturn(0); 4045 } 4046 4047 #undef __FUNCT__ 4048 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4049 /*@ 4050 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4051 CSR format the local rows. 4052 4053 Collective on MPI_Comm 4054 4055 Input Parameters: 4056 + comm - MPI communicator 4057 . m - number of local rows (Cannot be PETSC_DECIDE) 4058 . n - This value should be the same as the local size used in creating the 4059 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4060 calculated if N is given) For square matrices n is almost always m. 4061 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4062 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4063 . i - row indices 4064 . j - column indices 4065 - a - matrix values 4066 4067 Output Parameter: 4068 . mat - the matrix 4069 4070 Level: intermediate 4071 4072 Notes: 4073 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4074 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4075 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4076 4077 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4078 4079 The format which is used for the sparse matrix input, is equivalent to a 4080 row-major ordering.. i.e for the following matrix, the input data expected is 4081 as shown: 4082 4083 1 0 0 4084 2 0 3 P0 4085 ------- 4086 4 5 6 P1 4087 4088 Process0 [P0]: rows_owned=[0,1] 4089 i = {0,1,3} [size = nrow+1 = 2+1] 4090 j = {0,0,2} [size = nz = 6] 4091 v = {1,2,3} [size = nz = 6] 4092 4093 Process1 [P1]: rows_owned=[2] 4094 i = {0,3} [size = nrow+1 = 1+1] 4095 j = {0,1,2} [size = nz = 6] 4096 v = {4,5,6} [size = nz = 6] 4097 4098 .keywords: matrix, aij, compressed row, sparse, parallel 4099 4100 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4101 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4102 @*/ 4103 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4104 { 4105 PetscErrorCode ierr; 4106 4107 PetscFunctionBegin; 4108 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4109 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4110 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4111 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4112 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4113 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4114 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4115 PetscFunctionReturn(0); 4116 } 4117 4118 #undef __FUNCT__ 4119 #define __FUNCT__ "MatCreateAIJ" 4120 /*@C 4121 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4122 (the default parallel PETSc format). For good matrix assembly performance 4123 the user should preallocate the matrix storage by setting the parameters 4124 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4125 performance can be increased by more than a factor of 50. 4126 4127 Collective on MPI_Comm 4128 4129 Input Parameters: 4130 + comm - MPI communicator 4131 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4132 This value should be the same as the local size used in creating the 4133 y vector for the matrix-vector product y = Ax. 4134 . n - This value should be the same as the local size used in creating the 4135 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4136 calculated if N is given) For square matrices n is almost always m. 4137 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4138 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4139 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4140 (same value is used for all local rows) 4141 . d_nnz - array containing the number of nonzeros in the various rows of the 4142 DIAGONAL portion of the local submatrix (possibly different for each row) 4143 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4144 The size of this array is equal to the number of local rows, i.e 'm'. 4145 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4146 submatrix (same value is used for all local rows). 4147 - o_nnz - array containing the number of nonzeros in the various rows of the 4148 OFF-DIAGONAL portion of the local submatrix (possibly different for 4149 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4150 structure. The size of this array is equal to the number 4151 of local rows, i.e 'm'. 4152 4153 Output Parameter: 4154 . A - the matrix 4155 4156 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4157 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4158 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4159 4160 Notes: 4161 If the *_nnz parameter is given then the *_nz parameter is ignored 4162 4163 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4164 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4165 storage requirements for this matrix. 4166 4167 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4168 processor than it must be used on all processors that share the object for 4169 that argument. 4170 4171 The user MUST specify either the local or global matrix dimensions 4172 (possibly both). 4173 4174 The parallel matrix is partitioned across processors such that the 4175 first m0 rows belong to process 0, the next m1 rows belong to 4176 process 1, the next m2 rows belong to process 2 etc.. where 4177 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4178 values corresponding to [m x N] submatrix. 4179 4180 The columns are logically partitioned with the n0 columns belonging 4181 to 0th partition, the next n1 columns belonging to the next 4182 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4183 4184 The DIAGONAL portion of the local submatrix on any given processor 4185 is the submatrix corresponding to the rows and columns m,n 4186 corresponding to the given processor. i.e diagonal matrix on 4187 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4188 etc. The remaining portion of the local submatrix [m x (N-n)] 4189 constitute the OFF-DIAGONAL portion. The example below better 4190 illustrates this concept. 4191 4192 For a square global matrix we define each processor's diagonal portion 4193 to be its local rows and the corresponding columns (a square submatrix); 4194 each processor's off-diagonal portion encompasses the remainder of the 4195 local matrix (a rectangular submatrix). 4196 4197 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4198 4199 When calling this routine with a single process communicator, a matrix of 4200 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4201 type of communicator, use the construction mechanism: 4202 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4203 4204 By default, this format uses inodes (identical nodes) when possible. 4205 We search for consecutive rows with the same nonzero structure, thereby 4206 reusing matrix information to achieve increased efficiency. 4207 4208 Options Database Keys: 4209 + -mat_no_inode - Do not use inodes 4210 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4211 - -mat_aij_oneindex - Internally use indexing starting at 1 4212 rather than 0. Note that when calling MatSetValues(), 4213 the user still MUST index entries starting at 0! 4214 4215 4216 Example usage: 4217 4218 Consider the following 8x8 matrix with 34 non-zero values, that is 4219 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4220 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4221 as follows: 4222 4223 .vb 4224 1 2 0 | 0 3 0 | 0 4 4225 Proc0 0 5 6 | 7 0 0 | 8 0 4226 9 0 10 | 11 0 0 | 12 0 4227 ------------------------------------- 4228 13 0 14 | 15 16 17 | 0 0 4229 Proc1 0 18 0 | 19 20 21 | 0 0 4230 0 0 0 | 22 23 0 | 24 0 4231 ------------------------------------- 4232 Proc2 25 26 27 | 0 0 28 | 29 0 4233 30 0 0 | 31 32 33 | 0 34 4234 .ve 4235 4236 This can be represented as a collection of submatrices as: 4237 4238 .vb 4239 A B C 4240 D E F 4241 G H I 4242 .ve 4243 4244 Where the submatrices A,B,C are owned by proc0, D,E,F are 4245 owned by proc1, G,H,I are owned by proc2. 4246 4247 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4248 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4249 The 'M','N' parameters are 8,8, and have the same values on all procs. 4250 4251 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4252 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4253 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4254 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4255 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4256 matrix, ans [DF] as another SeqAIJ matrix. 4257 4258 When d_nz, o_nz parameters are specified, d_nz storage elements are 4259 allocated for every row of the local diagonal submatrix, and o_nz 4260 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4261 One way to choose d_nz and o_nz is to use the max nonzerors per local 4262 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4263 In this case, the values of d_nz,o_nz are: 4264 .vb 4265 proc0 : dnz = 2, o_nz = 2 4266 proc1 : dnz = 3, o_nz = 2 4267 proc2 : dnz = 1, o_nz = 4 4268 .ve 4269 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4270 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4271 for proc3. i.e we are using 12+15+10=37 storage locations to store 4272 34 values. 4273 4274 When d_nnz, o_nnz parameters are specified, the storage is specified 4275 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4276 In the above case the values for d_nnz,o_nnz are: 4277 .vb 4278 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4279 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4280 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4281 .ve 4282 Here the space allocated is sum of all the above values i.e 34, and 4283 hence pre-allocation is perfect. 4284 4285 Level: intermediate 4286 4287 .keywords: matrix, aij, compressed row, sparse, parallel 4288 4289 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4290 MPIAIJ, MatCreateMPIAIJWithArrays() 4291 @*/ 4292 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4293 { 4294 PetscErrorCode ierr; 4295 PetscMPIInt size; 4296 4297 PetscFunctionBegin; 4298 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4299 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4300 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4301 if (size > 1) { 4302 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4303 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4304 } else { 4305 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4306 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4307 } 4308 PetscFunctionReturn(0); 4309 } 4310 4311 #undef __FUNCT__ 4312 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4313 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4314 { 4315 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4316 4317 PetscFunctionBegin; 4318 *Ad = a->A; 4319 *Ao = a->B; 4320 *colmap = a->garray; 4321 PetscFunctionReturn(0); 4322 } 4323 4324 #undef __FUNCT__ 4325 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4326 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4327 { 4328 PetscErrorCode ierr; 4329 PetscInt i; 4330 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4331 4332 PetscFunctionBegin; 4333 if (coloring->ctype == IS_COLORING_GLOBAL) { 4334 ISColoringValue *allcolors,*colors; 4335 ISColoring ocoloring; 4336 4337 /* set coloring for diagonal portion */ 4338 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4339 4340 /* set coloring for off-diagonal portion */ 4341 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4342 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4343 for (i=0; i<a->B->cmap->n; i++) { 4344 colors[i] = allcolors[a->garray[i]]; 4345 } 4346 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4347 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4348 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4349 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4350 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4351 ISColoringValue *colors; 4352 PetscInt *larray; 4353 ISColoring ocoloring; 4354 4355 /* set coloring for diagonal portion */ 4356 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4357 for (i=0; i<a->A->cmap->n; i++) { 4358 larray[i] = i + A->cmap->rstart; 4359 } 4360 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4361 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4362 for (i=0; i<a->A->cmap->n; i++) { 4363 colors[i] = coloring->colors[larray[i]]; 4364 } 4365 ierr = PetscFree(larray);CHKERRQ(ierr); 4366 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4367 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4368 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4369 4370 /* set coloring for off-diagonal portion */ 4371 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4372 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4373 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4374 for (i=0; i<a->B->cmap->n; i++) { 4375 colors[i] = coloring->colors[larray[i]]; 4376 } 4377 ierr = PetscFree(larray);CHKERRQ(ierr); 4378 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4379 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4380 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4381 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4382 PetscFunctionReturn(0); 4383 } 4384 4385 #undef __FUNCT__ 4386 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4387 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4388 { 4389 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4390 PetscErrorCode ierr; 4391 4392 PetscFunctionBegin; 4393 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4394 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4395 PetscFunctionReturn(0); 4396 } 4397 4398 #undef __FUNCT__ 4399 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4400 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4401 { 4402 PetscErrorCode ierr; 4403 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4404 PetscInt *indx; 4405 4406 PetscFunctionBegin; 4407 /* This routine will ONLY return MPIAIJ type matrix */ 4408 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4409 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4410 if (n == PETSC_DECIDE) { 4411 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4412 } 4413 /* Check sum(n) = N */ 4414 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4415 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4416 4417 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4418 rstart -= m; 4419 4420 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4421 for (i=0;i<m;i++) { 4422 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4423 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4424 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4425 } 4426 4427 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4428 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4429 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4430 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4431 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4432 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4433 PetscFunctionReturn(0); 4434 } 4435 4436 #undef __FUNCT__ 4437 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4438 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4439 { 4440 PetscErrorCode ierr; 4441 PetscInt m,N,i,rstart,nnz,Ii; 4442 PetscInt *indx; 4443 PetscScalar *values; 4444 4445 PetscFunctionBegin; 4446 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4447 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4448 for (i=0;i<m;i++) { 4449 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4450 Ii = i + rstart; 4451 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4452 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4453 } 4454 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4455 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4456 PetscFunctionReturn(0); 4457 } 4458 4459 #undef __FUNCT__ 4460 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4461 /*@ 4462 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4463 matrices from each processor 4464 4465 Collective on MPI_Comm 4466 4467 Input Parameters: 4468 + comm - the communicators the parallel matrix will live on 4469 . inmat - the input sequential matrices 4470 . n - number of local columns (or PETSC_DECIDE) 4471 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4472 4473 Output Parameter: 4474 . outmat - the parallel matrix generated 4475 4476 Level: advanced 4477 4478 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4479 4480 @*/ 4481 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4482 { 4483 PetscErrorCode ierr; 4484 4485 PetscFunctionBegin; 4486 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4487 if (scall == MAT_INITIAL_MATRIX) { 4488 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4489 } 4490 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4491 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4492 PetscFunctionReturn(0); 4493 } 4494 4495 #undef __FUNCT__ 4496 #define __FUNCT__ "MatFileSplit" 4497 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4498 { 4499 PetscErrorCode ierr; 4500 PetscMPIInt rank; 4501 PetscInt m,N,i,rstart,nnz; 4502 size_t len; 4503 const PetscInt *indx; 4504 PetscViewer out; 4505 char *name; 4506 Mat B; 4507 const PetscScalar *values; 4508 4509 PetscFunctionBegin; 4510 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4511 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4512 /* Should this be the type of the diagonal block of A? */ 4513 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4514 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4515 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4516 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4517 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4518 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4519 for (i=0;i<m;i++) { 4520 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4521 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4522 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4523 } 4524 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4525 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4526 4527 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4528 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4529 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4530 sprintf(name,"%s.%d",outfile,rank); 4531 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4532 ierr = PetscFree(name);CHKERRQ(ierr); 4533 ierr = MatView(B,out);CHKERRQ(ierr); 4534 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4535 ierr = MatDestroy(&B);CHKERRQ(ierr); 4536 PetscFunctionReturn(0); 4537 } 4538 4539 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4540 #undef __FUNCT__ 4541 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4542 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4543 { 4544 PetscErrorCode ierr; 4545 Mat_Merge_SeqsToMPI *merge; 4546 PetscContainer container; 4547 4548 PetscFunctionBegin; 4549 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4550 if (container) { 4551 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4557 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4558 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4559 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4560 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4561 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4562 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4563 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4564 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4565 ierr = PetscFree(merge);CHKERRQ(ierr); 4566 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4567 } 4568 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4569 PetscFunctionReturn(0); 4570 } 4571 4572 #include <../src/mat/utils/freespace.h> 4573 #include <petscbt.h> 4574 4575 #undef __FUNCT__ 4576 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4577 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4578 { 4579 PetscErrorCode ierr; 4580 MPI_Comm comm=((PetscObject)mpimat)->comm; 4581 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4582 PetscMPIInt size,rank,taga,*len_s; 4583 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4584 PetscInt proc,m; 4585 PetscInt **buf_ri,**buf_rj; 4586 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4587 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4588 MPI_Request *s_waits,*r_waits; 4589 MPI_Status *status; 4590 MatScalar *aa=a->a; 4591 MatScalar **abuf_r,*ba_i; 4592 Mat_Merge_SeqsToMPI *merge; 4593 PetscContainer container; 4594 4595 PetscFunctionBegin; 4596 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4597 4598 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4599 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4600 4601 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4602 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4603 4604 bi = merge->bi; 4605 bj = merge->bj; 4606 buf_ri = merge->buf_ri; 4607 buf_rj = merge->buf_rj; 4608 4609 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4610 owners = merge->rowmap->range; 4611 len_s = merge->len_s; 4612 4613 /* send and recv matrix values */ 4614 /*-----------------------------*/ 4615 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4616 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4617 4618 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4619 for (proc=0,k=0; proc<size; proc++) { 4620 if (!len_s[proc]) continue; 4621 i = owners[proc]; 4622 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4623 k++; 4624 } 4625 4626 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4627 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4628 ierr = PetscFree(status);CHKERRQ(ierr); 4629 4630 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4631 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4632 4633 /* insert mat values of mpimat */ 4634 /*----------------------------*/ 4635 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4636 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4637 4638 for (k=0; k<merge->nrecv; k++) { 4639 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4640 nrows = *(buf_ri_k[k]); 4641 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4642 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4643 } 4644 4645 /* set values of ba */ 4646 m = merge->rowmap->n; 4647 for (i=0; i<m; i++) { 4648 arow = owners[rank] + i; 4649 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4650 bnzi = bi[i+1] - bi[i]; 4651 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4652 4653 /* add local non-zero vals of this proc's seqmat into ba */ 4654 anzi = ai[arow+1] - ai[arow]; 4655 aj = a->j + ai[arow]; 4656 aa = a->a + ai[arow]; 4657 nextaj = 0; 4658 for (j=0; nextaj<anzi; j++) { 4659 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4660 ba_i[j] += aa[nextaj++]; 4661 } 4662 } 4663 4664 /* add received vals into ba */ 4665 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4666 /* i-th row */ 4667 if (i == *nextrow[k]) { 4668 anzi = *(nextai[k]+1) - *nextai[k]; 4669 aj = buf_rj[k] + *(nextai[k]); 4670 aa = abuf_r[k] + *(nextai[k]); 4671 nextaj = 0; 4672 for (j=0; nextaj<anzi; j++) { 4673 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4674 ba_i[j] += aa[nextaj++]; 4675 } 4676 } 4677 nextrow[k]++; nextai[k]++; 4678 } 4679 } 4680 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4681 } 4682 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4683 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4684 4685 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4686 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4687 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4688 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4689 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4690 PetscFunctionReturn(0); 4691 } 4692 4693 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4694 4695 #undef __FUNCT__ 4696 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4697 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4698 { 4699 PetscErrorCode ierr; 4700 Mat B_mpi; 4701 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4702 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4703 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4704 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4705 PetscInt len,proc,*dnz,*onz,bs,cbs; 4706 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4707 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4708 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4709 MPI_Status *status; 4710 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4711 PetscBT lnkbt; 4712 Mat_Merge_SeqsToMPI *merge; 4713 PetscContainer container; 4714 4715 PetscFunctionBegin; 4716 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4717 4718 /* make sure it is a PETSc comm */ 4719 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4720 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4721 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4722 4723 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4724 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4725 4726 /* determine row ownership */ 4727 /*---------------------------------------------------------*/ 4728 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4729 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4730 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4731 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4732 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4733 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4734 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4735 4736 m = merge->rowmap->n; 4737 owners = merge->rowmap->range; 4738 4739 /* determine the number of messages to send, their lengths */ 4740 /*---------------------------------------------------------*/ 4741 len_s = merge->len_s; 4742 4743 len = 0; /* length of buf_si[] */ 4744 merge->nsend = 0; 4745 for (proc=0; proc<size; proc++) { 4746 len_si[proc] = 0; 4747 if (proc == rank) { 4748 len_s[proc] = 0; 4749 } else { 4750 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4751 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4752 } 4753 if (len_s[proc]) { 4754 merge->nsend++; 4755 nrows = 0; 4756 for (i=owners[proc]; i<owners[proc+1]; i++) { 4757 if (ai[i+1] > ai[i]) nrows++; 4758 } 4759 len_si[proc] = 2*(nrows+1); 4760 len += len_si[proc]; 4761 } 4762 } 4763 4764 /* determine the number and length of messages to receive for ij-structure */ 4765 /*-------------------------------------------------------------------------*/ 4766 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4767 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4768 4769 /* post the Irecv of j-structure */ 4770 /*-------------------------------*/ 4771 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4772 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4773 4774 /* post the Isend of j-structure */ 4775 /*--------------------------------*/ 4776 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4777 4778 for (proc=0, k=0; proc<size; proc++) { 4779 if (!len_s[proc]) continue; 4780 i = owners[proc]; 4781 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4782 k++; 4783 } 4784 4785 /* receives and sends of j-structure are complete */ 4786 /*------------------------------------------------*/ 4787 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4788 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4789 4790 /* send and recv i-structure */ 4791 /*---------------------------*/ 4792 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4793 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4794 4795 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4796 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4797 for (proc=0,k=0; proc<size; proc++) { 4798 if (!len_s[proc]) continue; 4799 /* form outgoing message for i-structure: 4800 buf_si[0]: nrows to be sent 4801 [1:nrows]: row index (global) 4802 [nrows+1:2*nrows+1]: i-structure index 4803 */ 4804 /*-------------------------------------------*/ 4805 nrows = len_si[proc]/2 - 1; 4806 buf_si_i = buf_si + nrows+1; 4807 buf_si[0] = nrows; 4808 buf_si_i[0] = 0; 4809 nrows = 0; 4810 for (i=owners[proc]; i<owners[proc+1]; i++) { 4811 anzi = ai[i+1] - ai[i]; 4812 if (anzi) { 4813 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4814 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4815 nrows++; 4816 } 4817 } 4818 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4819 k++; 4820 buf_si += len_si[proc]; 4821 } 4822 4823 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4824 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4825 4826 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4827 for (i=0; i<merge->nrecv; i++) { 4828 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4829 } 4830 4831 ierr = PetscFree(len_si);CHKERRQ(ierr); 4832 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4833 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4834 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4835 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4836 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4837 ierr = PetscFree(status);CHKERRQ(ierr); 4838 4839 /* compute a local seq matrix in each processor */ 4840 /*----------------------------------------------*/ 4841 /* allocate bi array and free space for accumulating nonzero column info */ 4842 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4843 bi[0] = 0; 4844 4845 /* create and initialize a linked list */ 4846 nlnk = N+1; 4847 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4848 4849 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4850 len = ai[owners[rank+1]] - ai[owners[rank]]; 4851 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4852 current_space = free_space; 4853 4854 /* determine symbolic info for each local row */ 4855 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4856 4857 for (k=0; k<merge->nrecv; k++) { 4858 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4859 nrows = *buf_ri_k[k]; 4860 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4861 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4862 } 4863 4864 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4865 len = 0; 4866 for (i=0;i<m;i++) { 4867 bnzi = 0; 4868 /* add local non-zero cols of this proc's seqmat into lnk */ 4869 arow = owners[rank] + i; 4870 anzi = ai[arow+1] - ai[arow]; 4871 aj = a->j + ai[arow]; 4872 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4873 bnzi += nlnk; 4874 /* add received col data into lnk */ 4875 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4876 if (i == *nextrow[k]) { /* i-th row */ 4877 anzi = *(nextai[k]+1) - *nextai[k]; 4878 aj = buf_rj[k] + *nextai[k]; 4879 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4880 bnzi += nlnk; 4881 nextrow[k]++; nextai[k]++; 4882 } 4883 } 4884 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4885 4886 /* if free space is not available, make more free space */ 4887 if (current_space->local_remaining<bnzi) { 4888 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4889 nspacedouble++; 4890 } 4891 /* copy data into free space, then initialize lnk */ 4892 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4893 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4894 4895 current_space->array += bnzi; 4896 current_space->local_used += bnzi; 4897 current_space->local_remaining -= bnzi; 4898 4899 bi[i+1] = bi[i] + bnzi; 4900 } 4901 4902 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4903 4904 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4905 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4906 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4907 4908 /* create symbolic parallel matrix B_mpi */ 4909 /*---------------------------------------*/ 4910 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4911 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4912 if (n==PETSC_DECIDE) { 4913 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4914 } else { 4915 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4916 } 4917 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4918 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4919 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4920 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4921 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4922 4923 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4924 B_mpi->assembled = PETSC_FALSE; 4925 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4926 merge->bi = bi; 4927 merge->bj = bj; 4928 merge->buf_ri = buf_ri; 4929 merge->buf_rj = buf_rj; 4930 merge->coi = PETSC_NULL; 4931 merge->coj = PETSC_NULL; 4932 merge->owners_co = PETSC_NULL; 4933 4934 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4935 4936 /* attach the supporting struct to B_mpi for reuse */ 4937 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4938 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4939 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4940 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4941 *mpimat = B_mpi; 4942 4943 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4944 PetscFunctionReturn(0); 4945 } 4946 4947 #undef __FUNCT__ 4948 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4949 /*@C 4950 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4951 matrices from each processor 4952 4953 Collective on MPI_Comm 4954 4955 Input Parameters: 4956 + comm - the communicators the parallel matrix will live on 4957 . seqmat - the input sequential matrices 4958 . m - number of local rows (or PETSC_DECIDE) 4959 . n - number of local columns (or PETSC_DECIDE) 4960 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4961 4962 Output Parameter: 4963 . mpimat - the parallel matrix generated 4964 4965 Level: advanced 4966 4967 Notes: 4968 The dimensions of the sequential matrix in each processor MUST be the same. 4969 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4970 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4971 @*/ 4972 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4973 { 4974 PetscErrorCode ierr; 4975 PetscMPIInt size; 4976 4977 PetscFunctionBegin; 4978 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4979 if (size == 1) { 4980 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4981 if (scall == MAT_INITIAL_MATRIX) { 4982 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4983 } else { 4984 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4985 } 4986 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4987 PetscFunctionReturn(0); 4988 } 4989 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4990 if (scall == MAT_INITIAL_MATRIX) { 4991 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4992 } 4993 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4994 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4995 PetscFunctionReturn(0); 4996 } 4997 4998 #undef __FUNCT__ 4999 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5000 /*@ 5001 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5002 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5003 with MatGetSize() 5004 5005 Not Collective 5006 5007 Input Parameters: 5008 + A - the matrix 5009 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5010 5011 Output Parameter: 5012 . A_loc - the local sequential matrix generated 5013 5014 Level: developer 5015 5016 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5017 5018 @*/ 5019 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5020 { 5021 PetscErrorCode ierr; 5022 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5023 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5024 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5025 MatScalar *aa=a->a,*ba=b->a,*cam; 5026 PetscScalar *ca; 5027 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5028 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5029 PetscBool match; 5030 5031 PetscFunctionBegin; 5032 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5033 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5034 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5035 if (scall == MAT_INITIAL_MATRIX) { 5036 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5037 ci[0] = 0; 5038 for (i=0; i<am; i++) { 5039 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5040 } 5041 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5042 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5043 k = 0; 5044 for (i=0; i<am; i++) { 5045 ncols_o = bi[i+1] - bi[i]; 5046 ncols_d = ai[i+1] - ai[i]; 5047 /* off-diagonal portion of A */ 5048 for (jo=0; jo<ncols_o; jo++) { 5049 col = cmap[*bj]; 5050 if (col >= cstart) break; 5051 cj[k] = col; bj++; 5052 ca[k++] = *ba++; 5053 } 5054 /* diagonal portion of A */ 5055 for (j=0; j<ncols_d; j++) { 5056 cj[k] = cstart + *aj++; 5057 ca[k++] = *aa++; 5058 } 5059 /* off-diagonal portion of A */ 5060 for (j=jo; j<ncols_o; j++) { 5061 cj[k] = cmap[*bj++]; 5062 ca[k++] = *ba++; 5063 } 5064 } 5065 /* put together the new matrix */ 5066 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5067 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5068 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5069 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5070 mat->free_a = PETSC_TRUE; 5071 mat->free_ij = PETSC_TRUE; 5072 mat->nonew = 0; 5073 } else if (scall == MAT_REUSE_MATRIX) { 5074 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5075 ci = mat->i; cj = mat->j; cam = mat->a; 5076 for (i=0; i<am; i++) { 5077 /* off-diagonal portion of A */ 5078 ncols_o = bi[i+1] - bi[i]; 5079 for (jo=0; jo<ncols_o; jo++) { 5080 col = cmap[*bj]; 5081 if (col >= cstart) break; 5082 *cam++ = *ba++; bj++; 5083 } 5084 /* diagonal portion of A */ 5085 ncols_d = ai[i+1] - ai[i]; 5086 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5087 /* off-diagonal portion of A */ 5088 for (j=jo; j<ncols_o; j++) { 5089 *cam++ = *ba++; bj++; 5090 } 5091 } 5092 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5093 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5094 PetscFunctionReturn(0); 5095 } 5096 5097 #undef __FUNCT__ 5098 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5099 /*@C 5100 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5101 5102 Not Collective 5103 5104 Input Parameters: 5105 + A - the matrix 5106 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5107 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5108 5109 Output Parameter: 5110 . A_loc - the local sequential matrix generated 5111 5112 Level: developer 5113 5114 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5115 5116 @*/ 5117 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5118 { 5119 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5120 PetscErrorCode ierr; 5121 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5122 IS isrowa,iscola; 5123 Mat *aloc; 5124 PetscBool match; 5125 5126 PetscFunctionBegin; 5127 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5128 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5129 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5130 if (!row) { 5131 start = A->rmap->rstart; end = A->rmap->rend; 5132 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5133 } else { 5134 isrowa = *row; 5135 } 5136 if (!col) { 5137 start = A->cmap->rstart; 5138 cmap = a->garray; 5139 nzA = a->A->cmap->n; 5140 nzB = a->B->cmap->n; 5141 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5142 ncols = 0; 5143 for (i=0; i<nzB; i++) { 5144 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5145 else break; 5146 } 5147 imark = i; 5148 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5149 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5150 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5151 } else { 5152 iscola = *col; 5153 } 5154 if (scall != MAT_INITIAL_MATRIX) { 5155 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5156 aloc[0] = *A_loc; 5157 } 5158 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5159 *A_loc = aloc[0]; 5160 ierr = PetscFree(aloc);CHKERRQ(ierr); 5161 if (!row) { 5162 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5163 } 5164 if (!col) { 5165 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5166 } 5167 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5168 PetscFunctionReturn(0); 5169 } 5170 5171 #undef __FUNCT__ 5172 #define __FUNCT__ "MatGetBrowsOfAcols" 5173 /*@C 5174 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5175 5176 Collective on Mat 5177 5178 Input Parameters: 5179 + A,B - the matrices in mpiaij format 5180 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5181 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5182 5183 Output Parameter: 5184 + rowb, colb - index sets of rows and columns of B to extract 5185 - B_seq - the sequential matrix generated 5186 5187 Level: developer 5188 5189 @*/ 5190 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5191 { 5192 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5193 PetscErrorCode ierr; 5194 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5195 IS isrowb,iscolb; 5196 Mat *bseq=PETSC_NULL; 5197 5198 PetscFunctionBegin; 5199 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5200 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5201 } 5202 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5203 5204 if (scall == MAT_INITIAL_MATRIX) { 5205 start = A->cmap->rstart; 5206 cmap = a->garray; 5207 nzA = a->A->cmap->n; 5208 nzB = a->B->cmap->n; 5209 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5210 ncols = 0; 5211 for (i=0; i<nzB; i++) { /* row < local row index */ 5212 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5213 else break; 5214 } 5215 imark = i; 5216 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5217 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5218 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5219 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5220 } else { 5221 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5222 isrowb = *rowb; iscolb = *colb; 5223 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5224 bseq[0] = *B_seq; 5225 } 5226 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5227 *B_seq = bseq[0]; 5228 ierr = PetscFree(bseq);CHKERRQ(ierr); 5229 if (!rowb) { 5230 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5231 } else { 5232 *rowb = isrowb; 5233 } 5234 if (!colb) { 5235 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5236 } else { 5237 *colb = iscolb; 5238 } 5239 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5240 PetscFunctionReturn(0); 5241 } 5242 5243 #undef __FUNCT__ 5244 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5245 /* 5246 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5247 of the OFF-DIAGONAL portion of local A 5248 5249 Collective on Mat 5250 5251 Input Parameters: 5252 + A,B - the matrices in mpiaij format 5253 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5254 5255 Output Parameter: 5256 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5257 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5258 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5259 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5260 5261 Level: developer 5262 5263 */ 5264 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5265 { 5266 VecScatter_MPI_General *gen_to,*gen_from; 5267 PetscErrorCode ierr; 5268 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5269 Mat_SeqAIJ *b_oth; 5270 VecScatter ctx=a->Mvctx; 5271 MPI_Comm comm=((PetscObject)ctx)->comm; 5272 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5273 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5274 PetscScalar *rvalues,*svalues; 5275 MatScalar *b_otha,*bufa,*bufA; 5276 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5277 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5278 MPI_Status *sstatus,rstatus; 5279 PetscMPIInt jj; 5280 PetscInt *cols,sbs,rbs; 5281 PetscScalar *vals; 5282 5283 PetscFunctionBegin; 5284 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5285 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5286 } 5287 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5288 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5289 5290 gen_to = (VecScatter_MPI_General*)ctx->todata; 5291 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5292 rvalues = gen_from->values; /* holds the length of receiving row */ 5293 svalues = gen_to->values; /* holds the length of sending row */ 5294 nrecvs = gen_from->n; 5295 nsends = gen_to->n; 5296 5297 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5298 srow = gen_to->indices; /* local row index to be sent */ 5299 sstarts = gen_to->starts; 5300 sprocs = gen_to->procs; 5301 sstatus = gen_to->sstatus; 5302 sbs = gen_to->bs; 5303 rstarts = gen_from->starts; 5304 rprocs = gen_from->procs; 5305 rbs = gen_from->bs; 5306 5307 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5308 if (scall == MAT_INITIAL_MATRIX) { 5309 /* i-array */ 5310 /*---------*/ 5311 /* post receives */ 5312 for (i=0; i<nrecvs; i++) { 5313 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5314 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5315 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5316 } 5317 5318 /* pack the outgoing message */ 5319 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5320 sstartsj[0] = 0; rstartsj[0] = 0; 5321 len = 0; /* total length of j or a array to be sent */ 5322 k = 0; 5323 for (i=0; i<nsends; i++) { 5324 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5325 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5326 for (j=0; j<nrows; j++) { 5327 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5328 for (l=0; l<sbs; l++) { 5329 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5330 rowlen[j*sbs+l] = ncols; 5331 len += ncols; 5332 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5333 } 5334 k++; 5335 } 5336 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5337 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5338 } 5339 /* recvs and sends of i-array are completed */ 5340 i = nrecvs; 5341 while (i--) { 5342 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5343 } 5344 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5345 5346 /* allocate buffers for sending j and a arrays */ 5347 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5348 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5349 5350 /* create i-array of B_oth */ 5351 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5352 b_othi[0] = 0; 5353 len = 0; /* total length of j or a array to be received */ 5354 k = 0; 5355 for (i=0; i<nrecvs; i++) { 5356 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5357 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5358 for (j=0; j<nrows; j++) { 5359 b_othi[k+1] = b_othi[k] + rowlen[j]; 5360 len += rowlen[j]; k++; 5361 } 5362 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5363 } 5364 5365 /* allocate space for j and a arrrays of B_oth */ 5366 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5367 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5368 5369 /* j-array */ 5370 /*---------*/ 5371 /* post receives of j-array */ 5372 for (i=0; i<nrecvs; i++) { 5373 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5374 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5375 } 5376 5377 /* pack the outgoing message j-array */ 5378 k = 0; 5379 for (i=0; i<nsends; i++) { 5380 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5381 bufJ = bufj+sstartsj[i]; 5382 for (j=0; j<nrows; j++) { 5383 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5384 for (ll=0; ll<sbs; ll++) { 5385 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5386 for (l=0; l<ncols; l++) { 5387 *bufJ++ = cols[l]; 5388 } 5389 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5390 } 5391 } 5392 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5393 } 5394 5395 /* recvs and sends of j-array are completed */ 5396 i = nrecvs; 5397 while (i--) { 5398 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5399 } 5400 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5401 } else if (scall == MAT_REUSE_MATRIX) { 5402 sstartsj = *startsj_s; 5403 rstartsj = *startsj_r; 5404 bufa = *bufa_ptr; 5405 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5406 b_otha = b_oth->a; 5407 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5408 5409 /* a-array */ 5410 /*---------*/ 5411 /* post receives of a-array */ 5412 for (i=0; i<nrecvs; i++) { 5413 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5414 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5415 } 5416 5417 /* pack the outgoing message a-array */ 5418 k = 0; 5419 for (i=0; i<nsends; i++) { 5420 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5421 bufA = bufa+sstartsj[i]; 5422 for (j=0; j<nrows; j++) { 5423 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5424 for (ll=0; ll<sbs; ll++) { 5425 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5426 for (l=0; l<ncols; l++) { 5427 *bufA++ = vals[l]; 5428 } 5429 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5430 } 5431 } 5432 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5433 } 5434 /* recvs and sends of a-array are completed */ 5435 i = nrecvs; 5436 while (i--) { 5437 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5438 } 5439 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5440 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5441 5442 if (scall == MAT_INITIAL_MATRIX) { 5443 /* put together the new matrix */ 5444 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5445 5446 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5447 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5448 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5449 b_oth->free_a = PETSC_TRUE; 5450 b_oth->free_ij = PETSC_TRUE; 5451 b_oth->nonew = 0; 5452 5453 ierr = PetscFree(bufj);CHKERRQ(ierr); 5454 if (!startsj_s || !bufa_ptr) { 5455 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5456 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5457 } else { 5458 *startsj_s = sstartsj; 5459 *startsj_r = rstartsj; 5460 *bufa_ptr = bufa; 5461 } 5462 } 5463 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5464 PetscFunctionReturn(0); 5465 } 5466 5467 #undef __FUNCT__ 5468 #define __FUNCT__ "MatGetCommunicationStructs" 5469 /*@C 5470 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5471 5472 Not Collective 5473 5474 Input Parameters: 5475 . A - The matrix in mpiaij format 5476 5477 Output Parameter: 5478 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5479 . colmap - A map from global column index to local index into lvec 5480 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5481 5482 Level: developer 5483 5484 @*/ 5485 #if defined (PETSC_USE_CTABLE) 5486 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5487 #else 5488 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5489 #endif 5490 { 5491 Mat_MPIAIJ *a; 5492 5493 PetscFunctionBegin; 5494 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5495 PetscValidPointer(lvec, 2); 5496 PetscValidPointer(colmap, 3); 5497 PetscValidPointer(multScatter, 4); 5498 a = (Mat_MPIAIJ *) A->data; 5499 if (lvec) *lvec = a->lvec; 5500 if (colmap) *colmap = a->colmap; 5501 if (multScatter) *multScatter = a->Mvctx; 5502 PetscFunctionReturn(0); 5503 } 5504 5505 EXTERN_C_BEGIN 5506 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5507 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5508 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5509 EXTERN_C_END 5510 5511 #undef __FUNCT__ 5512 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5513 /* 5514 Computes (B'*A')' since computing B*A directly is untenable 5515 5516 n p p 5517 ( ) ( ) ( ) 5518 m ( A ) * n ( B ) = m ( C ) 5519 ( ) ( ) ( ) 5520 5521 */ 5522 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5523 { 5524 PetscErrorCode ierr; 5525 Mat At,Bt,Ct; 5526 5527 PetscFunctionBegin; 5528 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5529 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5530 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5531 ierr = MatDestroy(&At);CHKERRQ(ierr); 5532 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5533 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5534 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5535 PetscFunctionReturn(0); 5536 } 5537 5538 #undef __FUNCT__ 5539 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5540 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5541 { 5542 PetscErrorCode ierr; 5543 PetscInt m=A->rmap->n,n=B->cmap->n; 5544 Mat Cmat; 5545 5546 PetscFunctionBegin; 5547 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5548 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5549 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5550 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5551 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5552 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5553 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5554 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5555 5556 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5557 *C = Cmat; 5558 PetscFunctionReturn(0); 5559 } 5560 5561 /* ----------------------------------------------------------------*/ 5562 #undef __FUNCT__ 5563 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5564 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5565 { 5566 PetscErrorCode ierr; 5567 5568 PetscFunctionBegin; 5569 if (scall == MAT_INITIAL_MATRIX) { 5570 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5571 } 5572 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5573 PetscFunctionReturn(0); 5574 } 5575 5576 EXTERN_C_BEGIN 5577 #if defined(PETSC_HAVE_MUMPS) 5578 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5579 #endif 5580 #if defined(PETSC_HAVE_PASTIX) 5581 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5582 #endif 5583 #if defined(PETSC_HAVE_SUPERLU_DIST) 5584 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5585 #endif 5586 #if defined(PETSC_HAVE_CLIQUE) 5587 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5588 #endif 5589 EXTERN_C_END 5590 5591 /*MC 5592 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5593 5594 Options Database Keys: 5595 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5596 5597 Level: beginner 5598 5599 .seealso: MatCreateAIJ() 5600 M*/ 5601 5602 EXTERN_C_BEGIN 5603 #undef __FUNCT__ 5604 #define __FUNCT__ "MatCreate_MPIAIJ" 5605 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5606 { 5607 Mat_MPIAIJ *b; 5608 PetscErrorCode ierr; 5609 PetscMPIInt size; 5610 5611 PetscFunctionBegin; 5612 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5613 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5614 B->data = (void*)b; 5615 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5616 B->assembled = PETSC_FALSE; 5617 B->insertmode = NOT_SET_VALUES; 5618 b->size = size; 5619 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5620 5621 /* build cache for off array entries formed */ 5622 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5623 b->donotstash = PETSC_FALSE; 5624 b->colmap = 0; 5625 b->garray = 0; 5626 b->roworiented = PETSC_TRUE; 5627 5628 /* stuff used for matrix vector multiply */ 5629 b->lvec = PETSC_NULL; 5630 b->Mvctx = PETSC_NULL; 5631 5632 /* stuff for MatGetRow() */ 5633 b->rowindices = 0; 5634 b->rowvalues = 0; 5635 b->getrowactive = PETSC_FALSE; 5636 5637 /* flexible pointer used in CUSP/CUSPARSE classes */ 5638 b->spptr = PETSC_NULL; 5639 5640 #if defined(PETSC_HAVE_MUMPS) 5641 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5642 "MatGetFactor_aij_mumps", 5643 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5644 #endif 5645 #if defined(PETSC_HAVE_PASTIX) 5646 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5647 "MatGetFactor_mpiaij_pastix", 5648 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5649 #endif 5650 #if defined(PETSC_HAVE_SUPERLU_DIST) 5651 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5652 "MatGetFactor_mpiaij_superlu_dist", 5653 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5654 #endif 5655 #if defined(PETSC_HAVE_CLIQUE) 5656 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5657 "MatGetFactor_aij_clique", 5658 MatGetFactor_aij_clique);CHKERRQ(ierr); 5659 #endif 5660 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5661 "MatStoreValues_MPIAIJ", 5662 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5664 "MatRetrieveValues_MPIAIJ", 5665 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5666 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5667 "MatGetDiagonalBlock_MPIAIJ", 5668 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5669 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5670 "MatIsTranspose_MPIAIJ", 5671 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5672 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5673 "MatMPIAIJSetPreallocation_MPIAIJ", 5674 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5675 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5676 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5677 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5678 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5679 "MatDiagonalScaleLocal_MPIAIJ", 5680 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5681 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5682 "MatConvert_MPIAIJ_MPIAIJPERM", 5683 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5684 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5685 "MatConvert_MPIAIJ_MPIAIJCRL", 5686 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5687 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5688 "MatConvert_MPIAIJ_MPISBAIJ", 5689 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5690 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5691 "MatMatMult_MPIDense_MPIAIJ", 5692 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5693 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5694 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5695 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5696 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5697 "MatMatMultNumeric_MPIDense_MPIAIJ", 5698 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5699 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5700 PetscFunctionReturn(0); 5701 } 5702 EXTERN_C_END 5703 5704 #undef __FUNCT__ 5705 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5706 /*@ 5707 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5708 and "off-diagonal" part of the matrix in CSR format. 5709 5710 Collective on MPI_Comm 5711 5712 Input Parameters: 5713 + comm - MPI communicator 5714 . m - number of local rows (Cannot be PETSC_DECIDE) 5715 . n - This value should be the same as the local size used in creating the 5716 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5717 calculated if N is given) For square matrices n is almost always m. 5718 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5719 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5720 . i - row indices for "diagonal" portion of matrix 5721 . j - column indices 5722 . a - matrix values 5723 . oi - row indices for "off-diagonal" portion of matrix 5724 . oj - column indices 5725 - oa - matrix values 5726 5727 Output Parameter: 5728 . mat - the matrix 5729 5730 Level: advanced 5731 5732 Notes: 5733 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5734 must free the arrays once the matrix has been destroyed and not before. 5735 5736 The i and j indices are 0 based 5737 5738 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5739 5740 This sets local rows and cannot be used to set off-processor values. 5741 5742 You cannot later use MatSetValues() to change values in this matrix. 5743 5744 .keywords: matrix, aij, compressed row, sparse, parallel 5745 5746 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5747 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5748 @*/ 5749 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5750 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5751 { 5752 PetscErrorCode ierr; 5753 Mat_MPIAIJ *maij; 5754 5755 PetscFunctionBegin; 5756 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5757 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5758 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5759 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5760 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5761 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5762 maij = (Mat_MPIAIJ*) (*mat)->data; 5763 maij->donotstash = PETSC_TRUE; 5764 (*mat)->preallocated = PETSC_TRUE; 5765 5766 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5767 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5768 5769 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5770 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5771 5772 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5773 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5774 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5775 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5776 5777 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5778 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5779 PetscFunctionReturn(0); 5780 } 5781 5782 /* 5783 Special version for direct calls from Fortran 5784 */ 5785 #include <petsc-private/fortranimpl.h> 5786 5787 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5788 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5789 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5790 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5791 #endif 5792 5793 /* Change these macros so can be used in void function */ 5794 #undef CHKERRQ 5795 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5796 #undef SETERRQ2 5797 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5798 #undef SETERRQ3 5799 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5800 #undef SETERRQ 5801 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5802 5803 EXTERN_C_BEGIN 5804 #undef __FUNCT__ 5805 #define __FUNCT__ "matsetvaluesmpiaij_" 5806 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5807 { 5808 Mat mat = *mmat; 5809 PetscInt m = *mm, n = *mn; 5810 InsertMode addv = *maddv; 5811 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5812 PetscScalar value; 5813 PetscErrorCode ierr; 5814 5815 MatCheckPreallocated(mat,1); 5816 if (mat->insertmode == NOT_SET_VALUES) { 5817 mat->insertmode = addv; 5818 } 5819 #if defined(PETSC_USE_DEBUG) 5820 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5821 #endif 5822 { 5823 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5824 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5825 PetscBool roworiented = aij->roworiented; 5826 5827 /* Some Variables required in the macro */ 5828 Mat A = aij->A; 5829 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5830 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5831 MatScalar *aa = a->a; 5832 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5833 Mat B = aij->B; 5834 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5835 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5836 MatScalar *ba = b->a; 5837 5838 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5839 PetscInt nonew = a->nonew; 5840 MatScalar *ap1,*ap2; 5841 5842 PetscFunctionBegin; 5843 for (i=0; i<m; i++) { 5844 if (im[i] < 0) continue; 5845 #if defined(PETSC_USE_DEBUG) 5846 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5847 #endif 5848 if (im[i] >= rstart && im[i] < rend) { 5849 row = im[i] - rstart; 5850 lastcol1 = -1; 5851 rp1 = aj + ai[row]; 5852 ap1 = aa + ai[row]; 5853 rmax1 = aimax[row]; 5854 nrow1 = ailen[row]; 5855 low1 = 0; 5856 high1 = nrow1; 5857 lastcol2 = -1; 5858 rp2 = bj + bi[row]; 5859 ap2 = ba + bi[row]; 5860 rmax2 = bimax[row]; 5861 nrow2 = bilen[row]; 5862 low2 = 0; 5863 high2 = nrow2; 5864 5865 for (j=0; j<n; j++) { 5866 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5867 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5868 if (in[j] >= cstart && in[j] < cend) { 5869 col = in[j] - cstart; 5870 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5871 } else if (in[j] < 0) continue; 5872 #if defined(PETSC_USE_DEBUG) 5873 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5874 #endif 5875 else { 5876 if (mat->was_assembled) { 5877 if (!aij->colmap) { 5878 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5879 } 5880 #if defined (PETSC_USE_CTABLE) 5881 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5882 col--; 5883 #else 5884 col = aij->colmap[in[j]] - 1; 5885 #endif 5886 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5887 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5888 col = in[j]; 5889 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5890 B = aij->B; 5891 b = (Mat_SeqAIJ*)B->data; 5892 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5893 rp2 = bj + bi[row]; 5894 ap2 = ba + bi[row]; 5895 rmax2 = bimax[row]; 5896 nrow2 = bilen[row]; 5897 low2 = 0; 5898 high2 = nrow2; 5899 bm = aij->B->rmap->n; 5900 ba = b->a; 5901 } 5902 } else col = in[j]; 5903 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5904 } 5905 } 5906 } else { 5907 if (!aij->donotstash) { 5908 if (roworiented) { 5909 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5910 } else { 5911 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5912 } 5913 } 5914 } 5915 }} 5916 PetscFunctionReturnVoid(); 5917 } 5918 EXTERN_C_END 5919 5920