1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for (j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 if (!rank) { 204 bses[0] = gmat->rmap->bs; 205 bses[1] = gmat->cmap->bs; 206 } 207 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 209 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 210 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 211 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 212 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) { 215 rowners[i] += rowners[i-1]; 216 } 217 rstart = rowners[rank]; 218 rend = rowners[rank+1]; 219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 220 if (!rank) { 221 gmata = (Mat_SeqAIJ*) gmat->data; 222 /* send row lengths to all processors */ 223 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 224 for (i=1; i<size; i++) { 225 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 226 } 227 /* determine number diagonal and off-diagonal counts */ 228 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 230 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 264 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 265 jj = 0; 266 for (i=0; i<m; i++) { 267 for (j=0; j<dlens[i]; j++) { 268 if (gmataj[jj] < rstart) ld[i]++; 269 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 270 jj++; 271 } 272 } 273 /* receive numerical values */ 274 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 275 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 276 } 277 /* set preallocation */ 278 for (i=0; i<m; i++) { 279 dlens[i] -= olens[i]; 280 } 281 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 282 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 283 284 for (i=0; i<m; i++) { 285 dlens[i] += olens[i]; 286 } 287 cnt = 0; 288 for (i=0; i<m; i++) { 289 row = rstart + i; 290 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 291 cnt += dlens[i]; 292 } 293 if (rank) { 294 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 295 } 296 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 297 ierr = PetscFree(rowners);CHKERRQ(ierr); 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 *inmat = mat; 300 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 303 mat = *inmat; 304 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 305 if (!rank) { 306 /* send numerical values to other processes */ 307 gmata = (Mat_SeqAIJ*) gmat->data; 308 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 309 gmataa = gmata->a; 310 for (i=1; i<size; i++) { 311 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 312 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 313 } 314 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 315 } else { 316 /* receive numerical values from process 0*/ 317 nz = Ad->nz + Ao->nz; 318 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 319 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 320 } 321 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 322 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 323 ad = Ad->a; 324 ao = Ao->a; 325 if (mat->rmap->n) { 326 i = 0; 327 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 328 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 329 } 330 for (i=1; i<mat->rmap->n; i++) { 331 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 i--; 335 if (mat->rmap->n) { 336 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 337 } 338 if (rank) { 339 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 CHKMEMQ; 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined (PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++){ 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 373 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 374 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 375 #endif 376 PetscFunctionReturn(0); 377 } 378 379 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 380 { \ 381 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 416 lastcol2 = col;\ 417 while (high2-low2 > 5) { \ 418 t = (low2+high2)/2; \ 419 if (rp2[t] > col) high2 = t; \ 420 else low2 = t; \ 421 } \ 422 for (_i=low2; _i<high2; _i++) { \ 423 if (rp2[_i] > col) break; \ 424 if (rp2[_i] == col) { \ 425 if (addv == ADD_VALUES) ap2[_i] += value; \ 426 else ap2[_i] = value; \ 427 goto b_noinsert; \ 428 } \ 429 } \ 430 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 433 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 434 N = nrow2++ - 1; b->nz++; high2++; \ 435 /* shift up all the later entries in this row */ \ 436 for (ii=N; ii>=_i; ii--) { \ 437 rp2[ii+1] = rp2[ii]; \ 438 ap2[ii+1] = ap2[ii]; \ 439 } \ 440 rp2[_i] = col; \ 441 ap2[_i] = value; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend){ 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined (PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend){ 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined (PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col --; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else { 620 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 } 623 PetscFunctionReturn(0); 624 } 625 626 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 630 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscErrorCode ierr; 634 PetscInt nstash,reallocs; 635 InsertMode addv; 636 637 PetscFunctionBegin; 638 if (aij->donotstash || mat->nooffprocentries) { 639 PetscFunctionReturn(0); 640 } 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n;) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 677 if (j < n) ncols = j-i; 678 else ncols = n-i; 679 /* Now assemble all these values with a single function call */ 680 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 706 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 708 709 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 723 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 724 { 725 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 726 PetscErrorCode ierr; 727 728 PetscFunctionBegin; 729 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 730 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 731 PetscFunctionReturn(0); 732 } 733 734 #undef __FUNCT__ 735 #define __FUNCT__ "MatZeroRows_MPIAIJ" 736 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 737 { 738 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 739 PetscErrorCode ierr; 740 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 741 PetscInt i,*owners = A->rmap->range; 742 PetscInt *nprocs,j,idx,nsends,row; 743 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 744 PetscInt *rvalues,count,base,slen,*source; 745 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 746 MPI_Comm comm = ((PetscObject)A)->comm; 747 MPI_Request *send_waits,*recv_waits; 748 MPI_Status recv_status,*send_status; 749 const PetscScalar *xx; 750 PetscScalar *bb; 751 #if defined(PETSC_DEBUG) 752 PetscBool found = PETSC_FALSE; 753 #endif 754 755 PetscFunctionBegin; 756 /* first count number of contributors to each processor */ 757 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 758 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 759 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 760 j = 0; 761 for (i=0; i<N; i++) { 762 if (lastidx > (idx = rows[i])) j = 0; 763 lastidx = idx; 764 for (; j<size; j++) { 765 if (idx >= owners[j] && idx < owners[j+1]) { 766 nprocs[2*j]++; 767 nprocs[2*j+1] = 1; 768 owner[i] = j; 769 #if defined(PETSC_DEBUG) 770 found = PETSC_TRUE; 771 #endif 772 break; 773 } 774 } 775 #if defined(PETSC_DEBUG) 776 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 777 found = PETSC_FALSE; 778 #endif 779 } 780 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 781 782 if (A->nooffproczerorows) { 783 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 784 nrecvs = nsends; 785 nmax = N; 786 } else { 787 /* inform other processors of number of messages and max length*/ 788 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 789 } 790 791 /* post receives: */ 792 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 793 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 794 for (i=0; i<nrecvs; i++) { 795 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 796 } 797 798 /* do sends: 799 1) starts[i] gives the starting index in svalues for stuff going to 800 the ith processor 801 */ 802 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 803 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 804 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 805 starts[0] = 0; 806 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 807 for (i=0; i<N; i++) { 808 svalues[starts[owner[i]]++] = rows[i]; 809 } 810 811 starts[0] = 0; 812 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 813 count = 0; 814 for (i=0; i<size; i++) { 815 if (nprocs[2*i+1]) { 816 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 817 } 818 } 819 ierr = PetscFree(starts);CHKERRQ(ierr); 820 821 base = owners[rank]; 822 823 /* wait on receives */ 824 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 825 count = nrecvs; slen = 0; 826 while (count) { 827 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 828 /* unpack receives into our local space */ 829 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 830 source[imdex] = recv_status.MPI_SOURCE; 831 lens[imdex] = n; 832 slen += n; 833 count--; 834 } 835 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 836 837 /* move the data into the send scatter */ 838 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 839 count = 0; 840 for (i=0; i<nrecvs; i++) { 841 values = rvalues + i*nmax; 842 for (j=0; j<lens[i]; j++) { 843 lrows[count++] = values[j] - base; 844 } 845 } 846 ierr = PetscFree(rvalues);CHKERRQ(ierr); 847 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 848 ierr = PetscFree(owner);CHKERRQ(ierr); 849 ierr = PetscFree(nprocs);CHKERRQ(ierr); 850 851 /* fix right hand side if needed */ 852 if (x && b) { 853 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 855 for (i=0; i<slen; i++) { 856 bb[lrows[i]] = diag*xx[lrows[i]]; 857 } 858 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 859 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 860 } 861 /* 862 Zero the required rows. If the "diagonal block" of the matrix 863 is square and the user wishes to set the diagonal we use separate 864 code so that MatSetValues() is not called for each diagonal allocating 865 new memory, thus calling lots of mallocs and slowing things down. 866 867 */ 868 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 869 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 870 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 871 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 872 } else if (diag != 0.0) { 873 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 874 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 875 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 876 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 877 } 878 for (i = 0; i < slen; i++) { 879 row = lrows[i] + rstart; 880 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 881 } 882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 884 } else { 885 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 886 } 887 ierr = PetscFree(lrows);CHKERRQ(ierr); 888 889 /* wait on sends */ 890 if (nsends) { 891 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 892 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 893 ierr = PetscFree(send_status);CHKERRQ(ierr); 894 } 895 ierr = PetscFree(send_waits);CHKERRQ(ierr); 896 ierr = PetscFree(svalues);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 902 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 903 { 904 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 905 PetscErrorCode ierr; 906 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 907 PetscInt i,*owners = A->rmap->range; 908 PetscInt *nprocs,j,idx,nsends; 909 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 910 PetscInt *rvalues,count,base,slen,*source; 911 PetscInt *lens,*lrows,*values,m; 912 MPI_Comm comm = ((PetscObject)A)->comm; 913 MPI_Request *send_waits,*recv_waits; 914 MPI_Status recv_status,*send_status; 915 const PetscScalar *xx; 916 PetscScalar *bb,*mask; 917 Vec xmask,lmask; 918 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 919 const PetscInt *aj, *ii,*ridx; 920 PetscScalar *aa; 921 #if defined(PETSC_DEBUG) 922 PetscBool found = PETSC_FALSE; 923 #endif 924 925 PetscFunctionBegin; 926 /* first count number of contributors to each processor */ 927 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 928 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 929 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 930 j = 0; 931 for (i=0; i<N; i++) { 932 if (lastidx > (idx = rows[i])) j = 0; 933 lastidx = idx; 934 for (; j<size; j++) { 935 if (idx >= owners[j] && idx < owners[j+1]) { 936 nprocs[2*j]++; 937 nprocs[2*j+1] = 1; 938 owner[i] = j; 939 #if defined(PETSC_DEBUG) 940 found = PETSC_TRUE; 941 #endif 942 break; 943 } 944 } 945 #if defined(PETSC_DEBUG) 946 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 947 found = PETSC_FALSE; 948 #endif 949 } 950 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 951 952 /* inform other processors of number of messages and max length*/ 953 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 954 955 /* post receives: */ 956 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 957 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 958 for (i=0; i<nrecvs; i++) { 959 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 960 } 961 962 /* do sends: 963 1) starts[i] gives the starting index in svalues for stuff going to 964 the ith processor 965 */ 966 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 967 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 968 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 969 starts[0] = 0; 970 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 971 for (i=0; i<N; i++) { 972 svalues[starts[owner[i]]++] = rows[i]; 973 } 974 975 starts[0] = 0; 976 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 977 count = 0; 978 for (i=0; i<size; i++) { 979 if (nprocs[2*i+1]) { 980 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 981 } 982 } 983 ierr = PetscFree(starts);CHKERRQ(ierr); 984 985 base = owners[rank]; 986 987 /* wait on receives */ 988 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 989 count = nrecvs; slen = 0; 990 while (count) { 991 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 992 /* unpack receives into our local space */ 993 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 994 source[imdex] = recv_status.MPI_SOURCE; 995 lens[imdex] = n; 996 slen += n; 997 count--; 998 } 999 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1000 1001 /* move the data into the send scatter */ 1002 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1003 count = 0; 1004 for (i=0; i<nrecvs; i++) { 1005 values = rvalues + i*nmax; 1006 for (j=0; j<lens[i]; j++) { 1007 lrows[count++] = values[j] - base; 1008 } 1009 } 1010 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1011 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1012 ierr = PetscFree(owner);CHKERRQ(ierr); 1013 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1014 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1015 1016 /* zero diagonal part of matrix */ 1017 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1018 1019 /* handle off diagonal part of matrix */ 1020 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1021 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1022 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1023 for (i=0; i<slen; i++) { 1024 bb[lrows[i]] = 1; 1025 } 1026 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1027 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1030 if (x) { 1031 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1034 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1035 } 1036 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1037 1038 /* remove zeroed rows of off diagonal matrix */ 1039 ii = aij->i; 1040 for (i=0; i<slen; i++) { 1041 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1042 } 1043 1044 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1045 if (aij->compressedrow.use){ 1046 m = aij->compressedrow.nrows; 1047 ii = aij->compressedrow.i; 1048 ridx = aij->compressedrow.rindex; 1049 for (i=0; i<m; i++){ 1050 n = ii[i+1] - ii[i]; 1051 aj = aij->j + ii[i]; 1052 aa = aij->a + ii[i]; 1053 1054 for (j=0; j<n; j++) { 1055 if (PetscAbsScalar(mask[*aj])) { 1056 if (b) bb[*ridx] -= *aa*xx[*aj]; 1057 *aa = 0.0; 1058 } 1059 aa++; 1060 aj++; 1061 } 1062 ridx++; 1063 } 1064 } else { /* do not use compressed row format */ 1065 m = l->B->rmap->n; 1066 for (i=0; i<m; i++) { 1067 n = ii[i+1] - ii[i]; 1068 aj = aij->j + ii[i]; 1069 aa = aij->a + ii[i]; 1070 for (j=0; j<n; j++) { 1071 if (PetscAbsScalar(mask[*aj])) { 1072 if (b) bb[i] -= *aa*xx[*aj]; 1073 *aa = 0.0; 1074 } 1075 aa++; 1076 aj++; 1077 } 1078 } 1079 } 1080 if (x) { 1081 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1082 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1083 } 1084 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1085 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1086 ierr = PetscFree(lrows);CHKERRQ(ierr); 1087 1088 /* wait on sends */ 1089 if (nsends) { 1090 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1091 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1092 ierr = PetscFree(send_status);CHKERRQ(ierr); 1093 } 1094 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1095 ierr = PetscFree(svalues);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 EXTERN_C_BEGIN 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1179 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1180 { 1181 MPI_Comm comm; 1182 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1183 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1184 IS Me,Notme; 1185 PetscErrorCode ierr; 1186 PetscInt M,N,first,last,*notme,i; 1187 PetscMPIInt size; 1188 1189 PetscFunctionBegin; 1190 1191 /* Easy test: symmetric diagonal block */ 1192 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1193 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1194 if (!*f) PetscFunctionReturn(0); 1195 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1197 if (size == 1) PetscFunctionReturn(0); 1198 1199 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1200 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1201 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1202 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1203 for (i=0; i<first; i++) notme[i] = i; 1204 for (i=last; i<M; i++) notme[i-last+first] = i; 1205 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1206 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1207 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1208 Aoff = Aoffs[0]; 1209 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1210 Boff = Boffs[0]; 1211 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1212 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1213 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1214 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1215 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1216 ierr = PetscFree(notme);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 EXTERN_C_END 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1223 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1224 { 1225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 /* do nondiagonal part */ 1230 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1231 /* send it on its way */ 1232 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 /* do local part */ 1234 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1235 /* receive remote parts */ 1236 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 /* 1241 This only works correctly for square matrices where the subblock A->A is the 1242 diagonal block 1243 */ 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1246 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1247 { 1248 PetscErrorCode ierr; 1249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1250 1251 PetscFunctionBegin; 1252 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1253 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1254 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatScale_MPIAIJ" 1260 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1261 { 1262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1267 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatDestroy_MPIAIJ" 1273 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 #if defined(PETSC_USE_LOG) 1280 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1281 #endif 1282 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1283 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1284 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1285 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1286 #if defined (PETSC_USE_CTABLE) 1287 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1288 #else 1289 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1290 #endif 1291 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1292 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1293 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1294 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1295 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1312 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1316 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1319 int fd; 1320 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1321 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1322 PetscScalar *column_values; 1323 PetscInt message_count,flowcontrolcount; 1324 FILE *file; 1325 1326 PetscFunctionBegin; 1327 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1328 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1329 nz = A->nz + B->nz; 1330 if (!rank) { 1331 header[0] = MAT_FILE_CLASSID; 1332 header[1] = mat->rmap->N; 1333 header[2] = mat->cmap->N; 1334 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1335 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1336 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1337 /* get largest number of rows any processor has */ 1338 rlen = mat->rmap->n; 1339 range = mat->rmap->range; 1340 for (i=1; i<size; i++) { 1341 rlen = PetscMax(rlen,range[i+1] - range[i]); 1342 } 1343 } else { 1344 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 rlen = mat->rmap->n; 1346 } 1347 1348 /* load up the local row counts */ 1349 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1350 for (i=0; i<mat->rmap->n; i++) { 1351 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1352 } 1353 1354 /* store the row lengths to the file */ 1355 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 for (i=1; i<size; i++) { 1359 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1360 rlen = range[i+1] - range[i]; 1361 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1362 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1363 } 1364 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1367 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1368 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1369 } 1370 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1371 1372 /* load up the local column indices */ 1373 nzmax = nz; /* )th processor needs space a largest processor needs */ 1374 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1375 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1376 cnt = 0; 1377 for (i=0; i<mat->rmap->n; i++) { 1378 for (j=B->i[i]; j<B->i[i+1]; j++) { 1379 if ( (col = garray[B->j[j]]) > cstart) break; 1380 column_indices[cnt++] = col; 1381 } 1382 for (k=A->i[i]; k<A->i[i+1]; k++) { 1383 column_indices[cnt++] = A->j[k] + cstart; 1384 } 1385 for (; j<B->i[i+1]; j++) { 1386 column_indices[cnt++] = garray[B->j[j]]; 1387 } 1388 } 1389 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1390 1391 /* store the column indices to the file */ 1392 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1393 if (!rank) { 1394 MPI_Status status; 1395 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1396 for (i=1; i<size; i++) { 1397 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1398 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1399 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1400 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1401 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1402 } 1403 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1406 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1407 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1408 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1409 } 1410 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1411 1412 /* load up the local column values */ 1413 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1414 cnt = 0; 1415 for (i=0; i<mat->rmap->n; i++) { 1416 for (j=B->i[i]; j<B->i[i+1]; j++) { 1417 if ( garray[B->j[j]] > cstart) break; 1418 column_values[cnt++] = B->a[j]; 1419 } 1420 for (k=A->i[i]; k<A->i[i+1]; k++) { 1421 column_values[cnt++] = A->a[k]; 1422 } 1423 for (; j<B->i[i+1]; j++) { 1424 column_values[cnt++] = B->a[j]; 1425 } 1426 } 1427 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1428 1429 /* store the column values to the file */ 1430 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1431 if (!rank) { 1432 MPI_Status status; 1433 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1434 for (i=1; i<size; i++) { 1435 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1436 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1437 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1438 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1439 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1440 } 1441 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1442 } else { 1443 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1444 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1445 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1446 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1447 } 1448 ierr = PetscFree(column_values);CHKERRQ(ierr); 1449 1450 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1451 if (file) { 1452 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1453 } 1454 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1460 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1461 { 1462 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1463 PetscErrorCode ierr; 1464 PetscMPIInt rank = aij->rank,size = aij->size; 1465 PetscBool isdraw,iascii,isbinary; 1466 PetscViewer sviewer; 1467 PetscViewerFormat format; 1468 1469 PetscFunctionBegin; 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1472 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1473 if (iascii) { 1474 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1475 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1476 MatInfo info; 1477 PetscBool inodes; 1478 1479 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1480 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1481 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1482 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1483 if (!inodes) { 1484 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1485 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1486 } else { 1487 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1488 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1489 } 1490 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1491 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1492 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1493 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1494 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1495 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1496 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1497 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1500 PetscInt inodecount,inodelimit,*inodes; 1501 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1502 if (inodes) { 1503 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1504 } else { 1505 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1509 PetscFunctionReturn(0); 1510 } 1511 } else if (isbinary) { 1512 if (size == 1) { 1513 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1514 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1515 } else { 1516 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1517 } 1518 PetscFunctionReturn(0); 1519 } else if (isdraw) { 1520 PetscDraw draw; 1521 PetscBool isnull; 1522 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1523 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1524 } 1525 1526 if (size == 1) { 1527 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1528 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1529 } else { 1530 /* assemble the entire matrix onto first processor. */ 1531 Mat A; 1532 Mat_SeqAIJ *Aloc; 1533 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1534 MatScalar *a; 1535 1536 if (mat->rmap->N > 1024) { 1537 PetscBool flg = PETSC_FALSE; 1538 1539 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1540 if (!flg) { 1541 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1542 } 1543 } 1544 1545 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1546 if (!rank) { 1547 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1550 } 1551 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1552 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1553 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1554 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1555 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1556 1557 /* copy over the A part */ 1558 Aloc = (Mat_SeqAIJ*)aij->A->data; 1559 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1560 row = mat->rmap->rstart; 1561 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1562 for (i=0; i<m; i++) { 1563 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1564 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1565 } 1566 aj = Aloc->j; 1567 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1568 1569 /* copy over the B part */ 1570 Aloc = (Mat_SeqAIJ*)aij->B->data; 1571 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1572 row = mat->rmap->rstart; 1573 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1574 ct = cols; 1575 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1576 for (i=0; i<m; i++) { 1577 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1578 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1579 } 1580 ierr = PetscFree(ct);CHKERRQ(ierr); 1581 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 /* 1584 Everyone has to call to draw the matrix since the graphics waits are 1585 synchronized across all processors that share the PetscDraw object 1586 */ 1587 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1588 if (!rank) { 1589 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1590 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1591 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1592 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1593 } 1594 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1595 ierr = MatDestroy(&A);CHKERRQ(ierr); 1596 } 1597 PetscFunctionReturn(0); 1598 } 1599 1600 #undef __FUNCT__ 1601 #define __FUNCT__ "MatView_MPIAIJ" 1602 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1603 { 1604 PetscErrorCode ierr; 1605 PetscBool iascii,isdraw,issocket,isbinary; 1606 1607 PetscFunctionBegin; 1608 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1609 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1610 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1611 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1612 if (iascii || isdraw || isbinary || issocket) { 1613 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1614 } else { 1615 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatSOR_MPIAIJ" 1622 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1623 { 1624 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1625 PetscErrorCode ierr; 1626 Vec bb1 = 0; 1627 PetscBool hasop; 1628 1629 PetscFunctionBegin; 1630 if (flag == SOR_APPLY_UPPER) { 1631 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1636 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1637 } 1638 1639 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1640 if (flag & SOR_ZERO_INITIAL_GUESS) { 1641 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1642 its--; 1643 } 1644 1645 while (its--) { 1646 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 1649 /* update rhs: bb1 = bb - B*x */ 1650 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1651 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1652 1653 /* local sweep */ 1654 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1655 } 1656 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1657 if (flag & SOR_ZERO_INITIAL_GUESS) { 1658 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1659 its--; 1660 } 1661 while (its--) { 1662 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 1665 /* update rhs: bb1 = bb - B*x */ 1666 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1667 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1668 1669 /* local sweep */ 1670 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1671 } 1672 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1673 if (flag & SOR_ZERO_INITIAL_GUESS) { 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1675 its--; 1676 } 1677 while (its--) { 1678 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1679 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1680 1681 /* update rhs: bb1 = bb - B*x */ 1682 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1683 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1684 1685 /* local sweep */ 1686 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1687 } 1688 } else if (flag & SOR_EISENSTAT) { 1689 Vec xx1; 1690 1691 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1692 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1693 1694 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1695 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 if (!mat->diag) { 1697 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1698 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1699 } 1700 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1701 if (hasop) { 1702 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1703 } else { 1704 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1705 } 1706 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1707 1708 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1709 1710 /* local sweep */ 1711 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1712 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1713 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1714 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1715 1716 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatPermute_MPIAIJ" 1722 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1723 { 1724 Mat aA,aB,Aperm; 1725 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1726 PetscScalar *aa,*ba; 1727 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1728 PetscSF rowsf,sf; 1729 IS parcolp = PETSC_NULL; 1730 PetscBool done; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1735 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1736 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1737 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1738 1739 /* Invert row permutation to find out where my rows should go */ 1740 ierr = PetscSFCreate(((PetscObject)A)->comm,&rowsf);CHKERRQ(ierr); 1741 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,PETSC_NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1742 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1743 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1744 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr); 1745 1746 /* Invert column permutation to find out where my columns should go */ 1747 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1748 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,PETSC_NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1749 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1750 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1751 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr); 1752 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1753 1754 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1755 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1756 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1757 1758 /* Find out where my gcols should go */ 1759 ierr = MatGetSize(aB,PETSC_NULL,&ng);CHKERRQ(ierr); 1760 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1761 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 1762 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,PETSC_NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1763 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1764 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1765 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1766 1767 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1768 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1769 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1770 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1771 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1772 for (i=0; i<m; i++) { 1773 PetscInt row = rdest[i],rowner; 1774 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1775 for (j=ai[i]; j<ai[i+1]; j++) { 1776 PetscInt cowner,col = cdest[aj[j]]; 1777 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1778 if (rowner == cowner) dnnz[i]++; 1779 else onnz[i]++; 1780 } 1781 for (j=bi[i]; j<bi[i+1]; j++) { 1782 PetscInt cowner,col = gcdest[bj[j]]; 1783 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1784 if (rowner == cowner) dnnz[i]++; 1785 else onnz[i]++; 1786 } 1787 } 1788 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1789 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1790 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1791 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1792 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1793 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1794 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1795 1796 ierr = MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1797 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1798 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1799 for (i=0; i<m; i++) { 1800 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1801 PetscInt rowlen; 1802 rowlen = ai[i+1] - ai[i]; 1803 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1804 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1805 rowlen = bi[i+1] - bi[i]; 1806 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1807 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1808 } 1809 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1810 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1811 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1812 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1813 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1814 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1815 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1816 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1817 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1818 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1819 *B = Aperm; 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1825 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1826 { 1827 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1828 Mat A = mat->A,B = mat->B; 1829 PetscErrorCode ierr; 1830 PetscReal isend[5],irecv[5]; 1831 1832 PetscFunctionBegin; 1833 info->block_size = 1.0; 1834 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1835 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1836 isend[3] = info->memory; isend[4] = info->mallocs; 1837 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1838 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1839 isend[3] += info->memory; isend[4] += info->mallocs; 1840 if (flag == MAT_LOCAL) { 1841 info->nz_used = isend[0]; 1842 info->nz_allocated = isend[1]; 1843 info->nz_unneeded = isend[2]; 1844 info->memory = isend[3]; 1845 info->mallocs = isend[4]; 1846 } else if (flag == MAT_GLOBAL_MAX) { 1847 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1848 info->nz_used = irecv[0]; 1849 info->nz_allocated = irecv[1]; 1850 info->nz_unneeded = irecv[2]; 1851 info->memory = irecv[3]; 1852 info->mallocs = irecv[4]; 1853 } else if (flag == MAT_GLOBAL_SUM) { 1854 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1855 info->nz_used = irecv[0]; 1856 info->nz_allocated = irecv[1]; 1857 info->nz_unneeded = irecv[2]; 1858 info->memory = irecv[3]; 1859 info->mallocs = irecv[4]; 1860 } 1861 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1862 info->fill_ratio_needed = 0; 1863 info->factor_mallocs = 0; 1864 PetscFunctionReturn(0); 1865 } 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatSetOption_MPIAIJ" 1869 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1870 { 1871 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1872 PetscErrorCode ierr; 1873 1874 PetscFunctionBegin; 1875 switch (op) { 1876 case MAT_NEW_NONZERO_LOCATIONS: 1877 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1878 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1879 case MAT_KEEP_NONZERO_PATTERN: 1880 case MAT_NEW_NONZERO_LOCATION_ERR: 1881 case MAT_USE_INODES: 1882 case MAT_IGNORE_ZERO_ENTRIES: 1883 MatCheckPreallocated(A,1); 1884 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1885 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1886 break; 1887 case MAT_ROW_ORIENTED: 1888 a->roworiented = flg; 1889 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1890 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1891 break; 1892 case MAT_NEW_DIAGONALS: 1893 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1894 break; 1895 case MAT_IGNORE_OFF_PROC_ENTRIES: 1896 a->donotstash = flg; 1897 break; 1898 case MAT_SPD: 1899 A->spd_set = PETSC_TRUE; 1900 A->spd = flg; 1901 if (flg) { 1902 A->symmetric = PETSC_TRUE; 1903 A->structurally_symmetric = PETSC_TRUE; 1904 A->symmetric_set = PETSC_TRUE; 1905 A->structurally_symmetric_set = PETSC_TRUE; 1906 } 1907 break; 1908 case MAT_SYMMETRIC: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 case MAT_STRUCTURALLY_SYMMETRIC: 1912 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1913 break; 1914 case MAT_HERMITIAN: 1915 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1916 break; 1917 case MAT_SYMMETRY_ETERNAL: 1918 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1919 break; 1920 default: 1921 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1922 } 1923 PetscFunctionReturn(0); 1924 } 1925 1926 #undef __FUNCT__ 1927 #define __FUNCT__ "MatGetRow_MPIAIJ" 1928 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1929 { 1930 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1931 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1932 PetscErrorCode ierr; 1933 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1934 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1935 PetscInt *cmap,*idx_p; 1936 1937 PetscFunctionBegin; 1938 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1939 mat->getrowactive = PETSC_TRUE; 1940 1941 if (!mat->rowvalues && (idx || v)) { 1942 /* 1943 allocate enough space to hold information from the longest row. 1944 */ 1945 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1946 PetscInt max = 1,tmp; 1947 for (i=0; i<matin->rmap->n; i++) { 1948 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1949 if (max < tmp) { max = tmp; } 1950 } 1951 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1952 } 1953 1954 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1955 lrow = row - rstart; 1956 1957 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1958 if (!v) {pvA = 0; pvB = 0;} 1959 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1960 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1961 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1962 nztot = nzA + nzB; 1963 1964 cmap = mat->garray; 1965 if (v || idx) { 1966 if (nztot) { 1967 /* Sort by increasing column numbers, assuming A and B already sorted */ 1968 PetscInt imark = -1; 1969 if (v) { 1970 *v = v_p = mat->rowvalues; 1971 for (i=0; i<nzB; i++) { 1972 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1973 else break; 1974 } 1975 imark = i; 1976 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1977 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1978 } 1979 if (idx) { 1980 *idx = idx_p = mat->rowindices; 1981 if (imark > -1) { 1982 for (i=0; i<imark; i++) { 1983 idx_p[i] = cmap[cworkB[i]]; 1984 } 1985 } else { 1986 for (i=0; i<nzB; i++) { 1987 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1988 else break; 1989 } 1990 imark = i; 1991 } 1992 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1993 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1994 } 1995 } else { 1996 if (idx) *idx = 0; 1997 if (v) *v = 0; 1998 } 1999 } 2000 *nz = nztot; 2001 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 2002 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 2008 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2009 { 2010 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2011 2012 PetscFunctionBegin; 2013 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2014 aij->getrowactive = PETSC_FALSE; 2015 PetscFunctionReturn(0); 2016 } 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "MatNorm_MPIAIJ" 2020 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2021 { 2022 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2023 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2024 PetscErrorCode ierr; 2025 PetscInt i,j,cstart = mat->cmap->rstart; 2026 PetscReal sum = 0.0; 2027 MatScalar *v; 2028 2029 PetscFunctionBegin; 2030 if (aij->size == 1) { 2031 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2032 } else { 2033 if (type == NORM_FROBENIUS) { 2034 v = amat->a; 2035 for (i=0; i<amat->nz; i++) { 2036 #if defined(PETSC_USE_COMPLEX) 2037 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2038 #else 2039 sum += (*v)*(*v); v++; 2040 #endif 2041 } 2042 v = bmat->a; 2043 for (i=0; i<bmat->nz; i++) { 2044 #if defined(PETSC_USE_COMPLEX) 2045 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2046 #else 2047 sum += (*v)*(*v); v++; 2048 #endif 2049 } 2050 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2051 *norm = PetscSqrtReal(*norm); 2052 } else if (type == NORM_1) { /* max column norm */ 2053 PetscReal *tmp,*tmp2; 2054 PetscInt *jj,*garray = aij->garray; 2055 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2056 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2057 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2058 *norm = 0.0; 2059 v = amat->a; jj = amat->j; 2060 for (j=0; j<amat->nz; j++) { 2061 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 2062 } 2063 v = bmat->a; jj = bmat->j; 2064 for (j=0; j<bmat->nz; j++) { 2065 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2066 } 2067 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2068 for (j=0; j<mat->cmap->N; j++) { 2069 if (tmp2[j] > *norm) *norm = tmp2[j]; 2070 } 2071 ierr = PetscFree(tmp);CHKERRQ(ierr); 2072 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2073 } else if (type == NORM_INFINITY) { /* max row norm */ 2074 PetscReal ntemp = 0.0; 2075 for (j=0; j<aij->A->rmap->n; j++) { 2076 v = amat->a + amat->i[j]; 2077 sum = 0.0; 2078 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2079 sum += PetscAbsScalar(*v); v++; 2080 } 2081 v = bmat->a + bmat->i[j]; 2082 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2083 sum += PetscAbsScalar(*v); v++; 2084 } 2085 if (sum > ntemp) ntemp = sum; 2086 } 2087 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2088 } else { 2089 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2090 } 2091 } 2092 PetscFunctionReturn(0); 2093 } 2094 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "MatTranspose_MPIAIJ" 2097 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2098 { 2099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2100 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2101 PetscErrorCode ierr; 2102 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2103 PetscInt cstart=A->cmap->rstart,ncol; 2104 Mat B; 2105 MatScalar *array; 2106 2107 PetscFunctionBegin; 2108 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2109 2110 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2111 ai = Aloc->i; aj = Aloc->j; 2112 bi = Bloc->i; bj = Bloc->j; 2113 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2114 PetscInt *d_nnz,*g_nnz,*o_nnz; 2115 PetscSFNode *oloc; 2116 PETSC_UNUSED PetscSF sf; 2117 2118 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2119 /* compute d_nnz for preallocation */ 2120 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2121 for (i=0; i<ai[ma]; i++) { 2122 d_nnz[aj[i]] ++; 2123 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2124 } 2125 #if defined(OMPI_MAJOR_VERSION) /* Open MPI Bug. https://bitbucket.org/petsc/petsc-dev/issue/9/implement-petscsf-without-one-sided */ 2126 ierr = PetscMemcpy(o_nnz,d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2127 #else 2128 /* compute local off-diagonal contributions */ 2129 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2130 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2131 /* map those to global */ 2132 ierr = PetscSFCreate(((PetscObject)A)->comm,&sf);CHKERRQ(ierr); 2133 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,PETSC_NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2134 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2135 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2136 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2137 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2138 #endif 2139 2140 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2141 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2142 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2143 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2144 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2145 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2146 #if defined(OMPI_MAJOR_VERSION) 2147 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2148 #endif 2149 } else { 2150 B = *matout; 2151 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2152 for (i=0; i<ai[ma]; i++){ 2153 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2154 } 2155 } 2156 2157 /* copy over the A part */ 2158 array = Aloc->a; 2159 row = A->rmap->rstart; 2160 for (i=0; i<ma; i++) { 2161 ncol = ai[i+1]-ai[i]; 2162 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2163 row++; array += ncol; aj += ncol; 2164 } 2165 aj = Aloc->j; 2166 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2167 2168 /* copy over the B part */ 2169 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2170 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2171 array = Bloc->a; 2172 row = A->rmap->rstart; 2173 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2174 cols_tmp = cols; 2175 for (i=0; i<mb; i++) { 2176 ncol = bi[i+1]-bi[i]; 2177 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2178 row++; array += ncol; cols_tmp += ncol; 2179 } 2180 ierr = PetscFree(cols);CHKERRQ(ierr); 2181 2182 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2183 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2184 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2185 *matout = B; 2186 } else { 2187 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2188 } 2189 PetscFunctionReturn(0); 2190 } 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2194 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2195 { 2196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2197 Mat a = aij->A,b = aij->B; 2198 PetscErrorCode ierr; 2199 PetscInt s1,s2,s3; 2200 2201 PetscFunctionBegin; 2202 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2203 if (rr) { 2204 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2205 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2206 /* Overlap communication with computation. */ 2207 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2208 } 2209 if (ll) { 2210 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2211 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2212 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2213 } 2214 /* scale the diagonal block */ 2215 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2216 2217 if (rr) { 2218 /* Do a scatter end and then right scale the off-diagonal block */ 2219 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2220 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2221 } 2222 2223 PetscFunctionReturn(0); 2224 } 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2228 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2229 { 2230 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2231 PetscErrorCode ierr; 2232 2233 PetscFunctionBegin; 2234 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2235 PetscFunctionReturn(0); 2236 } 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatEqual_MPIAIJ" 2240 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2241 { 2242 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2243 Mat a,b,c,d; 2244 PetscBool flg; 2245 PetscErrorCode ierr; 2246 2247 PetscFunctionBegin; 2248 a = matA->A; b = matA->B; 2249 c = matB->A; d = matB->B; 2250 2251 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2252 if (flg) { 2253 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2254 } 2255 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatCopy_MPIAIJ" 2261 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2262 { 2263 PetscErrorCode ierr; 2264 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2265 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2266 2267 PetscFunctionBegin; 2268 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2269 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2270 /* because of the column compression in the off-processor part of the matrix a->B, 2271 the number of columns in a->B and b->B may be different, hence we cannot call 2272 the MatCopy() directly on the two parts. If need be, we can provide a more 2273 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2274 then copying the submatrices */ 2275 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2276 } else { 2277 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2278 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2279 } 2280 PetscFunctionReturn(0); 2281 } 2282 2283 #undef __FUNCT__ 2284 #define __FUNCT__ "MatSetUp_MPIAIJ" 2285 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2286 { 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2296 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2297 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2298 { 2299 PetscInt i,m=Y->rmap->N; 2300 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2301 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2302 const PetscInt *xi = x->i,*yi = y->i; 2303 2304 PetscFunctionBegin; 2305 /* Set the number of nonzeros in the new matrix */ 2306 for (i=0; i<m; i++) { 2307 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2308 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2309 nnz[i] = 0; 2310 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2311 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2312 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2313 nnz[i]++; 2314 } 2315 for (; k<nzy; k++) nnz[i]++; 2316 } 2317 PetscFunctionReturn(0); 2318 } 2319 2320 #undef __FUNCT__ 2321 #define __FUNCT__ "MatAXPY_MPIAIJ" 2322 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2323 { 2324 PetscErrorCode ierr; 2325 PetscInt i; 2326 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2327 PetscBLASInt bnz,one=1; 2328 Mat_SeqAIJ *x,*y; 2329 2330 PetscFunctionBegin; 2331 if (str == SAME_NONZERO_PATTERN) { 2332 PetscScalar alpha = a; 2333 x = (Mat_SeqAIJ *)xx->A->data; 2334 y = (Mat_SeqAIJ *)yy->A->data; 2335 bnz = PetscBLASIntCast(x->nz); 2336 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2337 x = (Mat_SeqAIJ *)xx->B->data; 2338 y = (Mat_SeqAIJ *)yy->B->data; 2339 bnz = PetscBLASIntCast(x->nz); 2340 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2341 } else if (str == SUBSET_NONZERO_PATTERN) { 2342 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2343 2344 x = (Mat_SeqAIJ *)xx->B->data; 2345 y = (Mat_SeqAIJ *)yy->B->data; 2346 if (y->xtoy && y->XtoY != xx->B) { 2347 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2348 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2349 } 2350 if (!y->xtoy) { /* get xtoy */ 2351 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2352 y->XtoY = xx->B; 2353 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2354 } 2355 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2356 } else { 2357 Mat B; 2358 PetscInt *nnz_d,*nnz_o; 2359 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2360 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2361 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2362 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2363 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2364 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2365 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2366 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2367 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2368 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2369 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2370 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2371 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2372 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2373 } 2374 PetscFunctionReturn(0); 2375 } 2376 2377 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatConjugate_MPIAIJ" 2381 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2382 { 2383 #if defined(PETSC_USE_COMPLEX) 2384 PetscErrorCode ierr; 2385 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2386 2387 PetscFunctionBegin; 2388 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2389 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2390 #else 2391 PetscFunctionBegin; 2392 #endif 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatRealPart_MPIAIJ" 2398 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2399 { 2400 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2405 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2411 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2412 { 2413 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2414 PetscErrorCode ierr; 2415 2416 PetscFunctionBegin; 2417 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2418 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2419 PetscFunctionReturn(0); 2420 } 2421 2422 #ifdef PETSC_HAVE_PBGL 2423 2424 #include <boost/parallel/mpi/bsp_process_group.hpp> 2425 #include <boost/graph/distributed/ilu_default_graph.hpp> 2426 #include <boost/graph/distributed/ilu_0_block.hpp> 2427 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2428 #include <boost/graph/distributed/petsc/interface.hpp> 2429 #include <boost/multi_array.hpp> 2430 #include <boost/parallel/distributed_property_map->hpp> 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2434 /* 2435 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2436 */ 2437 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2438 { 2439 namespace petsc = boost::distributed::petsc; 2440 2441 namespace graph_dist = boost::graph::distributed; 2442 using boost::graph::distributed::ilu_default::process_group_type; 2443 using boost::graph::ilu_permuted; 2444 2445 PetscBool row_identity, col_identity; 2446 PetscContainer c; 2447 PetscInt m, n, M, N; 2448 PetscErrorCode ierr; 2449 2450 PetscFunctionBegin; 2451 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2452 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2453 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2454 if (!row_identity || !col_identity) { 2455 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2456 } 2457 2458 process_group_type pg; 2459 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2460 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2461 lgraph_type& level_graph = *lgraph_p; 2462 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2463 2464 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2465 ilu_permuted(level_graph); 2466 2467 /* put together the new matrix */ 2468 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2469 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2470 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2471 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2472 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2473 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2474 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2475 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2476 2477 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2478 ierr = PetscContainerSetPointer(c, lgraph_p); 2479 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2480 ierr = PetscContainerDestroy(&c); 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2486 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2487 { 2488 PetscFunctionBegin; 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatSolve_MPIAIJ" 2494 /* 2495 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2496 */ 2497 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2498 { 2499 namespace graph_dist = boost::graph::distributed; 2500 2501 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2502 lgraph_type* lgraph_p; 2503 PetscContainer c; 2504 PetscErrorCode ierr; 2505 2506 PetscFunctionBegin; 2507 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2508 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2509 ierr = VecCopy(b, x);CHKERRQ(ierr); 2510 2511 PetscScalar* array_x; 2512 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2513 PetscInt sx; 2514 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2515 2516 PetscScalar* array_b; 2517 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2518 PetscInt sb; 2519 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2520 2521 lgraph_type& level_graph = *lgraph_p; 2522 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2523 2524 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2525 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2526 ref_x(array_x, boost::extents[num_vertices(graph)]); 2527 2528 typedef boost::iterator_property_map<array_ref_type::iterator, 2529 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2530 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2531 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2532 2533 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2534 2535 PetscFunctionReturn(0); 2536 } 2537 #endif 2538 2539 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2540 PetscInt nzlocal,nsends,nrecvs; 2541 PetscMPIInt *send_rank,*recv_rank; 2542 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2543 PetscScalar *sbuf_a,**rbuf_a; 2544 PetscErrorCode (*Destroy)(Mat); 2545 } Mat_Redundant; 2546 2547 #undef __FUNCT__ 2548 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2549 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2550 { 2551 PetscErrorCode ierr; 2552 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2553 PetscInt i; 2554 2555 PetscFunctionBegin; 2556 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2557 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2558 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2559 for (i=0; i<redund->nrecvs; i++){ 2560 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2561 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2562 } 2563 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2564 ierr = PetscFree(redund);CHKERRQ(ierr); 2565 PetscFunctionReturn(0); 2566 } 2567 2568 #undef __FUNCT__ 2569 #define __FUNCT__ "MatDestroy_MatRedundant" 2570 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2571 { 2572 PetscErrorCode ierr; 2573 PetscContainer container; 2574 Mat_Redundant *redund=PETSC_NULL; 2575 2576 PetscFunctionBegin; 2577 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2578 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2579 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2580 A->ops->destroy = redund->Destroy; 2581 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2582 if (A->ops->destroy) { 2583 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2584 } 2585 PetscFunctionReturn(0); 2586 } 2587 2588 #undef __FUNCT__ 2589 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2590 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2591 { 2592 PetscMPIInt rank,size; 2593 MPI_Comm comm=((PetscObject)mat)->comm; 2594 PetscErrorCode ierr; 2595 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2596 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2597 PetscInt *rowrange=mat->rmap->range; 2598 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2599 Mat A=aij->A,B=aij->B,C=*matredundant; 2600 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2601 PetscScalar *sbuf_a; 2602 PetscInt nzlocal=a->nz+b->nz; 2603 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2604 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2605 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2606 MatScalar *aworkA,*aworkB; 2607 PetscScalar *vals; 2608 PetscMPIInt tag1,tag2,tag3,imdex; 2609 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2610 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2611 MPI_Status recv_status,*send_status; 2612 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2613 PetscInt **rbuf_j=PETSC_NULL; 2614 PetscScalar **rbuf_a=PETSC_NULL; 2615 Mat_Redundant *redund=PETSC_NULL; 2616 PetscContainer container; 2617 2618 PetscFunctionBegin; 2619 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2620 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2621 2622 if (reuse == MAT_REUSE_MATRIX) { 2623 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2624 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2625 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2626 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2627 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2628 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2629 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2630 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2631 2632 nsends = redund->nsends; 2633 nrecvs = redund->nrecvs; 2634 send_rank = redund->send_rank; 2635 recv_rank = redund->recv_rank; 2636 sbuf_nz = redund->sbuf_nz; 2637 rbuf_nz = redund->rbuf_nz; 2638 sbuf_j = redund->sbuf_j; 2639 sbuf_a = redund->sbuf_a; 2640 rbuf_j = redund->rbuf_j; 2641 rbuf_a = redund->rbuf_a; 2642 } 2643 2644 if (reuse == MAT_INITIAL_MATRIX){ 2645 PetscMPIInt subrank,subsize; 2646 PetscInt nleftover,np_subcomm; 2647 /* get the destination processors' id send_rank, nsends and nrecvs */ 2648 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2649 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2650 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2651 np_subcomm = size/nsubcomm; 2652 nleftover = size - nsubcomm*np_subcomm; 2653 nsends = 0; nrecvs = 0; 2654 for (i=0; i<size; i++){ /* i=rank*/ 2655 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2656 send_rank[nsends] = i; nsends++; 2657 recv_rank[nrecvs++] = i; 2658 } 2659 } 2660 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2661 i = size-nleftover-1; 2662 j = 0; 2663 while (j < nsubcomm - nleftover){ 2664 send_rank[nsends++] = i; 2665 i--; j++; 2666 } 2667 } 2668 2669 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2670 for (i=0; i<nleftover; i++){ 2671 recv_rank[nrecvs++] = size-nleftover+i; 2672 } 2673 } 2674 2675 /* allocate sbuf_j, sbuf_a */ 2676 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2677 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2678 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2679 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2680 2681 /* copy mat's local entries into the buffers */ 2682 if (reuse == MAT_INITIAL_MATRIX){ 2683 rownz_max = 0; 2684 rptr = sbuf_j; 2685 cols = sbuf_j + rend-rstart + 1; 2686 vals = sbuf_a; 2687 rptr[0] = 0; 2688 for (i=0; i<rend-rstart; i++){ 2689 row = i + rstart; 2690 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2691 ncols = nzA + nzB; 2692 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2693 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2694 /* load the column indices for this row into cols */ 2695 lwrite = 0; 2696 for (l=0; l<nzB; l++) { 2697 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2698 vals[lwrite] = aworkB[l]; 2699 cols[lwrite++] = ctmp; 2700 } 2701 } 2702 for (l=0; l<nzA; l++){ 2703 vals[lwrite] = aworkA[l]; 2704 cols[lwrite++] = cstart + cworkA[l]; 2705 } 2706 for (l=0; l<nzB; l++) { 2707 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2708 vals[lwrite] = aworkB[l]; 2709 cols[lwrite++] = ctmp; 2710 } 2711 } 2712 vals += ncols; 2713 cols += ncols; 2714 rptr[i+1] = rptr[i] + ncols; 2715 if (rownz_max < ncols) rownz_max = ncols; 2716 } 2717 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2718 } else { /* only copy matrix values into sbuf_a */ 2719 rptr = sbuf_j; 2720 vals = sbuf_a; 2721 rptr[0] = 0; 2722 for (i=0; i<rend-rstart; i++){ 2723 row = i + rstart; 2724 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2725 ncols = nzA + nzB; 2726 cworkB = b->j + b->i[i]; 2727 aworkA = a->a + a->i[i]; 2728 aworkB = b->a + b->i[i]; 2729 lwrite = 0; 2730 for (l=0; l<nzB; l++) { 2731 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2732 } 2733 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2734 for (l=0; l<nzB; l++) { 2735 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2736 } 2737 vals += ncols; 2738 rptr[i+1] = rptr[i] + ncols; 2739 } 2740 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2741 2742 /* send nzlocal to others, and recv other's nzlocal */ 2743 /*--------------------------------------------------*/ 2744 if (reuse == MAT_INITIAL_MATRIX){ 2745 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2746 s_waits2 = s_waits3 + nsends; 2747 s_waits1 = s_waits2 + nsends; 2748 r_waits1 = s_waits1 + nsends; 2749 r_waits2 = r_waits1 + nrecvs; 2750 r_waits3 = r_waits2 + nrecvs; 2751 } else { 2752 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2753 r_waits3 = s_waits3 + nsends; 2754 } 2755 2756 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2757 if (reuse == MAT_INITIAL_MATRIX){ 2758 /* get new tags to keep the communication clean */ 2759 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2760 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2761 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2762 2763 /* post receives of other's nzlocal */ 2764 for (i=0; i<nrecvs; i++){ 2765 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2766 } 2767 /* send nzlocal to others */ 2768 for (i=0; i<nsends; i++){ 2769 sbuf_nz[i] = nzlocal; 2770 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2771 } 2772 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2773 count = nrecvs; 2774 while (count) { 2775 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2776 recv_rank[imdex] = recv_status.MPI_SOURCE; 2777 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2778 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2779 2780 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2781 rbuf_nz[imdex] += i + 2; 2782 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2783 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2784 count--; 2785 } 2786 /* wait on sends of nzlocal */ 2787 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2788 /* send mat->i,j to others, and recv from other's */ 2789 /*------------------------------------------------*/ 2790 for (i=0; i<nsends; i++){ 2791 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2792 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2793 } 2794 /* wait on receives of mat->i,j */ 2795 /*------------------------------*/ 2796 count = nrecvs; 2797 while (count) { 2798 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2799 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2800 count--; 2801 } 2802 /* wait on sends of mat->i,j */ 2803 /*---------------------------*/ 2804 if (nsends) { 2805 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2806 } 2807 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2808 2809 /* post receives, send and receive mat->a */ 2810 /*----------------------------------------*/ 2811 for (imdex=0; imdex<nrecvs; imdex++) { 2812 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2813 } 2814 for (i=0; i<nsends; i++){ 2815 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2816 } 2817 count = nrecvs; 2818 while (count) { 2819 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2820 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2821 count--; 2822 } 2823 if (nsends) { 2824 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2825 } 2826 2827 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2828 2829 /* create redundant matrix */ 2830 /*-------------------------*/ 2831 if (reuse == MAT_INITIAL_MATRIX){ 2832 /* compute rownz_max for preallocation */ 2833 for (imdex=0; imdex<nrecvs; imdex++){ 2834 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2835 rptr = rbuf_j[imdex]; 2836 for (i=0; i<j; i++){ 2837 ncols = rptr[i+1] - rptr[i]; 2838 if (rownz_max < ncols) rownz_max = ncols; 2839 } 2840 } 2841 2842 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2843 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2844 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2845 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2846 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2847 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2848 } else { 2849 C = *matredundant; 2850 } 2851 2852 /* insert local matrix entries */ 2853 rptr = sbuf_j; 2854 cols = sbuf_j + rend-rstart + 1; 2855 vals = sbuf_a; 2856 for (i=0; i<rend-rstart; i++){ 2857 row = i + rstart; 2858 ncols = rptr[i+1] - rptr[i]; 2859 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2860 vals += ncols; 2861 cols += ncols; 2862 } 2863 /* insert received matrix entries */ 2864 for (imdex=0; imdex<nrecvs; imdex++){ 2865 rstart = rowrange[recv_rank[imdex]]; 2866 rend = rowrange[recv_rank[imdex]+1]; 2867 rptr = rbuf_j[imdex]; 2868 cols = rbuf_j[imdex] + rend-rstart + 1; 2869 vals = rbuf_a[imdex]; 2870 for (i=0; i<rend-rstart; i++){ 2871 row = i + rstart; 2872 ncols = rptr[i+1] - rptr[i]; 2873 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2874 vals += ncols; 2875 cols += ncols; 2876 } 2877 } 2878 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2879 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2880 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2881 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2882 if (reuse == MAT_INITIAL_MATRIX) { 2883 PetscContainer container; 2884 *matredundant = C; 2885 /* create a supporting struct and attach it to C for reuse */ 2886 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2887 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2888 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2889 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2890 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2891 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2892 2893 redund->nzlocal = nzlocal; 2894 redund->nsends = nsends; 2895 redund->nrecvs = nrecvs; 2896 redund->send_rank = send_rank; 2897 redund->recv_rank = recv_rank; 2898 redund->sbuf_nz = sbuf_nz; 2899 redund->rbuf_nz = rbuf_nz; 2900 redund->sbuf_j = sbuf_j; 2901 redund->sbuf_a = sbuf_a; 2902 redund->rbuf_j = rbuf_j; 2903 redund->rbuf_a = rbuf_a; 2904 2905 redund->Destroy = C->ops->destroy; 2906 C->ops->destroy = MatDestroy_MatRedundant; 2907 } 2908 PetscFunctionReturn(0); 2909 } 2910 2911 #undef __FUNCT__ 2912 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2913 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2914 { 2915 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2916 PetscErrorCode ierr; 2917 PetscInt i,*idxb = 0; 2918 PetscScalar *va,*vb; 2919 Vec vtmp; 2920 2921 PetscFunctionBegin; 2922 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2923 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2924 if (idx) { 2925 for (i=0; i<A->rmap->n; i++) { 2926 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2927 } 2928 } 2929 2930 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2931 if (idx) { 2932 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2933 } 2934 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2935 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2936 2937 for (i=0; i<A->rmap->n; i++){ 2938 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2939 va[i] = vb[i]; 2940 if (idx) idx[i] = a->garray[idxb[i]]; 2941 } 2942 } 2943 2944 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2945 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2946 ierr = PetscFree(idxb);CHKERRQ(ierr); 2947 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2948 PetscFunctionReturn(0); 2949 } 2950 2951 #undef __FUNCT__ 2952 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2953 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2954 { 2955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2956 PetscErrorCode ierr; 2957 PetscInt i,*idxb = 0; 2958 PetscScalar *va,*vb; 2959 Vec vtmp; 2960 2961 PetscFunctionBegin; 2962 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2963 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2964 if (idx) { 2965 for (i=0; i<A->cmap->n; i++) { 2966 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2967 } 2968 } 2969 2970 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2971 if (idx) { 2972 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2973 } 2974 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2975 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2976 2977 for (i=0; i<A->rmap->n; i++){ 2978 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2979 va[i] = vb[i]; 2980 if (idx) idx[i] = a->garray[idxb[i]]; 2981 } 2982 } 2983 2984 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2985 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2986 ierr = PetscFree(idxb);CHKERRQ(ierr); 2987 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2988 PetscFunctionReturn(0); 2989 } 2990 2991 #undef __FUNCT__ 2992 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2993 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2994 { 2995 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2996 PetscInt n = A->rmap->n; 2997 PetscInt cstart = A->cmap->rstart; 2998 PetscInt *cmap = mat->garray; 2999 PetscInt *diagIdx, *offdiagIdx; 3000 Vec diagV, offdiagV; 3001 PetscScalar *a, *diagA, *offdiagA; 3002 PetscInt r; 3003 PetscErrorCode ierr; 3004 3005 PetscFunctionBegin; 3006 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3007 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 3008 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 3009 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3010 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3011 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3012 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3013 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3014 for (r = 0; r < n; ++r) { 3015 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3016 a[r] = diagA[r]; 3017 idx[r] = cstart + diagIdx[r]; 3018 } else { 3019 a[r] = offdiagA[r]; 3020 idx[r] = cmap[offdiagIdx[r]]; 3021 } 3022 } 3023 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3024 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3025 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3026 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3027 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3028 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3029 PetscFunctionReturn(0); 3030 } 3031 3032 #undef __FUNCT__ 3033 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3034 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3035 { 3036 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 3037 PetscInt n = A->rmap->n; 3038 PetscInt cstart = A->cmap->rstart; 3039 PetscInt *cmap = mat->garray; 3040 PetscInt *diagIdx, *offdiagIdx; 3041 Vec diagV, offdiagV; 3042 PetscScalar *a, *diagA, *offdiagA; 3043 PetscInt r; 3044 PetscErrorCode ierr; 3045 3046 PetscFunctionBegin; 3047 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3048 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3049 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3050 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3051 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3052 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3053 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3054 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3055 for (r = 0; r < n; ++r) { 3056 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3057 a[r] = diagA[r]; 3058 idx[r] = cstart + diagIdx[r]; 3059 } else { 3060 a[r] = offdiagA[r]; 3061 idx[r] = cmap[offdiagIdx[r]]; 3062 } 3063 } 3064 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3065 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3066 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3067 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3068 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3069 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3070 PetscFunctionReturn(0); 3071 } 3072 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3075 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3076 { 3077 PetscErrorCode ierr; 3078 Mat *dummy; 3079 3080 PetscFunctionBegin; 3081 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3082 *newmat = *dummy; 3083 ierr = PetscFree(dummy);CHKERRQ(ierr); 3084 PetscFunctionReturn(0); 3085 } 3086 3087 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3088 3089 #undef __FUNCT__ 3090 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3091 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3092 { 3093 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3094 PetscErrorCode ierr; 3095 3096 PetscFunctionBegin; 3097 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3098 PetscFunctionReturn(0); 3099 } 3100 3101 #undef __FUNCT__ 3102 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3103 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3104 { 3105 PetscErrorCode ierr; 3106 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3107 3108 PetscFunctionBegin; 3109 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3110 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3111 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3112 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3113 PetscFunctionReturn(0); 3114 } 3115 3116 /* -------------------------------------------------------------------*/ 3117 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3118 MatGetRow_MPIAIJ, 3119 MatRestoreRow_MPIAIJ, 3120 MatMult_MPIAIJ, 3121 /* 4*/ MatMultAdd_MPIAIJ, 3122 MatMultTranspose_MPIAIJ, 3123 MatMultTransposeAdd_MPIAIJ, 3124 #ifdef PETSC_HAVE_PBGL 3125 MatSolve_MPIAIJ, 3126 #else 3127 0, 3128 #endif 3129 0, 3130 0, 3131 /*10*/ 0, 3132 0, 3133 0, 3134 MatSOR_MPIAIJ, 3135 MatTranspose_MPIAIJ, 3136 /*15*/ MatGetInfo_MPIAIJ, 3137 MatEqual_MPIAIJ, 3138 MatGetDiagonal_MPIAIJ, 3139 MatDiagonalScale_MPIAIJ, 3140 MatNorm_MPIAIJ, 3141 /*20*/ MatAssemblyBegin_MPIAIJ, 3142 MatAssemblyEnd_MPIAIJ, 3143 MatSetOption_MPIAIJ, 3144 MatZeroEntries_MPIAIJ, 3145 /*24*/ MatZeroRows_MPIAIJ, 3146 0, 3147 #ifdef PETSC_HAVE_PBGL 3148 0, 3149 #else 3150 0, 3151 #endif 3152 0, 3153 0, 3154 /*29*/ MatSetUp_MPIAIJ, 3155 #ifdef PETSC_HAVE_PBGL 3156 0, 3157 #else 3158 0, 3159 #endif 3160 0, 3161 0, 3162 0, 3163 /*34*/ MatDuplicate_MPIAIJ, 3164 0, 3165 0, 3166 0, 3167 0, 3168 /*39*/ MatAXPY_MPIAIJ, 3169 MatGetSubMatrices_MPIAIJ, 3170 MatIncreaseOverlap_MPIAIJ, 3171 MatGetValues_MPIAIJ, 3172 MatCopy_MPIAIJ, 3173 /*44*/ MatGetRowMax_MPIAIJ, 3174 MatScale_MPIAIJ, 3175 0, 3176 0, 3177 MatZeroRowsColumns_MPIAIJ, 3178 /*49*/ MatSetRandom_MPIAIJ, 3179 0, 3180 0, 3181 0, 3182 0, 3183 /*54*/ MatFDColoringCreate_MPIAIJ, 3184 0, 3185 MatSetUnfactored_MPIAIJ, 3186 MatPermute_MPIAIJ, 3187 0, 3188 /*59*/ MatGetSubMatrix_MPIAIJ, 3189 MatDestroy_MPIAIJ, 3190 MatView_MPIAIJ, 3191 0, 3192 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3193 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3194 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3195 0, 3196 0, 3197 0, 3198 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3199 MatGetRowMinAbs_MPIAIJ, 3200 0, 3201 MatSetColoring_MPIAIJ, 3202 0, 3203 MatSetValuesAdifor_MPIAIJ, 3204 /*75*/ MatFDColoringApply_AIJ, 3205 0, 3206 0, 3207 0, 3208 MatFindZeroDiagonals_MPIAIJ, 3209 /*80*/ 0, 3210 0, 3211 0, 3212 /*83*/ MatLoad_MPIAIJ, 3213 0, 3214 0, 3215 0, 3216 0, 3217 0, 3218 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3219 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3220 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3221 MatPtAP_MPIAIJ_MPIAIJ, 3222 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3223 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3224 0, 3225 0, 3226 0, 3227 0, 3228 /*99*/ 0, 3229 0, 3230 0, 3231 MatConjugate_MPIAIJ, 3232 0, 3233 /*104*/MatSetValuesRow_MPIAIJ, 3234 MatRealPart_MPIAIJ, 3235 MatImaginaryPart_MPIAIJ, 3236 0, 3237 0, 3238 /*109*/0, 3239 MatGetRedundantMatrix_MPIAIJ, 3240 MatGetRowMin_MPIAIJ, 3241 0, 3242 0, 3243 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3244 0, 3245 0, 3246 0, 3247 0, 3248 /*119*/0, 3249 0, 3250 0, 3251 0, 3252 MatGetMultiProcBlock_MPIAIJ, 3253 /*124*/MatFindNonzeroRows_MPIAIJ, 3254 MatGetColumnNorms_MPIAIJ, 3255 MatInvertBlockDiagonal_MPIAIJ, 3256 0, 3257 MatGetSubMatricesParallel_MPIAIJ, 3258 /*129*/0, 3259 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3260 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3261 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3262 0, 3263 /*134*/0, 3264 0, 3265 0, 3266 0, 3267 0 3268 }; 3269 3270 /* ----------------------------------------------------------------------------------------*/ 3271 3272 EXTERN_C_BEGIN 3273 #undef __FUNCT__ 3274 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3275 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3276 { 3277 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3278 PetscErrorCode ierr; 3279 3280 PetscFunctionBegin; 3281 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3282 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3283 PetscFunctionReturn(0); 3284 } 3285 EXTERN_C_END 3286 3287 EXTERN_C_BEGIN 3288 #undef __FUNCT__ 3289 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3290 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3291 { 3292 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3293 PetscErrorCode ierr; 3294 3295 PetscFunctionBegin; 3296 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3297 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3298 PetscFunctionReturn(0); 3299 } 3300 EXTERN_C_END 3301 3302 EXTERN_C_BEGIN 3303 #undef __FUNCT__ 3304 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3305 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3306 { 3307 Mat_MPIAIJ *b; 3308 PetscErrorCode ierr; 3309 PetscInt i; 3310 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3311 3312 PetscFunctionBegin; 3313 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3314 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3315 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3316 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3317 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3318 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3319 3320 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3321 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3322 if (d_nnz) { 3323 for (i=0; i<B->rmap->n; i++) { 3324 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3325 } 3326 } 3327 if (o_nnz) { 3328 for (i=0; i<B->rmap->n; i++) { 3329 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3330 } 3331 } 3332 b = (Mat_MPIAIJ*)B->data; 3333 3334 if (!B->preallocated) { 3335 /* Explicitly create 2 MATSEQAIJ matrices. */ 3336 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3337 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3338 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3339 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3340 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3341 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3342 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3343 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3344 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3345 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3346 } 3347 3348 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3349 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3350 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3351 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3352 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3353 B->preallocated = PETSC_TRUE; 3354 PetscFunctionReturn(0); 3355 } 3356 EXTERN_C_END 3357 3358 #undef __FUNCT__ 3359 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3360 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3361 { 3362 Mat mat; 3363 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3364 PetscErrorCode ierr; 3365 3366 PetscFunctionBegin; 3367 *newmat = 0; 3368 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3369 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3370 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3371 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3372 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3373 a = (Mat_MPIAIJ*)mat->data; 3374 3375 mat->factortype = matin->factortype; 3376 mat->rmap->bs = matin->rmap->bs; 3377 mat->cmap->bs = matin->cmap->bs; 3378 mat->assembled = PETSC_TRUE; 3379 mat->insertmode = NOT_SET_VALUES; 3380 mat->preallocated = PETSC_TRUE; 3381 3382 a->size = oldmat->size; 3383 a->rank = oldmat->rank; 3384 a->donotstash = oldmat->donotstash; 3385 a->roworiented = oldmat->roworiented; 3386 a->rowindices = 0; 3387 a->rowvalues = 0; 3388 a->getrowactive = PETSC_FALSE; 3389 3390 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3391 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3392 3393 if (oldmat->colmap) { 3394 #if defined (PETSC_USE_CTABLE) 3395 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3396 #else 3397 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3398 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3399 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3400 #endif 3401 } else a->colmap = 0; 3402 if (oldmat->garray) { 3403 PetscInt len; 3404 len = oldmat->B->cmap->n; 3405 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3406 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3407 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3408 } else a->garray = 0; 3409 3410 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3411 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3412 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3413 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3414 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3415 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3416 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3417 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3418 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3419 *newmat = mat; 3420 PetscFunctionReturn(0); 3421 } 3422 3423 3424 3425 #undef __FUNCT__ 3426 #define __FUNCT__ "MatLoad_MPIAIJ" 3427 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3428 { 3429 PetscScalar *vals,*svals; 3430 MPI_Comm comm = ((PetscObject)viewer)->comm; 3431 PetscErrorCode ierr; 3432 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3433 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3434 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3435 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3436 PetscInt cend,cstart,n,*rowners,sizesset=1; 3437 int fd; 3438 PetscInt bs = 1; 3439 3440 PetscFunctionBegin; 3441 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3442 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3443 if (!rank) { 3444 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3445 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3446 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3447 } 3448 3449 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3450 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3451 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3452 3453 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3454 3455 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3456 M = header[1]; N = header[2]; 3457 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3458 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3459 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3460 3461 /* If global sizes are set, check if they are consistent with that given in the file */ 3462 if (sizesset) { 3463 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3464 } 3465 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3466 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3467 3468 /* determine ownership of all (block) rows */ 3469 if ( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3470 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3471 else m = newMat->rmap->n; /* Set by user */ 3472 3473 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3474 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3475 3476 /* First process needs enough room for process with most rows */ 3477 if (!rank) { 3478 mmax = rowners[1]; 3479 for (i=2; i<=size; i++) { 3480 mmax = PetscMax(mmax, rowners[i]); 3481 } 3482 } else mmax = m; 3483 3484 rowners[0] = 0; 3485 for (i=2; i<=size; i++) { 3486 rowners[i] += rowners[i-1]; 3487 } 3488 rstart = rowners[rank]; 3489 rend = rowners[rank+1]; 3490 3491 /* distribute row lengths to all processors */ 3492 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3493 if (!rank) { 3494 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3495 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3496 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3497 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3498 for (j=0; j<m; j++) { 3499 procsnz[0] += ourlens[j]; 3500 } 3501 for (i=1; i<size; i++) { 3502 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3503 /* calculate the number of nonzeros on each processor */ 3504 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3505 procsnz[i] += rowlengths[j]; 3506 } 3507 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3508 } 3509 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3510 } else { 3511 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3512 } 3513 3514 if (!rank) { 3515 /* determine max buffer needed and allocate it */ 3516 maxnz = 0; 3517 for (i=0; i<size; i++) { 3518 maxnz = PetscMax(maxnz,procsnz[i]); 3519 } 3520 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3521 3522 /* read in my part of the matrix column indices */ 3523 nz = procsnz[0]; 3524 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3525 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3526 3527 /* read in every one elses and ship off */ 3528 for (i=1; i<size; i++) { 3529 nz = procsnz[i]; 3530 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3531 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3532 } 3533 ierr = PetscFree(cols);CHKERRQ(ierr); 3534 } else { 3535 /* determine buffer space needed for message */ 3536 nz = 0; 3537 for (i=0; i<m; i++) { 3538 nz += ourlens[i]; 3539 } 3540 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3541 3542 /* receive message of column indices*/ 3543 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3544 } 3545 3546 /* determine column ownership if matrix is not square */ 3547 if (N != M) { 3548 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3549 else n = newMat->cmap->n; 3550 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3551 cstart = cend - n; 3552 } else { 3553 cstart = rstart; 3554 cend = rend; 3555 n = cend - cstart; 3556 } 3557 3558 /* loop over local rows, determining number of off diagonal entries */ 3559 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3560 jj = 0; 3561 for (i=0; i<m; i++) { 3562 for (j=0; j<ourlens[i]; j++) { 3563 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3564 jj++; 3565 } 3566 } 3567 3568 for (i=0; i<m; i++) { 3569 ourlens[i] -= offlens[i]; 3570 } 3571 if (!sizesset) { 3572 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3573 } 3574 3575 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3576 3577 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3578 3579 for (i=0; i<m; i++) { 3580 ourlens[i] += offlens[i]; 3581 } 3582 3583 if (!rank) { 3584 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3585 3586 /* read in my part of the matrix numerical values */ 3587 nz = procsnz[0]; 3588 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3589 3590 /* insert into matrix */ 3591 jj = rstart; 3592 smycols = mycols; 3593 svals = vals; 3594 for (i=0; i<m; i++) { 3595 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3596 smycols += ourlens[i]; 3597 svals += ourlens[i]; 3598 jj++; 3599 } 3600 3601 /* read in other processors and ship out */ 3602 for (i=1; i<size; i++) { 3603 nz = procsnz[i]; 3604 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3605 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3606 } 3607 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3608 } else { 3609 /* receive numeric values */ 3610 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3611 3612 /* receive message of values*/ 3613 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3614 3615 /* insert into matrix */ 3616 jj = rstart; 3617 smycols = mycols; 3618 svals = vals; 3619 for (i=0; i<m; i++) { 3620 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3621 smycols += ourlens[i]; 3622 svals += ourlens[i]; 3623 jj++; 3624 } 3625 } 3626 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3627 ierr = PetscFree(vals);CHKERRQ(ierr); 3628 ierr = PetscFree(mycols);CHKERRQ(ierr); 3629 ierr = PetscFree(rowners);CHKERRQ(ierr); 3630 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3631 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3632 3633 PetscFunctionReturn(0); 3634 } 3635 3636 #undef __FUNCT__ 3637 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3638 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3639 { 3640 PetscErrorCode ierr; 3641 IS iscol_local; 3642 PetscInt csize; 3643 3644 PetscFunctionBegin; 3645 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3646 if (call == MAT_REUSE_MATRIX) { 3647 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3648 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3649 } else { 3650 PetscInt cbs; 3651 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3652 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3653 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3654 } 3655 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3656 if (call == MAT_INITIAL_MATRIX) { 3657 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3658 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3659 } 3660 PetscFunctionReturn(0); 3661 } 3662 3663 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3664 #undef __FUNCT__ 3665 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3666 /* 3667 Not great since it makes two copies of the submatrix, first an SeqAIJ 3668 in local and then by concatenating the local matrices the end result. 3669 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3670 3671 Note: This requires a sequential iscol with all indices. 3672 */ 3673 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3674 { 3675 PetscErrorCode ierr; 3676 PetscMPIInt rank,size; 3677 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3678 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3679 PetscBool allcolumns, colflag; 3680 Mat M,Mreuse; 3681 MatScalar *vwork,*aa; 3682 MPI_Comm comm = ((PetscObject)mat)->comm; 3683 Mat_SeqAIJ *aij; 3684 3685 3686 PetscFunctionBegin; 3687 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3688 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3689 3690 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3691 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3692 if (colflag && ncol == mat->cmap->N){ 3693 allcolumns = PETSC_TRUE; 3694 } else { 3695 allcolumns = PETSC_FALSE; 3696 } 3697 if (call == MAT_REUSE_MATRIX) { 3698 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3699 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3700 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3701 } else { 3702 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3703 } 3704 3705 /* 3706 m - number of local rows 3707 n - number of columns (same on all processors) 3708 rstart - first row in new global matrix generated 3709 */ 3710 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3711 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3712 if (call == MAT_INITIAL_MATRIX) { 3713 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3714 ii = aij->i; 3715 jj = aij->j; 3716 3717 /* 3718 Determine the number of non-zeros in the diagonal and off-diagonal 3719 portions of the matrix in order to do correct preallocation 3720 */ 3721 3722 /* first get start and end of "diagonal" columns */ 3723 if (csize == PETSC_DECIDE) { 3724 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3725 if (mglobal == n) { /* square matrix */ 3726 nlocal = m; 3727 } else { 3728 nlocal = n/size + ((n % size) > rank); 3729 } 3730 } else { 3731 nlocal = csize; 3732 } 3733 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3734 rstart = rend - nlocal; 3735 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3736 3737 /* next, compute all the lengths */ 3738 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3739 olens = dlens + m; 3740 for (i=0; i<m; i++) { 3741 jend = ii[i+1] - ii[i]; 3742 olen = 0; 3743 dlen = 0; 3744 for (j=0; j<jend; j++) { 3745 if (*jj < rstart || *jj >= rend) olen++; 3746 else dlen++; 3747 jj++; 3748 } 3749 olens[i] = olen; 3750 dlens[i] = dlen; 3751 } 3752 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3753 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3754 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3755 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3756 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3757 ierr = PetscFree(dlens);CHKERRQ(ierr); 3758 } else { 3759 PetscInt ml,nl; 3760 3761 M = *newmat; 3762 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3763 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3764 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3765 /* 3766 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3767 rather than the slower MatSetValues(). 3768 */ 3769 M->was_assembled = PETSC_TRUE; 3770 M->assembled = PETSC_FALSE; 3771 } 3772 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3773 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3774 ii = aij->i; 3775 jj = aij->j; 3776 aa = aij->a; 3777 for (i=0; i<m; i++) { 3778 row = rstart + i; 3779 nz = ii[i+1] - ii[i]; 3780 cwork = jj; jj += nz; 3781 vwork = aa; aa += nz; 3782 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3783 } 3784 3785 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3786 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3787 *newmat = M; 3788 3789 /* save submatrix used in processor for next request */ 3790 if (call == MAT_INITIAL_MATRIX) { 3791 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3792 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3793 } 3794 3795 PetscFunctionReturn(0); 3796 } 3797 3798 EXTERN_C_BEGIN 3799 #undef __FUNCT__ 3800 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3801 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3802 { 3803 PetscInt m,cstart, cend,j,nnz,i,d; 3804 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3805 const PetscInt *JJ; 3806 PetscScalar *values; 3807 PetscErrorCode ierr; 3808 3809 PetscFunctionBegin; 3810 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3811 3812 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3813 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3814 m = B->rmap->n; 3815 cstart = B->cmap->rstart; 3816 cend = B->cmap->rend; 3817 rstart = B->rmap->rstart; 3818 3819 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3820 3821 #if defined(PETSC_USE_DEBUGGING) 3822 for (i=0; i<m; i++) { 3823 nnz = Ii[i+1]- Ii[i]; 3824 JJ = J + Ii[i]; 3825 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3826 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3827 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3828 } 3829 #endif 3830 3831 for (i=0; i<m; i++) { 3832 nnz = Ii[i+1]- Ii[i]; 3833 JJ = J + Ii[i]; 3834 nnz_max = PetscMax(nnz_max,nnz); 3835 d = 0; 3836 for (j=0; j<nnz; j++) { 3837 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3838 } 3839 d_nnz[i] = d; 3840 o_nnz[i] = nnz - d; 3841 } 3842 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3843 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3844 3845 if (v) values = (PetscScalar*)v; 3846 else { 3847 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3848 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3849 } 3850 3851 for (i=0; i<m; i++) { 3852 ii = i + rstart; 3853 nnz = Ii[i+1]- Ii[i]; 3854 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3855 } 3856 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3857 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3858 3859 if (!v) { 3860 ierr = PetscFree(values);CHKERRQ(ierr); 3861 } 3862 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3863 PetscFunctionReturn(0); 3864 } 3865 EXTERN_C_END 3866 3867 #undef __FUNCT__ 3868 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3869 /*@ 3870 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3871 (the default parallel PETSc format). 3872 3873 Collective on MPI_Comm 3874 3875 Input Parameters: 3876 + B - the matrix 3877 . i - the indices into j for the start of each local row (starts with zero) 3878 . j - the column indices for each local row (starts with zero) 3879 - v - optional values in the matrix 3880 3881 Level: developer 3882 3883 Notes: 3884 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3885 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3886 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3887 3888 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3889 3890 The format which is used for the sparse matrix input, is equivalent to a 3891 row-major ordering.. i.e for the following matrix, the input data expected is 3892 as shown: 3893 3894 1 0 0 3895 2 0 3 P0 3896 ------- 3897 4 5 6 P1 3898 3899 Process0 [P0]: rows_owned=[0,1] 3900 i = {0,1,3} [size = nrow+1 = 2+1] 3901 j = {0,0,2} [size = nz = 6] 3902 v = {1,2,3} [size = nz = 6] 3903 3904 Process1 [P1]: rows_owned=[2] 3905 i = {0,3} [size = nrow+1 = 1+1] 3906 j = {0,1,2} [size = nz = 6] 3907 v = {4,5,6} [size = nz = 6] 3908 3909 .keywords: matrix, aij, compressed row, sparse, parallel 3910 3911 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3912 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3913 @*/ 3914 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3915 { 3916 PetscErrorCode ierr; 3917 3918 PetscFunctionBegin; 3919 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3920 PetscFunctionReturn(0); 3921 } 3922 3923 #undef __FUNCT__ 3924 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3925 /*@C 3926 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3927 (the default parallel PETSc format). For good matrix assembly performance 3928 the user should preallocate the matrix storage by setting the parameters 3929 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3930 performance can be increased by more than a factor of 50. 3931 3932 Collective on MPI_Comm 3933 3934 Input Parameters: 3935 + A - the matrix 3936 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3937 (same value is used for all local rows) 3938 . d_nnz - array containing the number of nonzeros in the various rows of the 3939 DIAGONAL portion of the local submatrix (possibly different for each row) 3940 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3941 The size of this array is equal to the number of local rows, i.e 'm'. 3942 For matrices that will be factored, you must leave room for (and set) 3943 the diagonal entry even if it is zero. 3944 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3945 submatrix (same value is used for all local rows). 3946 - o_nnz - array containing the number of nonzeros in the various rows of the 3947 OFF-DIAGONAL portion of the local submatrix (possibly different for 3948 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3949 structure. The size of this array is equal to the number 3950 of local rows, i.e 'm'. 3951 3952 If the *_nnz parameter is given then the *_nz parameter is ignored 3953 3954 The AIJ format (also called the Yale sparse matrix format or 3955 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3956 storage. The stored row and column indices begin with zero. 3957 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3958 3959 The parallel matrix is partitioned such that the first m0 rows belong to 3960 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3961 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3962 3963 The DIAGONAL portion of the local submatrix of a processor can be defined 3964 as the submatrix which is obtained by extraction the part corresponding to 3965 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3966 first row that belongs to the processor, r2 is the last row belonging to 3967 the this processor, and c1-c2 is range of indices of the local part of a 3968 vector suitable for applying the matrix to. This is an mxn matrix. In the 3969 common case of a square matrix, the row and column ranges are the same and 3970 the DIAGONAL part is also square. The remaining portion of the local 3971 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3972 3973 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3974 3975 You can call MatGetInfo() to get information on how effective the preallocation was; 3976 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3977 You can also run with the option -info and look for messages with the string 3978 malloc in them to see if additional memory allocation was needed. 3979 3980 Example usage: 3981 3982 Consider the following 8x8 matrix with 34 non-zero values, that is 3983 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3984 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3985 as follows: 3986 3987 .vb 3988 1 2 0 | 0 3 0 | 0 4 3989 Proc0 0 5 6 | 7 0 0 | 8 0 3990 9 0 10 | 11 0 0 | 12 0 3991 ------------------------------------- 3992 13 0 14 | 15 16 17 | 0 0 3993 Proc1 0 18 0 | 19 20 21 | 0 0 3994 0 0 0 | 22 23 0 | 24 0 3995 ------------------------------------- 3996 Proc2 25 26 27 | 0 0 28 | 29 0 3997 30 0 0 | 31 32 33 | 0 34 3998 .ve 3999 4000 This can be represented as a collection of submatrices as: 4001 4002 .vb 4003 A B C 4004 D E F 4005 G H I 4006 .ve 4007 4008 Where the submatrices A,B,C are owned by proc0, D,E,F are 4009 owned by proc1, G,H,I are owned by proc2. 4010 4011 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4012 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4013 The 'M','N' parameters are 8,8, and have the same values on all procs. 4014 4015 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4016 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4017 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4018 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4019 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4020 matrix, ans [DF] as another SeqAIJ matrix. 4021 4022 When d_nz, o_nz parameters are specified, d_nz storage elements are 4023 allocated for every row of the local diagonal submatrix, and o_nz 4024 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4025 One way to choose d_nz and o_nz is to use the max nonzerors per local 4026 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4027 In this case, the values of d_nz,o_nz are: 4028 .vb 4029 proc0 : dnz = 2, o_nz = 2 4030 proc1 : dnz = 3, o_nz = 2 4031 proc2 : dnz = 1, o_nz = 4 4032 .ve 4033 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4034 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4035 for proc3. i.e we are using 12+15+10=37 storage locations to store 4036 34 values. 4037 4038 When d_nnz, o_nnz parameters are specified, the storage is specified 4039 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4040 In the above case the values for d_nnz,o_nnz are: 4041 .vb 4042 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4043 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4044 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4045 .ve 4046 Here the space allocated is sum of all the above values i.e 34, and 4047 hence pre-allocation is perfect. 4048 4049 Level: intermediate 4050 4051 .keywords: matrix, aij, compressed row, sparse, parallel 4052 4053 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4054 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4055 @*/ 4056 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4057 { 4058 PetscErrorCode ierr; 4059 4060 PetscFunctionBegin; 4061 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4062 PetscValidType(B,1); 4063 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4064 PetscFunctionReturn(0); 4065 } 4066 4067 #undef __FUNCT__ 4068 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4069 /*@ 4070 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4071 CSR format the local rows. 4072 4073 Collective on MPI_Comm 4074 4075 Input Parameters: 4076 + comm - MPI communicator 4077 . m - number of local rows (Cannot be PETSC_DECIDE) 4078 . n - This value should be the same as the local size used in creating the 4079 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4080 calculated if N is given) For square matrices n is almost always m. 4081 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4082 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4083 . i - row indices 4084 . j - column indices 4085 - a - matrix values 4086 4087 Output Parameter: 4088 . mat - the matrix 4089 4090 Level: intermediate 4091 4092 Notes: 4093 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4094 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4095 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4096 4097 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4098 4099 The format which is used for the sparse matrix input, is equivalent to a 4100 row-major ordering.. i.e for the following matrix, the input data expected is 4101 as shown: 4102 4103 1 0 0 4104 2 0 3 P0 4105 ------- 4106 4 5 6 P1 4107 4108 Process0 [P0]: rows_owned=[0,1] 4109 i = {0,1,3} [size = nrow+1 = 2+1] 4110 j = {0,0,2} [size = nz = 6] 4111 v = {1,2,3} [size = nz = 6] 4112 4113 Process1 [P1]: rows_owned=[2] 4114 i = {0,3} [size = nrow+1 = 1+1] 4115 j = {0,1,2} [size = nz = 6] 4116 v = {4,5,6} [size = nz = 6] 4117 4118 .keywords: matrix, aij, compressed row, sparse, parallel 4119 4120 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4121 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4122 @*/ 4123 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4124 { 4125 PetscErrorCode ierr; 4126 4127 PetscFunctionBegin; 4128 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4129 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4130 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4131 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4132 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4133 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4134 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4135 PetscFunctionReturn(0); 4136 } 4137 4138 #undef __FUNCT__ 4139 #define __FUNCT__ "MatCreateAIJ" 4140 /*@C 4141 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4142 (the default parallel PETSc format). For good matrix assembly performance 4143 the user should preallocate the matrix storage by setting the parameters 4144 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4145 performance can be increased by more than a factor of 50. 4146 4147 Collective on MPI_Comm 4148 4149 Input Parameters: 4150 + comm - MPI communicator 4151 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4152 This value should be the same as the local size used in creating the 4153 y vector for the matrix-vector product y = Ax. 4154 . n - This value should be the same as the local size used in creating the 4155 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4156 calculated if N is given) For square matrices n is almost always m. 4157 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4158 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4159 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4160 (same value is used for all local rows) 4161 . d_nnz - array containing the number of nonzeros in the various rows of the 4162 DIAGONAL portion of the local submatrix (possibly different for each row) 4163 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4164 The size of this array is equal to the number of local rows, i.e 'm'. 4165 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4166 submatrix (same value is used for all local rows). 4167 - o_nnz - array containing the number of nonzeros in the various rows of the 4168 OFF-DIAGONAL portion of the local submatrix (possibly different for 4169 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4170 structure. The size of this array is equal to the number 4171 of local rows, i.e 'm'. 4172 4173 Output Parameter: 4174 . A - the matrix 4175 4176 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4177 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4178 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4179 4180 Notes: 4181 If the *_nnz parameter is given then the *_nz parameter is ignored 4182 4183 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4184 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4185 storage requirements for this matrix. 4186 4187 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4188 processor than it must be used on all processors that share the object for 4189 that argument. 4190 4191 The user MUST specify either the local or global matrix dimensions 4192 (possibly both). 4193 4194 The parallel matrix is partitioned across processors such that the 4195 first m0 rows belong to process 0, the next m1 rows belong to 4196 process 1, the next m2 rows belong to process 2 etc.. where 4197 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4198 values corresponding to [m x N] submatrix. 4199 4200 The columns are logically partitioned with the n0 columns belonging 4201 to 0th partition, the next n1 columns belonging to the next 4202 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4203 4204 The DIAGONAL portion of the local submatrix on any given processor 4205 is the submatrix corresponding to the rows and columns m,n 4206 corresponding to the given processor. i.e diagonal matrix on 4207 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4208 etc. The remaining portion of the local submatrix [m x (N-n)] 4209 constitute the OFF-DIAGONAL portion. The example below better 4210 illustrates this concept. 4211 4212 For a square global matrix we define each processor's diagonal portion 4213 to be its local rows and the corresponding columns (a square submatrix); 4214 each processor's off-diagonal portion encompasses the remainder of the 4215 local matrix (a rectangular submatrix). 4216 4217 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4218 4219 When calling this routine with a single process communicator, a matrix of 4220 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4221 type of communicator, use the construction mechanism: 4222 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4223 4224 By default, this format uses inodes (identical nodes) when possible. 4225 We search for consecutive rows with the same nonzero structure, thereby 4226 reusing matrix information to achieve increased efficiency. 4227 4228 Options Database Keys: 4229 + -mat_no_inode - Do not use inodes 4230 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4231 - -mat_aij_oneindex - Internally use indexing starting at 1 4232 rather than 0. Note that when calling MatSetValues(), 4233 the user still MUST index entries starting at 0! 4234 4235 4236 Example usage: 4237 4238 Consider the following 8x8 matrix with 34 non-zero values, that is 4239 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4240 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4241 as follows: 4242 4243 .vb 4244 1 2 0 | 0 3 0 | 0 4 4245 Proc0 0 5 6 | 7 0 0 | 8 0 4246 9 0 10 | 11 0 0 | 12 0 4247 ------------------------------------- 4248 13 0 14 | 15 16 17 | 0 0 4249 Proc1 0 18 0 | 19 20 21 | 0 0 4250 0 0 0 | 22 23 0 | 24 0 4251 ------------------------------------- 4252 Proc2 25 26 27 | 0 0 28 | 29 0 4253 30 0 0 | 31 32 33 | 0 34 4254 .ve 4255 4256 This can be represented as a collection of submatrices as: 4257 4258 .vb 4259 A B C 4260 D E F 4261 G H I 4262 .ve 4263 4264 Where the submatrices A,B,C are owned by proc0, D,E,F are 4265 owned by proc1, G,H,I are owned by proc2. 4266 4267 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4268 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4269 The 'M','N' parameters are 8,8, and have the same values on all procs. 4270 4271 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4272 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4273 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4274 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4275 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4276 matrix, ans [DF] as another SeqAIJ matrix. 4277 4278 When d_nz, o_nz parameters are specified, d_nz storage elements are 4279 allocated for every row of the local diagonal submatrix, and o_nz 4280 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4281 One way to choose d_nz and o_nz is to use the max nonzerors per local 4282 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4283 In this case, the values of d_nz,o_nz are: 4284 .vb 4285 proc0 : dnz = 2, o_nz = 2 4286 proc1 : dnz = 3, o_nz = 2 4287 proc2 : dnz = 1, o_nz = 4 4288 .ve 4289 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4290 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4291 for proc3. i.e we are using 12+15+10=37 storage locations to store 4292 34 values. 4293 4294 When d_nnz, o_nnz parameters are specified, the storage is specified 4295 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4296 In the above case the values for d_nnz,o_nnz are: 4297 .vb 4298 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4299 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4300 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4301 .ve 4302 Here the space allocated is sum of all the above values i.e 34, and 4303 hence pre-allocation is perfect. 4304 4305 Level: intermediate 4306 4307 .keywords: matrix, aij, compressed row, sparse, parallel 4308 4309 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4310 MPIAIJ, MatCreateMPIAIJWithArrays() 4311 @*/ 4312 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4313 { 4314 PetscErrorCode ierr; 4315 PetscMPIInt size; 4316 4317 PetscFunctionBegin; 4318 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4319 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4320 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4321 if (size > 1) { 4322 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4323 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4324 } else { 4325 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4326 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4327 } 4328 PetscFunctionReturn(0); 4329 } 4330 4331 #undef __FUNCT__ 4332 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4333 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4334 { 4335 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4336 4337 PetscFunctionBegin; 4338 *Ad = a->A; 4339 *Ao = a->B; 4340 *colmap = a->garray; 4341 PetscFunctionReturn(0); 4342 } 4343 4344 #undef __FUNCT__ 4345 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4346 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4347 { 4348 PetscErrorCode ierr; 4349 PetscInt i; 4350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4351 4352 PetscFunctionBegin; 4353 if (coloring->ctype == IS_COLORING_GLOBAL) { 4354 ISColoringValue *allcolors,*colors; 4355 ISColoring ocoloring; 4356 4357 /* set coloring for diagonal portion */ 4358 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4359 4360 /* set coloring for off-diagonal portion */ 4361 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4362 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4363 for (i=0; i<a->B->cmap->n; i++) { 4364 colors[i] = allcolors[a->garray[i]]; 4365 } 4366 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4367 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4368 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4369 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4370 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4371 ISColoringValue *colors; 4372 PetscInt *larray; 4373 ISColoring ocoloring; 4374 4375 /* set coloring for diagonal portion */ 4376 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4377 for (i=0; i<a->A->cmap->n; i++) { 4378 larray[i] = i + A->cmap->rstart; 4379 } 4380 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4381 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4382 for (i=0; i<a->A->cmap->n; i++) { 4383 colors[i] = coloring->colors[larray[i]]; 4384 } 4385 ierr = PetscFree(larray);CHKERRQ(ierr); 4386 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4387 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4388 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4389 4390 /* set coloring for off-diagonal portion */ 4391 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4392 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4393 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4394 for (i=0; i<a->B->cmap->n; i++) { 4395 colors[i] = coloring->colors[larray[i]]; 4396 } 4397 ierr = PetscFree(larray);CHKERRQ(ierr); 4398 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4399 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4400 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4401 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4402 4403 PetscFunctionReturn(0); 4404 } 4405 4406 #undef __FUNCT__ 4407 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4408 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4409 { 4410 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4411 PetscErrorCode ierr; 4412 4413 PetscFunctionBegin; 4414 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4415 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4416 PetscFunctionReturn(0); 4417 } 4418 4419 #undef __FUNCT__ 4420 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4421 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4422 { 4423 PetscErrorCode ierr; 4424 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4425 PetscInt *indx; 4426 4427 PetscFunctionBegin; 4428 /* This routine will ONLY return MPIAIJ type matrix */ 4429 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4430 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4431 if (n == PETSC_DECIDE){ 4432 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4433 } 4434 /* Check sum(n) = N */ 4435 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4436 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4437 4438 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4439 rstart -= m; 4440 4441 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4442 for (i=0;i<m;i++) { 4443 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4444 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4445 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4446 } 4447 4448 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4449 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4450 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4451 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4452 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4453 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4454 PetscFunctionReturn(0); 4455 } 4456 4457 #undef __FUNCT__ 4458 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4459 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4460 { 4461 PetscErrorCode ierr; 4462 PetscInt m,N,i,rstart,nnz,Ii; 4463 PetscInt *indx; 4464 PetscScalar *values; 4465 4466 PetscFunctionBegin; 4467 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4468 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4469 for (i=0;i<m;i++) { 4470 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4471 Ii = i + rstart; 4472 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4473 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4474 } 4475 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4476 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4477 PetscFunctionReturn(0); 4478 } 4479 4480 #undef __FUNCT__ 4481 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4482 /*@ 4483 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4484 matrices from each processor 4485 4486 Collective on MPI_Comm 4487 4488 Input Parameters: 4489 + comm - the communicators the parallel matrix will live on 4490 . inmat - the input sequential matrices 4491 . n - number of local columns (or PETSC_DECIDE) 4492 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4493 4494 Output Parameter: 4495 . outmat - the parallel matrix generated 4496 4497 Level: advanced 4498 4499 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4500 4501 @*/ 4502 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4503 { 4504 PetscErrorCode ierr; 4505 4506 PetscFunctionBegin; 4507 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4508 if (scall == MAT_INITIAL_MATRIX){ 4509 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4510 } 4511 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4512 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4513 PetscFunctionReturn(0); 4514 } 4515 4516 #undef __FUNCT__ 4517 #define __FUNCT__ "MatFileSplit" 4518 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4519 { 4520 PetscErrorCode ierr; 4521 PetscMPIInt rank; 4522 PetscInt m,N,i,rstart,nnz; 4523 size_t len; 4524 const PetscInt *indx; 4525 PetscViewer out; 4526 char *name; 4527 Mat B; 4528 const PetscScalar *values; 4529 4530 PetscFunctionBegin; 4531 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4532 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4533 /* Should this be the type of the diagonal block of A? */ 4534 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4535 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4536 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4537 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4538 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4539 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4540 for (i=0;i<m;i++) { 4541 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4542 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4543 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4544 } 4545 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4546 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4547 4548 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4549 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4550 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4551 sprintf(name,"%s.%d",outfile,rank); 4552 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4553 ierr = PetscFree(name);CHKERRQ(ierr); 4554 ierr = MatView(B,out);CHKERRQ(ierr); 4555 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4556 ierr = MatDestroy(&B);CHKERRQ(ierr); 4557 PetscFunctionReturn(0); 4558 } 4559 4560 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4561 #undef __FUNCT__ 4562 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4563 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4564 { 4565 PetscErrorCode ierr; 4566 Mat_Merge_SeqsToMPI *merge; 4567 PetscContainer container; 4568 4569 PetscFunctionBegin; 4570 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4571 if (container) { 4572 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4574 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4575 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4576 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4577 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4578 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4579 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4580 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4581 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4582 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4583 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4584 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4585 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4586 ierr = PetscFree(merge);CHKERRQ(ierr); 4587 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4588 } 4589 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4590 PetscFunctionReturn(0); 4591 } 4592 4593 #include <../src/mat/utils/freespace.h> 4594 #include <petscbt.h> 4595 4596 #undef __FUNCT__ 4597 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4598 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4599 { 4600 PetscErrorCode ierr; 4601 MPI_Comm comm=((PetscObject)mpimat)->comm; 4602 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4603 PetscMPIInt size,rank,taga,*len_s; 4604 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4605 PetscInt proc,m; 4606 PetscInt **buf_ri,**buf_rj; 4607 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4608 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4609 MPI_Request *s_waits,*r_waits; 4610 MPI_Status *status; 4611 MatScalar *aa=a->a; 4612 MatScalar **abuf_r,*ba_i; 4613 Mat_Merge_SeqsToMPI *merge; 4614 PetscContainer container; 4615 4616 PetscFunctionBegin; 4617 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4618 4619 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4620 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4621 4622 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4623 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4624 4625 bi = merge->bi; 4626 bj = merge->bj; 4627 buf_ri = merge->buf_ri; 4628 buf_rj = merge->buf_rj; 4629 4630 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4631 owners = merge->rowmap->range; 4632 len_s = merge->len_s; 4633 4634 /* send and recv matrix values */ 4635 /*-----------------------------*/ 4636 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4637 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4638 4639 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4640 for (proc=0,k=0; proc<size; proc++){ 4641 if (!len_s[proc]) continue; 4642 i = owners[proc]; 4643 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4644 k++; 4645 } 4646 4647 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4648 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4649 ierr = PetscFree(status);CHKERRQ(ierr); 4650 4651 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4652 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4653 4654 /* insert mat values of mpimat */ 4655 /*----------------------------*/ 4656 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4657 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4658 4659 for (k=0; k<merge->nrecv; k++){ 4660 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4661 nrows = *(buf_ri_k[k]); 4662 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4663 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4664 } 4665 4666 /* set values of ba */ 4667 m = merge->rowmap->n; 4668 for (i=0; i<m; i++) { 4669 arow = owners[rank] + i; 4670 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4671 bnzi = bi[i+1] - bi[i]; 4672 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4673 4674 /* add local non-zero vals of this proc's seqmat into ba */ 4675 anzi = ai[arow+1] - ai[arow]; 4676 aj = a->j + ai[arow]; 4677 aa = a->a + ai[arow]; 4678 nextaj = 0; 4679 for (j=0; nextaj<anzi; j++){ 4680 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4681 ba_i[j] += aa[nextaj++]; 4682 } 4683 } 4684 4685 /* add received vals into ba */ 4686 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4687 /* i-th row */ 4688 if (i == *nextrow[k]) { 4689 anzi = *(nextai[k]+1) - *nextai[k]; 4690 aj = buf_rj[k] + *(nextai[k]); 4691 aa = abuf_r[k] + *(nextai[k]); 4692 nextaj = 0; 4693 for (j=0; nextaj<anzi; j++){ 4694 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4695 ba_i[j] += aa[nextaj++]; 4696 } 4697 } 4698 nextrow[k]++; nextai[k]++; 4699 } 4700 } 4701 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4702 } 4703 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4704 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4705 4706 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4707 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4708 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4709 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4710 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4711 PetscFunctionReturn(0); 4712 } 4713 4714 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4715 4716 #undef __FUNCT__ 4717 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4718 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4719 { 4720 PetscErrorCode ierr; 4721 Mat B_mpi; 4722 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4723 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4724 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4725 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4726 PetscInt len,proc,*dnz,*onz,bs,cbs; 4727 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4728 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4729 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4730 MPI_Status *status; 4731 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4732 PetscBT lnkbt; 4733 Mat_Merge_SeqsToMPI *merge; 4734 PetscContainer container; 4735 4736 PetscFunctionBegin; 4737 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4738 4739 /* make sure it is a PETSc comm */ 4740 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4741 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4742 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4743 4744 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4745 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4746 4747 /* determine row ownership */ 4748 /*---------------------------------------------------------*/ 4749 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4750 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4751 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4752 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4753 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4754 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4755 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4756 4757 m = merge->rowmap->n; 4758 owners = merge->rowmap->range; 4759 4760 /* determine the number of messages to send, their lengths */ 4761 /*---------------------------------------------------------*/ 4762 len_s = merge->len_s; 4763 4764 len = 0; /* length of buf_si[] */ 4765 merge->nsend = 0; 4766 for (proc=0; proc<size; proc++){ 4767 len_si[proc] = 0; 4768 if (proc == rank){ 4769 len_s[proc] = 0; 4770 } else { 4771 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4772 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4773 } 4774 if (len_s[proc]) { 4775 merge->nsend++; 4776 nrows = 0; 4777 for (i=owners[proc]; i<owners[proc+1]; i++){ 4778 if (ai[i+1] > ai[i]) nrows++; 4779 } 4780 len_si[proc] = 2*(nrows+1); 4781 len += len_si[proc]; 4782 } 4783 } 4784 4785 /* determine the number and length of messages to receive for ij-structure */ 4786 /*-------------------------------------------------------------------------*/ 4787 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4788 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4789 4790 /* post the Irecv of j-structure */ 4791 /*-------------------------------*/ 4792 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4793 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4794 4795 /* post the Isend of j-structure */ 4796 /*--------------------------------*/ 4797 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4798 4799 for (proc=0, k=0; proc<size; proc++){ 4800 if (!len_s[proc]) continue; 4801 i = owners[proc]; 4802 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4803 k++; 4804 } 4805 4806 /* receives and sends of j-structure are complete */ 4807 /*------------------------------------------------*/ 4808 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4809 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4810 4811 /* send and recv i-structure */ 4812 /*---------------------------*/ 4813 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4814 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4815 4816 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4817 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4818 for (proc=0,k=0; proc<size; proc++){ 4819 if (!len_s[proc]) continue; 4820 /* form outgoing message for i-structure: 4821 buf_si[0]: nrows to be sent 4822 [1:nrows]: row index (global) 4823 [nrows+1:2*nrows+1]: i-structure index 4824 */ 4825 /*-------------------------------------------*/ 4826 nrows = len_si[proc]/2 - 1; 4827 buf_si_i = buf_si + nrows+1; 4828 buf_si[0] = nrows; 4829 buf_si_i[0] = 0; 4830 nrows = 0; 4831 for (i=owners[proc]; i<owners[proc+1]; i++){ 4832 anzi = ai[i+1] - ai[i]; 4833 if (anzi) { 4834 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4835 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4836 nrows++; 4837 } 4838 } 4839 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4840 k++; 4841 buf_si += len_si[proc]; 4842 } 4843 4844 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4845 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4846 4847 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4848 for (i=0; i<merge->nrecv; i++){ 4849 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4850 } 4851 4852 ierr = PetscFree(len_si);CHKERRQ(ierr); 4853 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4854 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4855 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4856 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4857 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4858 ierr = PetscFree(status);CHKERRQ(ierr); 4859 4860 /* compute a local seq matrix in each processor */ 4861 /*----------------------------------------------*/ 4862 /* allocate bi array and free space for accumulating nonzero column info */ 4863 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4864 bi[0] = 0; 4865 4866 /* create and initialize a linked list */ 4867 nlnk = N+1; 4868 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4869 4870 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4871 len = ai[owners[rank+1]] - ai[owners[rank]]; 4872 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4873 current_space = free_space; 4874 4875 /* determine symbolic info for each local row */ 4876 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4877 4878 for (k=0; k<merge->nrecv; k++){ 4879 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4880 nrows = *buf_ri_k[k]; 4881 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4882 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4883 } 4884 4885 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4886 len = 0; 4887 for (i=0;i<m;i++) { 4888 bnzi = 0; 4889 /* add local non-zero cols of this proc's seqmat into lnk */ 4890 arow = owners[rank] + i; 4891 anzi = ai[arow+1] - ai[arow]; 4892 aj = a->j + ai[arow]; 4893 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4894 bnzi += nlnk; 4895 /* add received col data into lnk */ 4896 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4897 if (i == *nextrow[k]) { /* i-th row */ 4898 anzi = *(nextai[k]+1) - *nextai[k]; 4899 aj = buf_rj[k] + *nextai[k]; 4900 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4901 bnzi += nlnk; 4902 nextrow[k]++; nextai[k]++; 4903 } 4904 } 4905 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4906 4907 /* if free space is not available, make more free space */ 4908 if (current_space->local_remaining<bnzi) { 4909 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4910 nspacedouble++; 4911 } 4912 /* copy data into free space, then initialize lnk */ 4913 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4914 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4915 4916 current_space->array += bnzi; 4917 current_space->local_used += bnzi; 4918 current_space->local_remaining -= bnzi; 4919 4920 bi[i+1] = bi[i] + bnzi; 4921 } 4922 4923 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4924 4925 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4926 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4927 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4928 4929 /* create symbolic parallel matrix B_mpi */ 4930 /*---------------------------------------*/ 4931 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4932 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4933 if (n==PETSC_DECIDE) { 4934 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4935 } else { 4936 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4937 } 4938 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4939 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4940 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4941 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4942 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4943 4944 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4945 B_mpi->assembled = PETSC_FALSE; 4946 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4947 merge->bi = bi; 4948 merge->bj = bj; 4949 merge->buf_ri = buf_ri; 4950 merge->buf_rj = buf_rj; 4951 merge->coi = PETSC_NULL; 4952 merge->coj = PETSC_NULL; 4953 merge->owners_co = PETSC_NULL; 4954 4955 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4956 4957 /* attach the supporting struct to B_mpi for reuse */ 4958 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4959 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4960 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4961 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4962 *mpimat = B_mpi; 4963 4964 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4965 PetscFunctionReturn(0); 4966 } 4967 4968 #undef __FUNCT__ 4969 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4970 /*@C 4971 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4972 matrices from each processor 4973 4974 Collective on MPI_Comm 4975 4976 Input Parameters: 4977 + comm - the communicators the parallel matrix will live on 4978 . seqmat - the input sequential matrices 4979 . m - number of local rows (or PETSC_DECIDE) 4980 . n - number of local columns (or PETSC_DECIDE) 4981 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4982 4983 Output Parameter: 4984 . mpimat - the parallel matrix generated 4985 4986 Level: advanced 4987 4988 Notes: 4989 The dimensions of the sequential matrix in each processor MUST be the same. 4990 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4991 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4992 @*/ 4993 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4994 { 4995 PetscErrorCode ierr; 4996 PetscMPIInt size; 4997 4998 PetscFunctionBegin; 4999 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5000 if (size == 1){ 5001 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5002 if (scall == MAT_INITIAL_MATRIX){ 5003 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5004 } else { 5005 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5006 } 5007 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5008 PetscFunctionReturn(0); 5009 } 5010 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5011 if (scall == MAT_INITIAL_MATRIX){ 5012 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5013 } 5014 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5015 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5016 PetscFunctionReturn(0); 5017 } 5018 5019 #undef __FUNCT__ 5020 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5021 /*@ 5022 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5023 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5024 with MatGetSize() 5025 5026 Not Collective 5027 5028 Input Parameters: 5029 + A - the matrix 5030 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5031 5032 Output Parameter: 5033 . A_loc - the local sequential matrix generated 5034 5035 Level: developer 5036 5037 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5038 5039 @*/ 5040 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5041 { 5042 PetscErrorCode ierr; 5043 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5044 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5045 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5046 MatScalar *aa=a->a,*ba=b->a,*cam; 5047 PetscScalar *ca; 5048 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5049 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5050 PetscBool match; 5051 5052 PetscFunctionBegin; 5053 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5054 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5055 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5056 if (scall == MAT_INITIAL_MATRIX){ 5057 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5058 ci[0] = 0; 5059 for (i=0; i<am; i++){ 5060 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5061 } 5062 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5063 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5064 k = 0; 5065 for (i=0; i<am; i++) { 5066 ncols_o = bi[i+1] - bi[i]; 5067 ncols_d = ai[i+1] - ai[i]; 5068 /* off-diagonal portion of A */ 5069 for (jo=0; jo<ncols_o; jo++) { 5070 col = cmap[*bj]; 5071 if (col >= cstart) break; 5072 cj[k] = col; bj++; 5073 ca[k++] = *ba++; 5074 } 5075 /* diagonal portion of A */ 5076 for (j=0; j<ncols_d; j++) { 5077 cj[k] = cstart + *aj++; 5078 ca[k++] = *aa++; 5079 } 5080 /* off-diagonal portion of A */ 5081 for (j=jo; j<ncols_o; j++) { 5082 cj[k] = cmap[*bj++]; 5083 ca[k++] = *ba++; 5084 } 5085 } 5086 /* put together the new matrix */ 5087 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5088 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5089 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5090 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5091 mat->free_a = PETSC_TRUE; 5092 mat->free_ij = PETSC_TRUE; 5093 mat->nonew = 0; 5094 } else if (scall == MAT_REUSE_MATRIX){ 5095 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5096 ci = mat->i; cj = mat->j; cam = mat->a; 5097 for (i=0; i<am; i++) { 5098 /* off-diagonal portion of A */ 5099 ncols_o = bi[i+1] - bi[i]; 5100 for (jo=0; jo<ncols_o; jo++) { 5101 col = cmap[*bj]; 5102 if (col >= cstart) break; 5103 *cam++ = *ba++; bj++; 5104 } 5105 /* diagonal portion of A */ 5106 ncols_d = ai[i+1] - ai[i]; 5107 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5108 /* off-diagonal portion of A */ 5109 for (j=jo; j<ncols_o; j++) { 5110 *cam++ = *ba++; bj++; 5111 } 5112 } 5113 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5114 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5115 PetscFunctionReturn(0); 5116 } 5117 5118 #undef __FUNCT__ 5119 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5120 /*@C 5121 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5122 5123 Not Collective 5124 5125 Input Parameters: 5126 + A - the matrix 5127 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5128 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5129 5130 Output Parameter: 5131 . A_loc - the local sequential matrix generated 5132 5133 Level: developer 5134 5135 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5136 5137 @*/ 5138 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5139 { 5140 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5141 PetscErrorCode ierr; 5142 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5143 IS isrowa,iscola; 5144 Mat *aloc; 5145 PetscBool match; 5146 5147 PetscFunctionBegin; 5148 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5149 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5150 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5151 if (!row){ 5152 start = A->rmap->rstart; end = A->rmap->rend; 5153 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5154 } else { 5155 isrowa = *row; 5156 } 5157 if (!col){ 5158 start = A->cmap->rstart; 5159 cmap = a->garray; 5160 nzA = a->A->cmap->n; 5161 nzB = a->B->cmap->n; 5162 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5163 ncols = 0; 5164 for (i=0; i<nzB; i++) { 5165 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5166 else break; 5167 } 5168 imark = i; 5169 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5170 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5171 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5172 } else { 5173 iscola = *col; 5174 } 5175 if (scall != MAT_INITIAL_MATRIX){ 5176 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5177 aloc[0] = *A_loc; 5178 } 5179 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5180 *A_loc = aloc[0]; 5181 ierr = PetscFree(aloc);CHKERRQ(ierr); 5182 if (!row){ 5183 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5184 } 5185 if (!col){ 5186 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5187 } 5188 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5189 PetscFunctionReturn(0); 5190 } 5191 5192 #undef __FUNCT__ 5193 #define __FUNCT__ "MatGetBrowsOfAcols" 5194 /*@C 5195 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5196 5197 Collective on Mat 5198 5199 Input Parameters: 5200 + A,B - the matrices in mpiaij format 5201 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5202 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5203 5204 Output Parameter: 5205 + rowb, colb - index sets of rows and columns of B to extract 5206 - B_seq - the sequential matrix generated 5207 5208 Level: developer 5209 5210 @*/ 5211 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5212 { 5213 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5214 PetscErrorCode ierr; 5215 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5216 IS isrowb,iscolb; 5217 Mat *bseq=PETSC_NULL; 5218 5219 PetscFunctionBegin; 5220 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5221 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5222 } 5223 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5224 5225 if (scall == MAT_INITIAL_MATRIX){ 5226 start = A->cmap->rstart; 5227 cmap = a->garray; 5228 nzA = a->A->cmap->n; 5229 nzB = a->B->cmap->n; 5230 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5231 ncols = 0; 5232 for (i=0; i<nzB; i++) { /* row < local row index */ 5233 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5234 else break; 5235 } 5236 imark = i; 5237 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5238 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5239 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5240 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5241 } else { 5242 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5243 isrowb = *rowb; iscolb = *colb; 5244 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5245 bseq[0] = *B_seq; 5246 } 5247 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5248 *B_seq = bseq[0]; 5249 ierr = PetscFree(bseq);CHKERRQ(ierr); 5250 if (!rowb){ 5251 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5252 } else { 5253 *rowb = isrowb; 5254 } 5255 if (!colb){ 5256 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5257 } else { 5258 *colb = iscolb; 5259 } 5260 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5261 PetscFunctionReturn(0); 5262 } 5263 5264 #undef __FUNCT__ 5265 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5266 /* 5267 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5268 of the OFF-DIAGONAL portion of local A 5269 5270 Collective on Mat 5271 5272 Input Parameters: 5273 + A,B - the matrices in mpiaij format 5274 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5275 5276 Output Parameter: 5277 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5278 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5279 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5280 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5281 5282 Level: developer 5283 5284 */ 5285 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5286 { 5287 VecScatter_MPI_General *gen_to,*gen_from; 5288 PetscErrorCode ierr; 5289 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5290 Mat_SeqAIJ *b_oth; 5291 VecScatter ctx=a->Mvctx; 5292 MPI_Comm comm=((PetscObject)ctx)->comm; 5293 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5294 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5295 PetscScalar *rvalues,*svalues; 5296 MatScalar *b_otha,*bufa,*bufA; 5297 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5298 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5299 MPI_Status *sstatus,rstatus; 5300 PetscMPIInt jj; 5301 PetscInt *cols,sbs,rbs; 5302 PetscScalar *vals; 5303 5304 PetscFunctionBegin; 5305 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5306 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5307 } 5308 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5309 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5310 5311 gen_to = (VecScatter_MPI_General*)ctx->todata; 5312 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5313 rvalues = gen_from->values; /* holds the length of receiving row */ 5314 svalues = gen_to->values; /* holds the length of sending row */ 5315 nrecvs = gen_from->n; 5316 nsends = gen_to->n; 5317 5318 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5319 srow = gen_to->indices; /* local row index to be sent */ 5320 sstarts = gen_to->starts; 5321 sprocs = gen_to->procs; 5322 sstatus = gen_to->sstatus; 5323 sbs = gen_to->bs; 5324 rstarts = gen_from->starts; 5325 rprocs = gen_from->procs; 5326 rbs = gen_from->bs; 5327 5328 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5329 if (scall == MAT_INITIAL_MATRIX){ 5330 /* i-array */ 5331 /*---------*/ 5332 /* post receives */ 5333 for (i=0; i<nrecvs; i++){ 5334 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5335 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5336 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5337 } 5338 5339 /* pack the outgoing message */ 5340 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5341 sstartsj[0] = 0; rstartsj[0] = 0; 5342 len = 0; /* total length of j or a array to be sent */ 5343 k = 0; 5344 for (i=0; i<nsends; i++){ 5345 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5346 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5347 for (j=0; j<nrows; j++) { 5348 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5349 for (l=0; l<sbs; l++){ 5350 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5351 rowlen[j*sbs+l] = ncols; 5352 len += ncols; 5353 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5354 } 5355 k++; 5356 } 5357 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5358 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5359 } 5360 /* recvs and sends of i-array are completed */ 5361 i = nrecvs; 5362 while (i--) { 5363 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5364 } 5365 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5366 5367 /* allocate buffers for sending j and a arrays */ 5368 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5369 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5370 5371 /* create i-array of B_oth */ 5372 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5373 b_othi[0] = 0; 5374 len = 0; /* total length of j or a array to be received */ 5375 k = 0; 5376 for (i=0; i<nrecvs; i++){ 5377 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5378 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5379 for (j=0; j<nrows; j++) { 5380 b_othi[k+1] = b_othi[k] + rowlen[j]; 5381 len += rowlen[j]; k++; 5382 } 5383 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5384 } 5385 5386 /* allocate space for j and a arrrays of B_oth */ 5387 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5388 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5389 5390 /* j-array */ 5391 /*---------*/ 5392 /* post receives of j-array */ 5393 for (i=0; i<nrecvs; i++){ 5394 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5395 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5396 } 5397 5398 /* pack the outgoing message j-array */ 5399 k = 0; 5400 for (i=0; i<nsends; i++){ 5401 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5402 bufJ = bufj+sstartsj[i]; 5403 for (j=0; j<nrows; j++) { 5404 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5405 for (ll=0; ll<sbs; ll++){ 5406 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5407 for (l=0; l<ncols; l++){ 5408 *bufJ++ = cols[l]; 5409 } 5410 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5411 } 5412 } 5413 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5414 } 5415 5416 /* recvs and sends of j-array are completed */ 5417 i = nrecvs; 5418 while (i--) { 5419 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5420 } 5421 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5422 } else if (scall == MAT_REUSE_MATRIX){ 5423 sstartsj = *startsj_s; 5424 rstartsj = *startsj_r; 5425 bufa = *bufa_ptr; 5426 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5427 b_otha = b_oth->a; 5428 } else { 5429 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5430 } 5431 5432 /* a-array */ 5433 /*---------*/ 5434 /* post receives of a-array */ 5435 for (i=0; i<nrecvs; i++){ 5436 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5437 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5438 } 5439 5440 /* pack the outgoing message a-array */ 5441 k = 0; 5442 for (i=0; i<nsends; i++){ 5443 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5444 bufA = bufa+sstartsj[i]; 5445 for (j=0; j<nrows; j++) { 5446 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5447 for (ll=0; ll<sbs; ll++){ 5448 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5449 for (l=0; l<ncols; l++){ 5450 *bufA++ = vals[l]; 5451 } 5452 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5453 } 5454 } 5455 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5456 } 5457 /* recvs and sends of a-array are completed */ 5458 i = nrecvs; 5459 while (i--) { 5460 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5461 } 5462 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5463 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5464 5465 if (scall == MAT_INITIAL_MATRIX){ 5466 /* put together the new matrix */ 5467 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5468 5469 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5470 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5471 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5472 b_oth->free_a = PETSC_TRUE; 5473 b_oth->free_ij = PETSC_TRUE; 5474 b_oth->nonew = 0; 5475 5476 ierr = PetscFree(bufj);CHKERRQ(ierr); 5477 if (!startsj_s || !bufa_ptr){ 5478 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5479 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5480 } else { 5481 *startsj_s = sstartsj; 5482 *startsj_r = rstartsj; 5483 *bufa_ptr = bufa; 5484 } 5485 } 5486 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5487 PetscFunctionReturn(0); 5488 } 5489 5490 #undef __FUNCT__ 5491 #define __FUNCT__ "MatGetCommunicationStructs" 5492 /*@C 5493 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5494 5495 Not Collective 5496 5497 Input Parameters: 5498 . A - The matrix in mpiaij format 5499 5500 Output Parameter: 5501 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5502 . colmap - A map from global column index to local index into lvec 5503 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5504 5505 Level: developer 5506 5507 @*/ 5508 #if defined (PETSC_USE_CTABLE) 5509 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5510 #else 5511 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5512 #endif 5513 { 5514 Mat_MPIAIJ *a; 5515 5516 PetscFunctionBegin; 5517 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5518 PetscValidPointer(lvec, 2); 5519 PetscValidPointer(colmap, 3); 5520 PetscValidPointer(multScatter, 4); 5521 a = (Mat_MPIAIJ *) A->data; 5522 if (lvec) *lvec = a->lvec; 5523 if (colmap) *colmap = a->colmap; 5524 if (multScatter) *multScatter = a->Mvctx; 5525 PetscFunctionReturn(0); 5526 } 5527 5528 EXTERN_C_BEGIN 5529 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5530 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5531 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5532 EXTERN_C_END 5533 5534 #undef __FUNCT__ 5535 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5536 /* 5537 Computes (B'*A')' since computing B*A directly is untenable 5538 5539 n p p 5540 ( ) ( ) ( ) 5541 m ( A ) * n ( B ) = m ( C ) 5542 ( ) ( ) ( ) 5543 5544 */ 5545 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5546 { 5547 PetscErrorCode ierr; 5548 Mat At,Bt,Ct; 5549 5550 PetscFunctionBegin; 5551 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5552 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5553 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5554 ierr = MatDestroy(&At);CHKERRQ(ierr); 5555 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5556 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5557 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5558 PetscFunctionReturn(0); 5559 } 5560 5561 #undef __FUNCT__ 5562 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5563 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5564 { 5565 PetscErrorCode ierr; 5566 PetscInt m=A->rmap->n,n=B->cmap->n; 5567 Mat Cmat; 5568 5569 PetscFunctionBegin; 5570 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5571 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5572 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5573 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5574 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5575 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5576 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5577 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5578 5579 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5580 *C = Cmat; 5581 PetscFunctionReturn(0); 5582 } 5583 5584 /* ----------------------------------------------------------------*/ 5585 #undef __FUNCT__ 5586 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5587 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5588 { 5589 PetscErrorCode ierr; 5590 5591 PetscFunctionBegin; 5592 if (scall == MAT_INITIAL_MATRIX){ 5593 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5594 } 5595 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5596 PetscFunctionReturn(0); 5597 } 5598 5599 EXTERN_C_BEGIN 5600 #if defined(PETSC_HAVE_MUMPS) 5601 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5602 #endif 5603 #if defined(PETSC_HAVE_PASTIX) 5604 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5605 #endif 5606 #if defined(PETSC_HAVE_SUPERLU_DIST) 5607 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5608 #endif 5609 #if defined(PETSC_HAVE_CLIQUE) 5610 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5611 #endif 5612 EXTERN_C_END 5613 5614 /*MC 5615 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5616 5617 Options Database Keys: 5618 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5619 5620 Level: beginner 5621 5622 .seealso: MatCreateAIJ() 5623 M*/ 5624 5625 EXTERN_C_BEGIN 5626 #undef __FUNCT__ 5627 #define __FUNCT__ "MatCreate_MPIAIJ" 5628 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5629 { 5630 Mat_MPIAIJ *b; 5631 PetscErrorCode ierr; 5632 PetscMPIInt size; 5633 5634 PetscFunctionBegin; 5635 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5636 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5637 B->data = (void*)b; 5638 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5639 B->assembled = PETSC_FALSE; 5640 B->insertmode = NOT_SET_VALUES; 5641 b->size = size; 5642 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5643 5644 /* build cache for off array entries formed */ 5645 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5646 b->donotstash = PETSC_FALSE; 5647 b->colmap = 0; 5648 b->garray = 0; 5649 b->roworiented = PETSC_TRUE; 5650 5651 /* stuff used for matrix vector multiply */ 5652 b->lvec = PETSC_NULL; 5653 b->Mvctx = PETSC_NULL; 5654 5655 /* stuff for MatGetRow() */ 5656 b->rowindices = 0; 5657 b->rowvalues = 0; 5658 b->getrowactive = PETSC_FALSE; 5659 5660 /* flexible pointer used in CUSP/CUSPARSE classes */ 5661 b->spptr = PETSC_NULL; 5662 5663 #if defined(PETSC_HAVE_MUMPS) 5664 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5665 "MatGetFactor_aij_mumps", 5666 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5667 #endif 5668 #if defined(PETSC_HAVE_PASTIX) 5669 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5670 "MatGetFactor_mpiaij_pastix", 5671 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5672 #endif 5673 #if defined(PETSC_HAVE_SUPERLU_DIST) 5674 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5675 "MatGetFactor_mpiaij_superlu_dist", 5676 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5677 #endif 5678 #if defined(PETSC_HAVE_CLIQUE) 5679 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5680 "MatGetFactor_aij_clique", 5681 MatGetFactor_aij_clique);CHKERRQ(ierr); 5682 #endif 5683 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5684 "MatStoreValues_MPIAIJ", 5685 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5686 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5687 "MatRetrieveValues_MPIAIJ", 5688 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5689 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5690 "MatGetDiagonalBlock_MPIAIJ", 5691 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5692 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5693 "MatIsTranspose_MPIAIJ", 5694 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5695 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5696 "MatMPIAIJSetPreallocation_MPIAIJ", 5697 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5698 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5699 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5700 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5701 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5702 "MatDiagonalScaleLocal_MPIAIJ", 5703 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5704 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5705 "MatConvert_MPIAIJ_MPIAIJPERM", 5706 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5707 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5708 "MatConvert_MPIAIJ_MPIAIJCRL", 5709 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5710 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5711 "MatConvert_MPIAIJ_MPISBAIJ", 5712 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5713 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5714 "MatMatMult_MPIDense_MPIAIJ", 5715 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5716 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5717 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5718 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5719 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5720 "MatMatMultNumeric_MPIDense_MPIAIJ", 5721 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5722 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5723 PetscFunctionReturn(0); 5724 } 5725 EXTERN_C_END 5726 5727 #undef __FUNCT__ 5728 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5729 /*@ 5730 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5731 and "off-diagonal" part of the matrix in CSR format. 5732 5733 Collective on MPI_Comm 5734 5735 Input Parameters: 5736 + comm - MPI communicator 5737 . m - number of local rows (Cannot be PETSC_DECIDE) 5738 . n - This value should be the same as the local size used in creating the 5739 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5740 calculated if N is given) For square matrices n is almost always m. 5741 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5742 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5743 . i - row indices for "diagonal" portion of matrix 5744 . j - column indices 5745 . a - matrix values 5746 . oi - row indices for "off-diagonal" portion of matrix 5747 . oj - column indices 5748 - oa - matrix values 5749 5750 Output Parameter: 5751 . mat - the matrix 5752 5753 Level: advanced 5754 5755 Notes: 5756 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5757 must free the arrays once the matrix has been destroyed and not before. 5758 5759 The i and j indices are 0 based 5760 5761 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5762 5763 This sets local rows and cannot be used to set off-processor values. 5764 5765 You cannot later use MatSetValues() to change values in this matrix. 5766 5767 .keywords: matrix, aij, compressed row, sparse, parallel 5768 5769 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5770 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5771 @*/ 5772 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5773 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5774 { 5775 PetscErrorCode ierr; 5776 Mat_MPIAIJ *maij; 5777 5778 PetscFunctionBegin; 5779 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5780 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5781 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5782 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5783 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5784 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5785 maij = (Mat_MPIAIJ*) (*mat)->data; 5786 maij->donotstash = PETSC_TRUE; 5787 (*mat)->preallocated = PETSC_TRUE; 5788 5789 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5790 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5791 5792 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5793 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5794 5795 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5796 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5797 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5798 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5799 5800 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5801 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5802 PetscFunctionReturn(0); 5803 } 5804 5805 /* 5806 Special version for direct calls from Fortran 5807 */ 5808 #include <petsc-private/fortranimpl.h> 5809 5810 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5811 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5812 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5813 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5814 #endif 5815 5816 /* Change these macros so can be used in void function */ 5817 #undef CHKERRQ 5818 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5819 #undef SETERRQ2 5820 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5821 #undef SETERRQ3 5822 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5823 #undef SETERRQ 5824 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5825 5826 EXTERN_C_BEGIN 5827 #undef __FUNCT__ 5828 #define __FUNCT__ "matsetvaluesmpiaij_" 5829 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5830 { 5831 Mat mat = *mmat; 5832 PetscInt m = *mm, n = *mn; 5833 InsertMode addv = *maddv; 5834 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5835 PetscScalar value; 5836 PetscErrorCode ierr; 5837 5838 MatCheckPreallocated(mat,1); 5839 if (mat->insertmode == NOT_SET_VALUES) { 5840 mat->insertmode = addv; 5841 } 5842 #if defined(PETSC_USE_DEBUG) 5843 else if (mat->insertmode != addv) { 5844 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5845 } 5846 #endif 5847 { 5848 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5849 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5850 PetscBool roworiented = aij->roworiented; 5851 5852 /* Some Variables required in the macro */ 5853 Mat A = aij->A; 5854 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5855 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5856 MatScalar *aa = a->a; 5857 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5858 Mat B = aij->B; 5859 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5860 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5861 MatScalar *ba = b->a; 5862 5863 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5864 PetscInt nonew = a->nonew; 5865 MatScalar *ap1,*ap2; 5866 5867 PetscFunctionBegin; 5868 for (i=0; i<m; i++) { 5869 if (im[i] < 0) continue; 5870 #if defined(PETSC_USE_DEBUG) 5871 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5872 #endif 5873 if (im[i] >= rstart && im[i] < rend) { 5874 row = im[i] - rstart; 5875 lastcol1 = -1; 5876 rp1 = aj + ai[row]; 5877 ap1 = aa + ai[row]; 5878 rmax1 = aimax[row]; 5879 nrow1 = ailen[row]; 5880 low1 = 0; 5881 high1 = nrow1; 5882 lastcol2 = -1; 5883 rp2 = bj + bi[row]; 5884 ap2 = ba + bi[row]; 5885 rmax2 = bimax[row]; 5886 nrow2 = bilen[row]; 5887 low2 = 0; 5888 high2 = nrow2; 5889 5890 for (j=0; j<n; j++) { 5891 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5892 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5893 if (in[j] >= cstart && in[j] < cend){ 5894 col = in[j] - cstart; 5895 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5896 } else if (in[j] < 0) continue; 5897 #if defined(PETSC_USE_DEBUG) 5898 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5899 #endif 5900 else { 5901 if (mat->was_assembled) { 5902 if (!aij->colmap) { 5903 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5904 } 5905 #if defined (PETSC_USE_CTABLE) 5906 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5907 col--; 5908 #else 5909 col = aij->colmap[in[j]] - 1; 5910 #endif 5911 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5912 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5913 col = in[j]; 5914 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5915 B = aij->B; 5916 b = (Mat_SeqAIJ*)B->data; 5917 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5918 rp2 = bj + bi[row]; 5919 ap2 = ba + bi[row]; 5920 rmax2 = bimax[row]; 5921 nrow2 = bilen[row]; 5922 low2 = 0; 5923 high2 = nrow2; 5924 bm = aij->B->rmap->n; 5925 ba = b->a; 5926 } 5927 } else col = in[j]; 5928 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5929 } 5930 } 5931 } else { 5932 if (!aij->donotstash) { 5933 if (roworiented) { 5934 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5935 } else { 5936 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5937 } 5938 } 5939 } 5940 }} 5941 PetscFunctionReturnVoid(); 5942 } 5943 EXTERN_C_END 5944 5945