1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 107 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 108 { 109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 110 PetscErrorCode ierr; 111 PetscInt i,rstart,nrows,*rows; 112 113 PetscFunctionBegin; 114 *zrows = PETSC_NULL; 115 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 116 ierr = MatGetOwnershipRange(M,&rstart,PETSC_NULL);CHKERRQ(ierr); 117 for (i=0; i<nrows; i++) rows[i] += rstart; 118 ierr = ISCreateGeneral(((PetscObject)M)->comm,nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 124 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 125 { 126 PetscErrorCode ierr; 127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 128 PetscInt i,n,*garray = aij->garray; 129 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 130 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 131 PetscReal *work; 132 133 PetscFunctionBegin; 134 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 135 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 136 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 137 if (type == NORM_2) { 138 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 139 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 140 } 141 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 142 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 143 } 144 } else if (type == NORM_1) { 145 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 146 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 147 } 148 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 149 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 150 } 151 } else if (type == NORM_INFINITY) { 152 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 153 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 154 } 155 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 156 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 157 } 158 159 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 160 if (type == NORM_INFINITY) { 161 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 162 } else { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 164 } 165 ierr = PetscFree(work);CHKERRQ(ierr); 166 if (type == NORM_2) { 167 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatDistribute_MPIAIJ" 174 /* 175 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 176 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 177 178 Only for square matrices 179 */ 180 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 181 { 182 PetscMPIInt rank,size; 183 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 184 PetscErrorCode ierr; 185 Mat mat; 186 Mat_SeqAIJ *gmata; 187 PetscMPIInt tag; 188 MPI_Status status; 189 PetscBool aij; 190 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 191 192 PetscFunctionBegin; 193 CHKMEMQ; 194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (!rank) { 197 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 198 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 199 } 200 if (reuse == MAT_INITIAL_MATRIX) { 201 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 202 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 203 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 204 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 205 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 206 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 207 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 208 rowners[0] = 0; 209 for (i=2; i<=size; i++) { 210 rowners[i] += rowners[i-1]; 211 } 212 rstart = rowners[rank]; 213 rend = rowners[rank+1]; 214 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 215 if (!rank) { 216 gmata = (Mat_SeqAIJ*) gmat->data; 217 /* send row lengths to all processors */ 218 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 219 for (i=1; i<size; i++) { 220 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 } 222 /* determine number diagonal and off-diagonal counts */ 223 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 224 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 225 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 226 jj = 0; 227 for (i=0; i<m; i++) { 228 for (j=0; j<dlens[i]; j++) { 229 if (gmata->j[jj] < rstart) ld[i]++; 230 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 231 jj++; 232 } 233 } 234 /* send column indices to other processes */ 235 for (i=1; i<size; i++) { 236 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 237 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 238 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 239 } 240 241 /* send numerical values to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 245 } 246 gmataa = gmata->a; 247 gmataj = gmata->j; 248 249 } else { 250 /* receive row lengths */ 251 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 252 /* receive column indices */ 253 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 255 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 /* determine number diagonal and off-diagonal counts */ 257 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 258 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 259 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 260 jj = 0; 261 for (i=0; i<m; i++) { 262 for (j=0; j<dlens[i]; j++) { 263 if (gmataj[jj] < rstart) ld[i]++; 264 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 265 jj++; 266 } 267 } 268 /* receive numerical values */ 269 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 270 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 271 } 272 /* set preallocation */ 273 for (i=0; i<m; i++) { 274 dlens[i] -= olens[i]; 275 } 276 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 277 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 278 279 for (i=0; i<m; i++) { 280 dlens[i] += olens[i]; 281 } 282 cnt = 0; 283 for (i=0; i<m; i++) { 284 row = rstart + i; 285 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 286 cnt += dlens[i]; 287 } 288 if (rank) { 289 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 290 } 291 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 292 ierr = PetscFree(rowners);CHKERRQ(ierr); 293 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 294 *inmat = mat; 295 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 296 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 297 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 298 mat = *inmat; 299 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 300 if (!rank) { 301 /* send numerical values to other processes */ 302 gmata = (Mat_SeqAIJ*) gmat->data; 303 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 304 gmataa = gmata->a; 305 for (i=1; i<size; i++) { 306 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 307 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 308 } 309 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 310 } else { 311 /* receive numerical values from process 0*/ 312 nz = Ad->nz + Ao->nz; 313 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 314 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 315 } 316 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 317 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 318 ad = Ad->a; 319 ao = Ao->a; 320 if (mat->rmap->n) { 321 i = 0; 322 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 323 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 324 } 325 for (i=1; i<mat->rmap->n; i++) { 326 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 327 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 328 } 329 i--; 330 if (mat->rmap->n) { 331 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 } 333 if (rank) { 334 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 335 } 336 } 337 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 338 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 339 CHKMEMQ; 340 PetscFunctionReturn(0); 341 } 342 343 /* 344 Local utility routine that creates a mapping from the global column 345 number to the local number in the off-diagonal part of the local 346 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 347 a slightly higher hash table cost; without it it is not scalable (each processor 348 has an order N integer array but is fast to acess. 349 */ 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 352 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 353 { 354 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 355 PetscErrorCode ierr; 356 PetscInt n = aij->B->cmap->n,i; 357 358 PetscFunctionBegin; 359 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 360 #if defined (PETSC_USE_CTABLE) 361 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 362 for (i=0; i<n; i++){ 363 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 364 } 365 #else 366 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 367 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 368 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 369 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 370 #endif 371 PetscFunctionReturn(0); 372 } 373 374 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 375 { \ 376 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 377 lastcol1 = col;\ 378 while (high1-low1 > 5) { \ 379 t = (low1+high1)/2; \ 380 if (rp1[t] > col) high1 = t; \ 381 else low1 = t; \ 382 } \ 383 for (_i=low1; _i<high1; _i++) { \ 384 if (rp1[_i] > col) break; \ 385 if (rp1[_i] == col) { \ 386 if (addv == ADD_VALUES) ap1[_i] += value; \ 387 else ap1[_i] = value; \ 388 goto a_noinsert; \ 389 } \ 390 } \ 391 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 392 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 393 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 394 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 395 N = nrow1++ - 1; a->nz++; high1++; \ 396 /* shift up all the later entries in this row */ \ 397 for (ii=N; ii>=_i; ii--) { \ 398 rp1[ii+1] = rp1[ii]; \ 399 ap1[ii+1] = ap1[ii]; \ 400 } \ 401 rp1[_i] = col; \ 402 ap1[_i] = value; \ 403 a_noinsert: ; \ 404 ailen[row] = nrow1; \ 405 } 406 407 408 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 409 { \ 410 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 411 lastcol2 = col;\ 412 while (high2-low2 > 5) { \ 413 t = (low2+high2)/2; \ 414 if (rp2[t] > col) high2 = t; \ 415 else low2 = t; \ 416 } \ 417 for (_i=low2; _i<high2; _i++) { \ 418 if (rp2[_i] > col) break; \ 419 if (rp2[_i] == col) { \ 420 if (addv == ADD_VALUES) ap2[_i] += value; \ 421 else ap2[_i] = value; \ 422 goto b_noinsert; \ 423 } \ 424 } \ 425 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 426 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 427 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 428 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 429 N = nrow2++ - 1; b->nz++; high2++; \ 430 /* shift up all the later entries in this row */ \ 431 for (ii=N; ii>=_i; ii--) { \ 432 rp2[ii+1] = rp2[ii]; \ 433 ap2[ii+1] = ap2[ii]; \ 434 } \ 435 rp2[_i] = col; \ 436 ap2[_i] = value; \ 437 b_noinsert: ; \ 438 bilen[row] = nrow2; \ 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 443 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 444 { 445 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 446 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 447 PetscErrorCode ierr; 448 PetscInt l,*garray = mat->garray,diag; 449 450 PetscFunctionBegin; 451 /* code only works for square matrices A */ 452 453 /* find size of row to the left of the diagonal part */ 454 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 455 row = row - diag; 456 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 457 if (garray[b->j[b->i[row]+l]] > diag) break; 458 } 459 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 460 461 /* diagonal part */ 462 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 463 464 /* right of diagonal part */ 465 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatSetValues_MPIAIJ" 471 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 472 { 473 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 474 PetscScalar value; 475 PetscErrorCode ierr; 476 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 477 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 478 PetscBool roworiented = aij->roworiented; 479 480 /* Some Variables required in the macro */ 481 Mat A = aij->A; 482 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 483 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 484 MatScalar *aa = a->a; 485 PetscBool ignorezeroentries = a->ignorezeroentries; 486 Mat B = aij->B; 487 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 488 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 489 MatScalar *ba = b->a; 490 491 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 492 PetscInt nonew; 493 MatScalar *ap1,*ap2; 494 495 PetscFunctionBegin; 496 if (v) PetscValidScalarPointer(v,6); 497 for (i=0; i<m; i++) { 498 if (im[i] < 0) continue; 499 #if defined(PETSC_USE_DEBUG) 500 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 501 #endif 502 if (im[i] >= rstart && im[i] < rend) { 503 row = im[i] - rstart; 504 lastcol1 = -1; 505 rp1 = aj + ai[row]; 506 ap1 = aa + ai[row]; 507 rmax1 = aimax[row]; 508 nrow1 = ailen[row]; 509 low1 = 0; 510 high1 = nrow1; 511 lastcol2 = -1; 512 rp2 = bj + bi[row]; 513 ap2 = ba + bi[row]; 514 rmax2 = bimax[row]; 515 nrow2 = bilen[row]; 516 low2 = 0; 517 high2 = nrow2; 518 519 for (j=0; j<n; j++) { 520 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 521 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 522 if (in[j] >= cstart && in[j] < cend){ 523 col = in[j] - cstart; 524 nonew = a->nonew; 525 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 526 } else if (in[j] < 0) continue; 527 #if defined(PETSC_USE_DEBUG) 528 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 529 #endif 530 else { 531 if (mat->was_assembled) { 532 if (!aij->colmap) { 533 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 534 } 535 #if defined (PETSC_USE_CTABLE) 536 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 537 col--; 538 #else 539 col = aij->colmap[in[j]] - 1; 540 #endif 541 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 542 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 543 col = in[j]; 544 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 545 B = aij->B; 546 b = (Mat_SeqAIJ*)B->data; 547 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 548 rp2 = bj + bi[row]; 549 ap2 = ba + bi[row]; 550 rmax2 = bimax[row]; 551 nrow2 = bilen[row]; 552 low2 = 0; 553 high2 = nrow2; 554 bm = aij->B->rmap->n; 555 ba = b->a; 556 } 557 } else col = in[j]; 558 nonew = b->nonew; 559 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 560 } 561 } 562 } else { 563 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 564 if (!aij->donotstash) { 565 mat->assembled = PETSC_FALSE; 566 if (roworiented) { 567 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 568 } else { 569 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 570 } 571 } 572 } 573 } 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIAIJ" 579 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 584 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= rstart && idxm[i] < rend) { 591 row = idxm[i] - rstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= cstart && idxn[j] < cend){ 596 col = idxn[j] - cstart; 597 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!aij->colmap) { 600 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 604 col --; 605 #else 606 col = aij->colmap[idxn[j]] - 1; 607 #endif 608 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 609 else { 610 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } 612 } 613 } 614 } else { 615 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 616 } 617 } 618 PetscFunctionReturn(0); 619 } 620 621 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 625 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 626 { 627 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 628 PetscErrorCode ierr; 629 PetscInt nstash,reallocs; 630 InsertMode addv; 631 632 PetscFunctionBegin; 633 if (aij->donotstash || mat->nooffprocentries) { 634 PetscFunctionReturn(0); 635 } 636 637 /* make sure all processors are either in INSERTMODE or ADDMODE */ 638 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 639 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 640 mat->insertmode = addv; /* in case this processor had no cache */ 641 642 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 643 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 644 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNCT__ 649 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 650 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 651 { 652 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 653 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 654 PetscErrorCode ierr; 655 PetscMPIInt n; 656 PetscInt i,j,rstart,ncols,flg; 657 PetscInt *row,*col; 658 PetscBool other_disassembled; 659 PetscScalar *val; 660 InsertMode addv = mat->insertmode; 661 662 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 663 PetscFunctionBegin; 664 if (!aij->donotstash && !mat->nooffprocentries) { 665 while (1) { 666 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 667 if (!flg) break; 668 669 for (i=0; i<n;) { 670 /* Now identify the consecutive vals belonging to the same row */ 671 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 672 if (j < n) ncols = j-i; 673 else ncols = n-i; 674 /* Now assemble all these values with a single function call */ 675 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 676 i = j; 677 } 678 } 679 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 680 } 681 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 682 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 683 684 /* determine if any processor has disassembled, if so we must 685 also disassemble ourselfs, in order that we may reassemble. */ 686 /* 687 if nonzero structure of submatrix B cannot change then we know that 688 no processor disassembled thus we can skip this stuff 689 */ 690 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 691 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 692 if (mat->was_assembled && !other_disassembled) { 693 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 694 } 695 } 696 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 697 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 698 } 699 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 700 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 701 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 702 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 703 704 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 705 aij->rowvalues = 0; 706 707 /* used by MatAXPY() */ 708 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 709 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 710 711 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 712 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 718 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 719 { 720 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 721 PetscErrorCode ierr; 722 723 PetscFunctionBegin; 724 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 725 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroRows_MPIAIJ" 731 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 736 PetscInt i,*owners = A->rmap->range; 737 PetscInt *nprocs,j,idx,nsends,row; 738 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 739 PetscInt *rvalues,count,base,slen,*source; 740 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 741 MPI_Comm comm = ((PetscObject)A)->comm; 742 MPI_Request *send_waits,*recv_waits; 743 MPI_Status recv_status,*send_status; 744 const PetscScalar *xx; 745 PetscScalar *bb; 746 #if defined(PETSC_DEBUG) 747 PetscBool found = PETSC_FALSE; 748 #endif 749 750 PetscFunctionBegin; 751 /* first count number of contributors to each processor */ 752 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 753 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 754 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 755 j = 0; 756 for (i=0; i<N; i++) { 757 if (lastidx > (idx = rows[i])) j = 0; 758 lastidx = idx; 759 for (; j<size; j++) { 760 if (idx >= owners[j] && idx < owners[j+1]) { 761 nprocs[2*j]++; 762 nprocs[2*j+1] = 1; 763 owner[i] = j; 764 #if defined(PETSC_DEBUG) 765 found = PETSC_TRUE; 766 #endif 767 break; 768 } 769 } 770 #if defined(PETSC_DEBUG) 771 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 772 found = PETSC_FALSE; 773 #endif 774 } 775 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 776 777 if (A->nooffproczerorows) { 778 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 779 nrecvs = nsends; 780 nmax = N; 781 } else { 782 /* inform other processors of number of messages and max length*/ 783 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 784 } 785 786 /* post receives: */ 787 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 788 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 789 for (i=0; i<nrecvs; i++) { 790 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 791 } 792 793 /* do sends: 794 1) starts[i] gives the starting index in svalues for stuff going to 795 the ith processor 796 */ 797 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 798 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 799 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 800 starts[0] = 0; 801 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 802 for (i=0; i<N; i++) { 803 svalues[starts[owner[i]]++] = rows[i]; 804 } 805 806 starts[0] = 0; 807 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 808 count = 0; 809 for (i=0; i<size; i++) { 810 if (nprocs[2*i+1]) { 811 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 812 } 813 } 814 ierr = PetscFree(starts);CHKERRQ(ierr); 815 816 base = owners[rank]; 817 818 /* wait on receives */ 819 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 820 count = nrecvs; slen = 0; 821 while (count) { 822 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 823 /* unpack receives into our local space */ 824 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 825 source[imdex] = recv_status.MPI_SOURCE; 826 lens[imdex] = n; 827 slen += n; 828 count--; 829 } 830 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 831 832 /* move the data into the send scatter */ 833 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 834 count = 0; 835 for (i=0; i<nrecvs; i++) { 836 values = rvalues + i*nmax; 837 for (j=0; j<lens[i]; j++) { 838 lrows[count++] = values[j] - base; 839 } 840 } 841 ierr = PetscFree(rvalues);CHKERRQ(ierr); 842 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 843 ierr = PetscFree(owner);CHKERRQ(ierr); 844 ierr = PetscFree(nprocs);CHKERRQ(ierr); 845 846 /* fix right hand side if needed */ 847 if (x && b) { 848 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 849 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 850 for (i=0; i<slen; i++) { 851 bb[lrows[i]] = diag*xx[lrows[i]]; 852 } 853 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 854 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 855 } 856 /* 857 Zero the required rows. If the "diagonal block" of the matrix 858 is square and the user wishes to set the diagonal we use separate 859 code so that MatSetValues() is not called for each diagonal allocating 860 new memory, thus calling lots of mallocs and slowing things down. 861 862 */ 863 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 864 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 865 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 866 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 867 } else if (diag != 0.0) { 868 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 869 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 870 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 871 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 872 } 873 for (i = 0; i < slen; i++) { 874 row = lrows[i] + rstart; 875 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 876 } 877 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 878 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 879 } else { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 } 882 ierr = PetscFree(lrows);CHKERRQ(ierr); 883 884 /* wait on sends */ 885 if (nsends) { 886 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 887 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 888 ierr = PetscFree(send_status);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(send_waits);CHKERRQ(ierr); 891 ierr = PetscFree(svalues);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 897 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 898 { 899 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 900 PetscErrorCode ierr; 901 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 902 PetscInt i,*owners = A->rmap->range; 903 PetscInt *nprocs,j,idx,nsends; 904 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 905 PetscInt *rvalues,count,base,slen,*source; 906 PetscInt *lens,*lrows,*values,m; 907 MPI_Comm comm = ((PetscObject)A)->comm; 908 MPI_Request *send_waits,*recv_waits; 909 MPI_Status recv_status,*send_status; 910 const PetscScalar *xx; 911 PetscScalar *bb,*mask; 912 Vec xmask,lmask; 913 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 914 const PetscInt *aj, *ii,*ridx; 915 PetscScalar *aa; 916 #if defined(PETSC_DEBUG) 917 PetscBool found = PETSC_FALSE; 918 #endif 919 920 PetscFunctionBegin; 921 /* first count number of contributors to each processor */ 922 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 923 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 924 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 925 j = 0; 926 for (i=0; i<N; i++) { 927 if (lastidx > (idx = rows[i])) j = 0; 928 lastidx = idx; 929 for (; j<size; j++) { 930 if (idx >= owners[j] && idx < owners[j+1]) { 931 nprocs[2*j]++; 932 nprocs[2*j+1] = 1; 933 owner[i] = j; 934 #if defined(PETSC_DEBUG) 935 found = PETSC_TRUE; 936 #endif 937 break; 938 } 939 } 940 #if defined(PETSC_DEBUG) 941 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 942 found = PETSC_FALSE; 943 #endif 944 } 945 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 946 947 /* inform other processors of number of messages and max length*/ 948 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 949 950 /* post receives: */ 951 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 952 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 953 for (i=0; i<nrecvs; i++) { 954 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 955 } 956 957 /* do sends: 958 1) starts[i] gives the starting index in svalues for stuff going to 959 the ith processor 960 */ 961 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 963 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 964 starts[0] = 0; 965 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 966 for (i=0; i<N; i++) { 967 svalues[starts[owner[i]]++] = rows[i]; 968 } 969 970 starts[0] = 0; 971 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 972 count = 0; 973 for (i=0; i<size; i++) { 974 if (nprocs[2*i+1]) { 975 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 976 } 977 } 978 ierr = PetscFree(starts);CHKERRQ(ierr); 979 980 base = owners[rank]; 981 982 /* wait on receives */ 983 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 984 count = nrecvs; slen = 0; 985 while (count) { 986 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 987 /* unpack receives into our local space */ 988 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 989 source[imdex] = recv_status.MPI_SOURCE; 990 lens[imdex] = n; 991 slen += n; 992 count--; 993 } 994 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 995 996 /* move the data into the send scatter */ 997 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 998 count = 0; 999 for (i=0; i<nrecvs; i++) { 1000 values = rvalues + i*nmax; 1001 for (j=0; j<lens[i]; j++) { 1002 lrows[count++] = values[j] - base; 1003 } 1004 } 1005 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1006 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1007 ierr = PetscFree(owner);CHKERRQ(ierr); 1008 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1009 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1010 1011 /* zero diagonal part of matrix */ 1012 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1013 1014 /* handle off diagonal part of matrix */ 1015 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 1016 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1017 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1018 for (i=0; i<slen; i++) { 1019 bb[lrows[i]] = 1; 1020 } 1021 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1022 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1023 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1024 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1025 if (x) { 1026 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1029 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1030 } 1031 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1032 1033 /* remove zeroed rows of off diagonal matrix */ 1034 ii = aij->i; 1035 for (i=0; i<slen; i++) { 1036 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1037 } 1038 1039 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1040 if (aij->compressedrow.use){ 1041 m = aij->compressedrow.nrows; 1042 ii = aij->compressedrow.i; 1043 ridx = aij->compressedrow.rindex; 1044 for (i=0; i<m; i++){ 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 1049 for (j=0; j<n; j++) { 1050 if (PetscAbsScalar(mask[*aj])) { 1051 if (b) bb[*ridx] -= *aa*xx[*aj]; 1052 *aa = 0.0; 1053 } 1054 aa++; 1055 aj++; 1056 } 1057 ridx++; 1058 } 1059 } else { /* do not use compressed row format */ 1060 m = l->B->rmap->n; 1061 for (i=0; i<m; i++) { 1062 n = ii[i+1] - ii[i]; 1063 aj = aij->j + ii[i]; 1064 aa = aij->a + ii[i]; 1065 for (j=0; j<n; j++) { 1066 if (PetscAbsScalar(mask[*aj])) { 1067 if (b) bb[i] -= *aa*xx[*aj]; 1068 *aa = 0.0; 1069 } 1070 aa++; 1071 aj++; 1072 } 1073 } 1074 } 1075 if (x) { 1076 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1077 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1078 } 1079 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1080 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1081 ierr = PetscFree(lrows);CHKERRQ(ierr); 1082 1083 /* wait on sends */ 1084 if (nsends) { 1085 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1086 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1087 ierr = PetscFree(send_status);CHKERRQ(ierr); 1088 } 1089 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1090 ierr = PetscFree(svalues);CHKERRQ(ierr); 1091 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatMult_MPIAIJ" 1097 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 PetscErrorCode ierr; 1101 PetscInt nt; 1102 1103 PetscFunctionBegin; 1104 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1105 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1106 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1107 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1108 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1109 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1115 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1116 { 1117 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1118 PetscErrorCode ierr; 1119 1120 PetscFunctionBegin; 1121 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1127 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1128 { 1129 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1130 PetscErrorCode ierr; 1131 1132 PetscFunctionBegin; 1133 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1134 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1135 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1136 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNCT__ 1141 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1142 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1145 PetscErrorCode ierr; 1146 PetscBool merged; 1147 1148 PetscFunctionBegin; 1149 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1150 /* do nondiagonal part */ 1151 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1152 if (!merged) { 1153 /* send it on its way */ 1154 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 /* do local part */ 1156 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1157 /* receive remote parts: note this assumes the values are not actually */ 1158 /* added in yy until the next line, */ 1159 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 } else { 1161 /* do local part */ 1162 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1163 /* send it on its way */ 1164 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 /* values actually were received in the Begin() but we need to call this nop */ 1166 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1167 } 1168 PetscFunctionReturn(0); 1169 } 1170 1171 EXTERN_C_BEGIN 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1174 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1175 { 1176 MPI_Comm comm; 1177 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1178 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1179 IS Me,Notme; 1180 PetscErrorCode ierr; 1181 PetscInt M,N,first,last,*notme,i; 1182 PetscMPIInt size; 1183 1184 PetscFunctionBegin; 1185 1186 /* Easy test: symmetric diagonal block */ 1187 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1188 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1189 if (!*f) PetscFunctionReturn(0); 1190 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1191 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1192 if (size == 1) PetscFunctionReturn(0); 1193 1194 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1195 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1196 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1197 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1198 for (i=0; i<first; i++) notme[i] = i; 1199 for (i=last; i<M; i++) notme[i-last+first] = i; 1200 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1201 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1202 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1203 Aoff = Aoffs[0]; 1204 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1205 Boff = Boffs[0]; 1206 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1207 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1208 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1209 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1210 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1211 ierr = PetscFree(notme);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 EXTERN_C_END 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1218 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1219 { 1220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 /* do nondiagonal part */ 1225 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1226 /* send it on its way */ 1227 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1228 /* do local part */ 1229 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1230 /* receive remote parts */ 1231 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 /* 1236 This only works correctly for square matrices where the subblock A->A is the 1237 diagonal block 1238 */ 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1241 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1242 { 1243 PetscErrorCode ierr; 1244 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1245 1246 PetscFunctionBegin; 1247 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1248 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1249 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatScale_MPIAIJ" 1255 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1256 { 1257 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1262 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #undef __FUNCT__ 1267 #define __FUNCT__ "MatDestroy_MPIAIJ" 1268 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1269 { 1270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 #if defined(PETSC_USE_LOG) 1275 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1276 #endif 1277 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1278 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1279 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1280 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1281 #if defined (PETSC_USE_CTABLE) 1282 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1283 #else 1284 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1285 #endif 1286 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1287 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1288 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1289 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1290 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1291 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1292 1293 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1307 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1308 { 1309 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1310 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1311 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1312 PetscErrorCode ierr; 1313 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1314 int fd; 1315 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1316 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1317 PetscScalar *column_values; 1318 PetscInt message_count,flowcontrolcount; 1319 FILE *file; 1320 1321 PetscFunctionBegin; 1322 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1323 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1324 nz = A->nz + B->nz; 1325 if (!rank) { 1326 header[0] = MAT_FILE_CLASSID; 1327 header[1] = mat->rmap->N; 1328 header[2] = mat->cmap->N; 1329 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1330 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1331 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1332 /* get largest number of rows any processor has */ 1333 rlen = mat->rmap->n; 1334 range = mat->rmap->range; 1335 for (i=1; i<size; i++) { 1336 rlen = PetscMax(rlen,range[i+1] - range[i]); 1337 } 1338 } else { 1339 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 rlen = mat->rmap->n; 1341 } 1342 1343 /* load up the local row counts */ 1344 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1345 for (i=0; i<mat->rmap->n; i++) { 1346 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 } 1348 1349 /* store the row lengths to the file */ 1350 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1351 if (!rank) { 1352 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1353 for (i=1; i<size; i++) { 1354 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1355 rlen = range[i+1] - range[i]; 1356 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1357 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1358 } 1359 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1360 } else { 1361 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1362 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1363 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1364 } 1365 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1366 1367 /* load up the local column indices */ 1368 nzmax = nz; /* )th processor needs space a largest processor needs */ 1369 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1370 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1371 cnt = 0; 1372 for (i=0; i<mat->rmap->n; i++) { 1373 for (j=B->i[i]; j<B->i[i+1]; j++) { 1374 if ( (col = garray[B->j[j]]) > cstart) break; 1375 column_indices[cnt++] = col; 1376 } 1377 for (k=A->i[i]; k<A->i[i+1]; k++) { 1378 column_indices[cnt++] = A->j[k] + cstart; 1379 } 1380 for (; j<B->i[i+1]; j++) { 1381 column_indices[cnt++] = garray[B->j[j]]; 1382 } 1383 } 1384 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1385 1386 /* store the column indices to the file */ 1387 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1388 if (!rank) { 1389 MPI_Status status; 1390 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1391 for (i=1; i<size; i++) { 1392 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1393 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1394 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1395 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1396 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1397 } 1398 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1399 } else { 1400 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1401 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1402 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1403 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1404 } 1405 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1406 1407 /* load up the local column values */ 1408 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1409 cnt = 0; 1410 for (i=0; i<mat->rmap->n; i++) { 1411 for (j=B->i[i]; j<B->i[i+1]; j++) { 1412 if ( garray[B->j[j]] > cstart) break; 1413 column_values[cnt++] = B->a[j]; 1414 } 1415 for (k=A->i[i]; k<A->i[i+1]; k++) { 1416 column_values[cnt++] = A->a[k]; 1417 } 1418 for (; j<B->i[i+1]; j++) { 1419 column_values[cnt++] = B->a[j]; 1420 } 1421 } 1422 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1423 1424 /* store the column values to the file */ 1425 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1426 if (!rank) { 1427 MPI_Status status; 1428 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1429 for (i=1; i<size; i++) { 1430 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1431 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1432 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1433 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1434 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1435 } 1436 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1437 } else { 1438 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1439 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1440 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1441 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1442 } 1443 ierr = PetscFree(column_values);CHKERRQ(ierr); 1444 1445 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1446 if (file) { 1447 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1448 } 1449 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNCT__ 1454 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1455 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1456 { 1457 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1458 PetscErrorCode ierr; 1459 PetscMPIInt rank = aij->rank,size = aij->size; 1460 PetscBool isdraw,iascii,isbinary; 1461 PetscViewer sviewer; 1462 PetscViewerFormat format; 1463 1464 PetscFunctionBegin; 1465 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1466 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1467 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1468 if (iascii) { 1469 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1470 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1471 MatInfo info; 1472 PetscBool inodes; 1473 1474 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1475 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1476 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1478 if (!inodes) { 1479 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1480 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1481 } else { 1482 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1483 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1484 } 1485 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1486 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1487 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1488 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1489 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1490 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1491 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1492 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1493 PetscFunctionReturn(0); 1494 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1495 PetscInt inodecount,inodelimit,*inodes; 1496 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1497 if (inodes) { 1498 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1499 } else { 1500 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1504 PetscFunctionReturn(0); 1505 } 1506 } else if (isbinary) { 1507 if (size == 1) { 1508 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1509 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1510 } else { 1511 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1512 } 1513 PetscFunctionReturn(0); 1514 } else if (isdraw) { 1515 PetscDraw draw; 1516 PetscBool isnull; 1517 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1518 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1519 } 1520 1521 if (size == 1) { 1522 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1523 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1524 } else { 1525 /* assemble the entire matrix onto first processor. */ 1526 Mat A; 1527 Mat_SeqAIJ *Aloc; 1528 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1529 MatScalar *a; 1530 1531 if (mat->rmap->N > 1024) { 1532 PetscBool flg = PETSC_FALSE; 1533 1534 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1535 if (!flg) { 1536 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1537 } 1538 } 1539 1540 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1541 if (!rank) { 1542 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1543 } else { 1544 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1545 } 1546 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1547 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1548 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1549 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1550 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1551 1552 /* copy over the A part */ 1553 Aloc = (Mat_SeqAIJ*)aij->A->data; 1554 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1557 for (i=0; i<m; i++) { 1558 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1559 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1560 } 1561 aj = Aloc->j; 1562 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1563 1564 /* copy over the B part */ 1565 Aloc = (Mat_SeqAIJ*)aij->B->data; 1566 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1567 row = mat->rmap->rstart; 1568 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1569 ct = cols; 1570 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1571 for (i=0; i<m; i++) { 1572 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1573 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1574 } 1575 ierr = PetscFree(ct);CHKERRQ(ierr); 1576 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1577 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1578 /* 1579 Everyone has to call to draw the matrix since the graphics waits are 1580 synchronized across all processors that share the PetscDraw object 1581 */ 1582 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1583 if (!rank) { 1584 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1585 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1586 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1587 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1588 } 1589 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1590 ierr = MatDestroy(&A);CHKERRQ(ierr); 1591 } 1592 PetscFunctionReturn(0); 1593 } 1594 1595 #undef __FUNCT__ 1596 #define __FUNCT__ "MatView_MPIAIJ" 1597 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1598 { 1599 PetscErrorCode ierr; 1600 PetscBool iascii,isdraw,issocket,isbinary; 1601 1602 PetscFunctionBegin; 1603 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1604 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1605 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1606 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1607 if (iascii || isdraw || isbinary || issocket) { 1608 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1609 } else { 1610 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1611 } 1612 PetscFunctionReturn(0); 1613 } 1614 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatSOR_MPIAIJ" 1617 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1618 { 1619 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1620 PetscErrorCode ierr; 1621 Vec bb1 = 0; 1622 PetscBool hasop; 1623 1624 PetscFunctionBegin; 1625 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1626 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1627 } 1628 1629 if (flag == SOR_APPLY_UPPER) { 1630 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1631 PetscFunctionReturn(0); 1632 } 1633 1634 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1635 if (flag & SOR_ZERO_INITIAL_GUESS) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 its--; 1638 } 1639 1640 while (its--) { 1641 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 1644 /* update rhs: bb1 = bb - B*x */ 1645 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1646 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1647 1648 /* local sweep */ 1649 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1650 } 1651 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1652 if (flag & SOR_ZERO_INITIAL_GUESS) { 1653 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1654 its--; 1655 } 1656 while (its--) { 1657 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1659 1660 /* update rhs: bb1 = bb - B*x */ 1661 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1662 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1663 1664 /* local sweep */ 1665 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1666 } 1667 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1668 if (flag & SOR_ZERO_INITIAL_GUESS) { 1669 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1670 its--; 1671 } 1672 while (its--) { 1673 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1675 1676 /* update rhs: bb1 = bb - B*x */ 1677 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1678 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1679 1680 /* local sweep */ 1681 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1682 } 1683 } else if (flag & SOR_EISENSTAT) { 1684 Vec xx1; 1685 1686 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1687 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1688 1689 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1690 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1691 if (!mat->diag) { 1692 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1693 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1694 } 1695 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1696 if (hasop) { 1697 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1698 } else { 1699 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1700 } 1701 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1702 1703 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1704 1705 /* local sweep */ 1706 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1707 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1708 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1709 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1710 1711 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatPermute_MPIAIJ" 1717 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1718 { 1719 MPI_Comm comm; 1720 PetscInt first,local_rowsize,local_colsize; 1721 const PetscInt *rows; 1722 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1723 PetscErrorCode ierr; 1724 1725 PetscFunctionBegin; 1726 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1727 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1728 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1729 /* collect the global row permutation and invert it */ 1730 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1731 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1732 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1733 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1734 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1735 /* get the local target indices */ 1736 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1737 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1738 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1739 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1740 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1741 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1742 /* the column permutation is so much easier; 1743 make a local version of 'colp' and invert it */ 1744 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1745 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1746 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1747 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1748 /* now we just get the submatrix */ 1749 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1750 /* clean up */ 1751 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1752 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1753 PetscFunctionReturn(0); 1754 } 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1758 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1759 { 1760 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1761 Mat A = mat->A,B = mat->B; 1762 PetscErrorCode ierr; 1763 PetscReal isend[5],irecv[5]; 1764 1765 PetscFunctionBegin; 1766 info->block_size = 1.0; 1767 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1768 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1769 isend[3] = info->memory; isend[4] = info->mallocs; 1770 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1771 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1772 isend[3] += info->memory; isend[4] += info->mallocs; 1773 if (flag == MAT_LOCAL) { 1774 info->nz_used = isend[0]; 1775 info->nz_allocated = isend[1]; 1776 info->nz_unneeded = isend[2]; 1777 info->memory = isend[3]; 1778 info->mallocs = isend[4]; 1779 } else if (flag == MAT_GLOBAL_MAX) { 1780 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1781 info->nz_used = irecv[0]; 1782 info->nz_allocated = irecv[1]; 1783 info->nz_unneeded = irecv[2]; 1784 info->memory = irecv[3]; 1785 info->mallocs = irecv[4]; 1786 } else if (flag == MAT_GLOBAL_SUM) { 1787 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1788 info->nz_used = irecv[0]; 1789 info->nz_allocated = irecv[1]; 1790 info->nz_unneeded = irecv[2]; 1791 info->memory = irecv[3]; 1792 info->mallocs = irecv[4]; 1793 } 1794 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1795 info->fill_ratio_needed = 0; 1796 info->factor_mallocs = 0; 1797 PetscFunctionReturn(0); 1798 } 1799 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "MatSetOption_MPIAIJ" 1802 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1803 { 1804 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1805 PetscErrorCode ierr; 1806 1807 PetscFunctionBegin; 1808 switch (op) { 1809 case MAT_NEW_NONZERO_LOCATIONS: 1810 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1811 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1812 case MAT_KEEP_NONZERO_PATTERN: 1813 case MAT_NEW_NONZERO_LOCATION_ERR: 1814 case MAT_USE_INODES: 1815 case MAT_IGNORE_ZERO_ENTRIES: 1816 MatCheckPreallocated(A,1); 1817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1818 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_ROW_ORIENTED: 1821 a->roworiented = flg; 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1824 break; 1825 case MAT_NEW_DIAGONALS: 1826 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1827 break; 1828 case MAT_IGNORE_OFF_PROC_ENTRIES: 1829 a->donotstash = flg; 1830 break; 1831 case MAT_SPD: 1832 A->spd_set = PETSC_TRUE; 1833 A->spd = flg; 1834 if (flg) { 1835 A->symmetric = PETSC_TRUE; 1836 A->structurally_symmetric = PETSC_TRUE; 1837 A->symmetric_set = PETSC_TRUE; 1838 A->structurally_symmetric_set = PETSC_TRUE; 1839 } 1840 break; 1841 case MAT_SYMMETRIC: 1842 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1843 break; 1844 case MAT_STRUCTURALLY_SYMMETRIC: 1845 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1846 break; 1847 case MAT_HERMITIAN: 1848 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1849 break; 1850 case MAT_SYMMETRY_ETERNAL: 1851 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1852 break; 1853 default: 1854 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1855 } 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #undef __FUNCT__ 1860 #define __FUNCT__ "MatGetRow_MPIAIJ" 1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1862 { 1863 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1864 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1865 PetscErrorCode ierr; 1866 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1867 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1868 PetscInt *cmap,*idx_p; 1869 1870 PetscFunctionBegin; 1871 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1872 mat->getrowactive = PETSC_TRUE; 1873 1874 if (!mat->rowvalues && (idx || v)) { 1875 /* 1876 allocate enough space to hold information from the longest row. 1877 */ 1878 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1879 PetscInt max = 1,tmp; 1880 for (i=0; i<matin->rmap->n; i++) { 1881 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1882 if (max < tmp) { max = tmp; } 1883 } 1884 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1885 } 1886 1887 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1888 lrow = row - rstart; 1889 1890 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1891 if (!v) {pvA = 0; pvB = 0;} 1892 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1893 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1894 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1895 nztot = nzA + nzB; 1896 1897 cmap = mat->garray; 1898 if (v || idx) { 1899 if (nztot) { 1900 /* Sort by increasing column numbers, assuming A and B already sorted */ 1901 PetscInt imark = -1; 1902 if (v) { 1903 *v = v_p = mat->rowvalues; 1904 for (i=0; i<nzB; i++) { 1905 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1906 else break; 1907 } 1908 imark = i; 1909 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1910 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1911 } 1912 if (idx) { 1913 *idx = idx_p = mat->rowindices; 1914 if (imark > -1) { 1915 for (i=0; i<imark; i++) { 1916 idx_p[i] = cmap[cworkB[i]]; 1917 } 1918 } else { 1919 for (i=0; i<nzB; i++) { 1920 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1921 else break; 1922 } 1923 imark = i; 1924 } 1925 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1926 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1927 } 1928 } else { 1929 if (idx) *idx = 0; 1930 if (v) *v = 0; 1931 } 1932 } 1933 *nz = nztot; 1934 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1935 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1941 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1942 { 1943 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1944 1945 PetscFunctionBegin; 1946 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1947 aij->getrowactive = PETSC_FALSE; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatNorm_MPIAIJ" 1953 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1954 { 1955 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1956 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1957 PetscErrorCode ierr; 1958 PetscInt i,j,cstart = mat->cmap->rstart; 1959 PetscReal sum = 0.0; 1960 MatScalar *v; 1961 1962 PetscFunctionBegin; 1963 if (aij->size == 1) { 1964 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1965 } else { 1966 if (type == NORM_FROBENIUS) { 1967 v = amat->a; 1968 for (i=0; i<amat->nz; i++) { 1969 #if defined(PETSC_USE_COMPLEX) 1970 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1971 #else 1972 sum += (*v)*(*v); v++; 1973 #endif 1974 } 1975 v = bmat->a; 1976 for (i=0; i<bmat->nz; i++) { 1977 #if defined(PETSC_USE_COMPLEX) 1978 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1979 #else 1980 sum += (*v)*(*v); v++; 1981 #endif 1982 } 1983 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1984 *norm = PetscSqrtReal(*norm); 1985 } else if (type == NORM_1) { /* max column norm */ 1986 PetscReal *tmp,*tmp2; 1987 PetscInt *jj,*garray = aij->garray; 1988 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1989 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1990 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1991 *norm = 0.0; 1992 v = amat->a; jj = amat->j; 1993 for (j=0; j<amat->nz; j++) { 1994 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1995 } 1996 v = bmat->a; jj = bmat->j; 1997 for (j=0; j<bmat->nz; j++) { 1998 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1999 } 2000 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2001 for (j=0; j<mat->cmap->N; j++) { 2002 if (tmp2[j] > *norm) *norm = tmp2[j]; 2003 } 2004 ierr = PetscFree(tmp);CHKERRQ(ierr); 2005 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2006 } else if (type == NORM_INFINITY) { /* max row norm */ 2007 PetscReal ntemp = 0.0; 2008 for (j=0; j<aij->A->rmap->n; j++) { 2009 v = amat->a + amat->i[j]; 2010 sum = 0.0; 2011 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2012 sum += PetscAbsScalar(*v); v++; 2013 } 2014 v = bmat->a + bmat->i[j]; 2015 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2016 sum += PetscAbsScalar(*v); v++; 2017 } 2018 if (sum > ntemp) ntemp = sum; 2019 } 2020 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2021 } else { 2022 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2023 } 2024 } 2025 PetscFunctionReturn(0); 2026 } 2027 2028 #undef __FUNCT__ 2029 #define __FUNCT__ "MatTranspose_MPIAIJ" 2030 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2031 { 2032 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2033 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2034 PetscErrorCode ierr; 2035 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2036 PetscInt cstart=A->cmap->rstart,ncol; 2037 Mat B; 2038 MatScalar *array; 2039 2040 PetscFunctionBegin; 2041 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2042 2043 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2044 ai = Aloc->i; aj = Aloc->j; 2045 bi = Bloc->i; bj = Bloc->j; 2046 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2047 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2048 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2049 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2050 for (i=0; i<ai[ma]; i++){ 2051 d_nnz[aj[i]] ++; 2052 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2053 } 2054 2055 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2056 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2057 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2058 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2059 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2060 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2061 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2062 } else { 2063 B = *matout; 2064 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2065 for (i=0; i<ai[ma]; i++){ 2066 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2067 } 2068 } 2069 2070 /* copy over the A part */ 2071 array = Aloc->a; 2072 row = A->rmap->rstart; 2073 for (i=0; i<ma; i++) { 2074 ncol = ai[i+1]-ai[i]; 2075 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2076 row++; array += ncol; aj += ncol; 2077 } 2078 aj = Aloc->j; 2079 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2080 2081 /* copy over the B part */ 2082 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2083 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2084 array = Bloc->a; 2085 row = A->rmap->rstart; 2086 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2087 cols_tmp = cols; 2088 for (i=0; i<mb; i++) { 2089 ncol = bi[i+1]-bi[i]; 2090 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2091 row++; array += ncol; cols_tmp += ncol; 2092 } 2093 ierr = PetscFree(cols);CHKERRQ(ierr); 2094 2095 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2096 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2097 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2098 *matout = B; 2099 } else { 2100 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2101 } 2102 PetscFunctionReturn(0); 2103 } 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2107 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2108 { 2109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2110 Mat a = aij->A,b = aij->B; 2111 PetscErrorCode ierr; 2112 PetscInt s1,s2,s3; 2113 2114 PetscFunctionBegin; 2115 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2116 if (rr) { 2117 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2118 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2119 /* Overlap communication with computation. */ 2120 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2121 } 2122 if (ll) { 2123 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2124 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2125 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2126 } 2127 /* scale the diagonal block */ 2128 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2129 2130 if (rr) { 2131 /* Do a scatter end and then right scale the off-diagonal block */ 2132 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2133 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2134 } 2135 2136 PetscFunctionReturn(0); 2137 } 2138 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2141 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2142 { 2143 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatEqual_MPIAIJ" 2153 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2154 { 2155 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2156 Mat a,b,c,d; 2157 PetscBool flg; 2158 PetscErrorCode ierr; 2159 2160 PetscFunctionBegin; 2161 a = matA->A; b = matA->B; 2162 c = matB->A; d = matB->B; 2163 2164 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2165 if (flg) { 2166 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2167 } 2168 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2169 PetscFunctionReturn(0); 2170 } 2171 2172 #undef __FUNCT__ 2173 #define __FUNCT__ "MatCopy_MPIAIJ" 2174 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2175 { 2176 PetscErrorCode ierr; 2177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2178 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2179 2180 PetscFunctionBegin; 2181 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2182 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2183 /* because of the column compression in the off-processor part of the matrix a->B, 2184 the number of columns in a->B and b->B may be different, hence we cannot call 2185 the MatCopy() directly on the two parts. If need be, we can provide a more 2186 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2187 then copying the submatrices */ 2188 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2189 } else { 2190 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2191 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2192 } 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatSetUp_MPIAIJ" 2198 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2199 { 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 #undef __FUNCT__ 2208 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2209 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2210 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2211 { 2212 PetscInt i,m=Y->rmap->N; 2213 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2214 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2215 const PetscInt *xi = x->i,*yi = y->i; 2216 2217 PetscFunctionBegin; 2218 /* Set the number of nonzeros in the new matrix */ 2219 for(i=0; i<m; i++) { 2220 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2221 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2222 nnz[i] = 0; 2223 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2224 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2225 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2226 nnz[i]++; 2227 } 2228 for (; k<nzy; k++) nnz[i]++; 2229 } 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatAXPY_MPIAIJ" 2235 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 PetscInt i; 2239 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2240 PetscBLASInt bnz,one=1; 2241 Mat_SeqAIJ *x,*y; 2242 2243 PetscFunctionBegin; 2244 if (str == SAME_NONZERO_PATTERN) { 2245 PetscScalar alpha = a; 2246 x = (Mat_SeqAIJ *)xx->A->data; 2247 y = (Mat_SeqAIJ *)yy->A->data; 2248 bnz = PetscBLASIntCast(x->nz); 2249 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2250 x = (Mat_SeqAIJ *)xx->B->data; 2251 y = (Mat_SeqAIJ *)yy->B->data; 2252 bnz = PetscBLASIntCast(x->nz); 2253 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2254 } else if (str == SUBSET_NONZERO_PATTERN) { 2255 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2256 2257 x = (Mat_SeqAIJ *)xx->B->data; 2258 y = (Mat_SeqAIJ *)yy->B->data; 2259 if (y->xtoy && y->XtoY != xx->B) { 2260 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2261 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2262 } 2263 if (!y->xtoy) { /* get xtoy */ 2264 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2265 y->XtoY = xx->B; 2266 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2267 } 2268 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2269 } else { 2270 Mat B; 2271 PetscInt *nnz_d,*nnz_o; 2272 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2273 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2274 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2275 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2276 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2277 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2278 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2279 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2280 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2281 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2282 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2283 ierr = MatHeaderReplace(Y,B); 2284 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2285 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2286 } 2287 PetscFunctionReturn(0); 2288 } 2289 2290 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2291 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatConjugate_MPIAIJ" 2294 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2295 { 2296 #if defined(PETSC_USE_COMPLEX) 2297 PetscErrorCode ierr; 2298 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2299 2300 PetscFunctionBegin; 2301 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2302 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2303 #else 2304 PetscFunctionBegin; 2305 #endif 2306 PetscFunctionReturn(0); 2307 } 2308 2309 #undef __FUNCT__ 2310 #define __FUNCT__ "MatRealPart_MPIAIJ" 2311 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2312 { 2313 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2318 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 #undef __FUNCT__ 2323 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2324 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2325 { 2326 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2327 PetscErrorCode ierr; 2328 2329 PetscFunctionBegin; 2330 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2331 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2332 PetscFunctionReturn(0); 2333 } 2334 2335 #ifdef PETSC_HAVE_PBGL 2336 2337 #include <boost/parallel/mpi/bsp_process_group.hpp> 2338 #include <boost/graph/distributed/ilu_default_graph.hpp> 2339 #include <boost/graph/distributed/ilu_0_block.hpp> 2340 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2341 #include <boost/graph/distributed/petsc/interface.hpp> 2342 #include <boost/multi_array.hpp> 2343 #include <boost/parallel/distributed_property_map->hpp> 2344 2345 #undef __FUNCT__ 2346 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2347 /* 2348 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2349 */ 2350 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2351 { 2352 namespace petsc = boost::distributed::petsc; 2353 2354 namespace graph_dist = boost::graph::distributed; 2355 using boost::graph::distributed::ilu_default::process_group_type; 2356 using boost::graph::ilu_permuted; 2357 2358 PetscBool row_identity, col_identity; 2359 PetscContainer c; 2360 PetscInt m, n, M, N; 2361 PetscErrorCode ierr; 2362 2363 PetscFunctionBegin; 2364 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2365 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2366 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2367 if (!row_identity || !col_identity) { 2368 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2369 } 2370 2371 process_group_type pg; 2372 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2373 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2374 lgraph_type& level_graph = *lgraph_p; 2375 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2376 2377 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2378 ilu_permuted(level_graph); 2379 2380 /* put together the new matrix */ 2381 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2382 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2383 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2384 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2385 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2386 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2387 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2389 2390 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2391 ierr = PetscContainerSetPointer(c, lgraph_p); 2392 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2393 ierr = PetscContainerDestroy(&c); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 #undef __FUNCT__ 2398 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2399 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2400 { 2401 PetscFunctionBegin; 2402 PetscFunctionReturn(0); 2403 } 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatSolve_MPIAIJ" 2407 /* 2408 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2409 */ 2410 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2411 { 2412 namespace graph_dist = boost::graph::distributed; 2413 2414 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2415 lgraph_type* lgraph_p; 2416 PetscContainer c; 2417 PetscErrorCode ierr; 2418 2419 PetscFunctionBegin; 2420 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2421 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2422 ierr = VecCopy(b, x);CHKERRQ(ierr); 2423 2424 PetscScalar* array_x; 2425 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2426 PetscInt sx; 2427 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2428 2429 PetscScalar* array_b; 2430 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2431 PetscInt sb; 2432 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2433 2434 lgraph_type& level_graph = *lgraph_p; 2435 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2436 2437 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2438 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2439 ref_x(array_x, boost::extents[num_vertices(graph)]); 2440 2441 typedef boost::iterator_property_map<array_ref_type::iterator, 2442 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2443 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2444 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2445 2446 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2447 2448 PetscFunctionReturn(0); 2449 } 2450 #endif 2451 2452 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2453 PetscInt nzlocal,nsends,nrecvs; 2454 PetscMPIInt *send_rank,*recv_rank; 2455 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2456 PetscScalar *sbuf_a,**rbuf_a; 2457 PetscErrorCode (*Destroy)(Mat); 2458 } Mat_Redundant; 2459 2460 #undef __FUNCT__ 2461 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2462 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2463 { 2464 PetscErrorCode ierr; 2465 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2466 PetscInt i; 2467 2468 PetscFunctionBegin; 2469 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2470 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2471 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2472 for (i=0; i<redund->nrecvs; i++){ 2473 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2474 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2475 } 2476 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2477 ierr = PetscFree(redund);CHKERRQ(ierr); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatDestroy_MatRedundant" 2483 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2484 { 2485 PetscErrorCode ierr; 2486 PetscContainer container; 2487 Mat_Redundant *redund=PETSC_NULL; 2488 2489 PetscFunctionBegin; 2490 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2491 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2492 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2493 A->ops->destroy = redund->Destroy; 2494 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2495 if (A->ops->destroy) { 2496 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2497 } 2498 PetscFunctionReturn(0); 2499 } 2500 2501 #undef __FUNCT__ 2502 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2503 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2504 { 2505 PetscMPIInt rank,size; 2506 MPI_Comm comm=((PetscObject)mat)->comm; 2507 PetscErrorCode ierr; 2508 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2509 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2510 PetscInt *rowrange=mat->rmap->range; 2511 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2512 Mat A=aij->A,B=aij->B,C=*matredundant; 2513 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2514 PetscScalar *sbuf_a; 2515 PetscInt nzlocal=a->nz+b->nz; 2516 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2517 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2518 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2519 MatScalar *aworkA,*aworkB; 2520 PetscScalar *vals; 2521 PetscMPIInt tag1,tag2,tag3,imdex; 2522 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2523 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2524 MPI_Status recv_status,*send_status; 2525 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2526 PetscInt **rbuf_j=PETSC_NULL; 2527 PetscScalar **rbuf_a=PETSC_NULL; 2528 Mat_Redundant *redund=PETSC_NULL; 2529 PetscContainer container; 2530 2531 PetscFunctionBegin; 2532 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2533 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2534 2535 if (reuse == MAT_REUSE_MATRIX) { 2536 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2537 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2538 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2539 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2540 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2541 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2542 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2543 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2544 2545 nsends = redund->nsends; 2546 nrecvs = redund->nrecvs; 2547 send_rank = redund->send_rank; 2548 recv_rank = redund->recv_rank; 2549 sbuf_nz = redund->sbuf_nz; 2550 rbuf_nz = redund->rbuf_nz; 2551 sbuf_j = redund->sbuf_j; 2552 sbuf_a = redund->sbuf_a; 2553 rbuf_j = redund->rbuf_j; 2554 rbuf_a = redund->rbuf_a; 2555 } 2556 2557 if (reuse == MAT_INITIAL_MATRIX){ 2558 PetscMPIInt subrank,subsize; 2559 PetscInt nleftover,np_subcomm; 2560 /* get the destination processors' id send_rank, nsends and nrecvs */ 2561 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2562 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2563 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2564 np_subcomm = size/nsubcomm; 2565 nleftover = size - nsubcomm*np_subcomm; 2566 nsends = 0; nrecvs = 0; 2567 for (i=0; i<size; i++){ /* i=rank*/ 2568 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2569 send_rank[nsends] = i; nsends++; 2570 recv_rank[nrecvs++] = i; 2571 } 2572 } 2573 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2574 i = size-nleftover-1; 2575 j = 0; 2576 while (j < nsubcomm - nleftover){ 2577 send_rank[nsends++] = i; 2578 i--; j++; 2579 } 2580 } 2581 2582 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2583 for (i=0; i<nleftover; i++){ 2584 recv_rank[nrecvs++] = size-nleftover+i; 2585 } 2586 } 2587 2588 /* allocate sbuf_j, sbuf_a */ 2589 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2590 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2591 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2592 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2593 2594 /* copy mat's local entries into the buffers */ 2595 if (reuse == MAT_INITIAL_MATRIX){ 2596 rownz_max = 0; 2597 rptr = sbuf_j; 2598 cols = sbuf_j + rend-rstart + 1; 2599 vals = sbuf_a; 2600 rptr[0] = 0; 2601 for (i=0; i<rend-rstart; i++){ 2602 row = i + rstart; 2603 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2604 ncols = nzA + nzB; 2605 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2606 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2607 /* load the column indices for this row into cols */ 2608 lwrite = 0; 2609 for (l=0; l<nzB; l++) { 2610 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2611 vals[lwrite] = aworkB[l]; 2612 cols[lwrite++] = ctmp; 2613 } 2614 } 2615 for (l=0; l<nzA; l++){ 2616 vals[lwrite] = aworkA[l]; 2617 cols[lwrite++] = cstart + cworkA[l]; 2618 } 2619 for (l=0; l<nzB; l++) { 2620 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2621 vals[lwrite] = aworkB[l]; 2622 cols[lwrite++] = ctmp; 2623 } 2624 } 2625 vals += ncols; 2626 cols += ncols; 2627 rptr[i+1] = rptr[i] + ncols; 2628 if (rownz_max < ncols) rownz_max = ncols; 2629 } 2630 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2631 } else { /* only copy matrix values into sbuf_a */ 2632 rptr = sbuf_j; 2633 vals = sbuf_a; 2634 rptr[0] = 0; 2635 for (i=0; i<rend-rstart; i++){ 2636 row = i + rstart; 2637 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2638 ncols = nzA + nzB; 2639 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2640 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2641 lwrite = 0; 2642 for (l=0; l<nzB; l++) { 2643 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2644 } 2645 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2646 for (l=0; l<nzB; l++) { 2647 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2648 } 2649 vals += ncols; 2650 rptr[i+1] = rptr[i] + ncols; 2651 } 2652 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2653 2654 /* send nzlocal to others, and recv other's nzlocal */ 2655 /*--------------------------------------------------*/ 2656 if (reuse == MAT_INITIAL_MATRIX){ 2657 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2658 s_waits2 = s_waits3 + nsends; 2659 s_waits1 = s_waits2 + nsends; 2660 r_waits1 = s_waits1 + nsends; 2661 r_waits2 = r_waits1 + nrecvs; 2662 r_waits3 = r_waits2 + nrecvs; 2663 } else { 2664 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2665 r_waits3 = s_waits3 + nsends; 2666 } 2667 2668 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2669 if (reuse == MAT_INITIAL_MATRIX){ 2670 /* get new tags to keep the communication clean */ 2671 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2672 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2673 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2674 2675 /* post receives of other's nzlocal */ 2676 for (i=0; i<nrecvs; i++){ 2677 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2678 } 2679 /* send nzlocal to others */ 2680 for (i=0; i<nsends; i++){ 2681 sbuf_nz[i] = nzlocal; 2682 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2683 } 2684 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2685 count = nrecvs; 2686 while (count) { 2687 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2688 recv_rank[imdex] = recv_status.MPI_SOURCE; 2689 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2690 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2691 2692 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2693 rbuf_nz[imdex] += i + 2; 2694 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2695 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2696 count--; 2697 } 2698 /* wait on sends of nzlocal */ 2699 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2700 /* send mat->i,j to others, and recv from other's */ 2701 /*------------------------------------------------*/ 2702 for (i=0; i<nsends; i++){ 2703 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2704 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2705 } 2706 /* wait on receives of mat->i,j */ 2707 /*------------------------------*/ 2708 count = nrecvs; 2709 while (count) { 2710 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2711 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2712 count--; 2713 } 2714 /* wait on sends of mat->i,j */ 2715 /*---------------------------*/ 2716 if (nsends) { 2717 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2718 } 2719 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2720 2721 /* post receives, send and receive mat->a */ 2722 /*----------------------------------------*/ 2723 for (imdex=0; imdex<nrecvs; imdex++) { 2724 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2725 } 2726 for (i=0; i<nsends; i++){ 2727 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2728 } 2729 count = nrecvs; 2730 while (count) { 2731 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2732 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2733 count--; 2734 } 2735 if (nsends) { 2736 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2737 } 2738 2739 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2740 2741 /* create redundant matrix */ 2742 /*-------------------------*/ 2743 if (reuse == MAT_INITIAL_MATRIX){ 2744 /* compute rownz_max for preallocation */ 2745 for (imdex=0; imdex<nrecvs; imdex++){ 2746 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2747 rptr = rbuf_j[imdex]; 2748 for (i=0; i<j; i++){ 2749 ncols = rptr[i+1] - rptr[i]; 2750 if (rownz_max < ncols) rownz_max = ncols; 2751 } 2752 } 2753 2754 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2755 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2756 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2757 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2758 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2759 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2760 } else { 2761 C = *matredundant; 2762 } 2763 2764 /* insert local matrix entries */ 2765 rptr = sbuf_j; 2766 cols = sbuf_j + rend-rstart + 1; 2767 vals = sbuf_a; 2768 for (i=0; i<rend-rstart; i++){ 2769 row = i + rstart; 2770 ncols = rptr[i+1] - rptr[i]; 2771 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2772 vals += ncols; 2773 cols += ncols; 2774 } 2775 /* insert received matrix entries */ 2776 for (imdex=0; imdex<nrecvs; imdex++){ 2777 rstart = rowrange[recv_rank[imdex]]; 2778 rend = rowrange[recv_rank[imdex]+1]; 2779 rptr = rbuf_j[imdex]; 2780 cols = rbuf_j[imdex] + rend-rstart + 1; 2781 vals = rbuf_a[imdex]; 2782 for (i=0; i<rend-rstart; i++){ 2783 row = i + rstart; 2784 ncols = rptr[i+1] - rptr[i]; 2785 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2786 vals += ncols; 2787 cols += ncols; 2788 } 2789 } 2790 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2791 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2792 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2793 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2794 if (reuse == MAT_INITIAL_MATRIX) { 2795 PetscContainer container; 2796 *matredundant = C; 2797 /* create a supporting struct and attach it to C for reuse */ 2798 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2799 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2800 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2801 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2802 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2803 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2804 2805 redund->nzlocal = nzlocal; 2806 redund->nsends = nsends; 2807 redund->nrecvs = nrecvs; 2808 redund->send_rank = send_rank; 2809 redund->recv_rank = recv_rank; 2810 redund->sbuf_nz = sbuf_nz; 2811 redund->rbuf_nz = rbuf_nz; 2812 redund->sbuf_j = sbuf_j; 2813 redund->sbuf_a = sbuf_a; 2814 redund->rbuf_j = rbuf_j; 2815 redund->rbuf_a = rbuf_a; 2816 2817 redund->Destroy = C->ops->destroy; 2818 C->ops->destroy = MatDestroy_MatRedundant; 2819 } 2820 PetscFunctionReturn(0); 2821 } 2822 2823 #undef __FUNCT__ 2824 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2825 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2826 { 2827 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2828 PetscErrorCode ierr; 2829 PetscInt i,*idxb = 0; 2830 PetscScalar *va,*vb; 2831 Vec vtmp; 2832 2833 PetscFunctionBegin; 2834 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2835 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2836 if (idx) { 2837 for (i=0; i<A->rmap->n; i++) { 2838 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2839 } 2840 } 2841 2842 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2843 if (idx) { 2844 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2845 } 2846 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2847 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2848 2849 for (i=0; i<A->rmap->n; i++){ 2850 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2851 va[i] = vb[i]; 2852 if (idx) idx[i] = a->garray[idxb[i]]; 2853 } 2854 } 2855 2856 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2857 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2858 ierr = PetscFree(idxb);CHKERRQ(ierr); 2859 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2860 PetscFunctionReturn(0); 2861 } 2862 2863 #undef __FUNCT__ 2864 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2865 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2866 { 2867 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2868 PetscErrorCode ierr; 2869 PetscInt i,*idxb = 0; 2870 PetscScalar *va,*vb; 2871 Vec vtmp; 2872 2873 PetscFunctionBegin; 2874 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2875 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2876 if (idx) { 2877 for (i=0; i<A->cmap->n; i++) { 2878 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2879 } 2880 } 2881 2882 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2883 if (idx) { 2884 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2885 } 2886 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2887 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2888 2889 for (i=0; i<A->rmap->n; i++){ 2890 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2891 va[i] = vb[i]; 2892 if (idx) idx[i] = a->garray[idxb[i]]; 2893 } 2894 } 2895 2896 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2897 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2898 ierr = PetscFree(idxb);CHKERRQ(ierr); 2899 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2900 PetscFunctionReturn(0); 2901 } 2902 2903 #undef __FUNCT__ 2904 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2905 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2906 { 2907 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2908 PetscInt n = A->rmap->n; 2909 PetscInt cstart = A->cmap->rstart; 2910 PetscInt *cmap = mat->garray; 2911 PetscInt *diagIdx, *offdiagIdx; 2912 Vec diagV, offdiagV; 2913 PetscScalar *a, *diagA, *offdiagA; 2914 PetscInt r; 2915 PetscErrorCode ierr; 2916 2917 PetscFunctionBegin; 2918 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2919 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2920 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2921 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2922 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2923 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2924 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2925 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2926 for(r = 0; r < n; ++r) { 2927 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2928 a[r] = diagA[r]; 2929 idx[r] = cstart + diagIdx[r]; 2930 } else { 2931 a[r] = offdiagA[r]; 2932 idx[r] = cmap[offdiagIdx[r]]; 2933 } 2934 } 2935 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2936 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2937 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2938 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2939 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2940 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2941 PetscFunctionReturn(0); 2942 } 2943 2944 #undef __FUNCT__ 2945 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2946 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2947 { 2948 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2949 PetscInt n = A->rmap->n; 2950 PetscInt cstart = A->cmap->rstart; 2951 PetscInt *cmap = mat->garray; 2952 PetscInt *diagIdx, *offdiagIdx; 2953 Vec diagV, offdiagV; 2954 PetscScalar *a, *diagA, *offdiagA; 2955 PetscInt r; 2956 PetscErrorCode ierr; 2957 2958 PetscFunctionBegin; 2959 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2960 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2961 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2962 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2963 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2964 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2965 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2966 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2967 for(r = 0; r < n; ++r) { 2968 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2969 a[r] = diagA[r]; 2970 idx[r] = cstart + diagIdx[r]; 2971 } else { 2972 a[r] = offdiagA[r]; 2973 idx[r] = cmap[offdiagIdx[r]]; 2974 } 2975 } 2976 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2977 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2978 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2979 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2980 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2981 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2982 PetscFunctionReturn(0); 2983 } 2984 2985 #undef __FUNCT__ 2986 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2987 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2988 { 2989 PetscErrorCode ierr; 2990 Mat *dummy; 2991 2992 PetscFunctionBegin; 2993 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2994 *newmat = *dummy; 2995 ierr = PetscFree(dummy);CHKERRQ(ierr); 2996 PetscFunctionReturn(0); 2997 } 2998 2999 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3000 3001 #undef __FUNCT__ 3002 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3003 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3004 { 3005 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3006 PetscErrorCode ierr; 3007 3008 PetscFunctionBegin; 3009 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3010 PetscFunctionReturn(0); 3011 } 3012 3013 3014 /* -------------------------------------------------------------------*/ 3015 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3016 MatGetRow_MPIAIJ, 3017 MatRestoreRow_MPIAIJ, 3018 MatMult_MPIAIJ, 3019 /* 4*/ MatMultAdd_MPIAIJ, 3020 MatMultTranspose_MPIAIJ, 3021 MatMultTransposeAdd_MPIAIJ, 3022 #ifdef PETSC_HAVE_PBGL 3023 MatSolve_MPIAIJ, 3024 #else 3025 0, 3026 #endif 3027 0, 3028 0, 3029 /*10*/ 0, 3030 0, 3031 0, 3032 MatSOR_MPIAIJ, 3033 MatTranspose_MPIAIJ, 3034 /*15*/ MatGetInfo_MPIAIJ, 3035 MatEqual_MPIAIJ, 3036 MatGetDiagonal_MPIAIJ, 3037 MatDiagonalScale_MPIAIJ, 3038 MatNorm_MPIAIJ, 3039 /*20*/ MatAssemblyBegin_MPIAIJ, 3040 MatAssemblyEnd_MPIAIJ, 3041 MatSetOption_MPIAIJ, 3042 MatZeroEntries_MPIAIJ, 3043 /*24*/ MatZeroRows_MPIAIJ, 3044 0, 3045 #ifdef PETSC_HAVE_PBGL 3046 0, 3047 #else 3048 0, 3049 #endif 3050 0, 3051 0, 3052 /*29*/ MatSetUp_MPIAIJ, 3053 #ifdef PETSC_HAVE_PBGL 3054 0, 3055 #else 3056 0, 3057 #endif 3058 0, 3059 0, 3060 0, 3061 /*34*/ MatDuplicate_MPIAIJ, 3062 0, 3063 0, 3064 0, 3065 0, 3066 /*39*/ MatAXPY_MPIAIJ, 3067 MatGetSubMatrices_MPIAIJ, 3068 MatIncreaseOverlap_MPIAIJ, 3069 MatGetValues_MPIAIJ, 3070 MatCopy_MPIAIJ, 3071 /*44*/ MatGetRowMax_MPIAIJ, 3072 MatScale_MPIAIJ, 3073 0, 3074 0, 3075 MatZeroRowsColumns_MPIAIJ, 3076 /*49*/ 0, 3077 0, 3078 0, 3079 0, 3080 0, 3081 /*54*/ MatFDColoringCreate_MPIAIJ, 3082 0, 3083 MatSetUnfactored_MPIAIJ, 3084 0, /* MatPermute_MPIAIJ, impl currently broken */ 3085 0, 3086 /*59*/ MatGetSubMatrix_MPIAIJ, 3087 MatDestroy_MPIAIJ, 3088 MatView_MPIAIJ, 3089 0, 3090 0, 3091 /*64*/ 0, 3092 0, 3093 0, 3094 0, 3095 0, 3096 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3097 MatGetRowMinAbs_MPIAIJ, 3098 0, 3099 MatSetColoring_MPIAIJ, 3100 #if defined(PETSC_HAVE_ADIC) 3101 MatSetValuesAdic_MPIAIJ, 3102 #else 3103 0, 3104 #endif 3105 MatSetValuesAdifor_MPIAIJ, 3106 /*75*/ MatFDColoringApply_AIJ, 3107 0, 3108 0, 3109 0, 3110 MatFindZeroDiagonals_MPIAIJ, 3111 /*80*/ 0, 3112 0, 3113 0, 3114 /*83*/ MatLoad_MPIAIJ, 3115 0, 3116 0, 3117 0, 3118 0, 3119 0, 3120 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3121 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3122 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3123 MatPtAP_Basic, 3124 MatPtAPSymbolic_MPIAIJ, 3125 /*94*/ MatPtAPNumeric_MPIAIJ, 3126 0, 3127 0, 3128 0, 3129 0, 3130 /*99*/ 0, 3131 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3132 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3133 MatConjugate_MPIAIJ, 3134 0, 3135 /*104*/MatSetValuesRow_MPIAIJ, 3136 MatRealPart_MPIAIJ, 3137 MatImaginaryPart_MPIAIJ, 3138 0, 3139 0, 3140 /*109*/0, 3141 MatGetRedundantMatrix_MPIAIJ, 3142 MatGetRowMin_MPIAIJ, 3143 0, 3144 0, 3145 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3146 0, 3147 0, 3148 0, 3149 0, 3150 /*119*/0, 3151 0, 3152 0, 3153 0, 3154 MatGetMultiProcBlock_MPIAIJ, 3155 /*124*/MatFindNonzeroRows_MPIAIJ, 3156 MatGetColumnNorms_MPIAIJ, 3157 MatInvertBlockDiagonal_MPIAIJ, 3158 0, 3159 MatGetSubMatricesParallel_MPIAIJ, 3160 /*129*/0, 3161 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3162 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3163 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3164 0, 3165 /*134*/0, 3166 0, 3167 0, 3168 0, 3169 0 3170 }; 3171 3172 /* ----------------------------------------------------------------------------------------*/ 3173 3174 EXTERN_C_BEGIN 3175 #undef __FUNCT__ 3176 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3177 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3178 { 3179 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3180 PetscErrorCode ierr; 3181 3182 PetscFunctionBegin; 3183 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3184 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3185 PetscFunctionReturn(0); 3186 } 3187 EXTERN_C_END 3188 3189 EXTERN_C_BEGIN 3190 #undef __FUNCT__ 3191 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3192 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3193 { 3194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3195 PetscErrorCode ierr; 3196 3197 PetscFunctionBegin; 3198 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3199 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3200 PetscFunctionReturn(0); 3201 } 3202 EXTERN_C_END 3203 3204 EXTERN_C_BEGIN 3205 #undef __FUNCT__ 3206 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3207 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3208 { 3209 Mat_MPIAIJ *b; 3210 PetscErrorCode ierr; 3211 PetscInt i; 3212 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3213 3214 PetscFunctionBegin; 3215 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3216 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3217 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3218 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3219 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3220 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3221 3222 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3223 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3224 if (d_nnz) { 3225 for (i=0; i<B->rmap->n; i++) { 3226 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3227 } 3228 } 3229 if (o_nnz) { 3230 for (i=0; i<B->rmap->n; i++) { 3231 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3232 } 3233 } 3234 b = (Mat_MPIAIJ*)B->data; 3235 3236 if (!B->preallocated) { 3237 /* Explicitly create 2 MATSEQAIJ matrices. */ 3238 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3239 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3240 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3241 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3242 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3243 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3244 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3245 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3246 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3247 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3248 } 3249 3250 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3251 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3252 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3253 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3254 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3255 B->preallocated = PETSC_TRUE; 3256 PetscFunctionReturn(0); 3257 } 3258 EXTERN_C_END 3259 3260 #undef __FUNCT__ 3261 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3262 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3263 { 3264 Mat mat; 3265 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3266 PetscErrorCode ierr; 3267 3268 PetscFunctionBegin; 3269 *newmat = 0; 3270 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3271 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3272 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3273 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3274 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3275 a = (Mat_MPIAIJ*)mat->data; 3276 3277 mat->factortype = matin->factortype; 3278 mat->rmap->bs = matin->rmap->bs; 3279 mat->cmap->bs = matin->cmap->bs; 3280 mat->assembled = PETSC_TRUE; 3281 mat->insertmode = NOT_SET_VALUES; 3282 mat->preallocated = PETSC_TRUE; 3283 3284 a->size = oldmat->size; 3285 a->rank = oldmat->rank; 3286 a->donotstash = oldmat->donotstash; 3287 a->roworiented = oldmat->roworiented; 3288 a->rowindices = 0; 3289 a->rowvalues = 0; 3290 a->getrowactive = PETSC_FALSE; 3291 3292 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3293 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3294 3295 if (oldmat->colmap) { 3296 #if defined (PETSC_USE_CTABLE) 3297 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3298 #else 3299 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3300 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3301 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3302 #endif 3303 } else a->colmap = 0; 3304 if (oldmat->garray) { 3305 PetscInt len; 3306 len = oldmat->B->cmap->n; 3307 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3308 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3309 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3310 } else a->garray = 0; 3311 3312 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3313 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3314 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3315 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3316 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3317 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3318 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3319 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3320 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3321 *newmat = mat; 3322 PetscFunctionReturn(0); 3323 } 3324 3325 3326 3327 #undef __FUNCT__ 3328 #define __FUNCT__ "MatLoad_MPIAIJ" 3329 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3330 { 3331 PetscScalar *vals,*svals; 3332 MPI_Comm comm = ((PetscObject)viewer)->comm; 3333 PetscErrorCode ierr; 3334 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3335 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3336 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3337 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3338 PetscInt cend,cstart,n,*rowners,sizesset=1; 3339 int fd; 3340 PetscInt bs = 1; 3341 3342 PetscFunctionBegin; 3343 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3344 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3345 if (!rank) { 3346 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3347 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3348 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3349 } 3350 3351 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3352 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3353 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3354 3355 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3356 3357 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3358 M = header[1]; N = header[2]; 3359 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3360 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3361 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3362 3363 /* If global sizes are set, check if they are consistent with that given in the file */ 3364 if (sizesset) { 3365 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3366 } 3367 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3368 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3369 3370 /* determine ownership of all (block) rows */ 3371 if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3372 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3373 else m = newMat->rmap->n; /* Set by user */ 3374 3375 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3376 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3377 3378 /* First process needs enough room for process with most rows */ 3379 if (!rank) { 3380 mmax = rowners[1]; 3381 for (i=2; i<size; i++) { 3382 mmax = PetscMax(mmax,rowners[i]); 3383 } 3384 } else mmax = m; 3385 3386 rowners[0] = 0; 3387 for (i=2; i<=size; i++) { 3388 rowners[i] += rowners[i-1]; 3389 } 3390 rstart = rowners[rank]; 3391 rend = rowners[rank+1]; 3392 3393 /* distribute row lengths to all processors */ 3394 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3395 if (!rank) { 3396 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3397 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3398 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3399 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3400 for (j=0; j<m; j++) { 3401 procsnz[0] += ourlens[j]; 3402 } 3403 for (i=1; i<size; i++) { 3404 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3405 /* calculate the number of nonzeros on each processor */ 3406 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3407 procsnz[i] += rowlengths[j]; 3408 } 3409 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3410 } 3411 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3412 } else { 3413 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3414 } 3415 3416 if (!rank) { 3417 /* determine max buffer needed and allocate it */ 3418 maxnz = 0; 3419 for (i=0; i<size; i++) { 3420 maxnz = PetscMax(maxnz,procsnz[i]); 3421 } 3422 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3423 3424 /* read in my part of the matrix column indices */ 3425 nz = procsnz[0]; 3426 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3427 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3428 3429 /* read in every one elses and ship off */ 3430 for (i=1; i<size; i++) { 3431 nz = procsnz[i]; 3432 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3433 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3434 } 3435 ierr = PetscFree(cols);CHKERRQ(ierr); 3436 } else { 3437 /* determine buffer space needed for message */ 3438 nz = 0; 3439 for (i=0; i<m; i++) { 3440 nz += ourlens[i]; 3441 } 3442 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3443 3444 /* receive message of column indices*/ 3445 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3446 } 3447 3448 /* determine column ownership if matrix is not square */ 3449 if (N != M) { 3450 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3451 else n = newMat->cmap->n; 3452 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3453 cstart = cend - n; 3454 } else { 3455 cstart = rstart; 3456 cend = rend; 3457 n = cend - cstart; 3458 } 3459 3460 /* loop over local rows, determining number of off diagonal entries */ 3461 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3462 jj = 0; 3463 for (i=0; i<m; i++) { 3464 for (j=0; j<ourlens[i]; j++) { 3465 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3466 jj++; 3467 } 3468 } 3469 3470 for (i=0; i<m; i++) { 3471 ourlens[i] -= offlens[i]; 3472 } 3473 if (!sizesset) { 3474 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3475 } 3476 3477 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3478 3479 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3480 3481 for (i=0; i<m; i++) { 3482 ourlens[i] += offlens[i]; 3483 } 3484 3485 if (!rank) { 3486 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3487 3488 /* read in my part of the matrix numerical values */ 3489 nz = procsnz[0]; 3490 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3491 3492 /* insert into matrix */ 3493 jj = rstart; 3494 smycols = mycols; 3495 svals = vals; 3496 for (i=0; i<m; i++) { 3497 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3498 smycols += ourlens[i]; 3499 svals += ourlens[i]; 3500 jj++; 3501 } 3502 3503 /* read in other processors and ship out */ 3504 for (i=1; i<size; i++) { 3505 nz = procsnz[i]; 3506 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3507 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3508 } 3509 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3510 } else { 3511 /* receive numeric values */ 3512 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3513 3514 /* receive message of values*/ 3515 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3516 3517 /* insert into matrix */ 3518 jj = rstart; 3519 smycols = mycols; 3520 svals = vals; 3521 for (i=0; i<m; i++) { 3522 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3523 smycols += ourlens[i]; 3524 svals += ourlens[i]; 3525 jj++; 3526 } 3527 } 3528 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3529 ierr = PetscFree(vals);CHKERRQ(ierr); 3530 ierr = PetscFree(mycols);CHKERRQ(ierr); 3531 ierr = PetscFree(rowners);CHKERRQ(ierr); 3532 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3533 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3534 3535 PetscFunctionReturn(0); 3536 } 3537 3538 #undef __FUNCT__ 3539 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3540 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3541 { 3542 PetscErrorCode ierr; 3543 IS iscol_local; 3544 PetscInt csize; 3545 3546 PetscFunctionBegin; 3547 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3548 if (call == MAT_REUSE_MATRIX) { 3549 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3550 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3551 } else { 3552 PetscInt cbs; 3553 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3554 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3555 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3556 } 3557 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3558 if (call == MAT_INITIAL_MATRIX) { 3559 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3560 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3561 } 3562 PetscFunctionReturn(0); 3563 } 3564 3565 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3566 #undef __FUNCT__ 3567 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3568 /* 3569 Not great since it makes two copies of the submatrix, first an SeqAIJ 3570 in local and then by concatenating the local matrices the end result. 3571 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3572 3573 Note: This requires a sequential iscol with all indices. 3574 */ 3575 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3576 { 3577 PetscErrorCode ierr; 3578 PetscMPIInt rank,size; 3579 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3580 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3581 PetscBool allcolumns, colflag; 3582 Mat M,Mreuse; 3583 MatScalar *vwork,*aa; 3584 MPI_Comm comm = ((PetscObject)mat)->comm; 3585 Mat_SeqAIJ *aij; 3586 3587 3588 PetscFunctionBegin; 3589 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3590 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3591 3592 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3593 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3594 if (colflag && ncol == mat->cmap->N){ 3595 allcolumns = PETSC_TRUE; 3596 } else { 3597 allcolumns = PETSC_FALSE; 3598 } 3599 if (call == MAT_REUSE_MATRIX) { 3600 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3601 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3602 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3603 } else { 3604 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3605 } 3606 3607 /* 3608 m - number of local rows 3609 n - number of columns (same on all processors) 3610 rstart - first row in new global matrix generated 3611 */ 3612 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3613 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3614 if (call == MAT_INITIAL_MATRIX) { 3615 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3616 ii = aij->i; 3617 jj = aij->j; 3618 3619 /* 3620 Determine the number of non-zeros in the diagonal and off-diagonal 3621 portions of the matrix in order to do correct preallocation 3622 */ 3623 3624 /* first get start and end of "diagonal" columns */ 3625 if (csize == PETSC_DECIDE) { 3626 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3627 if (mglobal == n) { /* square matrix */ 3628 nlocal = m; 3629 } else { 3630 nlocal = n/size + ((n % size) > rank); 3631 } 3632 } else { 3633 nlocal = csize; 3634 } 3635 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3636 rstart = rend - nlocal; 3637 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3638 3639 /* next, compute all the lengths */ 3640 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3641 olens = dlens + m; 3642 for (i=0; i<m; i++) { 3643 jend = ii[i+1] - ii[i]; 3644 olen = 0; 3645 dlen = 0; 3646 for (j=0; j<jend; j++) { 3647 if (*jj < rstart || *jj >= rend) olen++; 3648 else dlen++; 3649 jj++; 3650 } 3651 olens[i] = olen; 3652 dlens[i] = dlen; 3653 } 3654 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3655 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3656 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3657 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3658 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3659 ierr = PetscFree(dlens);CHKERRQ(ierr); 3660 } else { 3661 PetscInt ml,nl; 3662 3663 M = *newmat; 3664 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3665 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3666 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3667 /* 3668 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3669 rather than the slower MatSetValues(). 3670 */ 3671 M->was_assembled = PETSC_TRUE; 3672 M->assembled = PETSC_FALSE; 3673 } 3674 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3675 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3676 ii = aij->i; 3677 jj = aij->j; 3678 aa = aij->a; 3679 for (i=0; i<m; i++) { 3680 row = rstart + i; 3681 nz = ii[i+1] - ii[i]; 3682 cwork = jj; jj += nz; 3683 vwork = aa; aa += nz; 3684 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3685 } 3686 3687 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3688 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3689 *newmat = M; 3690 3691 /* save submatrix used in processor for next request */ 3692 if (call == MAT_INITIAL_MATRIX) { 3693 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3694 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3695 } 3696 3697 PetscFunctionReturn(0); 3698 } 3699 3700 EXTERN_C_BEGIN 3701 #undef __FUNCT__ 3702 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3703 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3704 { 3705 PetscInt m,cstart, cend,j,nnz,i,d; 3706 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3707 const PetscInt *JJ; 3708 PetscScalar *values; 3709 PetscErrorCode ierr; 3710 3711 PetscFunctionBegin; 3712 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3713 3714 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3715 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3716 m = B->rmap->n; 3717 cstart = B->cmap->rstart; 3718 cend = B->cmap->rend; 3719 rstart = B->rmap->rstart; 3720 3721 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3722 3723 #if defined(PETSC_USE_DEBUGGING) 3724 for (i=0; i<m; i++) { 3725 nnz = Ii[i+1]- Ii[i]; 3726 JJ = J + Ii[i]; 3727 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3728 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3729 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3730 } 3731 #endif 3732 3733 for (i=0; i<m; i++) { 3734 nnz = Ii[i+1]- Ii[i]; 3735 JJ = J + Ii[i]; 3736 nnz_max = PetscMax(nnz_max,nnz); 3737 d = 0; 3738 for (j=0; j<nnz; j++) { 3739 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3740 } 3741 d_nnz[i] = d; 3742 o_nnz[i] = nnz - d; 3743 } 3744 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3745 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3746 3747 if (v) values = (PetscScalar*)v; 3748 else { 3749 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3750 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3751 } 3752 3753 for (i=0; i<m; i++) { 3754 ii = i + rstart; 3755 nnz = Ii[i+1]- Ii[i]; 3756 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3757 } 3758 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3759 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3760 3761 if (!v) { 3762 ierr = PetscFree(values);CHKERRQ(ierr); 3763 } 3764 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3765 PetscFunctionReturn(0); 3766 } 3767 EXTERN_C_END 3768 3769 #undef __FUNCT__ 3770 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3771 /*@ 3772 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3773 (the default parallel PETSc format). 3774 3775 Collective on MPI_Comm 3776 3777 Input Parameters: 3778 + B - the matrix 3779 . i - the indices into j for the start of each local row (starts with zero) 3780 . j - the column indices for each local row (starts with zero) 3781 - v - optional values in the matrix 3782 3783 Level: developer 3784 3785 Notes: 3786 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3787 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3788 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3789 3790 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3791 3792 The format which is used for the sparse matrix input, is equivalent to a 3793 row-major ordering.. i.e for the following matrix, the input data expected is 3794 as shown: 3795 3796 1 0 0 3797 2 0 3 P0 3798 ------- 3799 4 5 6 P1 3800 3801 Process0 [P0]: rows_owned=[0,1] 3802 i = {0,1,3} [size = nrow+1 = 2+1] 3803 j = {0,0,2} [size = nz = 6] 3804 v = {1,2,3} [size = nz = 6] 3805 3806 Process1 [P1]: rows_owned=[2] 3807 i = {0,3} [size = nrow+1 = 1+1] 3808 j = {0,1,2} [size = nz = 6] 3809 v = {4,5,6} [size = nz = 6] 3810 3811 .keywords: matrix, aij, compressed row, sparse, parallel 3812 3813 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3814 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3815 @*/ 3816 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3817 { 3818 PetscErrorCode ierr; 3819 3820 PetscFunctionBegin; 3821 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3822 PetscFunctionReturn(0); 3823 } 3824 3825 #undef __FUNCT__ 3826 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3827 /*@C 3828 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3829 (the default parallel PETSc format). For good matrix assembly performance 3830 the user should preallocate the matrix storage by setting the parameters 3831 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3832 performance can be increased by more than a factor of 50. 3833 3834 Collective on MPI_Comm 3835 3836 Input Parameters: 3837 + A - the matrix 3838 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3839 (same value is used for all local rows) 3840 . d_nnz - array containing the number of nonzeros in the various rows of the 3841 DIAGONAL portion of the local submatrix (possibly different for each row) 3842 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3843 The size of this array is equal to the number of local rows, i.e 'm'. 3844 For matrices that will be factored, you must leave room for (and set) 3845 the diagonal entry even if it is zero. 3846 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3847 submatrix (same value is used for all local rows). 3848 - o_nnz - array containing the number of nonzeros in the various rows of the 3849 OFF-DIAGONAL portion of the local submatrix (possibly different for 3850 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3851 structure. The size of this array is equal to the number 3852 of local rows, i.e 'm'. 3853 3854 If the *_nnz parameter is given then the *_nz parameter is ignored 3855 3856 The AIJ format (also called the Yale sparse matrix format or 3857 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3858 storage. The stored row and column indices begin with zero. 3859 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3860 3861 The parallel matrix is partitioned such that the first m0 rows belong to 3862 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3863 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3864 3865 The DIAGONAL portion of the local submatrix of a processor can be defined 3866 as the submatrix which is obtained by extraction the part corresponding to 3867 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3868 first row that belongs to the processor, r2 is the last row belonging to 3869 the this processor, and c1-c2 is range of indices of the local part of a 3870 vector suitable for applying the matrix to. This is an mxn matrix. In the 3871 common case of a square matrix, the row and column ranges are the same and 3872 the DIAGONAL part is also square. The remaining portion of the local 3873 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3874 3875 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3876 3877 You can call MatGetInfo() to get information on how effective the preallocation was; 3878 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3879 You can also run with the option -info and look for messages with the string 3880 malloc in them to see if additional memory allocation was needed. 3881 3882 Example usage: 3883 3884 Consider the following 8x8 matrix with 34 non-zero values, that is 3885 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3886 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3887 as follows: 3888 3889 .vb 3890 1 2 0 | 0 3 0 | 0 4 3891 Proc0 0 5 6 | 7 0 0 | 8 0 3892 9 0 10 | 11 0 0 | 12 0 3893 ------------------------------------- 3894 13 0 14 | 15 16 17 | 0 0 3895 Proc1 0 18 0 | 19 20 21 | 0 0 3896 0 0 0 | 22 23 0 | 24 0 3897 ------------------------------------- 3898 Proc2 25 26 27 | 0 0 28 | 29 0 3899 30 0 0 | 31 32 33 | 0 34 3900 .ve 3901 3902 This can be represented as a collection of submatrices as: 3903 3904 .vb 3905 A B C 3906 D E F 3907 G H I 3908 .ve 3909 3910 Where the submatrices A,B,C are owned by proc0, D,E,F are 3911 owned by proc1, G,H,I are owned by proc2. 3912 3913 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3914 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3915 The 'M','N' parameters are 8,8, and have the same values on all procs. 3916 3917 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3918 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3919 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3920 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3921 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3922 matrix, ans [DF] as another SeqAIJ matrix. 3923 3924 When d_nz, o_nz parameters are specified, d_nz storage elements are 3925 allocated for every row of the local diagonal submatrix, and o_nz 3926 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3927 One way to choose d_nz and o_nz is to use the max nonzerors per local 3928 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3929 In this case, the values of d_nz,o_nz are: 3930 .vb 3931 proc0 : dnz = 2, o_nz = 2 3932 proc1 : dnz = 3, o_nz = 2 3933 proc2 : dnz = 1, o_nz = 4 3934 .ve 3935 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3936 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3937 for proc3. i.e we are using 12+15+10=37 storage locations to store 3938 34 values. 3939 3940 When d_nnz, o_nnz parameters are specified, the storage is specified 3941 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3942 In the above case the values for d_nnz,o_nnz are: 3943 .vb 3944 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3945 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3946 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3947 .ve 3948 Here the space allocated is sum of all the above values i.e 34, and 3949 hence pre-allocation is perfect. 3950 3951 Level: intermediate 3952 3953 .keywords: matrix, aij, compressed row, sparse, parallel 3954 3955 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3956 MPIAIJ, MatGetInfo() 3957 @*/ 3958 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3959 { 3960 PetscErrorCode ierr; 3961 3962 PetscFunctionBegin; 3963 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3964 PetscValidType(B,1); 3965 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3966 PetscFunctionReturn(0); 3967 } 3968 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3971 /*@ 3972 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3973 CSR format the local rows. 3974 3975 Collective on MPI_Comm 3976 3977 Input Parameters: 3978 + comm - MPI communicator 3979 . m - number of local rows (Cannot be PETSC_DECIDE) 3980 . n - This value should be the same as the local size used in creating the 3981 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3982 calculated if N is given) For square matrices n is almost always m. 3983 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3984 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3985 . i - row indices 3986 . j - column indices 3987 - a - matrix values 3988 3989 Output Parameter: 3990 . mat - the matrix 3991 3992 Level: intermediate 3993 3994 Notes: 3995 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3996 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3997 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3998 3999 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4000 4001 The format which is used for the sparse matrix input, is equivalent to a 4002 row-major ordering.. i.e for the following matrix, the input data expected is 4003 as shown: 4004 4005 1 0 0 4006 2 0 3 P0 4007 ------- 4008 4 5 6 P1 4009 4010 Process0 [P0]: rows_owned=[0,1] 4011 i = {0,1,3} [size = nrow+1 = 2+1] 4012 j = {0,0,2} [size = nz = 6] 4013 v = {1,2,3} [size = nz = 6] 4014 4015 Process1 [P1]: rows_owned=[2] 4016 i = {0,3} [size = nrow+1 = 1+1] 4017 j = {0,1,2} [size = nz = 6] 4018 v = {4,5,6} [size = nz = 6] 4019 4020 .keywords: matrix, aij, compressed row, sparse, parallel 4021 4022 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4023 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4024 @*/ 4025 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4026 { 4027 PetscErrorCode ierr; 4028 4029 PetscFunctionBegin; 4030 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4031 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4032 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4033 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4034 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4035 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4036 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4037 PetscFunctionReturn(0); 4038 } 4039 4040 #undef __FUNCT__ 4041 #define __FUNCT__ "MatCreateAIJ" 4042 /*@C 4043 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4044 (the default parallel PETSc format). For good matrix assembly performance 4045 the user should preallocate the matrix storage by setting the parameters 4046 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4047 performance can be increased by more than a factor of 50. 4048 4049 Collective on MPI_Comm 4050 4051 Input Parameters: 4052 + comm - MPI communicator 4053 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4054 This value should be the same as the local size used in creating the 4055 y vector for the matrix-vector product y = Ax. 4056 . n - This value should be the same as the local size used in creating the 4057 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4058 calculated if N is given) For square matrices n is almost always m. 4059 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4060 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4061 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4062 (same value is used for all local rows) 4063 . d_nnz - array containing the number of nonzeros in the various rows of the 4064 DIAGONAL portion of the local submatrix (possibly different for each row) 4065 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4066 The size of this array is equal to the number of local rows, i.e 'm'. 4067 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4068 submatrix (same value is used for all local rows). 4069 - o_nnz - array containing the number of nonzeros in the various rows of the 4070 OFF-DIAGONAL portion of the local submatrix (possibly different for 4071 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4072 structure. The size of this array is equal to the number 4073 of local rows, i.e 'm'. 4074 4075 Output Parameter: 4076 . A - the matrix 4077 4078 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4079 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4080 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4081 4082 Notes: 4083 If the *_nnz parameter is given then the *_nz parameter is ignored 4084 4085 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4086 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4087 storage requirements for this matrix. 4088 4089 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4090 processor than it must be used on all processors that share the object for 4091 that argument. 4092 4093 The user MUST specify either the local or global matrix dimensions 4094 (possibly both). 4095 4096 The parallel matrix is partitioned across processors such that the 4097 first m0 rows belong to process 0, the next m1 rows belong to 4098 process 1, the next m2 rows belong to process 2 etc.. where 4099 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4100 values corresponding to [m x N] submatrix. 4101 4102 The columns are logically partitioned with the n0 columns belonging 4103 to 0th partition, the next n1 columns belonging to the next 4104 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4105 4106 The DIAGONAL portion of the local submatrix on any given processor 4107 is the submatrix corresponding to the rows and columns m,n 4108 corresponding to the given processor. i.e diagonal matrix on 4109 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4110 etc. The remaining portion of the local submatrix [m x (N-n)] 4111 constitute the OFF-DIAGONAL portion. The example below better 4112 illustrates this concept. 4113 4114 For a square global matrix we define each processor's diagonal portion 4115 to be its local rows and the corresponding columns (a square submatrix); 4116 each processor's off-diagonal portion encompasses the remainder of the 4117 local matrix (a rectangular submatrix). 4118 4119 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4120 4121 When calling this routine with a single process communicator, a matrix of 4122 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4123 type of communicator, use the construction mechanism: 4124 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4125 4126 By default, this format uses inodes (identical nodes) when possible. 4127 We search for consecutive rows with the same nonzero structure, thereby 4128 reusing matrix information to achieve increased efficiency. 4129 4130 Options Database Keys: 4131 + -mat_no_inode - Do not use inodes 4132 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4133 - -mat_aij_oneindex - Internally use indexing starting at 1 4134 rather than 0. Note that when calling MatSetValues(), 4135 the user still MUST index entries starting at 0! 4136 4137 4138 Example usage: 4139 4140 Consider the following 8x8 matrix with 34 non-zero values, that is 4141 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4142 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4143 as follows: 4144 4145 .vb 4146 1 2 0 | 0 3 0 | 0 4 4147 Proc0 0 5 6 | 7 0 0 | 8 0 4148 9 0 10 | 11 0 0 | 12 0 4149 ------------------------------------- 4150 13 0 14 | 15 16 17 | 0 0 4151 Proc1 0 18 0 | 19 20 21 | 0 0 4152 0 0 0 | 22 23 0 | 24 0 4153 ------------------------------------- 4154 Proc2 25 26 27 | 0 0 28 | 29 0 4155 30 0 0 | 31 32 33 | 0 34 4156 .ve 4157 4158 This can be represented as a collection of submatrices as: 4159 4160 .vb 4161 A B C 4162 D E F 4163 G H I 4164 .ve 4165 4166 Where the submatrices A,B,C are owned by proc0, D,E,F are 4167 owned by proc1, G,H,I are owned by proc2. 4168 4169 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4170 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4171 The 'M','N' parameters are 8,8, and have the same values on all procs. 4172 4173 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4174 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4175 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4176 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4177 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4178 matrix, ans [DF] as another SeqAIJ matrix. 4179 4180 When d_nz, o_nz parameters are specified, d_nz storage elements are 4181 allocated for every row of the local diagonal submatrix, and o_nz 4182 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4183 One way to choose d_nz and o_nz is to use the max nonzerors per local 4184 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4185 In this case, the values of d_nz,o_nz are: 4186 .vb 4187 proc0 : dnz = 2, o_nz = 2 4188 proc1 : dnz = 3, o_nz = 2 4189 proc2 : dnz = 1, o_nz = 4 4190 .ve 4191 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4192 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4193 for proc3. i.e we are using 12+15+10=37 storage locations to store 4194 34 values. 4195 4196 When d_nnz, o_nnz parameters are specified, the storage is specified 4197 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4198 In the above case the values for d_nnz,o_nnz are: 4199 .vb 4200 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4201 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4202 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4203 .ve 4204 Here the space allocated is sum of all the above values i.e 34, and 4205 hence pre-allocation is perfect. 4206 4207 Level: intermediate 4208 4209 .keywords: matrix, aij, compressed row, sparse, parallel 4210 4211 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4212 MPIAIJ, MatCreateMPIAIJWithArrays() 4213 @*/ 4214 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4215 { 4216 PetscErrorCode ierr; 4217 PetscMPIInt size; 4218 4219 PetscFunctionBegin; 4220 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4221 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4222 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4223 if (size > 1) { 4224 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4225 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4226 } else { 4227 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4228 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4229 } 4230 PetscFunctionReturn(0); 4231 } 4232 4233 #undef __FUNCT__ 4234 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4235 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4236 { 4237 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4238 4239 PetscFunctionBegin; 4240 *Ad = a->A; 4241 *Ao = a->B; 4242 *colmap = a->garray; 4243 PetscFunctionReturn(0); 4244 } 4245 4246 #undef __FUNCT__ 4247 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4248 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4249 { 4250 PetscErrorCode ierr; 4251 PetscInt i; 4252 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4253 4254 PetscFunctionBegin; 4255 if (coloring->ctype == IS_COLORING_GLOBAL) { 4256 ISColoringValue *allcolors,*colors; 4257 ISColoring ocoloring; 4258 4259 /* set coloring for diagonal portion */ 4260 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4261 4262 /* set coloring for off-diagonal portion */ 4263 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4264 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4265 for (i=0; i<a->B->cmap->n; i++) { 4266 colors[i] = allcolors[a->garray[i]]; 4267 } 4268 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4269 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4270 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4271 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4272 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4273 ISColoringValue *colors; 4274 PetscInt *larray; 4275 ISColoring ocoloring; 4276 4277 /* set coloring for diagonal portion */ 4278 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4279 for (i=0; i<a->A->cmap->n; i++) { 4280 larray[i] = i + A->cmap->rstart; 4281 } 4282 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4283 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4284 for (i=0; i<a->A->cmap->n; i++) { 4285 colors[i] = coloring->colors[larray[i]]; 4286 } 4287 ierr = PetscFree(larray);CHKERRQ(ierr); 4288 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4289 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4290 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4291 4292 /* set coloring for off-diagonal portion */ 4293 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4294 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4295 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4296 for (i=0; i<a->B->cmap->n; i++) { 4297 colors[i] = coloring->colors[larray[i]]; 4298 } 4299 ierr = PetscFree(larray);CHKERRQ(ierr); 4300 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4301 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4302 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4303 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4304 4305 PetscFunctionReturn(0); 4306 } 4307 4308 #if defined(PETSC_HAVE_ADIC) 4309 #undef __FUNCT__ 4310 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4311 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4312 { 4313 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4314 PetscErrorCode ierr; 4315 4316 PetscFunctionBegin; 4317 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4318 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4319 PetscFunctionReturn(0); 4320 } 4321 #endif 4322 4323 #undef __FUNCT__ 4324 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4325 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4326 { 4327 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4328 PetscErrorCode ierr; 4329 4330 PetscFunctionBegin; 4331 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4332 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4333 PetscFunctionReturn(0); 4334 } 4335 4336 #undef __FUNCT__ 4337 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4338 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4339 { 4340 PetscErrorCode ierr; 4341 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4342 PetscInt *indx; 4343 4344 PetscFunctionBegin; 4345 /* This routine will ONLY return MPIAIJ type matrix */ 4346 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4347 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4348 if (n == PETSC_DECIDE){ 4349 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4350 } 4351 /* Check sum(n) = N */ 4352 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4353 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4354 4355 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4356 rstart -= m; 4357 4358 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4359 for (i=0;i<m;i++) { 4360 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4361 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4362 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4363 } 4364 4365 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4366 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4367 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4368 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4369 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4370 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4371 PetscFunctionReturn(0); 4372 } 4373 4374 #undef __FUNCT__ 4375 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4376 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4377 { 4378 PetscErrorCode ierr; 4379 PetscInt m,N,i,rstart,nnz,Ii; 4380 PetscInt *indx; 4381 PetscScalar *values; 4382 4383 PetscFunctionBegin; 4384 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4385 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4386 for (i=0;i<m;i++) { 4387 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4388 Ii = i + rstart; 4389 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4390 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4391 } 4392 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4393 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4394 PetscFunctionReturn(0); 4395 } 4396 4397 #undef __FUNCT__ 4398 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4399 /*@ 4400 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4401 matrices from each processor 4402 4403 Collective on MPI_Comm 4404 4405 Input Parameters: 4406 + comm - the communicators the parallel matrix will live on 4407 . inmat - the input sequential matrices 4408 . n - number of local columns (or PETSC_DECIDE) 4409 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4410 4411 Output Parameter: 4412 . outmat - the parallel matrix generated 4413 4414 Level: advanced 4415 4416 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4417 4418 @*/ 4419 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4420 { 4421 PetscErrorCode ierr; 4422 4423 PetscFunctionBegin; 4424 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4425 if (scall == MAT_INITIAL_MATRIX){ 4426 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4427 } 4428 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4429 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4430 PetscFunctionReturn(0); 4431 } 4432 4433 #undef __FUNCT__ 4434 #define __FUNCT__ "MatFileSplit" 4435 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4436 { 4437 PetscErrorCode ierr; 4438 PetscMPIInt rank; 4439 PetscInt m,N,i,rstart,nnz; 4440 size_t len; 4441 const PetscInt *indx; 4442 PetscViewer out; 4443 char *name; 4444 Mat B; 4445 const PetscScalar *values; 4446 4447 PetscFunctionBegin; 4448 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4449 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4450 /* Should this be the type of the diagonal block of A? */ 4451 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4452 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4453 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4454 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4455 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4456 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4457 for (i=0;i<m;i++) { 4458 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4459 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4460 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4461 } 4462 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4463 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4464 4465 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4466 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4467 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4468 sprintf(name,"%s.%d",outfile,rank); 4469 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4470 ierr = PetscFree(name); 4471 ierr = MatView(B,out);CHKERRQ(ierr); 4472 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4473 ierr = MatDestroy(&B);CHKERRQ(ierr); 4474 PetscFunctionReturn(0); 4475 } 4476 4477 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4478 #undef __FUNCT__ 4479 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4480 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4481 { 4482 PetscErrorCode ierr; 4483 Mat_Merge_SeqsToMPI *merge; 4484 PetscContainer container; 4485 4486 PetscFunctionBegin; 4487 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4488 if (container) { 4489 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4502 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4503 ierr = PetscFree(merge);CHKERRQ(ierr); 4504 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4505 } 4506 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4507 PetscFunctionReturn(0); 4508 } 4509 4510 #include <../src/mat/utils/freespace.h> 4511 #include <petscbt.h> 4512 4513 #undef __FUNCT__ 4514 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4515 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4516 { 4517 PetscErrorCode ierr; 4518 MPI_Comm comm=((PetscObject)mpimat)->comm; 4519 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4520 PetscMPIInt size,rank,taga,*len_s; 4521 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4522 PetscInt proc,m; 4523 PetscInt **buf_ri,**buf_rj; 4524 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4525 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4526 MPI_Request *s_waits,*r_waits; 4527 MPI_Status *status; 4528 MatScalar *aa=a->a; 4529 MatScalar **abuf_r,*ba_i; 4530 Mat_Merge_SeqsToMPI *merge; 4531 PetscContainer container; 4532 4533 PetscFunctionBegin; 4534 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4535 4536 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4537 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4538 4539 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4540 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4541 4542 bi = merge->bi; 4543 bj = merge->bj; 4544 buf_ri = merge->buf_ri; 4545 buf_rj = merge->buf_rj; 4546 4547 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4548 owners = merge->rowmap->range; 4549 len_s = merge->len_s; 4550 4551 /* send and recv matrix values */ 4552 /*-----------------------------*/ 4553 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4554 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4555 4556 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4557 for (proc=0,k=0; proc<size; proc++){ 4558 if (!len_s[proc]) continue; 4559 i = owners[proc]; 4560 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4561 k++; 4562 } 4563 4564 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4565 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4566 ierr = PetscFree(status);CHKERRQ(ierr); 4567 4568 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4569 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4570 4571 /* insert mat values of mpimat */ 4572 /*----------------------------*/ 4573 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4574 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4575 4576 for (k=0; k<merge->nrecv; k++){ 4577 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4578 nrows = *(buf_ri_k[k]); 4579 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4580 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4581 } 4582 4583 /* set values of ba */ 4584 m = merge->rowmap->n; 4585 for (i=0; i<m; i++) { 4586 arow = owners[rank] + i; 4587 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4588 bnzi = bi[i+1] - bi[i]; 4589 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4590 4591 /* add local non-zero vals of this proc's seqmat into ba */ 4592 anzi = ai[arow+1] - ai[arow]; 4593 aj = a->j + ai[arow]; 4594 aa = a->a + ai[arow]; 4595 nextaj = 0; 4596 for (j=0; nextaj<anzi; j++){ 4597 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4598 ba_i[j] += aa[nextaj++]; 4599 } 4600 } 4601 4602 /* add received vals into ba */ 4603 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4604 /* i-th row */ 4605 if (i == *nextrow[k]) { 4606 anzi = *(nextai[k]+1) - *nextai[k]; 4607 aj = buf_rj[k] + *(nextai[k]); 4608 aa = abuf_r[k] + *(nextai[k]); 4609 nextaj = 0; 4610 for (j=0; nextaj<anzi; j++){ 4611 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4612 ba_i[j] += aa[nextaj++]; 4613 } 4614 } 4615 nextrow[k]++; nextai[k]++; 4616 } 4617 } 4618 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4619 } 4620 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4621 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4622 4623 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4624 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4625 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4626 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4627 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4628 PetscFunctionReturn(0); 4629 } 4630 4631 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4632 4633 #undef __FUNCT__ 4634 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4635 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4636 { 4637 PetscErrorCode ierr; 4638 Mat B_mpi; 4639 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4640 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4641 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4642 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4643 PetscInt len,proc,*dnz,*onz,bs,cbs; 4644 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4645 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4646 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4647 MPI_Status *status; 4648 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4649 PetscBT lnkbt; 4650 Mat_Merge_SeqsToMPI *merge; 4651 PetscContainer container; 4652 4653 PetscFunctionBegin; 4654 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4655 4656 /* make sure it is a PETSc comm */ 4657 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4658 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4659 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4660 4661 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4662 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4663 4664 /* determine row ownership */ 4665 /*---------------------------------------------------------*/ 4666 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4667 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4668 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4669 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4670 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4671 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4672 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4673 4674 m = merge->rowmap->n; 4675 M = merge->rowmap->N; 4676 owners = merge->rowmap->range; 4677 4678 /* determine the number of messages to send, their lengths */ 4679 /*---------------------------------------------------------*/ 4680 len_s = merge->len_s; 4681 4682 len = 0; /* length of buf_si[] */ 4683 merge->nsend = 0; 4684 for (proc=0; proc<size; proc++){ 4685 len_si[proc] = 0; 4686 if (proc == rank){ 4687 len_s[proc] = 0; 4688 } else { 4689 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4690 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4691 } 4692 if (len_s[proc]) { 4693 merge->nsend++; 4694 nrows = 0; 4695 for (i=owners[proc]; i<owners[proc+1]; i++){ 4696 if (ai[i+1] > ai[i]) nrows++; 4697 } 4698 len_si[proc] = 2*(nrows+1); 4699 len += len_si[proc]; 4700 } 4701 } 4702 4703 /* determine the number and length of messages to receive for ij-structure */ 4704 /*-------------------------------------------------------------------------*/ 4705 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4706 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4707 4708 /* post the Irecv of j-structure */ 4709 /*-------------------------------*/ 4710 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4711 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4712 4713 /* post the Isend of j-structure */ 4714 /*--------------------------------*/ 4715 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4716 4717 for (proc=0, k=0; proc<size; proc++){ 4718 if (!len_s[proc]) continue; 4719 i = owners[proc]; 4720 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4721 k++; 4722 } 4723 4724 /* receives and sends of j-structure are complete */ 4725 /*------------------------------------------------*/ 4726 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4727 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4728 4729 /* send and recv i-structure */ 4730 /*---------------------------*/ 4731 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4732 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4733 4734 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4735 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4736 for (proc=0,k=0; proc<size; proc++){ 4737 if (!len_s[proc]) continue; 4738 /* form outgoing message for i-structure: 4739 buf_si[0]: nrows to be sent 4740 [1:nrows]: row index (global) 4741 [nrows+1:2*nrows+1]: i-structure index 4742 */ 4743 /*-------------------------------------------*/ 4744 nrows = len_si[proc]/2 - 1; 4745 buf_si_i = buf_si + nrows+1; 4746 buf_si[0] = nrows; 4747 buf_si_i[0] = 0; 4748 nrows = 0; 4749 for (i=owners[proc]; i<owners[proc+1]; i++){ 4750 anzi = ai[i+1] - ai[i]; 4751 if (anzi) { 4752 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4753 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4754 nrows++; 4755 } 4756 } 4757 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4758 k++; 4759 buf_si += len_si[proc]; 4760 } 4761 4762 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4763 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4764 4765 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4766 for (i=0; i<merge->nrecv; i++){ 4767 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4768 } 4769 4770 ierr = PetscFree(len_si);CHKERRQ(ierr); 4771 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4772 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4773 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4774 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4775 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4776 ierr = PetscFree(status);CHKERRQ(ierr); 4777 4778 /* compute a local seq matrix in each processor */ 4779 /*----------------------------------------------*/ 4780 /* allocate bi array and free space for accumulating nonzero column info */ 4781 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4782 bi[0] = 0; 4783 4784 /* create and initialize a linked list */ 4785 nlnk = N+1; 4786 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4787 4788 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4789 len = 0; 4790 len = ai[owners[rank+1]] - ai[owners[rank]]; 4791 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4792 current_space = free_space; 4793 4794 /* determine symbolic info for each local row */ 4795 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4796 4797 for (k=0; k<merge->nrecv; k++){ 4798 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4799 nrows = *buf_ri_k[k]; 4800 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4801 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4802 } 4803 4804 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4805 len = 0; 4806 for (i=0;i<m;i++) { 4807 bnzi = 0; 4808 /* add local non-zero cols of this proc's seqmat into lnk */ 4809 arow = owners[rank] + i; 4810 anzi = ai[arow+1] - ai[arow]; 4811 aj = a->j + ai[arow]; 4812 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4813 bnzi += nlnk; 4814 /* add received col data into lnk */ 4815 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4816 if (i == *nextrow[k]) { /* i-th row */ 4817 anzi = *(nextai[k]+1) - *nextai[k]; 4818 aj = buf_rj[k] + *nextai[k]; 4819 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4820 bnzi += nlnk; 4821 nextrow[k]++; nextai[k]++; 4822 } 4823 } 4824 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4825 4826 /* if free space is not available, make more free space */ 4827 if (current_space->local_remaining<bnzi) { 4828 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4829 nspacedouble++; 4830 } 4831 /* copy data into free space, then initialize lnk */ 4832 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4833 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4834 4835 current_space->array += bnzi; 4836 current_space->local_used += bnzi; 4837 current_space->local_remaining -= bnzi; 4838 4839 bi[i+1] = bi[i] + bnzi; 4840 } 4841 4842 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4843 4844 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4845 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4846 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4847 4848 /* create symbolic parallel matrix B_mpi */ 4849 /*---------------------------------------*/ 4850 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4851 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4852 if (n==PETSC_DECIDE) { 4853 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4854 } else { 4855 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4856 } 4857 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4858 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4859 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4860 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4861 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4862 4863 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4864 B_mpi->assembled = PETSC_FALSE; 4865 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4866 merge->bi = bi; 4867 merge->bj = bj; 4868 merge->buf_ri = buf_ri; 4869 merge->buf_rj = buf_rj; 4870 merge->coi = PETSC_NULL; 4871 merge->coj = PETSC_NULL; 4872 merge->owners_co = PETSC_NULL; 4873 4874 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4875 4876 /* attach the supporting struct to B_mpi for reuse */ 4877 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4878 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4879 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4880 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4881 *mpimat = B_mpi; 4882 4883 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4884 PetscFunctionReturn(0); 4885 } 4886 4887 #undef __FUNCT__ 4888 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4889 /*@C 4890 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4891 matrices from each processor 4892 4893 Collective on MPI_Comm 4894 4895 Input Parameters: 4896 + comm - the communicators the parallel matrix will live on 4897 . seqmat - the input sequential matrices 4898 . m - number of local rows (or PETSC_DECIDE) 4899 . n - number of local columns (or PETSC_DECIDE) 4900 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4901 4902 Output Parameter: 4903 . mpimat - the parallel matrix generated 4904 4905 Level: advanced 4906 4907 Notes: 4908 The dimensions of the sequential matrix in each processor MUST be the same. 4909 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4910 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4911 @*/ 4912 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4913 { 4914 PetscErrorCode ierr; 4915 PetscMPIInt size; 4916 4917 PetscFunctionBegin; 4918 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4919 if (size == 1){ 4920 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4921 if (scall == MAT_INITIAL_MATRIX){ 4922 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4923 } else { 4924 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4925 } 4926 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4927 PetscFunctionReturn(0); 4928 } 4929 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4930 if (scall == MAT_INITIAL_MATRIX){ 4931 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4932 } 4933 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4934 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4935 PetscFunctionReturn(0); 4936 } 4937 4938 #undef __FUNCT__ 4939 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4940 /*@ 4941 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4942 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4943 with MatGetSize() 4944 4945 Not Collective 4946 4947 Input Parameters: 4948 + A - the matrix 4949 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4950 4951 Output Parameter: 4952 . A_loc - the local sequential matrix generated 4953 4954 Level: developer 4955 4956 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4957 4958 @*/ 4959 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4960 { 4961 PetscErrorCode ierr; 4962 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4963 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4964 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4965 MatScalar *aa=a->a,*ba=b->a,*cam; 4966 PetscScalar *ca; 4967 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4968 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4969 PetscBool match; 4970 4971 PetscFunctionBegin; 4972 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4973 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4974 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4975 if (scall == MAT_INITIAL_MATRIX){ 4976 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4977 ci[0] = 0; 4978 for (i=0; i<am; i++){ 4979 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4980 } 4981 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4982 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4983 k = 0; 4984 for (i=0; i<am; i++) { 4985 ncols_o = bi[i+1] - bi[i]; 4986 ncols_d = ai[i+1] - ai[i]; 4987 /* off-diagonal portion of A */ 4988 for (jo=0; jo<ncols_o; jo++) { 4989 col = cmap[*bj]; 4990 if (col >= cstart) break; 4991 cj[k] = col; bj++; 4992 ca[k++] = *ba++; 4993 } 4994 /* diagonal portion of A */ 4995 for (j=0; j<ncols_d; j++) { 4996 cj[k] = cstart + *aj++; 4997 ca[k++] = *aa++; 4998 } 4999 /* off-diagonal portion of A */ 5000 for (j=jo; j<ncols_o; j++) { 5001 cj[k] = cmap[*bj++]; 5002 ca[k++] = *ba++; 5003 } 5004 } 5005 /* put together the new matrix */ 5006 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5007 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5008 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5009 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5010 mat->free_a = PETSC_TRUE; 5011 mat->free_ij = PETSC_TRUE; 5012 mat->nonew = 0; 5013 } else if (scall == MAT_REUSE_MATRIX){ 5014 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5015 ci = mat->i; cj = mat->j; cam = mat->a; 5016 for (i=0; i<am; i++) { 5017 /* off-diagonal portion of A */ 5018 ncols_o = bi[i+1] - bi[i]; 5019 for (jo=0; jo<ncols_o; jo++) { 5020 col = cmap[*bj]; 5021 if (col >= cstart) break; 5022 *cam++ = *ba++; bj++; 5023 } 5024 /* diagonal portion of A */ 5025 ncols_d = ai[i+1] - ai[i]; 5026 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5027 /* off-diagonal portion of A */ 5028 for (j=jo; j<ncols_o; j++) { 5029 *cam++ = *ba++; bj++; 5030 } 5031 } 5032 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5033 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5034 PetscFunctionReturn(0); 5035 } 5036 5037 #undef __FUNCT__ 5038 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5039 /*@C 5040 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5041 5042 Not Collective 5043 5044 Input Parameters: 5045 + A - the matrix 5046 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5047 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5048 5049 Output Parameter: 5050 . A_loc - the local sequential matrix generated 5051 5052 Level: developer 5053 5054 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5055 5056 @*/ 5057 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5058 { 5059 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5060 PetscErrorCode ierr; 5061 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5062 IS isrowa,iscola; 5063 Mat *aloc; 5064 PetscBool match; 5065 5066 PetscFunctionBegin; 5067 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5068 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5069 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5070 if (!row){ 5071 start = A->rmap->rstart; end = A->rmap->rend; 5072 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5073 } else { 5074 isrowa = *row; 5075 } 5076 if (!col){ 5077 start = A->cmap->rstart; 5078 cmap = a->garray; 5079 nzA = a->A->cmap->n; 5080 nzB = a->B->cmap->n; 5081 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5082 ncols = 0; 5083 for (i=0; i<nzB; i++) { 5084 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5085 else break; 5086 } 5087 imark = i; 5088 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5089 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5090 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5091 } else { 5092 iscola = *col; 5093 } 5094 if (scall != MAT_INITIAL_MATRIX){ 5095 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5096 aloc[0] = *A_loc; 5097 } 5098 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5099 *A_loc = aloc[0]; 5100 ierr = PetscFree(aloc);CHKERRQ(ierr); 5101 if (!row){ 5102 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5103 } 5104 if (!col){ 5105 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5106 } 5107 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5108 PetscFunctionReturn(0); 5109 } 5110 5111 #undef __FUNCT__ 5112 #define __FUNCT__ "MatGetBrowsOfAcols" 5113 /*@C 5114 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5115 5116 Collective on Mat 5117 5118 Input Parameters: 5119 + A,B - the matrices in mpiaij format 5120 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5121 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5122 5123 Output Parameter: 5124 + rowb, colb - index sets of rows and columns of B to extract 5125 - B_seq - the sequential matrix generated 5126 5127 Level: developer 5128 5129 @*/ 5130 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5131 { 5132 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5133 PetscErrorCode ierr; 5134 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5135 IS isrowb,iscolb; 5136 Mat *bseq=PETSC_NULL; 5137 5138 PetscFunctionBegin; 5139 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5140 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5141 } 5142 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5143 5144 if (scall == MAT_INITIAL_MATRIX){ 5145 start = A->cmap->rstart; 5146 cmap = a->garray; 5147 nzA = a->A->cmap->n; 5148 nzB = a->B->cmap->n; 5149 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5150 ncols = 0; 5151 for (i=0; i<nzB; i++) { /* row < local row index */ 5152 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5153 else break; 5154 } 5155 imark = i; 5156 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5157 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5158 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5159 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5160 } else { 5161 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5162 isrowb = *rowb; iscolb = *colb; 5163 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5164 bseq[0] = *B_seq; 5165 } 5166 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5167 *B_seq = bseq[0]; 5168 ierr = PetscFree(bseq);CHKERRQ(ierr); 5169 if (!rowb){ 5170 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5171 } else { 5172 *rowb = isrowb; 5173 } 5174 if (!colb){ 5175 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5176 } else { 5177 *colb = iscolb; 5178 } 5179 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5180 PetscFunctionReturn(0); 5181 } 5182 5183 #undef __FUNCT__ 5184 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5185 /* 5186 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5187 of the OFF-DIAGONAL portion of local A 5188 5189 Collective on Mat 5190 5191 Input Parameters: 5192 + A,B - the matrices in mpiaij format 5193 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5194 5195 Output Parameter: 5196 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5197 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5198 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5199 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5200 5201 Level: developer 5202 5203 */ 5204 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5205 { 5206 VecScatter_MPI_General *gen_to,*gen_from; 5207 PetscErrorCode ierr; 5208 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5209 Mat_SeqAIJ *b_oth; 5210 VecScatter ctx=a->Mvctx; 5211 MPI_Comm comm=((PetscObject)ctx)->comm; 5212 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5213 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5214 PetscScalar *rvalues,*svalues; 5215 MatScalar *b_otha,*bufa,*bufA; 5216 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5217 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5218 MPI_Status *sstatus,rstatus; 5219 PetscMPIInt jj; 5220 PetscInt *cols,sbs,rbs; 5221 PetscScalar *vals; 5222 5223 PetscFunctionBegin; 5224 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5225 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5226 } 5227 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5228 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5229 5230 gen_to = (VecScatter_MPI_General*)ctx->todata; 5231 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5232 rvalues = gen_from->values; /* holds the length of receiving row */ 5233 svalues = gen_to->values; /* holds the length of sending row */ 5234 nrecvs = gen_from->n; 5235 nsends = gen_to->n; 5236 5237 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5238 srow = gen_to->indices; /* local row index to be sent */ 5239 sstarts = gen_to->starts; 5240 sprocs = gen_to->procs; 5241 sstatus = gen_to->sstatus; 5242 sbs = gen_to->bs; 5243 rstarts = gen_from->starts; 5244 rprocs = gen_from->procs; 5245 rbs = gen_from->bs; 5246 5247 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5248 if (scall == MAT_INITIAL_MATRIX){ 5249 /* i-array */ 5250 /*---------*/ 5251 /* post receives */ 5252 for (i=0; i<nrecvs; i++){ 5253 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5254 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5255 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5256 } 5257 5258 /* pack the outgoing message */ 5259 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5260 sstartsj[0] = 0; rstartsj[0] = 0; 5261 len = 0; /* total length of j or a array to be sent */ 5262 k = 0; 5263 for (i=0; i<nsends; i++){ 5264 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5265 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5266 for (j=0; j<nrows; j++) { 5267 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5268 for (l=0; l<sbs; l++){ 5269 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5270 rowlen[j*sbs+l] = ncols; 5271 len += ncols; 5272 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5273 } 5274 k++; 5275 } 5276 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5277 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5278 } 5279 /* recvs and sends of i-array are completed */ 5280 i = nrecvs; 5281 while (i--) { 5282 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5283 } 5284 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5285 5286 /* allocate buffers for sending j and a arrays */ 5287 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5288 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5289 5290 /* create i-array of B_oth */ 5291 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5292 b_othi[0] = 0; 5293 len = 0; /* total length of j or a array to be received */ 5294 k = 0; 5295 for (i=0; i<nrecvs; i++){ 5296 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5297 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5298 for (j=0; j<nrows; j++) { 5299 b_othi[k+1] = b_othi[k] + rowlen[j]; 5300 len += rowlen[j]; k++; 5301 } 5302 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5303 } 5304 5305 /* allocate space for j and a arrrays of B_oth */ 5306 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5307 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5308 5309 /* j-array */ 5310 /*---------*/ 5311 /* post receives of j-array */ 5312 for (i=0; i<nrecvs; i++){ 5313 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5314 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5315 } 5316 5317 /* pack the outgoing message j-array */ 5318 k = 0; 5319 for (i=0; i<nsends; i++){ 5320 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5321 bufJ = bufj+sstartsj[i]; 5322 for (j=0; j<nrows; j++) { 5323 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5324 for (ll=0; ll<sbs; ll++){ 5325 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5326 for (l=0; l<ncols; l++){ 5327 *bufJ++ = cols[l]; 5328 } 5329 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5330 } 5331 } 5332 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5333 } 5334 5335 /* recvs and sends of j-array are completed */ 5336 i = nrecvs; 5337 while (i--) { 5338 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5339 } 5340 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5341 } else if (scall == MAT_REUSE_MATRIX){ 5342 sstartsj = *startsj_s; 5343 rstartsj = *startsj_r; 5344 bufa = *bufa_ptr; 5345 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5346 b_otha = b_oth->a; 5347 } else { 5348 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5349 } 5350 5351 /* a-array */ 5352 /*---------*/ 5353 /* post receives of a-array */ 5354 for (i=0; i<nrecvs; i++){ 5355 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5356 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5357 } 5358 5359 /* pack the outgoing message a-array */ 5360 k = 0; 5361 for (i=0; i<nsends; i++){ 5362 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5363 bufA = bufa+sstartsj[i]; 5364 for (j=0; j<nrows; j++) { 5365 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5366 for (ll=0; ll<sbs; ll++){ 5367 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5368 for (l=0; l<ncols; l++){ 5369 *bufA++ = vals[l]; 5370 } 5371 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5372 } 5373 } 5374 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5375 } 5376 /* recvs and sends of a-array are completed */ 5377 i = nrecvs; 5378 while (i--) { 5379 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5380 } 5381 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5382 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5383 5384 if (scall == MAT_INITIAL_MATRIX){ 5385 /* put together the new matrix */ 5386 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5387 5388 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5389 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5390 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5391 b_oth->free_a = PETSC_TRUE; 5392 b_oth->free_ij = PETSC_TRUE; 5393 b_oth->nonew = 0; 5394 5395 ierr = PetscFree(bufj);CHKERRQ(ierr); 5396 if (!startsj_s || !bufa_ptr){ 5397 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5398 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5399 } else { 5400 *startsj_s = sstartsj; 5401 *startsj_r = rstartsj; 5402 *bufa_ptr = bufa; 5403 } 5404 } 5405 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5406 PetscFunctionReturn(0); 5407 } 5408 5409 #undef __FUNCT__ 5410 #define __FUNCT__ "MatGetCommunicationStructs" 5411 /*@C 5412 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5413 5414 Not Collective 5415 5416 Input Parameters: 5417 . A - The matrix in mpiaij format 5418 5419 Output Parameter: 5420 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5421 . colmap - A map from global column index to local index into lvec 5422 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5423 5424 Level: developer 5425 5426 @*/ 5427 #if defined (PETSC_USE_CTABLE) 5428 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5429 #else 5430 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5431 #endif 5432 { 5433 Mat_MPIAIJ *a; 5434 5435 PetscFunctionBegin; 5436 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5437 PetscValidPointer(lvec, 2); 5438 PetscValidPointer(colmap, 3); 5439 PetscValidPointer(multScatter, 4); 5440 a = (Mat_MPIAIJ *) A->data; 5441 if (lvec) *lvec = a->lvec; 5442 if (colmap) *colmap = a->colmap; 5443 if (multScatter) *multScatter = a->Mvctx; 5444 PetscFunctionReturn(0); 5445 } 5446 5447 EXTERN_C_BEGIN 5448 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5449 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5450 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5451 EXTERN_C_END 5452 5453 #undef __FUNCT__ 5454 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5455 /* 5456 Computes (B'*A')' since computing B*A directly is untenable 5457 5458 n p p 5459 ( ) ( ) ( ) 5460 m ( A ) * n ( B ) = m ( C ) 5461 ( ) ( ) ( ) 5462 5463 */ 5464 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5465 { 5466 PetscErrorCode ierr; 5467 Mat At,Bt,Ct; 5468 5469 PetscFunctionBegin; 5470 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5471 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5472 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5473 ierr = MatDestroy(&At);CHKERRQ(ierr); 5474 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5475 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5476 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5477 PetscFunctionReturn(0); 5478 } 5479 5480 #undef __FUNCT__ 5481 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5482 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5483 { 5484 PetscErrorCode ierr; 5485 PetscInt m=A->rmap->n,n=B->cmap->n; 5486 Mat Cmat; 5487 5488 PetscFunctionBegin; 5489 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5490 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5491 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5492 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5493 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5494 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5495 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5496 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5497 *C = Cmat; 5498 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5499 PetscFunctionReturn(0); 5500 } 5501 5502 /* ----------------------------------------------------------------*/ 5503 #undef __FUNCT__ 5504 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5505 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5506 { 5507 PetscErrorCode ierr; 5508 5509 PetscFunctionBegin; 5510 if (scall == MAT_INITIAL_MATRIX){ 5511 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5512 } 5513 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5514 PetscFunctionReturn(0); 5515 } 5516 5517 EXTERN_C_BEGIN 5518 #if defined(PETSC_HAVE_MUMPS) 5519 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5520 #endif 5521 #if defined(PETSC_HAVE_PASTIX) 5522 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5523 #endif 5524 #if defined(PETSC_HAVE_SUPERLU_DIST) 5525 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5526 #endif 5527 #if defined(PETSC_HAVE_SPOOLES) 5528 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5529 #endif 5530 #if defined(PETSC_HAVE_CLIQUE) 5531 extern PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5532 #endif 5533 EXTERN_C_END 5534 5535 /*MC 5536 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5537 5538 Options Database Keys: 5539 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5540 5541 Level: beginner 5542 5543 .seealso: MatCreateAIJ() 5544 M*/ 5545 5546 EXTERN_C_BEGIN 5547 #undef __FUNCT__ 5548 #define __FUNCT__ "MatCreate_MPIAIJ" 5549 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5550 { 5551 Mat_MPIAIJ *b; 5552 PetscErrorCode ierr; 5553 PetscMPIInt size; 5554 5555 PetscFunctionBegin; 5556 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5557 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5558 B->data = (void*)b; 5559 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5560 B->assembled = PETSC_FALSE; 5561 B->insertmode = NOT_SET_VALUES; 5562 b->size = size; 5563 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5564 5565 /* build cache for off array entries formed */ 5566 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5567 b->donotstash = PETSC_FALSE; 5568 b->colmap = 0; 5569 b->garray = 0; 5570 b->roworiented = PETSC_TRUE; 5571 5572 /* stuff used for matrix vector multiply */ 5573 b->lvec = PETSC_NULL; 5574 b->Mvctx = PETSC_NULL; 5575 5576 /* stuff for MatGetRow() */ 5577 b->rowindices = 0; 5578 b->rowvalues = 0; 5579 b->getrowactive = PETSC_FALSE; 5580 5581 /* flexible pointer used in CUSP/CUSPARSE classes */ 5582 b->spptr = PETSC_NULL; 5583 #if defined(PETSC_HAVE_SPOOLES) 5584 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5585 "MatGetFactor_mpiaij_spooles", 5586 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5587 #endif 5588 #if defined(PETSC_HAVE_MUMPS) 5589 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5590 "MatGetFactor_aij_mumps", 5591 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5592 #endif 5593 #if defined(PETSC_HAVE_PASTIX) 5594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5595 "MatGetFactor_mpiaij_pastix", 5596 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5597 #endif 5598 #if defined(PETSC_HAVE_SUPERLU_DIST) 5599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5600 "MatGetFactor_mpiaij_superlu_dist", 5601 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5602 #endif 5603 #if defined(PETSC_HAVE_CLIQUE) 5604 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_clique_C", 5605 "MatGetFactor_aij_clique", 5606 MatGetFactor_aij_clique);CHKERRQ(ierr); 5607 #endif 5608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5609 "MatStoreValues_MPIAIJ", 5610 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5612 "MatRetrieveValues_MPIAIJ", 5613 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5615 "MatGetDiagonalBlock_MPIAIJ", 5616 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5618 "MatIsTranspose_MPIAIJ", 5619 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5621 "MatMPIAIJSetPreallocation_MPIAIJ", 5622 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5624 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5625 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5627 "MatDiagonalScaleLocal_MPIAIJ", 5628 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5630 "MatConvert_MPIAIJ_MPIAIJPERM", 5631 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5633 "MatConvert_MPIAIJ_MPIAIJCRL", 5634 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5635 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5636 "MatConvert_MPIAIJ_MPISBAIJ", 5637 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5639 "MatMatMult_MPIDense_MPIAIJ", 5640 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5641 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5642 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5643 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5644 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5645 "MatMatMultNumeric_MPIDense_MPIAIJ", 5646 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5647 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5648 PetscFunctionReturn(0); 5649 } 5650 EXTERN_C_END 5651 5652 #undef __FUNCT__ 5653 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5654 /*@ 5655 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5656 and "off-diagonal" part of the matrix in CSR format. 5657 5658 Collective on MPI_Comm 5659 5660 Input Parameters: 5661 + comm - MPI communicator 5662 . m - number of local rows (Cannot be PETSC_DECIDE) 5663 . n - This value should be the same as the local size used in creating the 5664 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5665 calculated if N is given) For square matrices n is almost always m. 5666 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5667 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5668 . i - row indices for "diagonal" portion of matrix 5669 . j - column indices 5670 . a - matrix values 5671 . oi - row indices for "off-diagonal" portion of matrix 5672 . oj - column indices 5673 - oa - matrix values 5674 5675 Output Parameter: 5676 . mat - the matrix 5677 5678 Level: advanced 5679 5680 Notes: 5681 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5682 must free the arrays once the matrix has been destroyed and not before. 5683 5684 The i and j indices are 0 based 5685 5686 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5687 5688 This sets local rows and cannot be used to set off-processor values. 5689 5690 You cannot later use MatSetValues() to change values in this matrix. 5691 5692 .keywords: matrix, aij, compressed row, sparse, parallel 5693 5694 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5695 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5696 @*/ 5697 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5698 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5699 { 5700 PetscErrorCode ierr; 5701 Mat_MPIAIJ *maij; 5702 5703 PetscFunctionBegin; 5704 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5705 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5706 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5707 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5708 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5709 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5710 maij = (Mat_MPIAIJ*) (*mat)->data; 5711 maij->donotstash = PETSC_TRUE; 5712 (*mat)->preallocated = PETSC_TRUE; 5713 5714 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5715 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5716 5717 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5718 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5719 5720 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5721 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5722 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5723 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5724 5725 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5726 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5727 PetscFunctionReturn(0); 5728 } 5729 5730 /* 5731 Special version for direct calls from Fortran 5732 */ 5733 #include <petsc-private/fortranimpl.h> 5734 5735 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5736 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5737 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5738 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5739 #endif 5740 5741 /* Change these macros so can be used in void function */ 5742 #undef CHKERRQ 5743 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5744 #undef SETERRQ2 5745 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5746 #undef SETERRQ3 5747 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5748 #undef SETERRQ 5749 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5750 5751 EXTERN_C_BEGIN 5752 #undef __FUNCT__ 5753 #define __FUNCT__ "matsetvaluesmpiaij_" 5754 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5755 { 5756 Mat mat = *mmat; 5757 PetscInt m = *mm, n = *mn; 5758 InsertMode addv = *maddv; 5759 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5760 PetscScalar value; 5761 PetscErrorCode ierr; 5762 5763 MatCheckPreallocated(mat,1); 5764 if (mat->insertmode == NOT_SET_VALUES) { 5765 mat->insertmode = addv; 5766 } 5767 #if defined(PETSC_USE_DEBUG) 5768 else if (mat->insertmode != addv) { 5769 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5770 } 5771 #endif 5772 { 5773 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5774 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5775 PetscBool roworiented = aij->roworiented; 5776 5777 /* Some Variables required in the macro */ 5778 Mat A = aij->A; 5779 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5780 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5781 MatScalar *aa = a->a; 5782 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5783 Mat B = aij->B; 5784 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5785 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5786 MatScalar *ba = b->a; 5787 5788 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5789 PetscInt nonew = a->nonew; 5790 MatScalar *ap1,*ap2; 5791 5792 PetscFunctionBegin; 5793 for (i=0; i<m; i++) { 5794 if (im[i] < 0) continue; 5795 #if defined(PETSC_USE_DEBUG) 5796 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5797 #endif 5798 if (im[i] >= rstart && im[i] < rend) { 5799 row = im[i] - rstart; 5800 lastcol1 = -1; 5801 rp1 = aj + ai[row]; 5802 ap1 = aa + ai[row]; 5803 rmax1 = aimax[row]; 5804 nrow1 = ailen[row]; 5805 low1 = 0; 5806 high1 = nrow1; 5807 lastcol2 = -1; 5808 rp2 = bj + bi[row]; 5809 ap2 = ba + bi[row]; 5810 rmax2 = bimax[row]; 5811 nrow2 = bilen[row]; 5812 low2 = 0; 5813 high2 = nrow2; 5814 5815 for (j=0; j<n; j++) { 5816 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5817 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5818 if (in[j] >= cstart && in[j] < cend){ 5819 col = in[j] - cstart; 5820 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5821 } else if (in[j] < 0) continue; 5822 #if defined(PETSC_USE_DEBUG) 5823 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5824 #endif 5825 else { 5826 if (mat->was_assembled) { 5827 if (!aij->colmap) { 5828 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5829 } 5830 #if defined (PETSC_USE_CTABLE) 5831 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5832 col--; 5833 #else 5834 col = aij->colmap[in[j]] - 1; 5835 #endif 5836 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5837 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5838 col = in[j]; 5839 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5840 B = aij->B; 5841 b = (Mat_SeqAIJ*)B->data; 5842 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5843 rp2 = bj + bi[row]; 5844 ap2 = ba + bi[row]; 5845 rmax2 = bimax[row]; 5846 nrow2 = bilen[row]; 5847 low2 = 0; 5848 high2 = nrow2; 5849 bm = aij->B->rmap->n; 5850 ba = b->a; 5851 } 5852 } else col = in[j]; 5853 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5854 } 5855 } 5856 } else { 5857 if (!aij->donotstash) { 5858 if (roworiented) { 5859 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5860 } else { 5861 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5862 } 5863 } 5864 } 5865 }} 5866 PetscFunctionReturnVoid(); 5867 } 5868 EXTERN_C_END 5869 5870