1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 639 */ 640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 641 { 642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 643 Mat A = aij->A; /* diagonal part of the matrix */ 644 Mat B = aij->B; /* offdiagonal part of the matrix */ 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 648 PetscInt *ailen = a->ilen,*aj = a->j; 649 PetscInt *bilen = b->ilen,*bj = b->j; 650 PetscInt am = aij->A->rmap->n,j; 651 PetscInt diag_so_far = 0,dnz; 652 PetscInt offd_so_far = 0,onz; 653 654 PetscFunctionBegin; 655 /* Iterate over all rows of the matrix */ 656 for (j=0; j<am; j++) { 657 dnz = onz = 0; 658 /* Iterate over all non-zero columns of the current row */ 659 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 660 /* If column is in the diagonal */ 661 if (mat_j[col] >= cstart && mat_j[col] < cend) { 662 aj[diag_so_far++] = mat_j[col] - cstart; 663 dnz++; 664 } else { /* off-diagonal entries */ 665 bj[offd_so_far++] = mat_j[col]; 666 onz++; 667 } 668 } 669 ailen[j] = dnz; 670 bilen[j] = onz; 671 } 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 677 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 678 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 679 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 680 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 681 */ 682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 Mat A = aij->A; /* diagonal part of the matrix */ 686 Mat B = aij->B; /* offdiagonal part of the matrix */ 687 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 689 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 690 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 691 PetscInt *ailen = a->ilen,*aj = a->j; 692 PetscInt *bilen = b->ilen,*bj = b->j; 693 PetscInt am = aij->A->rmap->n,j; 694 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 695 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 696 PetscScalar *aa = a->a,*ba = b->a; 697 698 PetscFunctionBegin; 699 /* Iterate over all rows of the matrix */ 700 for (j=0; j<am; j++) { 701 dnz_row = onz_row = 0; 702 rowstart_offd = full_offd_i[j]; 703 rowstart_diag = full_diag_i[j]; 704 /* Iterate over all non-zero columns of the current row */ 705 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 706 /* If column is in the diagonal */ 707 if (mat_j[col] >= cstart && mat_j[col] < cend) { 708 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 709 aa[rowstart_diag+dnz_row] = mat_a[col]; 710 dnz_row++; 711 } else { /* off-diagonal entries */ 712 bj[rowstart_offd+onz_row] = mat_j[col]; 713 ba[rowstart_offd+onz_row] = mat_a[col]; 714 onz_row++; 715 } 716 } 717 ailen[j] = dnz_row; 718 bilen[j] = onz_row; 719 } 720 PetscFunctionReturn(0); 721 } 722 723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 726 PetscErrorCode ierr; 727 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 728 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 729 730 PetscFunctionBegin; 731 for (i=0; i<m; i++) { 732 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 733 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 734 if (idxm[i] >= rstart && idxm[i] < rend) { 735 row = idxm[i] - rstart; 736 for (j=0; j<n; j++) { 737 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 738 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 739 if (idxn[j] >= cstart && idxn[j] < cend) { 740 col = idxn[j] - cstart; 741 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 742 } else { 743 if (!aij->colmap) { 744 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 745 } 746 #if defined(PETSC_USE_CTABLE) 747 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 753 else { 754 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 755 } 756 } 757 } 758 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 764 765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 PetscErrorCode ierr; 769 PetscInt nstash,reallocs; 770 771 PetscFunctionBegin; 772 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 773 774 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 775 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 776 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 781 { 782 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 783 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 784 PetscErrorCode ierr; 785 PetscMPIInt n; 786 PetscInt i,j,rstart,ncols,flg; 787 PetscInt *row,*col; 788 PetscBool other_disassembled; 789 PetscScalar *val; 790 791 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 792 793 PetscFunctionBegin; 794 if (!aij->donotstash && !mat->nooffprocentries) { 795 while (1) { 796 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 797 if (!flg) break; 798 799 for (i=0; i<n; ) { 800 /* Now identify the consecutive vals belonging to the same row */ 801 for (j=i,rstart=row[j]; j<n; j++) { 802 if (row[j] != rstart) break; 803 } 804 if (j < n) ncols = j-i; 805 else ncols = n-i; 806 /* Now assemble all these values with a single function call */ 807 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 808 809 i = j; 810 } 811 } 812 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 813 } 814 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 816 817 /* determine if any processor has disassembled, if so we must 818 also disassemble ourselfs, in order that we may reassemble. */ 819 /* 820 if nonzero structure of submatrix B cannot change then we know that 821 no processor disassembled thus we can skip this stuff 822 */ 823 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 824 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 825 if (mat->was_assembled && !other_disassembled) { 826 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 827 } 828 } 829 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 830 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 831 } 832 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 833 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 835 836 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 837 838 aij->rowvalues = 0; 839 840 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 841 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 842 843 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 844 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 845 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 846 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 852 { 853 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 854 PetscErrorCode ierr; 855 856 PetscFunctionBegin; 857 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 858 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 873 /* fix right hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 879 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 880 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 881 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 882 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 883 } 884 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 885 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 886 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 887 if ((diag != 0.0) && cong) { 888 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 889 } else if (diag != 0.0) { 890 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 891 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 892 for (r = 0; r < len; ++r) { 893 const PetscInt row = lrows[r] + A->rmap->rstart; 894 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 895 } 896 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 898 } else { 899 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 900 } 901 ierr = PetscFree(lrows);CHKERRQ(ierr); 902 903 /* only change matrix nonzero state if pattern was allowed to be changed */ 904 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 905 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 906 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 912 { 913 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 914 PetscErrorCode ierr; 915 PetscMPIInt n = A->rmap->n; 916 PetscInt i,j,r,m,p = 0,len = 0; 917 PetscInt *lrows,*owners = A->rmap->range; 918 PetscSFNode *rrows; 919 PetscSF sf; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 927 PetscFunctionBegin; 928 /* Create SF where leaves are input rows and roots are owned rows */ 929 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 930 for (r = 0; r < n; ++r) lrows[r] = -1; 931 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 932 for (r = 0; r < N; ++r) { 933 const PetscInt idx = rows[r]; 934 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 935 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 936 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 937 } 938 rrows[r].rank = p; 939 rrows[r].index = rows[r] - owners[p]; 940 } 941 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 942 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 943 /* Collect flags for rows to be zeroed */ 944 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 945 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 947 /* Compress and put in row numbers */ 948 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 949 /* zero diagonal part of matrix */ 950 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 951 /* handle off diagonal part of matrix */ 952 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 953 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 954 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 955 for (i=0; i<len; i++) bb[lrows[i]] = 1; 956 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 957 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 960 if (x) { 961 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 964 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 965 } 966 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 967 /* remove zeroed rows of off diagonal matrix */ 968 ii = aij->i; 969 for (i=0; i<len; i++) { 970 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 971 } 972 /* loop over all elements of off process part of matrix zeroing removed columns*/ 973 if (aij->compressedrow.use) { 974 m = aij->compressedrow.nrows; 975 ii = aij->compressedrow.i; 976 ridx = aij->compressedrow.rindex; 977 for (i=0; i<m; i++) { 978 n = ii[i+1] - ii[i]; 979 aj = aij->j + ii[i]; 980 aa = aij->a + ii[i]; 981 982 for (j=0; j<n; j++) { 983 if (PetscAbsScalar(mask[*aj])) { 984 if (b) bb[*ridx] -= *aa*xx[*aj]; 985 *aa = 0.0; 986 } 987 aa++; 988 aj++; 989 } 990 ridx++; 991 } 992 } else { /* do not use compressed row format */ 993 m = l->B->rmap->n; 994 for (i=0; i<m; i++) { 995 n = ii[i+1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij->a + ii[i]; 998 for (j=0; j<n; j++) { 999 if (PetscAbsScalar(mask[*aj])) { 1000 if (b) bb[i] -= *aa*xx[*aj]; 1001 *aa = 0.0; 1002 } 1003 aa++; 1004 aj++; 1005 } 1006 } 1007 } 1008 if (x) { 1009 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1010 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 } 1012 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1013 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1014 ierr = PetscFree(lrows);CHKERRQ(ierr); 1015 1016 /* only change matrix nonzero state if pattern was allowed to be changed */ 1017 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1018 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1019 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1025 { 1026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1027 PetscErrorCode ierr; 1028 PetscInt nt; 1029 VecScatter Mvctx = a->Mvctx; 1030 1031 PetscFunctionBegin; 1032 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1033 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1034 1035 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1060 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 PetscBool merged; 1072 1073 PetscFunctionBegin; 1074 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1075 /* do nondiagonal part */ 1076 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1077 if (!merged) { 1078 /* send it on its way */ 1079 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 /* do local part */ 1081 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1082 /* receive remote parts: note this assumes the values are not actually */ 1083 /* added in yy until the next line, */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 } else { 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1088 /* send it on its way */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 /* values actually were received in the Begin() but we need to call this nop */ 1091 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1097 { 1098 MPI_Comm comm; 1099 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1100 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1101 IS Me,Notme; 1102 PetscErrorCode ierr; 1103 PetscInt M,N,first,last,*notme,i; 1104 PetscBool lf; 1105 PetscMPIInt size; 1106 1107 PetscFunctionBegin; 1108 /* Easy test: symmetric diagonal block */ 1109 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1110 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1111 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1112 if (!*f) PetscFunctionReturn(0); 1113 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1114 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1115 if (size == 1) PetscFunctionReturn(0); 1116 1117 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1118 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1119 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1120 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1121 for (i=0; i<first; i++) notme[i] = i; 1122 for (i=last; i<M; i++) notme[i-last+first] = i; 1123 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1124 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1125 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1126 Aoff = Aoffs[0]; 1127 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1128 Boff = Boffs[0]; 1129 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1130 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1131 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1132 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1133 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1134 ierr = PetscFree(notme);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1139 { 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 /* do nondiagonal part */ 1154 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1155 /* send it on its way */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 /* do local part */ 1158 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1159 /* receive remote parts */ 1160 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 /* 1165 This only works correctly for square matrices where the subblock A->A is the 1166 diagonal block 1167 */ 1168 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1169 { 1170 PetscErrorCode ierr; 1171 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1172 1173 PetscFunctionBegin; 1174 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1175 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1176 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1181 { 1182 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1183 PetscErrorCode ierr; 1184 1185 PetscFunctionBegin; 1186 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1187 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1192 { 1193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1194 PetscErrorCode ierr; 1195 1196 PetscFunctionBegin; 1197 #if defined(PETSC_USE_LOG) 1198 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1199 #endif 1200 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1201 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1202 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1203 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1204 #if defined(PETSC_USE_CTABLE) 1205 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1206 #else 1207 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1208 #endif 1209 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1210 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1211 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1212 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1213 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1214 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1215 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1216 1217 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1226 #if defined(PETSC_HAVE_ELEMENTAL) 1227 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1228 #endif 1229 #if defined(PETSC_HAVE_HYPRE) 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1232 #endif 1233 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1239 { 1240 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1241 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1242 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1243 PetscErrorCode ierr; 1244 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1245 int fd; 1246 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1247 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1248 PetscScalar *column_values; 1249 PetscInt message_count,flowcontrolcount; 1250 FILE *file; 1251 1252 PetscFunctionBegin; 1253 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1254 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1255 nz = A->nz + B->nz; 1256 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1257 if (!rank) { 1258 header[0] = MAT_FILE_CLASSID; 1259 header[1] = mat->rmap->N; 1260 header[2] = mat->cmap->N; 1261 1262 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1263 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 /* get largest number of rows any processor has */ 1265 rlen = mat->rmap->n; 1266 range = mat->rmap->range; 1267 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1268 } else { 1269 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 rlen = mat->rmap->n; 1271 } 1272 1273 /* load up the local row counts */ 1274 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1275 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1276 1277 /* store the row lengths to the file */ 1278 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1279 if (!rank) { 1280 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1281 for (i=1; i<size; i++) { 1282 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1283 rlen = range[i+1] - range[i]; 1284 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1286 } 1287 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1288 } else { 1289 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1290 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1291 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1292 } 1293 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1294 1295 /* load up the local column indices */ 1296 nzmax = nz; /* th processor needs space a largest processor needs */ 1297 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1298 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1299 cnt = 0; 1300 for (i=0; i<mat->rmap->n; i++) { 1301 for (j=B->i[i]; j<B->i[i+1]; j++) { 1302 if ((col = garray[B->j[j]]) > cstart) break; 1303 column_indices[cnt++] = col; 1304 } 1305 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1306 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1307 } 1308 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1309 1310 /* store the column indices to the file */ 1311 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1312 if (!rank) { 1313 MPI_Status status; 1314 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 for (i=1; i<size; i++) { 1316 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1317 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1318 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1319 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1320 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1321 } 1322 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1323 } else { 1324 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1325 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1326 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1327 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1328 } 1329 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1330 1331 /* load up the local column values */ 1332 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1333 cnt = 0; 1334 for (i=0; i<mat->rmap->n; i++) { 1335 for (j=B->i[i]; j<B->i[i+1]; j++) { 1336 if (garray[B->j[j]] > cstart) break; 1337 column_values[cnt++] = B->a[j]; 1338 } 1339 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1340 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1341 } 1342 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1343 1344 /* store the column values to the file */ 1345 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1346 if (!rank) { 1347 MPI_Status status; 1348 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1349 for (i=1; i<size; i++) { 1350 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1351 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1352 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1353 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1354 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1355 } 1356 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1357 } else { 1358 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1359 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1360 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1361 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1362 } 1363 ierr = PetscFree(column_values);CHKERRQ(ierr); 1364 1365 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1366 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #include <petscdraw.h> 1371 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1372 { 1373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1374 PetscErrorCode ierr; 1375 PetscMPIInt rank = aij->rank,size = aij->size; 1376 PetscBool isdraw,iascii,isbinary; 1377 PetscViewer sviewer; 1378 PetscViewerFormat format; 1379 1380 PetscFunctionBegin; 1381 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1382 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1383 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1384 if (iascii) { 1385 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1386 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1387 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1388 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1389 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1390 for (i=0; i<(PetscInt)size; i++) { 1391 nmax = PetscMax(nmax,nz[i]); 1392 nmin = PetscMin(nmin,nz[i]); 1393 navg += nz[i]; 1394 } 1395 ierr = PetscFree(nz);CHKERRQ(ierr); 1396 navg = navg/size; 1397 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1401 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1402 MatInfo info; 1403 PetscBool inodes; 1404 1405 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1406 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1407 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1408 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1409 if (!inodes) { 1410 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1411 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1412 } else { 1413 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1414 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1415 } 1416 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1418 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1420 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1421 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1422 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1423 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1426 PetscInt inodecount,inodelimit,*inodes; 1427 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1428 if (inodes) { 1429 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1430 } else { 1431 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1432 } 1433 PetscFunctionReturn(0); 1434 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1435 PetscFunctionReturn(0); 1436 } 1437 } else if (isbinary) { 1438 if (size == 1) { 1439 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1440 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1441 } else { 1442 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (isdraw) { 1446 PetscDraw draw; 1447 PetscBool isnull; 1448 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1449 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1450 if (isnull) PetscFunctionReturn(0); 1451 } 1452 1453 { 1454 /* assemble the entire matrix onto first processor. */ 1455 Mat A; 1456 Mat_SeqAIJ *Aloc; 1457 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1458 MatScalar *a; 1459 1460 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1461 if (!rank) { 1462 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1463 } else { 1464 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1465 } 1466 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1467 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1468 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1469 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1470 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1471 1472 /* copy over the A part */ 1473 Aloc = (Mat_SeqAIJ*)aij->A->data; 1474 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1475 row = mat->rmap->rstart; 1476 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1477 for (i=0; i<m; i++) { 1478 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1479 row++; 1480 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1481 } 1482 aj = Aloc->j; 1483 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1484 1485 /* copy over the B part */ 1486 Aloc = (Mat_SeqAIJ*)aij->B->data; 1487 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1488 row = mat->rmap->rstart; 1489 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1490 ct = cols; 1491 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1492 for (i=0; i<m; i++) { 1493 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1494 row++; 1495 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1496 } 1497 ierr = PetscFree(ct);CHKERRQ(ierr); 1498 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1499 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1500 /* 1501 Everyone has to call to draw the matrix since the graphics waits are 1502 synchronized across all processors that share the PetscDraw object 1503 */ 1504 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 if (!rank) { 1506 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1507 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1508 } 1509 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1510 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1511 ierr = MatDestroy(&A);CHKERRQ(ierr); 1512 } 1513 PetscFunctionReturn(0); 1514 } 1515 1516 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1517 { 1518 PetscErrorCode ierr; 1519 PetscBool iascii,isdraw,issocket,isbinary; 1520 1521 PetscFunctionBegin; 1522 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1523 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1524 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1525 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1526 if (iascii || isdraw || isbinary || issocket) { 1527 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1533 { 1534 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1535 PetscErrorCode ierr; 1536 Vec bb1 = 0; 1537 PetscBool hasop; 1538 1539 PetscFunctionBegin; 1540 if (flag == SOR_APPLY_UPPER) { 1541 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1546 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1547 } 1548 1549 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1550 if (flag & SOR_ZERO_INITIAL_GUESS) { 1551 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1552 its--; 1553 } 1554 1555 while (its--) { 1556 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1557 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 1559 /* update rhs: bb1 = bb - B*x */ 1560 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1561 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1562 1563 /* local sweep */ 1564 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1565 } 1566 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1567 if (flag & SOR_ZERO_INITIAL_GUESS) { 1568 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1569 its--; 1570 } 1571 while (its--) { 1572 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 1575 /* update rhs: bb1 = bb - B*x */ 1576 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1577 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1578 1579 /* local sweep */ 1580 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1581 } 1582 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1583 if (flag & SOR_ZERO_INITIAL_GUESS) { 1584 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1585 its--; 1586 } 1587 while (its--) { 1588 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1589 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 1591 /* update rhs: bb1 = bb - B*x */ 1592 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1593 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1594 1595 /* local sweep */ 1596 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1597 } 1598 } else if (flag & SOR_EISENSTAT) { 1599 Vec xx1; 1600 1601 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1602 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1603 1604 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 if (!mat->diag) { 1607 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1608 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1609 } 1610 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1611 if (hasop) { 1612 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1613 } else { 1614 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1615 } 1616 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1617 1618 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1619 1620 /* local sweep */ 1621 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1622 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1623 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1624 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1625 1626 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1627 1628 matin->factorerrortype = mat->A->factorerrortype; 1629 PetscFunctionReturn(0); 1630 } 1631 1632 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1633 { 1634 Mat aA,aB,Aperm; 1635 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1636 PetscScalar *aa,*ba; 1637 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1638 PetscSF rowsf,sf; 1639 IS parcolp = NULL; 1640 PetscBool done; 1641 PetscErrorCode ierr; 1642 1643 PetscFunctionBegin; 1644 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1645 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1646 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1647 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1648 1649 /* Invert row permutation to find out where my rows should go */ 1650 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1651 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1652 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1653 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1654 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1655 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1656 1657 /* Invert column permutation to find out where my columns should go */ 1658 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1659 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1660 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1661 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1662 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1663 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1664 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1665 1666 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1667 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1668 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1669 1670 /* Find out where my gcols should go */ 1671 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1672 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1674 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1675 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1676 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1677 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1678 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1679 1680 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1681 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1682 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1683 for (i=0; i<m; i++) { 1684 PetscInt row = rdest[i],rowner; 1685 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1686 for (j=ai[i]; j<ai[i+1]; j++) { 1687 PetscInt cowner,col = cdest[aj[j]]; 1688 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1689 if (rowner == cowner) dnnz[i]++; 1690 else onnz[i]++; 1691 } 1692 for (j=bi[i]; j<bi[i+1]; j++) { 1693 PetscInt cowner,col = gcdest[bj[j]]; 1694 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1695 if (rowner == cowner) dnnz[i]++; 1696 else onnz[i]++; 1697 } 1698 } 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1701 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1702 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1703 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1704 1705 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1706 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1707 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1708 for (i=0; i<m; i++) { 1709 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1710 PetscInt j0,rowlen; 1711 rowlen = ai[i+1] - ai[i]; 1712 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1713 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1714 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1715 } 1716 rowlen = bi[i+1] - bi[i]; 1717 for (j0=j=0; j<rowlen; j0=j) { 1718 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1719 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1720 } 1721 } 1722 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1723 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1724 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1725 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1726 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1727 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1728 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1729 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1730 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1731 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1732 *B = Aperm; 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1737 { 1738 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1743 if (ghosts) *ghosts = aij->garray; 1744 PetscFunctionReturn(0); 1745 } 1746 1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1748 { 1749 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1750 Mat A = mat->A,B = mat->B; 1751 PetscErrorCode ierr; 1752 PetscReal isend[5],irecv[5]; 1753 1754 PetscFunctionBegin; 1755 info->block_size = 1.0; 1756 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1757 1758 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1759 isend[3] = info->memory; isend[4] = info->mallocs; 1760 1761 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1762 1763 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1764 isend[3] += info->memory; isend[4] += info->mallocs; 1765 if (flag == MAT_LOCAL) { 1766 info->nz_used = isend[0]; 1767 info->nz_allocated = isend[1]; 1768 info->nz_unneeded = isend[2]; 1769 info->memory = isend[3]; 1770 info->mallocs = isend[4]; 1771 } else if (flag == MAT_GLOBAL_MAX) { 1772 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1773 1774 info->nz_used = irecv[0]; 1775 info->nz_allocated = irecv[1]; 1776 info->nz_unneeded = irecv[2]; 1777 info->memory = irecv[3]; 1778 info->mallocs = irecv[4]; 1779 } else if (flag == MAT_GLOBAL_SUM) { 1780 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1781 1782 info->nz_used = irecv[0]; 1783 info->nz_allocated = irecv[1]; 1784 info->nz_unneeded = irecv[2]; 1785 info->memory = irecv[3]; 1786 info->mallocs = irecv[4]; 1787 } 1788 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1789 info->fill_ratio_needed = 0; 1790 info->factor_mallocs = 0; 1791 PetscFunctionReturn(0); 1792 } 1793 1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1795 { 1796 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 switch (op) { 1801 case MAT_NEW_NONZERO_LOCATIONS: 1802 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1803 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1804 case MAT_KEEP_NONZERO_PATTERN: 1805 case MAT_NEW_NONZERO_LOCATION_ERR: 1806 case MAT_USE_INODES: 1807 case MAT_IGNORE_ZERO_ENTRIES: 1808 MatCheckPreallocated(A,1); 1809 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1810 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1811 break; 1812 case MAT_ROW_ORIENTED: 1813 MatCheckPreallocated(A,1); 1814 a->roworiented = flg; 1815 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_NEW_DIAGONALS: 1820 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1821 break; 1822 case MAT_IGNORE_OFF_PROC_ENTRIES: 1823 a->donotstash = flg; 1824 break; 1825 case MAT_SPD: 1826 A->spd_set = PETSC_TRUE; 1827 A->spd = flg; 1828 if (flg) { 1829 A->symmetric = PETSC_TRUE; 1830 A->structurally_symmetric = PETSC_TRUE; 1831 A->symmetric_set = PETSC_TRUE; 1832 A->structurally_symmetric_set = PETSC_TRUE; 1833 } 1834 break; 1835 case MAT_SYMMETRIC: 1836 MatCheckPreallocated(A,1); 1837 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1838 break; 1839 case MAT_STRUCTURALLY_SYMMETRIC: 1840 MatCheckPreallocated(A,1); 1841 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1842 break; 1843 case MAT_HERMITIAN: 1844 MatCheckPreallocated(A,1); 1845 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1846 break; 1847 case MAT_SYMMETRY_ETERNAL: 1848 MatCheckPreallocated(A,1); 1849 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1850 break; 1851 case MAT_SUBMAT_SINGLEIS: 1852 A->submat_singleis = flg; 1853 break; 1854 case MAT_STRUCTURE_ONLY: 1855 /* The option is handled directly by MatSetOption() */ 1856 break; 1857 default: 1858 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1859 } 1860 PetscFunctionReturn(0); 1861 } 1862 1863 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1864 { 1865 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1866 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1867 PetscErrorCode ierr; 1868 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1869 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1870 PetscInt *cmap,*idx_p; 1871 1872 PetscFunctionBegin; 1873 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1874 mat->getrowactive = PETSC_TRUE; 1875 1876 if (!mat->rowvalues && (idx || v)) { 1877 /* 1878 allocate enough space to hold information from the longest row. 1879 */ 1880 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1881 PetscInt max = 1,tmp; 1882 for (i=0; i<matin->rmap->n; i++) { 1883 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1884 if (max < tmp) max = tmp; 1885 } 1886 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1887 } 1888 1889 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1890 lrow = row - rstart; 1891 1892 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1893 if (!v) {pvA = 0; pvB = 0;} 1894 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1895 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1896 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1897 nztot = nzA + nzB; 1898 1899 cmap = mat->garray; 1900 if (v || idx) { 1901 if (nztot) { 1902 /* Sort by increasing column numbers, assuming A and B already sorted */ 1903 PetscInt imark = -1; 1904 if (v) { 1905 *v = v_p = mat->rowvalues; 1906 for (i=0; i<nzB; i++) { 1907 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1908 else break; 1909 } 1910 imark = i; 1911 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1912 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1913 } 1914 if (idx) { 1915 *idx = idx_p = mat->rowindices; 1916 if (imark > -1) { 1917 for (i=0; i<imark; i++) { 1918 idx_p[i] = cmap[cworkB[i]]; 1919 } 1920 } else { 1921 for (i=0; i<nzB; i++) { 1922 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1923 else break; 1924 } 1925 imark = i; 1926 } 1927 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1928 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1929 } 1930 } else { 1931 if (idx) *idx = 0; 1932 if (v) *v = 0; 1933 } 1934 } 1935 *nz = nztot; 1936 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1937 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1942 { 1943 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1944 1945 PetscFunctionBegin; 1946 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1947 aij->getrowactive = PETSC_FALSE; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1952 { 1953 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1954 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1955 PetscErrorCode ierr; 1956 PetscInt i,j,cstart = mat->cmap->rstart; 1957 PetscReal sum = 0.0; 1958 MatScalar *v; 1959 1960 PetscFunctionBegin; 1961 if (aij->size == 1) { 1962 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1963 } else { 1964 if (type == NORM_FROBENIUS) { 1965 v = amat->a; 1966 for (i=0; i<amat->nz; i++) { 1967 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1968 } 1969 v = bmat->a; 1970 for (i=0; i<bmat->nz; i++) { 1971 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1972 } 1973 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1974 *norm = PetscSqrtReal(*norm); 1975 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1976 } else if (type == NORM_1) { /* max column norm */ 1977 PetscReal *tmp,*tmp2; 1978 PetscInt *jj,*garray = aij->garray; 1979 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1980 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1981 *norm = 0.0; 1982 v = amat->a; jj = amat->j; 1983 for (j=0; j<amat->nz; j++) { 1984 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1985 } 1986 v = bmat->a; jj = bmat->j; 1987 for (j=0; j<bmat->nz; j++) { 1988 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1989 } 1990 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1991 for (j=0; j<mat->cmap->N; j++) { 1992 if (tmp2[j] > *norm) *norm = tmp2[j]; 1993 } 1994 ierr = PetscFree(tmp);CHKERRQ(ierr); 1995 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1996 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1997 } else if (type == NORM_INFINITY) { /* max row norm */ 1998 PetscReal ntemp = 0.0; 1999 for (j=0; j<aij->A->rmap->n; j++) { 2000 v = amat->a + amat->i[j]; 2001 sum = 0.0; 2002 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2003 sum += PetscAbsScalar(*v); v++; 2004 } 2005 v = bmat->a + bmat->i[j]; 2006 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2007 sum += PetscAbsScalar(*v); v++; 2008 } 2009 if (sum > ntemp) ntemp = sum; 2010 } 2011 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2012 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2013 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2014 } 2015 PetscFunctionReturn(0); 2016 } 2017 2018 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2019 { 2020 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2021 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2022 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2023 PetscErrorCode ierr; 2024 Mat B,A_diag,*B_diag; 2025 MatScalar *array; 2026 2027 PetscFunctionBegin; 2028 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2029 ai = Aloc->i; aj = Aloc->j; 2030 bi = Bloc->i; bj = Bloc->j; 2031 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2032 PetscInt *d_nnz,*g_nnz,*o_nnz; 2033 PetscSFNode *oloc; 2034 PETSC_UNUSED PetscSF sf; 2035 2036 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2037 /* compute d_nnz for preallocation */ 2038 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2039 for (i=0; i<ai[ma]; i++) { 2040 d_nnz[aj[i]]++; 2041 } 2042 /* compute local off-diagonal contributions */ 2043 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2044 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2045 /* map those to global */ 2046 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2047 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2048 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2049 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2050 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2051 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2052 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2053 2054 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2055 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2056 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2057 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2058 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2059 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2060 } else { 2061 B = *matout; 2062 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2063 } 2064 2065 b = (Mat_MPIAIJ*)B->data; 2066 A_diag = a->A; 2067 B_diag = &b->A; 2068 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2069 A_diag_ncol = A_diag->cmap->N; 2070 B_diag_ilen = sub_B_diag->ilen; 2071 B_diag_i = sub_B_diag->i; 2072 2073 /* Set ilen for diagonal of B */ 2074 for (i=0; i<A_diag_ncol; i++) { 2075 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2076 } 2077 2078 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2079 very quickly (=without using MatSetValues), because all writes are local. */ 2080 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2081 2082 /* copy over the B part */ 2083 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2084 array = Bloc->a; 2085 row = A->rmap->rstart; 2086 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2087 cols_tmp = cols; 2088 for (i=0; i<mb; i++) { 2089 ncol = bi[i+1]-bi[i]; 2090 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2091 row++; 2092 array += ncol; cols_tmp += ncol; 2093 } 2094 ierr = PetscFree(cols);CHKERRQ(ierr); 2095 2096 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2097 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2098 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2099 *matout = B; 2100 } else { 2101 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2102 } 2103 PetscFunctionReturn(0); 2104 } 2105 2106 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2107 { 2108 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2109 Mat a = aij->A,b = aij->B; 2110 PetscErrorCode ierr; 2111 PetscInt s1,s2,s3; 2112 2113 PetscFunctionBegin; 2114 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2115 if (rr) { 2116 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2117 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2118 /* Overlap communication with computation. */ 2119 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2120 } 2121 if (ll) { 2122 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2123 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2124 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2125 } 2126 /* scale the diagonal block */ 2127 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2128 2129 if (rr) { 2130 /* Do a scatter end and then right scale the off-diagonal block */ 2131 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2132 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2133 } 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2138 { 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2140 PetscErrorCode ierr; 2141 2142 PetscFunctionBegin; 2143 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2148 { 2149 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2150 Mat a,b,c,d; 2151 PetscBool flg; 2152 PetscErrorCode ierr; 2153 2154 PetscFunctionBegin; 2155 a = matA->A; b = matA->B; 2156 c = matB->A; d = matB->B; 2157 2158 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2159 if (flg) { 2160 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2161 } 2162 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2167 { 2168 PetscErrorCode ierr; 2169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2170 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2171 2172 PetscFunctionBegin; 2173 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2174 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2175 /* because of the column compression in the off-processor part of the matrix a->B, 2176 the number of columns in a->B and b->B may be different, hence we cannot call 2177 the MatCopy() directly on the two parts. If need be, we can provide a more 2178 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2179 then copying the submatrices */ 2180 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2181 } else { 2182 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2183 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2184 } 2185 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2190 { 2191 PetscErrorCode ierr; 2192 2193 PetscFunctionBegin; 2194 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2195 PetscFunctionReturn(0); 2196 } 2197 2198 /* 2199 Computes the number of nonzeros per row needed for preallocation when X and Y 2200 have different nonzero structure. 2201 */ 2202 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2203 { 2204 PetscInt i,j,k,nzx,nzy; 2205 2206 PetscFunctionBegin; 2207 /* Set the number of nonzeros in the new matrix */ 2208 for (i=0; i<m; i++) { 2209 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2210 nzx = xi[i+1] - xi[i]; 2211 nzy = yi[i+1] - yi[i]; 2212 nnz[i] = 0; 2213 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2214 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2215 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2216 nnz[i]++; 2217 } 2218 for (; k<nzy; k++) nnz[i]++; 2219 } 2220 PetscFunctionReturn(0); 2221 } 2222 2223 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2224 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2225 { 2226 PetscErrorCode ierr; 2227 PetscInt m = Y->rmap->N; 2228 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2229 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2230 2231 PetscFunctionBegin; 2232 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2233 PetscFunctionReturn(0); 2234 } 2235 2236 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2237 { 2238 PetscErrorCode ierr; 2239 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2240 PetscBLASInt bnz,one=1; 2241 Mat_SeqAIJ *x,*y; 2242 2243 PetscFunctionBegin; 2244 if (str == SAME_NONZERO_PATTERN) { 2245 PetscScalar alpha = a; 2246 x = (Mat_SeqAIJ*)xx->A->data; 2247 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2248 y = (Mat_SeqAIJ*)yy->A->data; 2249 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2250 x = (Mat_SeqAIJ*)xx->B->data; 2251 y = (Mat_SeqAIJ*)yy->B->data; 2252 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2253 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2254 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2255 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2256 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2257 } else { 2258 Mat B; 2259 PetscInt *nnz_d,*nnz_o; 2260 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2261 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2262 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2263 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2264 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2265 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2266 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2267 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2268 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2269 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2270 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2271 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2272 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2273 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2274 } 2275 PetscFunctionReturn(0); 2276 } 2277 2278 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2279 2280 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2281 { 2282 #if defined(PETSC_USE_COMPLEX) 2283 PetscErrorCode ierr; 2284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2285 2286 PetscFunctionBegin; 2287 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2288 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2289 #else 2290 PetscFunctionBegin; 2291 #endif 2292 PetscFunctionReturn(0); 2293 } 2294 2295 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2296 { 2297 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2298 PetscErrorCode ierr; 2299 2300 PetscFunctionBegin; 2301 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2302 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2307 { 2308 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2309 PetscErrorCode ierr; 2310 2311 PetscFunctionBegin; 2312 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2313 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2318 { 2319 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2320 PetscErrorCode ierr; 2321 PetscInt i,*idxb = 0; 2322 PetscScalar *va,*vb; 2323 Vec vtmp; 2324 2325 PetscFunctionBegin; 2326 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2327 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2328 if (idx) { 2329 for (i=0; i<A->rmap->n; i++) { 2330 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2331 } 2332 } 2333 2334 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2335 if (idx) { 2336 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2337 } 2338 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2339 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2340 2341 for (i=0; i<A->rmap->n; i++) { 2342 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2343 va[i] = vb[i]; 2344 if (idx) idx[i] = a->garray[idxb[i]]; 2345 } 2346 } 2347 2348 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2349 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2350 ierr = PetscFree(idxb);CHKERRQ(ierr); 2351 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2352 PetscFunctionReturn(0); 2353 } 2354 2355 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2356 { 2357 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2358 PetscErrorCode ierr; 2359 PetscInt i,*idxb = 0; 2360 PetscScalar *va,*vb; 2361 Vec vtmp; 2362 2363 PetscFunctionBegin; 2364 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2365 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2366 if (idx) { 2367 for (i=0; i<A->cmap->n; i++) { 2368 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2369 } 2370 } 2371 2372 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2373 if (idx) { 2374 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2375 } 2376 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2377 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2378 2379 for (i=0; i<A->rmap->n; i++) { 2380 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2381 va[i] = vb[i]; 2382 if (idx) idx[i] = a->garray[idxb[i]]; 2383 } 2384 } 2385 2386 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2387 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2388 ierr = PetscFree(idxb);CHKERRQ(ierr); 2389 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2394 { 2395 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2396 PetscInt n = A->rmap->n; 2397 PetscInt cstart = A->cmap->rstart; 2398 PetscInt *cmap = mat->garray; 2399 PetscInt *diagIdx, *offdiagIdx; 2400 Vec diagV, offdiagV; 2401 PetscScalar *a, *diagA, *offdiagA; 2402 PetscInt r; 2403 PetscErrorCode ierr; 2404 2405 PetscFunctionBegin; 2406 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2407 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2408 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2409 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2410 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2411 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2412 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2413 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2414 for (r = 0; r < n; ++r) { 2415 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2416 a[r] = diagA[r]; 2417 idx[r] = cstart + diagIdx[r]; 2418 } else { 2419 a[r] = offdiagA[r]; 2420 idx[r] = cmap[offdiagIdx[r]]; 2421 } 2422 } 2423 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2424 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2425 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2426 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2427 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2428 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2429 PetscFunctionReturn(0); 2430 } 2431 2432 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2433 { 2434 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2435 PetscInt n = A->rmap->n; 2436 PetscInt cstart = A->cmap->rstart; 2437 PetscInt *cmap = mat->garray; 2438 PetscInt *diagIdx, *offdiagIdx; 2439 Vec diagV, offdiagV; 2440 PetscScalar *a, *diagA, *offdiagA; 2441 PetscInt r; 2442 PetscErrorCode ierr; 2443 2444 PetscFunctionBegin; 2445 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2446 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2447 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2448 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2449 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2450 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2451 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2452 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2453 for (r = 0; r < n; ++r) { 2454 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2455 a[r] = diagA[r]; 2456 idx[r] = cstart + diagIdx[r]; 2457 } else { 2458 a[r] = offdiagA[r]; 2459 idx[r] = cmap[offdiagIdx[r]]; 2460 } 2461 } 2462 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2464 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2465 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2466 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2467 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2472 { 2473 PetscErrorCode ierr; 2474 Mat *dummy; 2475 2476 PetscFunctionBegin; 2477 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2478 *newmat = *dummy; 2479 ierr = PetscFree(dummy);CHKERRQ(ierr); 2480 PetscFunctionReturn(0); 2481 } 2482 2483 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2484 { 2485 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2486 PetscErrorCode ierr; 2487 2488 PetscFunctionBegin; 2489 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2490 A->factorerrortype = a->A->factorerrortype; 2491 PetscFunctionReturn(0); 2492 } 2493 2494 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2495 { 2496 PetscErrorCode ierr; 2497 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2498 2499 PetscFunctionBegin; 2500 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2501 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2502 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2503 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2504 PetscFunctionReturn(0); 2505 } 2506 2507 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2508 { 2509 PetscFunctionBegin; 2510 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2511 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2512 PetscFunctionReturn(0); 2513 } 2514 2515 /*@ 2516 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2517 2518 Collective on Mat 2519 2520 Input Parameters: 2521 + A - the matrix 2522 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2523 2524 Level: advanced 2525 2526 @*/ 2527 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2528 { 2529 PetscErrorCode ierr; 2530 2531 PetscFunctionBegin; 2532 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2533 PetscFunctionReturn(0); 2534 } 2535 2536 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2537 { 2538 PetscErrorCode ierr; 2539 PetscBool sc = PETSC_FALSE,flg; 2540 2541 PetscFunctionBegin; 2542 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2543 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2544 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2545 if (flg) { 2546 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2547 } 2548 ierr = PetscOptionsTail();CHKERRQ(ierr); 2549 PetscFunctionReturn(0); 2550 } 2551 2552 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2553 { 2554 PetscErrorCode ierr; 2555 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2556 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2557 2558 PetscFunctionBegin; 2559 if (!Y->preallocated) { 2560 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2561 } else if (!aij->nz) { 2562 PetscInt nonew = aij->nonew; 2563 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2564 aij->nonew = nonew; 2565 } 2566 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2567 PetscFunctionReturn(0); 2568 } 2569 2570 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2571 { 2572 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2573 PetscErrorCode ierr; 2574 2575 PetscFunctionBegin; 2576 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2577 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2578 if (d) { 2579 PetscInt rstart; 2580 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2581 *d += rstart; 2582 2583 } 2584 PetscFunctionReturn(0); 2585 } 2586 2587 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2588 { 2589 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2590 PetscErrorCode ierr; 2591 2592 PetscFunctionBegin; 2593 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 /* -------------------------------------------------------------------*/ 2598 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2599 MatGetRow_MPIAIJ, 2600 MatRestoreRow_MPIAIJ, 2601 MatMult_MPIAIJ, 2602 /* 4*/ MatMultAdd_MPIAIJ, 2603 MatMultTranspose_MPIAIJ, 2604 MatMultTransposeAdd_MPIAIJ, 2605 0, 2606 0, 2607 0, 2608 /*10*/ 0, 2609 0, 2610 0, 2611 MatSOR_MPIAIJ, 2612 MatTranspose_MPIAIJ, 2613 /*15*/ MatGetInfo_MPIAIJ, 2614 MatEqual_MPIAIJ, 2615 MatGetDiagonal_MPIAIJ, 2616 MatDiagonalScale_MPIAIJ, 2617 MatNorm_MPIAIJ, 2618 /*20*/ MatAssemblyBegin_MPIAIJ, 2619 MatAssemblyEnd_MPIAIJ, 2620 MatSetOption_MPIAIJ, 2621 MatZeroEntries_MPIAIJ, 2622 /*24*/ MatZeroRows_MPIAIJ, 2623 0, 2624 0, 2625 0, 2626 0, 2627 /*29*/ MatSetUp_MPIAIJ, 2628 0, 2629 0, 2630 MatGetDiagonalBlock_MPIAIJ, 2631 0, 2632 /*34*/ MatDuplicate_MPIAIJ, 2633 0, 2634 0, 2635 0, 2636 0, 2637 /*39*/ MatAXPY_MPIAIJ, 2638 MatCreateSubMatrices_MPIAIJ, 2639 MatIncreaseOverlap_MPIAIJ, 2640 MatGetValues_MPIAIJ, 2641 MatCopy_MPIAIJ, 2642 /*44*/ MatGetRowMax_MPIAIJ, 2643 MatScale_MPIAIJ, 2644 MatShift_MPIAIJ, 2645 MatDiagonalSet_MPIAIJ, 2646 MatZeroRowsColumns_MPIAIJ, 2647 /*49*/ MatSetRandom_MPIAIJ, 2648 0, 2649 0, 2650 0, 2651 0, 2652 /*54*/ MatFDColoringCreate_MPIXAIJ, 2653 0, 2654 MatSetUnfactored_MPIAIJ, 2655 MatPermute_MPIAIJ, 2656 0, 2657 /*59*/ MatCreateSubMatrix_MPIAIJ, 2658 MatDestroy_MPIAIJ, 2659 MatView_MPIAIJ, 2660 0, 2661 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2662 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2663 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2664 0, 2665 0, 2666 0, 2667 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2668 MatGetRowMinAbs_MPIAIJ, 2669 0, 2670 0, 2671 0, 2672 0, 2673 /*75*/ MatFDColoringApply_AIJ, 2674 MatSetFromOptions_MPIAIJ, 2675 0, 2676 0, 2677 MatFindZeroDiagonals_MPIAIJ, 2678 /*80*/ 0, 2679 0, 2680 0, 2681 /*83*/ MatLoad_MPIAIJ, 2682 MatIsSymmetric_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2688 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2689 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2690 MatPtAP_MPIAIJ_MPIAIJ, 2691 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2692 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2693 0, 2694 0, 2695 0, 2696 0, 2697 /*99*/ 0, 2698 0, 2699 0, 2700 MatConjugate_MPIAIJ, 2701 0, 2702 /*104*/MatSetValuesRow_MPIAIJ, 2703 MatRealPart_MPIAIJ, 2704 MatImaginaryPart_MPIAIJ, 2705 0, 2706 0, 2707 /*109*/0, 2708 0, 2709 MatGetRowMin_MPIAIJ, 2710 0, 2711 MatMissingDiagonal_MPIAIJ, 2712 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2713 0, 2714 MatGetGhosts_MPIAIJ, 2715 0, 2716 0, 2717 /*119*/0, 2718 0, 2719 0, 2720 0, 2721 MatGetMultiProcBlock_MPIAIJ, 2722 /*124*/MatFindNonzeroRows_MPIAIJ, 2723 MatGetColumnNorms_MPIAIJ, 2724 MatInvertBlockDiagonal_MPIAIJ, 2725 MatInvertVariableBlockDiagonal_MPIAIJ, 2726 MatCreateSubMatricesMPI_MPIAIJ, 2727 /*129*/0, 2728 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2729 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2730 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2731 0, 2732 /*134*/0, 2733 0, 2734 MatRARt_MPIAIJ_MPIAIJ, 2735 0, 2736 0, 2737 /*139*/MatSetBlockSizes_MPIAIJ, 2738 0, 2739 0, 2740 MatFDColoringSetUp_MPIXAIJ, 2741 MatFindOffBlockDiagonalEntries_MPIAIJ, 2742 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2743 }; 2744 2745 /* ----------------------------------------------------------------------------------------*/ 2746 2747 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2748 { 2749 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2750 PetscErrorCode ierr; 2751 2752 PetscFunctionBegin; 2753 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2754 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2755 PetscFunctionReturn(0); 2756 } 2757 2758 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2759 { 2760 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2761 PetscErrorCode ierr; 2762 2763 PetscFunctionBegin; 2764 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2765 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2766 PetscFunctionReturn(0); 2767 } 2768 2769 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2770 { 2771 Mat_MPIAIJ *b; 2772 PetscErrorCode ierr; 2773 2774 PetscFunctionBegin; 2775 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2776 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2777 b = (Mat_MPIAIJ*)B->data; 2778 2779 #if defined(PETSC_USE_CTABLE) 2780 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2781 #else 2782 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2783 #endif 2784 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2785 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2786 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2787 2788 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2789 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2790 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2791 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2792 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2793 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2794 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2795 2796 if (!B->preallocated) { 2797 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2798 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2799 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2800 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2801 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2802 } 2803 2804 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2805 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2806 B->preallocated = PETSC_TRUE; 2807 B->was_assembled = PETSC_FALSE; 2808 B->assembled = PETSC_FALSE;; 2809 PetscFunctionReturn(0); 2810 } 2811 2812 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2813 { 2814 Mat_MPIAIJ *b; 2815 PetscErrorCode ierr; 2816 2817 PetscFunctionBegin; 2818 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2819 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2820 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2821 b = (Mat_MPIAIJ*)B->data; 2822 2823 #if defined(PETSC_USE_CTABLE) 2824 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2825 #else 2826 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2827 #endif 2828 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2829 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2830 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2831 2832 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2833 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2834 B->preallocated = PETSC_TRUE; 2835 B->was_assembled = PETSC_FALSE; 2836 B->assembled = PETSC_FALSE; 2837 PetscFunctionReturn(0); 2838 } 2839 2840 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2841 { 2842 Mat mat; 2843 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2844 PetscErrorCode ierr; 2845 2846 PetscFunctionBegin; 2847 *newmat = 0; 2848 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2849 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2850 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2851 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2852 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2853 a = (Mat_MPIAIJ*)mat->data; 2854 2855 mat->factortype = matin->factortype; 2856 mat->assembled = PETSC_TRUE; 2857 mat->insertmode = NOT_SET_VALUES; 2858 mat->preallocated = PETSC_TRUE; 2859 2860 a->size = oldmat->size; 2861 a->rank = oldmat->rank; 2862 a->donotstash = oldmat->donotstash; 2863 a->roworiented = oldmat->roworiented; 2864 a->rowindices = 0; 2865 a->rowvalues = 0; 2866 a->getrowactive = PETSC_FALSE; 2867 2868 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2869 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2870 2871 if (oldmat->colmap) { 2872 #if defined(PETSC_USE_CTABLE) 2873 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2874 #else 2875 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2876 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2877 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2878 #endif 2879 } else a->colmap = 0; 2880 if (oldmat->garray) { 2881 PetscInt len; 2882 len = oldmat->B->cmap->n; 2883 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2884 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2885 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2886 } else a->garray = 0; 2887 2888 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2890 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2891 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2892 2893 if (oldmat->Mvctx_mpi1) { 2894 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2895 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2896 } 2897 2898 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2899 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2900 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2901 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2902 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2903 *newmat = mat; 2904 PetscFunctionReturn(0); 2905 } 2906 2907 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2908 { 2909 PetscScalar *vals,*svals; 2910 MPI_Comm comm; 2911 PetscErrorCode ierr; 2912 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2913 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2914 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2915 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2916 PetscInt cend,cstart,n,*rowners; 2917 int fd; 2918 PetscInt bs = newMat->rmap->bs; 2919 2920 PetscFunctionBegin; 2921 /* force binary viewer to load .info file if it has not yet done so */ 2922 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2923 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2924 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2925 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2926 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2927 if (!rank) { 2928 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2929 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2930 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2931 } 2932 2933 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2934 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2935 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2936 if (bs < 0) bs = 1; 2937 2938 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2939 M = header[1]; N = header[2]; 2940 2941 /* If global sizes are set, check if they are consistent with that given in the file */ 2942 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2943 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2944 2945 /* determine ownership of all (block) rows */ 2946 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2947 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2948 else m = newMat->rmap->n; /* Set by user */ 2949 2950 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2951 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2952 2953 /* First process needs enough room for process with most rows */ 2954 if (!rank) { 2955 mmax = rowners[1]; 2956 for (i=2; i<=size; i++) { 2957 mmax = PetscMax(mmax, rowners[i]); 2958 } 2959 } else mmax = -1; /* unused, but compilers complain */ 2960 2961 rowners[0] = 0; 2962 for (i=2; i<=size; i++) { 2963 rowners[i] += rowners[i-1]; 2964 } 2965 rstart = rowners[rank]; 2966 rend = rowners[rank+1]; 2967 2968 /* distribute row lengths to all processors */ 2969 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2970 if (!rank) { 2971 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2972 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2973 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2974 for (j=0; j<m; j++) { 2975 procsnz[0] += ourlens[j]; 2976 } 2977 for (i=1; i<size; i++) { 2978 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2979 /* calculate the number of nonzeros on each processor */ 2980 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2981 procsnz[i] += rowlengths[j]; 2982 } 2983 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2984 } 2985 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2986 } else { 2987 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2988 } 2989 2990 if (!rank) { 2991 /* determine max buffer needed and allocate it */ 2992 maxnz = 0; 2993 for (i=0; i<size; i++) { 2994 maxnz = PetscMax(maxnz,procsnz[i]); 2995 } 2996 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2997 2998 /* read in my part of the matrix column indices */ 2999 nz = procsnz[0]; 3000 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3001 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3002 3003 /* read in every one elses and ship off */ 3004 for (i=1; i<size; i++) { 3005 nz = procsnz[i]; 3006 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3007 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3008 } 3009 ierr = PetscFree(cols);CHKERRQ(ierr); 3010 } else { 3011 /* determine buffer space needed for message */ 3012 nz = 0; 3013 for (i=0; i<m; i++) { 3014 nz += ourlens[i]; 3015 } 3016 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3017 3018 /* receive message of column indices*/ 3019 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3020 } 3021 3022 /* determine column ownership if matrix is not square */ 3023 if (N != M) { 3024 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3025 else n = newMat->cmap->n; 3026 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3027 cstart = cend - n; 3028 } else { 3029 cstart = rstart; 3030 cend = rend; 3031 n = cend - cstart; 3032 } 3033 3034 /* loop over local rows, determining number of off diagonal entries */ 3035 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3036 jj = 0; 3037 for (i=0; i<m; i++) { 3038 for (j=0; j<ourlens[i]; j++) { 3039 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3040 jj++; 3041 } 3042 } 3043 3044 for (i=0; i<m; i++) { 3045 ourlens[i] -= offlens[i]; 3046 } 3047 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3048 3049 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3050 3051 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3052 3053 for (i=0; i<m; i++) { 3054 ourlens[i] += offlens[i]; 3055 } 3056 3057 if (!rank) { 3058 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3059 3060 /* read in my part of the matrix numerical values */ 3061 nz = procsnz[0]; 3062 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3063 3064 /* insert into matrix */ 3065 jj = rstart; 3066 smycols = mycols; 3067 svals = vals; 3068 for (i=0; i<m; i++) { 3069 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3070 smycols += ourlens[i]; 3071 svals += ourlens[i]; 3072 jj++; 3073 } 3074 3075 /* read in other processors and ship out */ 3076 for (i=1; i<size; i++) { 3077 nz = procsnz[i]; 3078 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3079 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3080 } 3081 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3082 } else { 3083 /* receive numeric values */ 3084 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3085 3086 /* receive message of values*/ 3087 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3088 3089 /* insert into matrix */ 3090 jj = rstart; 3091 smycols = mycols; 3092 svals = vals; 3093 for (i=0; i<m; i++) { 3094 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3095 smycols += ourlens[i]; 3096 svals += ourlens[i]; 3097 jj++; 3098 } 3099 } 3100 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3101 ierr = PetscFree(vals);CHKERRQ(ierr); 3102 ierr = PetscFree(mycols);CHKERRQ(ierr); 3103 ierr = PetscFree(rowners);CHKERRQ(ierr); 3104 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3105 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3106 PetscFunctionReturn(0); 3107 } 3108 3109 /* Not scalable because of ISAllGather() unless getting all columns. */ 3110 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3111 { 3112 PetscErrorCode ierr; 3113 IS iscol_local; 3114 PetscBool isstride; 3115 PetscMPIInt lisstride=0,gisstride; 3116 3117 PetscFunctionBegin; 3118 /* check if we are grabbing all columns*/ 3119 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3120 3121 if (isstride) { 3122 PetscInt start,len,mstart,mlen; 3123 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3124 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3125 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3126 if (mstart == start && mlen-mstart == len) lisstride = 1; 3127 } 3128 3129 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3130 if (gisstride) { 3131 PetscInt N; 3132 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3133 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3134 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3135 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3136 } else { 3137 PetscInt cbs; 3138 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3139 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3140 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3141 } 3142 3143 *isseq = iscol_local; 3144 PetscFunctionReturn(0); 3145 } 3146 3147 /* 3148 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3149 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3150 3151 Input Parameters: 3152 mat - matrix 3153 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3154 i.e., mat->rstart <= isrow[i] < mat->rend 3155 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3156 i.e., mat->cstart <= iscol[i] < mat->cend 3157 Output Parameter: 3158 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3159 iscol_o - sequential column index set for retrieving mat->B 3160 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3161 */ 3162 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3163 { 3164 PetscErrorCode ierr; 3165 Vec x,cmap; 3166 const PetscInt *is_idx; 3167 PetscScalar *xarray,*cmaparray; 3168 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3169 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3170 Mat B=a->B; 3171 Vec lvec=a->lvec,lcmap; 3172 PetscInt i,cstart,cend,Bn=B->cmap->N; 3173 MPI_Comm comm; 3174 VecScatter Mvctx=a->Mvctx; 3175 3176 PetscFunctionBegin; 3177 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3178 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3179 3180 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3181 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3182 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3183 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3184 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3185 3186 /* Get start indices */ 3187 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3188 isstart -= ncols; 3189 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3190 3191 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3192 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3193 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3194 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3195 for (i=0; i<ncols; i++) { 3196 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3197 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3198 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3199 } 3200 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3201 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3202 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3203 3204 /* Get iscol_d */ 3205 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3206 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3207 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3208 3209 /* Get isrow_d */ 3210 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3211 rstart = mat->rmap->rstart; 3212 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3213 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3214 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3215 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3216 3217 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3218 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3219 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3220 3221 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3222 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3223 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3224 3225 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3226 3227 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3228 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3229 3230 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3231 /* off-process column indices */ 3232 count = 0; 3233 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3234 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3235 3236 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3237 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3238 for (i=0; i<Bn; i++) { 3239 if (PetscRealPart(xarray[i]) > -1.0) { 3240 idx[count] = i; /* local column index in off-diagonal part B */ 3241 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3242 count++; 3243 } 3244 } 3245 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3246 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3247 3248 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3249 /* cannot ensure iscol_o has same blocksize as iscol! */ 3250 3251 ierr = PetscFree(idx);CHKERRQ(ierr); 3252 *garray = cmap1; 3253 3254 ierr = VecDestroy(&x);CHKERRQ(ierr); 3255 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3256 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3257 PetscFunctionReturn(0); 3258 } 3259 3260 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3261 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3262 { 3263 PetscErrorCode ierr; 3264 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3265 Mat M = NULL; 3266 MPI_Comm comm; 3267 IS iscol_d,isrow_d,iscol_o; 3268 Mat Asub = NULL,Bsub = NULL; 3269 PetscInt n; 3270 3271 PetscFunctionBegin; 3272 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3273 3274 if (call == MAT_REUSE_MATRIX) { 3275 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3276 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3277 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3278 3279 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3280 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3281 3282 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3283 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3284 3285 /* Update diagonal and off-diagonal portions of submat */ 3286 asub = (Mat_MPIAIJ*)(*submat)->data; 3287 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3288 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3289 if (n) { 3290 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3291 } 3292 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3293 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3294 3295 } else { /* call == MAT_INITIAL_MATRIX) */ 3296 const PetscInt *garray; 3297 PetscInt BsubN; 3298 3299 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3300 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3301 3302 /* Create local submatrices Asub and Bsub */ 3303 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3304 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3305 3306 /* Create submatrix M */ 3307 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3308 3309 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3310 asub = (Mat_MPIAIJ*)M->data; 3311 3312 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3313 n = asub->B->cmap->N; 3314 if (BsubN > n) { 3315 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3316 const PetscInt *idx; 3317 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3318 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3319 3320 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3321 j = 0; 3322 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3323 for (i=0; i<n; i++) { 3324 if (j >= BsubN) break; 3325 while (subgarray[i] > garray[j]) j++; 3326 3327 if (subgarray[i] == garray[j]) { 3328 idx_new[i] = idx[j++]; 3329 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3330 } 3331 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3332 3333 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3334 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3335 3336 } else if (BsubN < n) { 3337 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3338 } 3339 3340 ierr = PetscFree(garray);CHKERRQ(ierr); 3341 *submat = M; 3342 3343 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3344 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3345 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3346 3347 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3348 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3349 3350 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3351 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3352 } 3353 PetscFunctionReturn(0); 3354 } 3355 3356 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3357 { 3358 PetscErrorCode ierr; 3359 IS iscol_local=NULL,isrow_d; 3360 PetscInt csize; 3361 PetscInt n,i,j,start,end; 3362 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3363 MPI_Comm comm; 3364 3365 PetscFunctionBegin; 3366 /* If isrow has same processor distribution as mat, 3367 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3368 if (call == MAT_REUSE_MATRIX) { 3369 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3370 if (isrow_d) { 3371 sameRowDist = PETSC_TRUE; 3372 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3373 } else { 3374 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3375 if (iscol_local) { 3376 sameRowDist = PETSC_TRUE; 3377 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3378 } 3379 } 3380 } else { 3381 /* Check if isrow has same processor distribution as mat */ 3382 sameDist[0] = PETSC_FALSE; 3383 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3384 if (!n) { 3385 sameDist[0] = PETSC_TRUE; 3386 } else { 3387 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3388 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3389 if (i >= start && j < end) { 3390 sameDist[0] = PETSC_TRUE; 3391 } 3392 } 3393 3394 /* Check if iscol has same processor distribution as mat */ 3395 sameDist[1] = PETSC_FALSE; 3396 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3397 if (!n) { 3398 sameDist[1] = PETSC_TRUE; 3399 } else { 3400 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3401 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3402 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3403 } 3404 3405 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3406 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3407 sameRowDist = tsameDist[0]; 3408 } 3409 3410 if (sameRowDist) { 3411 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3412 /* isrow and iscol have same processor distribution as mat */ 3413 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3414 PetscFunctionReturn(0); 3415 } else { /* sameRowDist */ 3416 /* isrow has same processor distribution as mat */ 3417 if (call == MAT_INITIAL_MATRIX) { 3418 PetscBool sorted; 3419 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3420 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3421 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3422 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3423 3424 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3425 if (sorted) { 3426 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3427 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3428 PetscFunctionReturn(0); 3429 } 3430 } else { /* call == MAT_REUSE_MATRIX */ 3431 IS iscol_sub; 3432 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3433 if (iscol_sub) { 3434 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3435 PetscFunctionReturn(0); 3436 } 3437 } 3438 } 3439 } 3440 3441 /* General case: iscol -> iscol_local which has global size of iscol */ 3442 if (call == MAT_REUSE_MATRIX) { 3443 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3444 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3445 } else { 3446 if (!iscol_local) { 3447 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3448 } 3449 } 3450 3451 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3452 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3453 3454 if (call == MAT_INITIAL_MATRIX) { 3455 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3456 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3457 } 3458 PetscFunctionReturn(0); 3459 } 3460 3461 /*@C 3462 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3463 and "off-diagonal" part of the matrix in CSR format. 3464 3465 Collective on MPI_Comm 3466 3467 Input Parameters: 3468 + comm - MPI communicator 3469 . A - "diagonal" portion of matrix 3470 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3471 - garray - global index of B columns 3472 3473 Output Parameter: 3474 . mat - the matrix, with input A as its local diagonal matrix 3475 Level: advanced 3476 3477 Notes: 3478 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3479 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3480 3481 .seealso: MatCreateMPIAIJWithSplitArrays() 3482 @*/ 3483 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3484 { 3485 PetscErrorCode ierr; 3486 Mat_MPIAIJ *maij; 3487 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3488 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3489 PetscScalar *oa=b->a; 3490 Mat Bnew; 3491 PetscInt m,n,N; 3492 3493 PetscFunctionBegin; 3494 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3495 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3496 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3497 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3498 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3499 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3500 3501 /* Get global columns of mat */ 3502 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3503 3504 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3505 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3506 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3507 maij = (Mat_MPIAIJ*)(*mat)->data; 3508 3509 (*mat)->preallocated = PETSC_TRUE; 3510 3511 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3512 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3513 3514 /* Set A as diagonal portion of *mat */ 3515 maij->A = A; 3516 3517 nz = oi[m]; 3518 for (i=0; i<nz; i++) { 3519 col = oj[i]; 3520 oj[i] = garray[col]; 3521 } 3522 3523 /* Set Bnew as off-diagonal portion of *mat */ 3524 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3525 bnew = (Mat_SeqAIJ*)Bnew->data; 3526 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3527 maij->B = Bnew; 3528 3529 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3530 3531 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3532 b->free_a = PETSC_FALSE; 3533 b->free_ij = PETSC_FALSE; 3534 ierr = MatDestroy(&B);CHKERRQ(ierr); 3535 3536 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3537 bnew->free_a = PETSC_TRUE; 3538 bnew->free_ij = PETSC_TRUE; 3539 3540 /* condense columns of maij->B */ 3541 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3542 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3543 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3544 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3545 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3546 PetscFunctionReturn(0); 3547 } 3548 3549 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3550 3551 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3552 { 3553 PetscErrorCode ierr; 3554 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3555 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3556 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3557 Mat M,Msub,B=a->B; 3558 MatScalar *aa; 3559 Mat_SeqAIJ *aij; 3560 PetscInt *garray = a->garray,*colsub,Ncols; 3561 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3562 IS iscol_sub,iscmap; 3563 const PetscInt *is_idx,*cmap; 3564 PetscBool allcolumns=PETSC_FALSE; 3565 MPI_Comm comm; 3566 3567 PetscFunctionBegin; 3568 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3569 3570 if (call == MAT_REUSE_MATRIX) { 3571 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3572 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3573 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3574 3575 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3576 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3577 3578 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3579 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3580 3581 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3582 3583 } else { /* call == MAT_INITIAL_MATRIX) */ 3584 PetscBool flg; 3585 3586 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3587 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3588 3589 /* (1) iscol -> nonscalable iscol_local */ 3590 /* Check for special case: each processor gets entire matrix columns */ 3591 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3592 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3593 if (allcolumns) { 3594 iscol_sub = iscol_local; 3595 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3596 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3597 3598 } else { 3599 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3600 PetscInt *idx,*cmap1,k; 3601 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3602 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3603 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3604 count = 0; 3605 k = 0; 3606 for (i=0; i<Ncols; i++) { 3607 j = is_idx[i]; 3608 if (j >= cstart && j < cend) { 3609 /* diagonal part of mat */ 3610 idx[count] = j; 3611 cmap1[count++] = i; /* column index in submat */ 3612 } else if (Bn) { 3613 /* off-diagonal part of mat */ 3614 if (j == garray[k]) { 3615 idx[count] = j; 3616 cmap1[count++] = i; /* column index in submat */ 3617 } else if (j > garray[k]) { 3618 while (j > garray[k] && k < Bn-1) k++; 3619 if (j == garray[k]) { 3620 idx[count] = j; 3621 cmap1[count++] = i; /* column index in submat */ 3622 } 3623 } 3624 } 3625 } 3626 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3627 3628 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3629 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3630 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3631 3632 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3633 } 3634 3635 /* (3) Create sequential Msub */ 3636 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3637 } 3638 3639 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3640 aij = (Mat_SeqAIJ*)(Msub)->data; 3641 ii = aij->i; 3642 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3643 3644 /* 3645 m - number of local rows 3646 Ncols - number of columns (same on all processors) 3647 rstart - first row in new global matrix generated 3648 */ 3649 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3650 3651 if (call == MAT_INITIAL_MATRIX) { 3652 /* (4) Create parallel newmat */ 3653 PetscMPIInt rank,size; 3654 PetscInt csize; 3655 3656 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3657 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3658 3659 /* 3660 Determine the number of non-zeros in the diagonal and off-diagonal 3661 portions of the matrix in order to do correct preallocation 3662 */ 3663 3664 /* first get start and end of "diagonal" columns */ 3665 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3666 if (csize == PETSC_DECIDE) { 3667 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3668 if (mglobal == Ncols) { /* square matrix */ 3669 nlocal = m; 3670 } else { 3671 nlocal = Ncols/size + ((Ncols % size) > rank); 3672 } 3673 } else { 3674 nlocal = csize; 3675 } 3676 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3677 rstart = rend - nlocal; 3678 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3679 3680 /* next, compute all the lengths */ 3681 jj = aij->j; 3682 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3683 olens = dlens + m; 3684 for (i=0; i<m; i++) { 3685 jend = ii[i+1] - ii[i]; 3686 olen = 0; 3687 dlen = 0; 3688 for (j=0; j<jend; j++) { 3689 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3690 else dlen++; 3691 jj++; 3692 } 3693 olens[i] = olen; 3694 dlens[i] = dlen; 3695 } 3696 3697 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3698 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3699 3700 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3701 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3702 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3703 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3704 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3705 ierr = PetscFree(dlens);CHKERRQ(ierr); 3706 3707 } else { /* call == MAT_REUSE_MATRIX */ 3708 M = *newmat; 3709 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3710 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3711 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3712 /* 3713 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3714 rather than the slower MatSetValues(). 3715 */ 3716 M->was_assembled = PETSC_TRUE; 3717 M->assembled = PETSC_FALSE; 3718 } 3719 3720 /* (5) Set values of Msub to *newmat */ 3721 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3722 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3723 3724 jj = aij->j; 3725 aa = aij->a; 3726 for (i=0; i<m; i++) { 3727 row = rstart + i; 3728 nz = ii[i+1] - ii[i]; 3729 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3730 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3731 jj += nz; aa += nz; 3732 } 3733 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3734 3735 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3736 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3737 3738 ierr = PetscFree(colsub);CHKERRQ(ierr); 3739 3740 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3741 if (call == MAT_INITIAL_MATRIX) { 3742 *newmat = M; 3743 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3744 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3745 3746 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3747 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3748 3749 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3750 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3751 3752 if (iscol_local) { 3753 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3754 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3755 } 3756 } 3757 PetscFunctionReturn(0); 3758 } 3759 3760 /* 3761 Not great since it makes two copies of the submatrix, first an SeqAIJ 3762 in local and then by concatenating the local matrices the end result. 3763 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3764 3765 Note: This requires a sequential iscol with all indices. 3766 */ 3767 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3768 { 3769 PetscErrorCode ierr; 3770 PetscMPIInt rank,size; 3771 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3772 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3773 Mat M,Mreuse; 3774 MatScalar *aa,*vwork; 3775 MPI_Comm comm; 3776 Mat_SeqAIJ *aij; 3777 PetscBool colflag,allcolumns=PETSC_FALSE; 3778 3779 PetscFunctionBegin; 3780 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3781 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3782 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3783 3784 /* Check for special case: each processor gets entire matrix columns */ 3785 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3786 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3787 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3788 3789 if (call == MAT_REUSE_MATRIX) { 3790 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3791 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3792 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3793 } else { 3794 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3795 } 3796 3797 /* 3798 m - number of local rows 3799 n - number of columns (same on all processors) 3800 rstart - first row in new global matrix generated 3801 */ 3802 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3803 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3804 if (call == MAT_INITIAL_MATRIX) { 3805 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3806 ii = aij->i; 3807 jj = aij->j; 3808 3809 /* 3810 Determine the number of non-zeros in the diagonal and off-diagonal 3811 portions of the matrix in order to do correct preallocation 3812 */ 3813 3814 /* first get start and end of "diagonal" columns */ 3815 if (csize == PETSC_DECIDE) { 3816 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3817 if (mglobal == n) { /* square matrix */ 3818 nlocal = m; 3819 } else { 3820 nlocal = n/size + ((n % size) > rank); 3821 } 3822 } else { 3823 nlocal = csize; 3824 } 3825 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3826 rstart = rend - nlocal; 3827 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3828 3829 /* next, compute all the lengths */ 3830 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3831 olens = dlens + m; 3832 for (i=0; i<m; i++) { 3833 jend = ii[i+1] - ii[i]; 3834 olen = 0; 3835 dlen = 0; 3836 for (j=0; j<jend; j++) { 3837 if (*jj < rstart || *jj >= rend) olen++; 3838 else dlen++; 3839 jj++; 3840 } 3841 olens[i] = olen; 3842 dlens[i] = dlen; 3843 } 3844 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3845 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3846 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3847 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3848 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3849 ierr = PetscFree(dlens);CHKERRQ(ierr); 3850 } else { 3851 PetscInt ml,nl; 3852 3853 M = *newmat; 3854 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3855 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3856 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3857 /* 3858 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3859 rather than the slower MatSetValues(). 3860 */ 3861 M->was_assembled = PETSC_TRUE; 3862 M->assembled = PETSC_FALSE; 3863 } 3864 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3865 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3866 ii = aij->i; 3867 jj = aij->j; 3868 aa = aij->a; 3869 for (i=0; i<m; i++) { 3870 row = rstart + i; 3871 nz = ii[i+1] - ii[i]; 3872 cwork = jj; jj += nz; 3873 vwork = aa; aa += nz; 3874 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3875 } 3876 3877 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3878 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3879 *newmat = M; 3880 3881 /* save submatrix used in processor for next request */ 3882 if (call == MAT_INITIAL_MATRIX) { 3883 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3884 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3885 } 3886 PetscFunctionReturn(0); 3887 } 3888 3889 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3890 { 3891 PetscInt m,cstart, cend,j,nnz,i,d; 3892 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3893 const PetscInt *JJ; 3894 PetscScalar *values; 3895 PetscErrorCode ierr; 3896 PetscBool nooffprocentries; 3897 3898 PetscFunctionBegin; 3899 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3900 3901 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3902 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3903 m = B->rmap->n; 3904 cstart = B->cmap->rstart; 3905 cend = B->cmap->rend; 3906 rstart = B->rmap->rstart; 3907 3908 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3909 3910 #if defined(PETSC_USE_DEBUG) 3911 for (i=0; i<m && Ii; i++) { 3912 nnz = Ii[i+1]- Ii[i]; 3913 JJ = J + Ii[i]; 3914 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3915 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3916 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3917 } 3918 #endif 3919 3920 for (i=0; i<m && Ii; i++) { 3921 nnz = Ii[i+1]- Ii[i]; 3922 JJ = J + Ii[i]; 3923 nnz_max = PetscMax(nnz_max,nnz); 3924 d = 0; 3925 for (j=0; j<nnz; j++) { 3926 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3927 } 3928 d_nnz[i] = d; 3929 o_nnz[i] = nnz - d; 3930 } 3931 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3932 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3933 3934 if (v) values = (PetscScalar*)v; 3935 else { 3936 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3937 } 3938 3939 for (i=0; i<m && Ii; i++) { 3940 ii = i + rstart; 3941 nnz = Ii[i+1]- Ii[i]; 3942 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3943 } 3944 nooffprocentries = B->nooffprocentries; 3945 B->nooffprocentries = PETSC_TRUE; 3946 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3947 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3948 B->nooffprocentries = nooffprocentries; 3949 3950 if (!v) { 3951 ierr = PetscFree(values);CHKERRQ(ierr); 3952 } 3953 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3954 PetscFunctionReturn(0); 3955 } 3956 3957 /*@ 3958 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3959 (the default parallel PETSc format). 3960 3961 Collective on MPI_Comm 3962 3963 Input Parameters: 3964 + B - the matrix 3965 . i - the indices into j for the start of each local row (starts with zero) 3966 . j - the column indices for each local row (starts with zero) 3967 - v - optional values in the matrix 3968 3969 Level: developer 3970 3971 Notes: 3972 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3973 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3974 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3975 3976 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3977 3978 The format which is used for the sparse matrix input, is equivalent to a 3979 row-major ordering.. i.e for the following matrix, the input data expected is 3980 as shown 3981 3982 $ 1 0 0 3983 $ 2 0 3 P0 3984 $ ------- 3985 $ 4 5 6 P1 3986 $ 3987 $ Process0 [P0]: rows_owned=[0,1] 3988 $ i = {0,1,3} [size = nrow+1 = 2+1] 3989 $ j = {0,0,2} [size = 3] 3990 $ v = {1,2,3} [size = 3] 3991 $ 3992 $ Process1 [P1]: rows_owned=[2] 3993 $ i = {0,3} [size = nrow+1 = 1+1] 3994 $ j = {0,1,2} [size = 3] 3995 $ v = {4,5,6} [size = 3] 3996 3997 .keywords: matrix, aij, compressed row, sparse, parallel 3998 3999 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4000 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4001 @*/ 4002 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4003 { 4004 PetscErrorCode ierr; 4005 4006 PetscFunctionBegin; 4007 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4008 PetscFunctionReturn(0); 4009 } 4010 4011 /*@C 4012 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4013 (the default parallel PETSc format). For good matrix assembly performance 4014 the user should preallocate the matrix storage by setting the parameters 4015 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4016 performance can be increased by more than a factor of 50. 4017 4018 Collective on MPI_Comm 4019 4020 Input Parameters: 4021 + B - the matrix 4022 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4023 (same value is used for all local rows) 4024 . d_nnz - array containing the number of nonzeros in the various rows of the 4025 DIAGONAL portion of the local submatrix (possibly different for each row) 4026 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4027 The size of this array is equal to the number of local rows, i.e 'm'. 4028 For matrices that will be factored, you must leave room for (and set) 4029 the diagonal entry even if it is zero. 4030 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4031 submatrix (same value is used for all local rows). 4032 - o_nnz - array containing the number of nonzeros in the various rows of the 4033 OFF-DIAGONAL portion of the local submatrix (possibly different for 4034 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4035 structure. The size of this array is equal to the number 4036 of local rows, i.e 'm'. 4037 4038 If the *_nnz parameter is given then the *_nz parameter is ignored 4039 4040 The AIJ format (also called the Yale sparse matrix format or 4041 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4042 storage. The stored row and column indices begin with zero. 4043 See Users-Manual: ch_mat for details. 4044 4045 The parallel matrix is partitioned such that the first m0 rows belong to 4046 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4047 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4048 4049 The DIAGONAL portion of the local submatrix of a processor can be defined 4050 as the submatrix which is obtained by extraction the part corresponding to 4051 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4052 first row that belongs to the processor, r2 is the last row belonging to 4053 the this processor, and c1-c2 is range of indices of the local part of a 4054 vector suitable for applying the matrix to. This is an mxn matrix. In the 4055 common case of a square matrix, the row and column ranges are the same and 4056 the DIAGONAL part is also square. The remaining portion of the local 4057 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4058 4059 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4060 4061 You can call MatGetInfo() to get information on how effective the preallocation was; 4062 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4063 You can also run with the option -info and look for messages with the string 4064 malloc in them to see if additional memory allocation was needed. 4065 4066 Example usage: 4067 4068 Consider the following 8x8 matrix with 34 non-zero values, that is 4069 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4070 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4071 as follows: 4072 4073 .vb 4074 1 2 0 | 0 3 0 | 0 4 4075 Proc0 0 5 6 | 7 0 0 | 8 0 4076 9 0 10 | 11 0 0 | 12 0 4077 ------------------------------------- 4078 13 0 14 | 15 16 17 | 0 0 4079 Proc1 0 18 0 | 19 20 21 | 0 0 4080 0 0 0 | 22 23 0 | 24 0 4081 ------------------------------------- 4082 Proc2 25 26 27 | 0 0 28 | 29 0 4083 30 0 0 | 31 32 33 | 0 34 4084 .ve 4085 4086 This can be represented as a collection of submatrices as: 4087 4088 .vb 4089 A B C 4090 D E F 4091 G H I 4092 .ve 4093 4094 Where the submatrices A,B,C are owned by proc0, D,E,F are 4095 owned by proc1, G,H,I are owned by proc2. 4096 4097 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4098 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4099 The 'M','N' parameters are 8,8, and have the same values on all procs. 4100 4101 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4102 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4103 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4104 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4105 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4106 matrix, ans [DF] as another SeqAIJ matrix. 4107 4108 When d_nz, o_nz parameters are specified, d_nz storage elements are 4109 allocated for every row of the local diagonal submatrix, and o_nz 4110 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4111 One way to choose d_nz and o_nz is to use the max nonzerors per local 4112 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4113 In this case, the values of d_nz,o_nz are: 4114 .vb 4115 proc0 : dnz = 2, o_nz = 2 4116 proc1 : dnz = 3, o_nz = 2 4117 proc2 : dnz = 1, o_nz = 4 4118 .ve 4119 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4120 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4121 for proc3. i.e we are using 12+15+10=37 storage locations to store 4122 34 values. 4123 4124 When d_nnz, o_nnz parameters are specified, the storage is specified 4125 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4126 In the above case the values for d_nnz,o_nnz are: 4127 .vb 4128 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4129 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4130 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4131 .ve 4132 Here the space allocated is sum of all the above values i.e 34, and 4133 hence pre-allocation is perfect. 4134 4135 Level: intermediate 4136 4137 .keywords: matrix, aij, compressed row, sparse, parallel 4138 4139 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4140 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4141 @*/ 4142 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4143 { 4144 PetscErrorCode ierr; 4145 4146 PetscFunctionBegin; 4147 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4148 PetscValidType(B,1); 4149 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4150 PetscFunctionReturn(0); 4151 } 4152 4153 /*@ 4154 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4155 CSR format the local rows. 4156 4157 Collective on MPI_Comm 4158 4159 Input Parameters: 4160 + comm - MPI communicator 4161 . m - number of local rows (Cannot be PETSC_DECIDE) 4162 . n - This value should be the same as the local size used in creating the 4163 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4164 calculated if N is given) For square matrices n is almost always m. 4165 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4166 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4167 . i - row indices 4168 . j - column indices 4169 - a - matrix values 4170 4171 Output Parameter: 4172 . mat - the matrix 4173 4174 Level: intermediate 4175 4176 Notes: 4177 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4178 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4179 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4180 4181 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4182 4183 The format which is used for the sparse matrix input, is equivalent to a 4184 row-major ordering.. i.e for the following matrix, the input data expected is 4185 as shown 4186 4187 $ 1 0 0 4188 $ 2 0 3 P0 4189 $ ------- 4190 $ 4 5 6 P1 4191 $ 4192 $ Process0 [P0]: rows_owned=[0,1] 4193 $ i = {0,1,3} [size = nrow+1 = 2+1] 4194 $ j = {0,0,2} [size = 3] 4195 $ v = {1,2,3} [size = 3] 4196 $ 4197 $ Process1 [P1]: rows_owned=[2] 4198 $ i = {0,3} [size = nrow+1 = 1+1] 4199 $ j = {0,1,2} [size = 3] 4200 $ v = {4,5,6} [size = 3] 4201 4202 .keywords: matrix, aij, compressed row, sparse, parallel 4203 4204 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4205 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4206 @*/ 4207 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4208 { 4209 PetscErrorCode ierr; 4210 4211 PetscFunctionBegin; 4212 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4213 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4214 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4215 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4216 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4217 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4218 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4219 PetscFunctionReturn(0); 4220 } 4221 4222 /*@C 4223 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4224 (the default parallel PETSc format). For good matrix assembly performance 4225 the user should preallocate the matrix storage by setting the parameters 4226 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4227 performance can be increased by more than a factor of 50. 4228 4229 Collective on MPI_Comm 4230 4231 Input Parameters: 4232 + comm - MPI communicator 4233 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4234 This value should be the same as the local size used in creating the 4235 y vector for the matrix-vector product y = Ax. 4236 . n - This value should be the same as the local size used in creating the 4237 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4238 calculated if N is given) For square matrices n is almost always m. 4239 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4240 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4241 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4242 (same value is used for all local rows) 4243 . d_nnz - array containing the number of nonzeros in the various rows of the 4244 DIAGONAL portion of the local submatrix (possibly different for each row) 4245 or NULL, if d_nz is used to specify the nonzero structure. 4246 The size of this array is equal to the number of local rows, i.e 'm'. 4247 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4248 submatrix (same value is used for all local rows). 4249 - o_nnz - array containing the number of nonzeros in the various rows of the 4250 OFF-DIAGONAL portion of the local submatrix (possibly different for 4251 each row) or NULL, if o_nz is used to specify the nonzero 4252 structure. The size of this array is equal to the number 4253 of local rows, i.e 'm'. 4254 4255 Output Parameter: 4256 . A - the matrix 4257 4258 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4259 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4260 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4261 4262 Notes: 4263 If the *_nnz parameter is given then the *_nz parameter is ignored 4264 4265 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4266 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4267 storage requirements for this matrix. 4268 4269 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4270 processor than it must be used on all processors that share the object for 4271 that argument. 4272 4273 The user MUST specify either the local or global matrix dimensions 4274 (possibly both). 4275 4276 The parallel matrix is partitioned across processors such that the 4277 first m0 rows belong to process 0, the next m1 rows belong to 4278 process 1, the next m2 rows belong to process 2 etc.. where 4279 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4280 values corresponding to [m x N] submatrix. 4281 4282 The columns are logically partitioned with the n0 columns belonging 4283 to 0th partition, the next n1 columns belonging to the next 4284 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4285 4286 The DIAGONAL portion of the local submatrix on any given processor 4287 is the submatrix corresponding to the rows and columns m,n 4288 corresponding to the given processor. i.e diagonal matrix on 4289 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4290 etc. The remaining portion of the local submatrix [m x (N-n)] 4291 constitute the OFF-DIAGONAL portion. The example below better 4292 illustrates this concept. 4293 4294 For a square global matrix we define each processor's diagonal portion 4295 to be its local rows and the corresponding columns (a square submatrix); 4296 each processor's off-diagonal portion encompasses the remainder of the 4297 local matrix (a rectangular submatrix). 4298 4299 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4300 4301 When calling this routine with a single process communicator, a matrix of 4302 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4303 type of communicator, use the construction mechanism 4304 .vb 4305 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4306 .ve 4307 4308 $ MatCreate(...,&A); 4309 $ MatSetType(A,MATMPIAIJ); 4310 $ MatSetSizes(A, m,n,M,N); 4311 $ MatMPIAIJSetPreallocation(A,...); 4312 4313 By default, this format uses inodes (identical nodes) when possible. 4314 We search for consecutive rows with the same nonzero structure, thereby 4315 reusing matrix information to achieve increased efficiency. 4316 4317 Options Database Keys: 4318 + -mat_no_inode - Do not use inodes 4319 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4320 4321 4322 4323 Example usage: 4324 4325 Consider the following 8x8 matrix with 34 non-zero values, that is 4326 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4327 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4328 as follows 4329 4330 .vb 4331 1 2 0 | 0 3 0 | 0 4 4332 Proc0 0 5 6 | 7 0 0 | 8 0 4333 9 0 10 | 11 0 0 | 12 0 4334 ------------------------------------- 4335 13 0 14 | 15 16 17 | 0 0 4336 Proc1 0 18 0 | 19 20 21 | 0 0 4337 0 0 0 | 22 23 0 | 24 0 4338 ------------------------------------- 4339 Proc2 25 26 27 | 0 0 28 | 29 0 4340 30 0 0 | 31 32 33 | 0 34 4341 .ve 4342 4343 This can be represented as a collection of submatrices as 4344 4345 .vb 4346 A B C 4347 D E F 4348 G H I 4349 .ve 4350 4351 Where the submatrices A,B,C are owned by proc0, D,E,F are 4352 owned by proc1, G,H,I are owned by proc2. 4353 4354 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4355 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4356 The 'M','N' parameters are 8,8, and have the same values on all procs. 4357 4358 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4359 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4360 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4361 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4362 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4363 matrix, ans [DF] as another SeqAIJ matrix. 4364 4365 When d_nz, o_nz parameters are specified, d_nz storage elements are 4366 allocated for every row of the local diagonal submatrix, and o_nz 4367 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4368 One way to choose d_nz and o_nz is to use the max nonzerors per local 4369 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4370 In this case, the values of d_nz,o_nz are 4371 .vb 4372 proc0 : dnz = 2, o_nz = 2 4373 proc1 : dnz = 3, o_nz = 2 4374 proc2 : dnz = 1, o_nz = 4 4375 .ve 4376 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4377 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4378 for proc3. i.e we are using 12+15+10=37 storage locations to store 4379 34 values. 4380 4381 When d_nnz, o_nnz parameters are specified, the storage is specified 4382 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4383 In the above case the values for d_nnz,o_nnz are 4384 .vb 4385 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4386 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4387 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4388 .ve 4389 Here the space allocated is sum of all the above values i.e 34, and 4390 hence pre-allocation is perfect. 4391 4392 Level: intermediate 4393 4394 .keywords: matrix, aij, compressed row, sparse, parallel 4395 4396 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4397 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4398 @*/ 4399 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4400 { 4401 PetscErrorCode ierr; 4402 PetscMPIInt size; 4403 4404 PetscFunctionBegin; 4405 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4406 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4407 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4408 if (size > 1) { 4409 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4410 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4411 } else { 4412 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4413 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4414 } 4415 PetscFunctionReturn(0); 4416 } 4417 4418 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4419 { 4420 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4421 PetscBool flg; 4422 PetscErrorCode ierr; 4423 4424 PetscFunctionBegin; 4425 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4426 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4427 if (Ad) *Ad = a->A; 4428 if (Ao) *Ao = a->B; 4429 if (colmap) *colmap = a->garray; 4430 PetscFunctionReturn(0); 4431 } 4432 4433 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4434 { 4435 PetscErrorCode ierr; 4436 PetscInt m,N,i,rstart,nnz,Ii; 4437 PetscInt *indx; 4438 PetscScalar *values; 4439 4440 PetscFunctionBegin; 4441 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4442 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4443 PetscInt *dnz,*onz,sum,bs,cbs; 4444 4445 if (n == PETSC_DECIDE) { 4446 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4447 } 4448 /* Check sum(n) = N */ 4449 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4450 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4451 4452 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4453 rstart -= m; 4454 4455 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4456 for (i=0; i<m; i++) { 4457 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4458 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4459 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4460 } 4461 4462 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4463 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4464 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4465 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4466 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4467 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4468 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4469 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4470 } 4471 4472 /* numeric phase */ 4473 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4474 for (i=0; i<m; i++) { 4475 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4476 Ii = i + rstart; 4477 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4478 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4479 } 4480 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4481 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4482 PetscFunctionReturn(0); 4483 } 4484 4485 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4486 { 4487 PetscErrorCode ierr; 4488 PetscMPIInt rank; 4489 PetscInt m,N,i,rstart,nnz; 4490 size_t len; 4491 const PetscInt *indx; 4492 PetscViewer out; 4493 char *name; 4494 Mat B; 4495 const PetscScalar *values; 4496 4497 PetscFunctionBegin; 4498 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4499 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4500 /* Should this be the type of the diagonal block of A? */ 4501 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4502 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4503 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4504 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4505 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4506 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4507 for (i=0; i<m; i++) { 4508 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4509 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4510 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4511 } 4512 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4513 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4514 4515 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4516 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4517 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4518 sprintf(name,"%s.%d",outfile,rank); 4519 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4520 ierr = PetscFree(name);CHKERRQ(ierr); 4521 ierr = MatView(B,out);CHKERRQ(ierr); 4522 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4523 ierr = MatDestroy(&B);CHKERRQ(ierr); 4524 PetscFunctionReturn(0); 4525 } 4526 4527 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4528 { 4529 PetscErrorCode ierr; 4530 Mat_Merge_SeqsToMPI *merge; 4531 PetscContainer container; 4532 4533 PetscFunctionBegin; 4534 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4535 if (container) { 4536 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4537 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4538 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4539 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4540 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4541 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4542 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4543 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4544 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4545 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4546 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4547 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4548 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4549 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4550 ierr = PetscFree(merge);CHKERRQ(ierr); 4551 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4552 } 4553 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4554 PetscFunctionReturn(0); 4555 } 4556 4557 #include <../src/mat/utils/freespace.h> 4558 #include <petscbt.h> 4559 4560 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4561 { 4562 PetscErrorCode ierr; 4563 MPI_Comm comm; 4564 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4565 PetscMPIInt size,rank,taga,*len_s; 4566 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4567 PetscInt proc,m; 4568 PetscInt **buf_ri,**buf_rj; 4569 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4570 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4571 MPI_Request *s_waits,*r_waits; 4572 MPI_Status *status; 4573 MatScalar *aa=a->a; 4574 MatScalar **abuf_r,*ba_i; 4575 Mat_Merge_SeqsToMPI *merge; 4576 PetscContainer container; 4577 4578 PetscFunctionBegin; 4579 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4580 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4581 4582 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4583 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4584 4585 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4586 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4587 4588 bi = merge->bi; 4589 bj = merge->bj; 4590 buf_ri = merge->buf_ri; 4591 buf_rj = merge->buf_rj; 4592 4593 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4594 owners = merge->rowmap->range; 4595 len_s = merge->len_s; 4596 4597 /* send and recv matrix values */ 4598 /*-----------------------------*/ 4599 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4600 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4601 4602 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4603 for (proc=0,k=0; proc<size; proc++) { 4604 if (!len_s[proc]) continue; 4605 i = owners[proc]; 4606 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4607 k++; 4608 } 4609 4610 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4611 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4612 ierr = PetscFree(status);CHKERRQ(ierr); 4613 4614 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4615 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4616 4617 /* insert mat values of mpimat */ 4618 /*----------------------------*/ 4619 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4620 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4621 4622 for (k=0; k<merge->nrecv; k++) { 4623 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4624 nrows = *(buf_ri_k[k]); 4625 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4626 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4627 } 4628 4629 /* set values of ba */ 4630 m = merge->rowmap->n; 4631 for (i=0; i<m; i++) { 4632 arow = owners[rank] + i; 4633 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4634 bnzi = bi[i+1] - bi[i]; 4635 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4636 4637 /* add local non-zero vals of this proc's seqmat into ba */ 4638 anzi = ai[arow+1] - ai[arow]; 4639 aj = a->j + ai[arow]; 4640 aa = a->a + ai[arow]; 4641 nextaj = 0; 4642 for (j=0; nextaj<anzi; j++) { 4643 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4644 ba_i[j] += aa[nextaj++]; 4645 } 4646 } 4647 4648 /* add received vals into ba */ 4649 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4650 /* i-th row */ 4651 if (i == *nextrow[k]) { 4652 anzi = *(nextai[k]+1) - *nextai[k]; 4653 aj = buf_rj[k] + *(nextai[k]); 4654 aa = abuf_r[k] + *(nextai[k]); 4655 nextaj = 0; 4656 for (j=0; nextaj<anzi; j++) { 4657 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4658 ba_i[j] += aa[nextaj++]; 4659 } 4660 } 4661 nextrow[k]++; nextai[k]++; 4662 } 4663 } 4664 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4665 } 4666 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4667 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4668 4669 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4670 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4671 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4672 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4673 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4674 PetscFunctionReturn(0); 4675 } 4676 4677 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4678 { 4679 PetscErrorCode ierr; 4680 Mat B_mpi; 4681 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4682 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4683 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4684 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4685 PetscInt len,proc,*dnz,*onz,bs,cbs; 4686 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4687 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4688 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4689 MPI_Status *status; 4690 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4691 PetscBT lnkbt; 4692 Mat_Merge_SeqsToMPI *merge; 4693 PetscContainer container; 4694 4695 PetscFunctionBegin; 4696 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4697 4698 /* make sure it is a PETSc comm */ 4699 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4700 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4701 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4702 4703 ierr = PetscNew(&merge);CHKERRQ(ierr); 4704 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4705 4706 /* determine row ownership */ 4707 /*---------------------------------------------------------*/ 4708 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4709 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4710 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4711 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4712 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4713 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4714 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4715 4716 m = merge->rowmap->n; 4717 owners = merge->rowmap->range; 4718 4719 /* determine the number of messages to send, their lengths */ 4720 /*---------------------------------------------------------*/ 4721 len_s = merge->len_s; 4722 4723 len = 0; /* length of buf_si[] */ 4724 merge->nsend = 0; 4725 for (proc=0; proc<size; proc++) { 4726 len_si[proc] = 0; 4727 if (proc == rank) { 4728 len_s[proc] = 0; 4729 } else { 4730 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4731 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4732 } 4733 if (len_s[proc]) { 4734 merge->nsend++; 4735 nrows = 0; 4736 for (i=owners[proc]; i<owners[proc+1]; i++) { 4737 if (ai[i+1] > ai[i]) nrows++; 4738 } 4739 len_si[proc] = 2*(nrows+1); 4740 len += len_si[proc]; 4741 } 4742 } 4743 4744 /* determine the number and length of messages to receive for ij-structure */ 4745 /*-------------------------------------------------------------------------*/ 4746 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4747 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4748 4749 /* post the Irecv of j-structure */ 4750 /*-------------------------------*/ 4751 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4752 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4753 4754 /* post the Isend of j-structure */ 4755 /*--------------------------------*/ 4756 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4757 4758 for (proc=0, k=0; proc<size; proc++) { 4759 if (!len_s[proc]) continue; 4760 i = owners[proc]; 4761 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4762 k++; 4763 } 4764 4765 /* receives and sends of j-structure are complete */ 4766 /*------------------------------------------------*/ 4767 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4768 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4769 4770 /* send and recv i-structure */ 4771 /*---------------------------*/ 4772 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4773 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4774 4775 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4776 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4777 for (proc=0,k=0; proc<size; proc++) { 4778 if (!len_s[proc]) continue; 4779 /* form outgoing message for i-structure: 4780 buf_si[0]: nrows to be sent 4781 [1:nrows]: row index (global) 4782 [nrows+1:2*nrows+1]: i-structure index 4783 */ 4784 /*-------------------------------------------*/ 4785 nrows = len_si[proc]/2 - 1; 4786 buf_si_i = buf_si + nrows+1; 4787 buf_si[0] = nrows; 4788 buf_si_i[0] = 0; 4789 nrows = 0; 4790 for (i=owners[proc]; i<owners[proc+1]; i++) { 4791 anzi = ai[i+1] - ai[i]; 4792 if (anzi) { 4793 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4794 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4795 nrows++; 4796 } 4797 } 4798 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4799 k++; 4800 buf_si += len_si[proc]; 4801 } 4802 4803 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4804 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4805 4806 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4807 for (i=0; i<merge->nrecv; i++) { 4808 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4809 } 4810 4811 ierr = PetscFree(len_si);CHKERRQ(ierr); 4812 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4813 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4814 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4815 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4816 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4817 ierr = PetscFree(status);CHKERRQ(ierr); 4818 4819 /* compute a local seq matrix in each processor */ 4820 /*----------------------------------------------*/ 4821 /* allocate bi array and free space for accumulating nonzero column info */ 4822 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4823 bi[0] = 0; 4824 4825 /* create and initialize a linked list */ 4826 nlnk = N+1; 4827 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4828 4829 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4830 len = ai[owners[rank+1]] - ai[owners[rank]]; 4831 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4832 4833 current_space = free_space; 4834 4835 /* determine symbolic info for each local row */ 4836 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4837 4838 for (k=0; k<merge->nrecv; k++) { 4839 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4840 nrows = *buf_ri_k[k]; 4841 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4842 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4843 } 4844 4845 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4846 len = 0; 4847 for (i=0; i<m; i++) { 4848 bnzi = 0; 4849 /* add local non-zero cols of this proc's seqmat into lnk */ 4850 arow = owners[rank] + i; 4851 anzi = ai[arow+1] - ai[arow]; 4852 aj = a->j + ai[arow]; 4853 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4854 bnzi += nlnk; 4855 /* add received col data into lnk */ 4856 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4857 if (i == *nextrow[k]) { /* i-th row */ 4858 anzi = *(nextai[k]+1) - *nextai[k]; 4859 aj = buf_rj[k] + *nextai[k]; 4860 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4861 bnzi += nlnk; 4862 nextrow[k]++; nextai[k]++; 4863 } 4864 } 4865 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4866 4867 /* if free space is not available, make more free space */ 4868 if (current_space->local_remaining<bnzi) { 4869 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4870 nspacedouble++; 4871 } 4872 /* copy data into free space, then initialize lnk */ 4873 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4874 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4875 4876 current_space->array += bnzi; 4877 current_space->local_used += bnzi; 4878 current_space->local_remaining -= bnzi; 4879 4880 bi[i+1] = bi[i] + bnzi; 4881 } 4882 4883 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4884 4885 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4886 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4887 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4888 4889 /* create symbolic parallel matrix B_mpi */ 4890 /*---------------------------------------*/ 4891 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4892 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4893 if (n==PETSC_DECIDE) { 4894 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4895 } else { 4896 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4897 } 4898 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4899 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4900 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4901 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4902 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4903 4904 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4905 B_mpi->assembled = PETSC_FALSE; 4906 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4907 merge->bi = bi; 4908 merge->bj = bj; 4909 merge->buf_ri = buf_ri; 4910 merge->buf_rj = buf_rj; 4911 merge->coi = NULL; 4912 merge->coj = NULL; 4913 merge->owners_co = NULL; 4914 4915 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4916 4917 /* attach the supporting struct to B_mpi for reuse */ 4918 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4919 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4920 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4921 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4922 *mpimat = B_mpi; 4923 4924 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4925 PetscFunctionReturn(0); 4926 } 4927 4928 /*@C 4929 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4930 matrices from each processor 4931 4932 Collective on MPI_Comm 4933 4934 Input Parameters: 4935 + comm - the communicators the parallel matrix will live on 4936 . seqmat - the input sequential matrices 4937 . m - number of local rows (or PETSC_DECIDE) 4938 . n - number of local columns (or PETSC_DECIDE) 4939 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4940 4941 Output Parameter: 4942 . mpimat - the parallel matrix generated 4943 4944 Level: advanced 4945 4946 Notes: 4947 The dimensions of the sequential matrix in each processor MUST be the same. 4948 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4949 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4950 @*/ 4951 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4952 { 4953 PetscErrorCode ierr; 4954 PetscMPIInt size; 4955 4956 PetscFunctionBegin; 4957 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4958 if (size == 1) { 4959 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4960 if (scall == MAT_INITIAL_MATRIX) { 4961 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4962 } else { 4963 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4964 } 4965 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4966 PetscFunctionReturn(0); 4967 } 4968 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4969 if (scall == MAT_INITIAL_MATRIX) { 4970 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4971 } 4972 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4973 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4974 PetscFunctionReturn(0); 4975 } 4976 4977 /*@ 4978 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4979 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4980 with MatGetSize() 4981 4982 Not Collective 4983 4984 Input Parameters: 4985 + A - the matrix 4986 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4987 4988 Output Parameter: 4989 . A_loc - the local sequential matrix generated 4990 4991 Level: developer 4992 4993 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4994 4995 @*/ 4996 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4997 { 4998 PetscErrorCode ierr; 4999 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5000 Mat_SeqAIJ *mat,*a,*b; 5001 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5002 MatScalar *aa,*ba,*cam; 5003 PetscScalar *ca; 5004 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5005 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5006 PetscBool match; 5007 MPI_Comm comm; 5008 PetscMPIInt size; 5009 5010 PetscFunctionBegin; 5011 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5012 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5013 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5014 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5015 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5016 5017 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5018 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5019 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5020 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5021 aa = a->a; ba = b->a; 5022 if (scall == MAT_INITIAL_MATRIX) { 5023 if (size == 1) { 5024 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5025 PetscFunctionReturn(0); 5026 } 5027 5028 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5029 ci[0] = 0; 5030 for (i=0; i<am; i++) { 5031 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5032 } 5033 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5034 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5035 k = 0; 5036 for (i=0; i<am; i++) { 5037 ncols_o = bi[i+1] - bi[i]; 5038 ncols_d = ai[i+1] - ai[i]; 5039 /* off-diagonal portion of A */ 5040 for (jo=0; jo<ncols_o; jo++) { 5041 col = cmap[*bj]; 5042 if (col >= cstart) break; 5043 cj[k] = col; bj++; 5044 ca[k++] = *ba++; 5045 } 5046 /* diagonal portion of A */ 5047 for (j=0; j<ncols_d; j++) { 5048 cj[k] = cstart + *aj++; 5049 ca[k++] = *aa++; 5050 } 5051 /* off-diagonal portion of A */ 5052 for (j=jo; j<ncols_o; j++) { 5053 cj[k] = cmap[*bj++]; 5054 ca[k++] = *ba++; 5055 } 5056 } 5057 /* put together the new matrix */ 5058 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5059 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5060 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5061 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5062 mat->free_a = PETSC_TRUE; 5063 mat->free_ij = PETSC_TRUE; 5064 mat->nonew = 0; 5065 } else if (scall == MAT_REUSE_MATRIX) { 5066 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5067 ci = mat->i; cj = mat->j; cam = mat->a; 5068 for (i=0; i<am; i++) { 5069 /* off-diagonal portion of A */ 5070 ncols_o = bi[i+1] - bi[i]; 5071 for (jo=0; jo<ncols_o; jo++) { 5072 col = cmap[*bj]; 5073 if (col >= cstart) break; 5074 *cam++ = *ba++; bj++; 5075 } 5076 /* diagonal portion of A */ 5077 ncols_d = ai[i+1] - ai[i]; 5078 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5079 /* off-diagonal portion of A */ 5080 for (j=jo; j<ncols_o; j++) { 5081 *cam++ = *ba++; bj++; 5082 } 5083 } 5084 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5085 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5086 PetscFunctionReturn(0); 5087 } 5088 5089 /*@C 5090 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5091 5092 Not Collective 5093 5094 Input Parameters: 5095 + A - the matrix 5096 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5097 - row, col - index sets of rows and columns to extract (or NULL) 5098 5099 Output Parameter: 5100 . A_loc - the local sequential matrix generated 5101 5102 Level: developer 5103 5104 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5105 5106 @*/ 5107 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5108 { 5109 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5110 PetscErrorCode ierr; 5111 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5112 IS isrowa,iscola; 5113 Mat *aloc; 5114 PetscBool match; 5115 5116 PetscFunctionBegin; 5117 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5118 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5119 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5120 if (!row) { 5121 start = A->rmap->rstart; end = A->rmap->rend; 5122 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5123 } else { 5124 isrowa = *row; 5125 } 5126 if (!col) { 5127 start = A->cmap->rstart; 5128 cmap = a->garray; 5129 nzA = a->A->cmap->n; 5130 nzB = a->B->cmap->n; 5131 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5132 ncols = 0; 5133 for (i=0; i<nzB; i++) { 5134 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5135 else break; 5136 } 5137 imark = i; 5138 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5139 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5140 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5141 } else { 5142 iscola = *col; 5143 } 5144 if (scall != MAT_INITIAL_MATRIX) { 5145 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5146 aloc[0] = *A_loc; 5147 } 5148 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5149 if (!col) { /* attach global id of condensed columns */ 5150 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5151 } 5152 *A_loc = aloc[0]; 5153 ierr = PetscFree(aloc);CHKERRQ(ierr); 5154 if (!row) { 5155 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5156 } 5157 if (!col) { 5158 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5159 } 5160 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5161 PetscFunctionReturn(0); 5162 } 5163 5164 /*@C 5165 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5166 5167 Collective on Mat 5168 5169 Input Parameters: 5170 + A,B - the matrices in mpiaij format 5171 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5172 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5173 5174 Output Parameter: 5175 + rowb, colb - index sets of rows and columns of B to extract 5176 - B_seq - the sequential matrix generated 5177 5178 Level: developer 5179 5180 @*/ 5181 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5182 { 5183 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5184 PetscErrorCode ierr; 5185 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5186 IS isrowb,iscolb; 5187 Mat *bseq=NULL; 5188 5189 PetscFunctionBegin; 5190 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5191 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5192 } 5193 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5194 5195 if (scall == MAT_INITIAL_MATRIX) { 5196 start = A->cmap->rstart; 5197 cmap = a->garray; 5198 nzA = a->A->cmap->n; 5199 nzB = a->B->cmap->n; 5200 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5201 ncols = 0; 5202 for (i=0; i<nzB; i++) { /* row < local row index */ 5203 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5204 else break; 5205 } 5206 imark = i; 5207 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5208 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5209 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5210 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5211 } else { 5212 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5213 isrowb = *rowb; iscolb = *colb; 5214 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5215 bseq[0] = *B_seq; 5216 } 5217 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5218 *B_seq = bseq[0]; 5219 ierr = PetscFree(bseq);CHKERRQ(ierr); 5220 if (!rowb) { 5221 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5222 } else { 5223 *rowb = isrowb; 5224 } 5225 if (!colb) { 5226 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5227 } else { 5228 *colb = iscolb; 5229 } 5230 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5231 PetscFunctionReturn(0); 5232 } 5233 5234 #include <petsc/private/vecscatterimpl.h> 5235 /* 5236 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5237 of the OFF-DIAGONAL portion of local A 5238 5239 Collective on Mat 5240 5241 Input Parameters: 5242 + A,B - the matrices in mpiaij format 5243 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5244 5245 Output Parameter: 5246 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5247 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5248 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5249 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5250 5251 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5252 for this matrix. This is not desirable.. 5253 5254 Level: developer 5255 5256 */ 5257 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5258 { 5259 VecScatter_MPI_General *gen_to,*gen_from; 5260 PetscErrorCode ierr; 5261 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5262 Mat_SeqAIJ *b_oth; 5263 VecScatter ctx; 5264 MPI_Comm comm; 5265 PetscMPIInt *rprocs,*sprocs,tag,rank; 5266 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5267 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5268 PetscScalar *b_otha,*bufa,*bufA,*vals; 5269 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5270 MPI_Request *rwaits = NULL,*swaits = NULL; 5271 MPI_Status *sstatus,rstatus; 5272 PetscMPIInt jj,size; 5273 VecScatterType type; 5274 PetscBool mpi1; 5275 5276 PetscFunctionBegin; 5277 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5278 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5279 5280 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5281 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5282 } 5283 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5284 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5285 5286 if (size == 1) { 5287 startsj_s = NULL; 5288 bufa_ptr = NULL; 5289 *B_oth = NULL; 5290 PetscFunctionReturn(0); 5291 } 5292 5293 ctx = a->Mvctx; 5294 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5295 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5296 if (!mpi1) { 5297 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5298 thus create a->Mvctx_mpi1 */ 5299 if (!a->Mvctx_mpi1) { 5300 a->Mvctx_mpi1_flg = PETSC_TRUE; 5301 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5302 } 5303 ctx = a->Mvctx_mpi1; 5304 } 5305 tag = ((PetscObject)ctx)->tag; 5306 5307 gen_to = (VecScatter_MPI_General*)ctx->todata; 5308 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5309 nrecvs = gen_from->n; 5310 nsends = gen_to->n; 5311 5312 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5313 srow = gen_to->indices; /* local row index to be sent */ 5314 sstarts = gen_to->starts; 5315 sprocs = gen_to->procs; 5316 sstatus = gen_to->sstatus; 5317 sbs = gen_to->bs; 5318 rstarts = gen_from->starts; 5319 rprocs = gen_from->procs; 5320 rbs = gen_from->bs; 5321 5322 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5323 if (scall == MAT_INITIAL_MATRIX) { 5324 /* i-array */ 5325 /*---------*/ 5326 /* post receives */ 5327 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5328 for (i=0; i<nrecvs; i++) { 5329 rowlen = rvalues + rstarts[i]*rbs; 5330 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5331 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5332 } 5333 5334 /* pack the outgoing message */ 5335 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5336 5337 sstartsj[0] = 0; 5338 rstartsj[0] = 0; 5339 len = 0; /* total length of j or a array to be sent */ 5340 k = 0; 5341 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5342 for (i=0; i<nsends; i++) { 5343 rowlen = svalues + sstarts[i]*sbs; 5344 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5345 for (j=0; j<nrows; j++) { 5346 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5347 for (l=0; l<sbs; l++) { 5348 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5349 5350 rowlen[j*sbs+l] = ncols; 5351 5352 len += ncols; 5353 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5354 } 5355 k++; 5356 } 5357 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5358 5359 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5360 } 5361 /* recvs and sends of i-array are completed */ 5362 i = nrecvs; 5363 while (i--) { 5364 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5365 } 5366 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5367 ierr = PetscFree(svalues);CHKERRQ(ierr); 5368 5369 /* allocate buffers for sending j and a arrays */ 5370 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5371 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5372 5373 /* create i-array of B_oth */ 5374 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5375 5376 b_othi[0] = 0; 5377 len = 0; /* total length of j or a array to be received */ 5378 k = 0; 5379 for (i=0; i<nrecvs; i++) { 5380 rowlen = rvalues + rstarts[i]*rbs; 5381 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5382 for (j=0; j<nrows; j++) { 5383 b_othi[k+1] = b_othi[k] + rowlen[j]; 5384 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5385 k++; 5386 } 5387 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5388 } 5389 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5390 5391 /* allocate space for j and a arrrays of B_oth */ 5392 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5393 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5394 5395 /* j-array */ 5396 /*---------*/ 5397 /* post receives of j-array */ 5398 for (i=0; i<nrecvs; i++) { 5399 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5400 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5401 } 5402 5403 /* pack the outgoing message j-array */ 5404 k = 0; 5405 for (i=0; i<nsends; i++) { 5406 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5407 bufJ = bufj+sstartsj[i]; 5408 for (j=0; j<nrows; j++) { 5409 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5410 for (ll=0; ll<sbs; ll++) { 5411 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5412 for (l=0; l<ncols; l++) { 5413 *bufJ++ = cols[l]; 5414 } 5415 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5416 } 5417 } 5418 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5419 } 5420 5421 /* recvs and sends of j-array are completed */ 5422 i = nrecvs; 5423 while (i--) { 5424 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5425 } 5426 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5427 } else if (scall == MAT_REUSE_MATRIX) { 5428 sstartsj = *startsj_s; 5429 rstartsj = *startsj_r; 5430 bufa = *bufa_ptr; 5431 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5432 b_otha = b_oth->a; 5433 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5434 5435 /* a-array */ 5436 /*---------*/ 5437 /* post receives of a-array */ 5438 for (i=0; i<nrecvs; i++) { 5439 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5440 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5441 } 5442 5443 /* pack the outgoing message a-array */ 5444 k = 0; 5445 for (i=0; i<nsends; i++) { 5446 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5447 bufA = bufa+sstartsj[i]; 5448 for (j=0; j<nrows; j++) { 5449 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5450 for (ll=0; ll<sbs; ll++) { 5451 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5452 for (l=0; l<ncols; l++) { 5453 *bufA++ = vals[l]; 5454 } 5455 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5456 } 5457 } 5458 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5459 } 5460 /* recvs and sends of a-array are completed */ 5461 i = nrecvs; 5462 while (i--) { 5463 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5464 } 5465 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5466 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5467 5468 if (scall == MAT_INITIAL_MATRIX) { 5469 /* put together the new matrix */ 5470 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5471 5472 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5473 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5474 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5475 b_oth->free_a = PETSC_TRUE; 5476 b_oth->free_ij = PETSC_TRUE; 5477 b_oth->nonew = 0; 5478 5479 ierr = PetscFree(bufj);CHKERRQ(ierr); 5480 if (!startsj_s || !bufa_ptr) { 5481 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5482 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5483 } else { 5484 *startsj_s = sstartsj; 5485 *startsj_r = rstartsj; 5486 *bufa_ptr = bufa; 5487 } 5488 } 5489 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5490 PetscFunctionReturn(0); 5491 } 5492 5493 /*@C 5494 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5495 5496 Not Collective 5497 5498 Input Parameters: 5499 . A - The matrix in mpiaij format 5500 5501 Output Parameter: 5502 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5503 . colmap - A map from global column index to local index into lvec 5504 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5505 5506 Level: developer 5507 5508 @*/ 5509 #if defined(PETSC_USE_CTABLE) 5510 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5511 #else 5512 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5513 #endif 5514 { 5515 Mat_MPIAIJ *a; 5516 5517 PetscFunctionBegin; 5518 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5519 PetscValidPointer(lvec, 2); 5520 PetscValidPointer(colmap, 3); 5521 PetscValidPointer(multScatter, 4); 5522 a = (Mat_MPIAIJ*) A->data; 5523 if (lvec) *lvec = a->lvec; 5524 if (colmap) *colmap = a->colmap; 5525 if (multScatter) *multScatter = a->Mvctx; 5526 PetscFunctionReturn(0); 5527 } 5528 5529 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5530 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5531 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5532 #if defined(PETSC_HAVE_MKL_SPARSE) 5533 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5534 #endif 5535 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5536 #if defined(PETSC_HAVE_ELEMENTAL) 5537 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5538 #endif 5539 #if defined(PETSC_HAVE_HYPRE) 5540 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5541 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5542 #endif 5543 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5544 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5545 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5546 5547 /* 5548 Computes (B'*A')' since computing B*A directly is untenable 5549 5550 n p p 5551 ( ) ( ) ( ) 5552 m ( A ) * n ( B ) = m ( C ) 5553 ( ) ( ) ( ) 5554 5555 */ 5556 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5557 { 5558 PetscErrorCode ierr; 5559 Mat At,Bt,Ct; 5560 5561 PetscFunctionBegin; 5562 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5563 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5564 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5565 ierr = MatDestroy(&At);CHKERRQ(ierr); 5566 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5567 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5568 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5569 PetscFunctionReturn(0); 5570 } 5571 5572 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5573 { 5574 PetscErrorCode ierr; 5575 PetscInt m=A->rmap->n,n=B->cmap->n; 5576 Mat Cmat; 5577 5578 PetscFunctionBegin; 5579 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5580 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5581 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5582 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5583 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5584 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5585 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5586 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5587 5588 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5589 5590 *C = Cmat; 5591 PetscFunctionReturn(0); 5592 } 5593 5594 /* ----------------------------------------------------------------*/ 5595 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5596 { 5597 PetscErrorCode ierr; 5598 5599 PetscFunctionBegin; 5600 if (scall == MAT_INITIAL_MATRIX) { 5601 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5602 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5603 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5604 } 5605 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5606 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5607 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5608 PetscFunctionReturn(0); 5609 } 5610 5611 /*MC 5612 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5613 5614 Options Database Keys: 5615 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5616 5617 Level: beginner 5618 5619 .seealso: MatCreateAIJ() 5620 M*/ 5621 5622 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5623 { 5624 Mat_MPIAIJ *b; 5625 PetscErrorCode ierr; 5626 PetscMPIInt size; 5627 5628 PetscFunctionBegin; 5629 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5630 5631 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5632 B->data = (void*)b; 5633 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5634 B->assembled = PETSC_FALSE; 5635 B->insertmode = NOT_SET_VALUES; 5636 b->size = size; 5637 5638 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5639 5640 /* build cache for off array entries formed */ 5641 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5642 5643 b->donotstash = PETSC_FALSE; 5644 b->colmap = 0; 5645 b->garray = 0; 5646 b->roworiented = PETSC_TRUE; 5647 5648 /* stuff used for matrix vector multiply */ 5649 b->lvec = NULL; 5650 b->Mvctx = NULL; 5651 5652 /* stuff for MatGetRow() */ 5653 b->rowindices = 0; 5654 b->rowvalues = 0; 5655 b->getrowactive = PETSC_FALSE; 5656 5657 /* flexible pointer used in CUSP/CUSPARSE classes */ 5658 b->spptr = NULL; 5659 5660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5670 #if defined(PETSC_HAVE_MKL_SPARSE) 5671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5672 #endif 5673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5675 #if defined(PETSC_HAVE_ELEMENTAL) 5676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5677 #endif 5678 #if defined(PETSC_HAVE_HYPRE) 5679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5680 #endif 5681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5686 #if defined(PETSC_HAVE_HYPRE) 5687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5688 #endif 5689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5690 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5691 PetscFunctionReturn(0); 5692 } 5693 5694 /*@C 5695 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5696 and "off-diagonal" part of the matrix in CSR format. 5697 5698 Collective on MPI_Comm 5699 5700 Input Parameters: 5701 + comm - MPI communicator 5702 . m - number of local rows (Cannot be PETSC_DECIDE) 5703 . n - This value should be the same as the local size used in creating the 5704 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5705 calculated if N is given) For square matrices n is almost always m. 5706 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5707 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5708 . i - row indices for "diagonal" portion of matrix 5709 . j - column indices 5710 . a - matrix values 5711 . oi - row indices for "off-diagonal" portion of matrix 5712 . oj - column indices 5713 - oa - matrix values 5714 5715 Output Parameter: 5716 . mat - the matrix 5717 5718 Level: advanced 5719 5720 Notes: 5721 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5722 must free the arrays once the matrix has been destroyed and not before. 5723 5724 The i and j indices are 0 based 5725 5726 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5727 5728 This sets local rows and cannot be used to set off-processor values. 5729 5730 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5731 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5732 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5733 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5734 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5735 communication if it is known that only local entries will be set. 5736 5737 .keywords: matrix, aij, compressed row, sparse, parallel 5738 5739 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5740 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5741 @*/ 5742 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5743 { 5744 PetscErrorCode ierr; 5745 Mat_MPIAIJ *maij; 5746 5747 PetscFunctionBegin; 5748 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5749 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5750 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5751 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5752 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5753 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5754 maij = (Mat_MPIAIJ*) (*mat)->data; 5755 5756 (*mat)->preallocated = PETSC_TRUE; 5757 5758 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5759 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5760 5761 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5762 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5763 5764 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5765 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5766 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5767 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5768 5769 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5770 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5771 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5772 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5773 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5774 PetscFunctionReturn(0); 5775 } 5776 5777 /* 5778 Special version for direct calls from Fortran 5779 */ 5780 #include <petsc/private/fortranimpl.h> 5781 5782 /* Change these macros so can be used in void function */ 5783 #undef CHKERRQ 5784 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5785 #undef SETERRQ2 5786 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5787 #undef SETERRQ3 5788 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5789 #undef SETERRQ 5790 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5791 5792 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5793 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5794 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5795 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5796 #else 5797 #endif 5798 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5799 { 5800 Mat mat = *mmat; 5801 PetscInt m = *mm, n = *mn; 5802 InsertMode addv = *maddv; 5803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5804 PetscScalar value; 5805 PetscErrorCode ierr; 5806 5807 MatCheckPreallocated(mat,1); 5808 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5809 5810 #if defined(PETSC_USE_DEBUG) 5811 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5812 #endif 5813 { 5814 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5815 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5816 PetscBool roworiented = aij->roworiented; 5817 5818 /* Some Variables required in the macro */ 5819 Mat A = aij->A; 5820 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5821 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5822 MatScalar *aa = a->a; 5823 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5824 Mat B = aij->B; 5825 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5826 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5827 MatScalar *ba = b->a; 5828 5829 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5830 PetscInt nonew = a->nonew; 5831 MatScalar *ap1,*ap2; 5832 5833 PetscFunctionBegin; 5834 for (i=0; i<m; i++) { 5835 if (im[i] < 0) continue; 5836 #if defined(PETSC_USE_DEBUG) 5837 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5838 #endif 5839 if (im[i] >= rstart && im[i] < rend) { 5840 row = im[i] - rstart; 5841 lastcol1 = -1; 5842 rp1 = aj + ai[row]; 5843 ap1 = aa + ai[row]; 5844 rmax1 = aimax[row]; 5845 nrow1 = ailen[row]; 5846 low1 = 0; 5847 high1 = nrow1; 5848 lastcol2 = -1; 5849 rp2 = bj + bi[row]; 5850 ap2 = ba + bi[row]; 5851 rmax2 = bimax[row]; 5852 nrow2 = bilen[row]; 5853 low2 = 0; 5854 high2 = nrow2; 5855 5856 for (j=0; j<n; j++) { 5857 if (roworiented) value = v[i*n+j]; 5858 else value = v[i+j*m]; 5859 if (in[j] >= cstart && in[j] < cend) { 5860 col = in[j] - cstart; 5861 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5862 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5863 } else if (in[j] < 0) continue; 5864 #if defined(PETSC_USE_DEBUG) 5865 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5866 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5867 #endif 5868 else { 5869 if (mat->was_assembled) { 5870 if (!aij->colmap) { 5871 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5872 } 5873 #if defined(PETSC_USE_CTABLE) 5874 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5875 col--; 5876 #else 5877 col = aij->colmap[in[j]] - 1; 5878 #endif 5879 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5880 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5881 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5882 col = in[j]; 5883 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5884 B = aij->B; 5885 b = (Mat_SeqAIJ*)B->data; 5886 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5887 rp2 = bj + bi[row]; 5888 ap2 = ba + bi[row]; 5889 rmax2 = bimax[row]; 5890 nrow2 = bilen[row]; 5891 low2 = 0; 5892 high2 = nrow2; 5893 bm = aij->B->rmap->n; 5894 ba = b->a; 5895 } 5896 } else col = in[j]; 5897 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5898 } 5899 } 5900 } else if (!aij->donotstash) { 5901 if (roworiented) { 5902 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5903 } else { 5904 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5905 } 5906 } 5907 } 5908 } 5909 PetscFunctionReturnVoid(); 5910 } 5911