1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 51 PetscFunctionBegin; 52 if (mat->A) { 53 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 54 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 60 { 61 PetscErrorCode ierr; 62 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 63 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 64 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 65 const PetscInt *ia,*ib; 66 const MatScalar *aa,*bb; 67 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 68 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 69 70 PetscFunctionBegin; 71 *keptrows = 0; 72 ia = a->i; 73 ib = b->i; 74 for (i=0; i<m; i++) { 75 na = ia[i+1] - ia[i]; 76 nb = ib[i+1] - ib[i]; 77 if (!na && !nb) { 78 cnt++; 79 goto ok1; 80 } 81 aa = a->a + ia[i]; 82 for (j=0; j<na; j++) { 83 if (aa[j] != 0.0) goto ok1; 84 } 85 bb = b->a + ib[i]; 86 for (j=0; j <nb; j++) { 87 if (bb[j] != 0.0) goto ok1; 88 } 89 cnt++; 90 ok1:; 91 } 92 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 93 if (!n0rows) PetscFunctionReturn(0); 94 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 95 cnt = 0; 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) continue; 100 aa = a->a + ia[i]; 101 for (j=0; j<na;j++) { 102 if (aa[j] != 0.0) { 103 rows[cnt++] = rstart + i; 104 goto ok2; 105 } 106 } 107 bb = b->a + ib[i]; 108 for (j=0; j<nb; j++) { 109 if (bb[j] != 0.0) { 110 rows[cnt++] = rstart + i; 111 goto ok2; 112 } 113 } 114 ok2:; 115 } 116 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 121 { 122 PetscErrorCode ierr; 123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 124 125 PetscFunctionBegin; 126 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 127 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 128 } else { 129 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 130 } 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 137 PetscErrorCode ierr; 138 PetscInt i,rstart,nrows,*rows; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 144 for (i=0; i<nrows; i++) rows[i] += rstart; 145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 153 PetscInt i,n,*garray = aij->garray; 154 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 155 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 156 PetscReal *work; 157 158 PetscFunctionBegin; 159 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 160 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 161 if (type == NORM_2) { 162 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 163 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 164 } 165 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 166 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 167 } 168 } else if (type == NORM_1) { 169 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 170 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 171 } 172 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 173 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 174 } 175 } else if (type == NORM_INFINITY) { 176 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 177 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 178 } 179 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 180 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 181 } 182 183 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 184 if (type == NORM_INFINITY) { 185 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 186 } else { 187 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 188 } 189 ierr = PetscFree(work);CHKERRQ(ierr); 190 if (type == NORM_2) { 191 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 197 { 198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 199 IS sis,gis; 200 PetscErrorCode ierr; 201 const PetscInt *isis,*igis; 202 PetscInt n,*iis,nsis,ngis,rstart,i; 203 204 PetscFunctionBegin; 205 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 206 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 207 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 208 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 209 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 210 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 211 212 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 213 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 215 n = ngis + nsis; 216 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 217 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 218 for (i=0; i<n; i++) iis[i] += rstart; 219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 220 221 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 223 ierr = ISDestroy(&sis);CHKERRQ(ierr); 224 ierr = ISDestroy(&gis);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 230 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 231 232 Only for square matrices 233 234 Used by a preconditioner, hence PETSC_EXTERN 235 */ 236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 237 { 238 PetscMPIInt rank,size; 239 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 240 PetscErrorCode ierr; 241 Mat mat; 242 Mat_SeqAIJ *gmata; 243 PetscMPIInt tag; 244 MPI_Status status; 245 PetscBool aij; 246 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 251 if (!rank) { 252 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 253 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 254 } 255 if (reuse == MAT_INITIAL_MATRIX) { 256 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 257 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 258 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 259 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 260 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 261 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 262 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 263 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 264 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 265 266 rowners[0] = 0; 267 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 268 rstart = rowners[rank]; 269 rend = rowners[rank+1]; 270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 271 if (!rank) { 272 gmata = (Mat_SeqAIJ*) gmat->data; 273 /* send row lengths to all processors */ 274 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 275 for (i=1; i<size; i++) { 276 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277 } 278 /* determine number diagonal and off-diagonal counts */ 279 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 280 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 281 jj = 0; 282 for (i=0; i<m; i++) { 283 for (j=0; j<dlens[i]; j++) { 284 if (gmata->j[jj] < rstart) ld[i]++; 285 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 286 jj++; 287 } 288 } 289 /* send column indices to other processes */ 290 for (i=1; i<size; i++) { 291 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 292 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 296 /* send numerical values to other processes */ 297 for (i=1; i<size; i++) { 298 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 299 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 300 } 301 gmataa = gmata->a; 302 gmataj = gmata->j; 303 304 } else { 305 /* receive row lengths */ 306 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 307 /* receive column indices */ 308 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 309 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 310 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* determine number diagonal and off-diagonal counts */ 312 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 314 jj = 0; 315 for (i=0; i<m; i++) { 316 for (j=0; j<dlens[i]; j++) { 317 if (gmataj[jj] < rstart) ld[i]++; 318 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 319 jj++; 320 } 321 } 322 /* receive numerical values */ 323 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 325 } 326 /* set preallocation */ 327 for (i=0; i<m; i++) { 328 dlens[i] -= olens[i]; 329 } 330 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 331 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 dlens[i] += olens[i]; 335 } 336 cnt = 0; 337 for (i=0; i<m; i++) { 338 row = rstart + i; 339 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 340 cnt += dlens[i]; 341 } 342 if (rank) { 343 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 344 } 345 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 346 ierr = PetscFree(rowners);CHKERRQ(ierr); 347 348 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 349 350 *inmat = mat; 351 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 352 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 353 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 354 mat = *inmat; 355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 356 if (!rank) { 357 /* send numerical values to other processes */ 358 gmata = (Mat_SeqAIJ*) gmat->data; 359 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 360 gmataa = gmata->a; 361 for (i=1; i<size; i++) { 362 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 363 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 364 } 365 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 366 } else { 367 /* receive numerical values from process 0*/ 368 nz = Ad->nz + Ao->nz; 369 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 370 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 371 } 372 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 373 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 374 ad = Ad->a; 375 ao = Ao->a; 376 if (mat->rmap->n) { 377 i = 0; 378 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 379 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 380 } 381 for (i=1; i<mat->rmap->n; i++) { 382 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 i--; 386 if (mat->rmap->n) { 387 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 388 } 389 if (rank) { 390 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 391 } 392 } 393 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /* 399 Local utility routine that creates a mapping from the global column 400 number to the local number in the off-diagonal part of the local 401 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 402 a slightly higher hash table cost; without it it is not scalable (each processor 403 has an order N integer array but is fast to acess. 404 */ 405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscInt n = aij->B->cmap->n,i; 410 411 PetscFunctionBegin; 412 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 413 #if defined(PETSC_USE_CTABLE) 414 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 415 for (i=0; i<n; i++) { 416 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 421 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 422 #endif 423 PetscFunctionReturn(0); 424 } 425 426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 427 { \ 428 if (col <= lastcol1) low1 = 0; \ 429 else high1 = nrow1; \ 430 lastcol1 = col;\ 431 while (high1-low1 > 5) { \ 432 t = (low1+high1)/2; \ 433 if (rp1[t] > col) high1 = t; \ 434 else low1 = t; \ 435 } \ 436 for (_i=low1; _i<high1; _i++) { \ 437 if (rp1[_i] > col) break; \ 438 if (rp1[_i] == col) { \ 439 if (addv == ADD_VALUES) ap1[_i] += value; \ 440 else ap1[_i] = value; \ 441 goto a_noinsert; \ 442 } \ 443 } \ 444 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 445 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 446 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 447 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 448 N = nrow1++ - 1; a->nz++; high1++; \ 449 /* shift up all the later entries in this row */ \ 450 for (ii=N; ii>=_i; ii--) { \ 451 rp1[ii+1] = rp1[ii]; \ 452 ap1[ii+1] = ap1[ii]; \ 453 } \ 454 rp1[_i] = col; \ 455 ap1[_i] = value; \ 456 A->nonzerostate++;\ 457 a_noinsert: ; \ 458 ailen[row] = nrow1; \ 459 } 460 461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 462 { \ 463 if (col <= lastcol2) low2 = 0; \ 464 else high2 = nrow2; \ 465 lastcol2 = col; \ 466 while (high2-low2 > 5) { \ 467 t = (low2+high2)/2; \ 468 if (rp2[t] > col) high2 = t; \ 469 else low2 = t; \ 470 } \ 471 for (_i=low2; _i<high2; _i++) { \ 472 if (rp2[_i] > col) break; \ 473 if (rp2[_i] == col) { \ 474 if (addv == ADD_VALUES) ap2[_i] += value; \ 475 else ap2[_i] = value; \ 476 goto b_noinsert; \ 477 } \ 478 } \ 479 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 480 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 482 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 483 N = nrow2++ - 1; b->nz++; high2++; \ 484 /* shift up all the later entries in this row */ \ 485 for (ii=N; ii>=_i; ii--) { \ 486 rp2[ii+1] = rp2[ii]; \ 487 ap2[ii+1] = ap2[ii]; \ 488 } \ 489 rp2[_i] = col; \ 490 ap2[_i] = value; \ 491 B->nonzerostate++; \ 492 b_noinsert: ; \ 493 bilen[row] = nrow2; \ 494 } 495 496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 497 { 498 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 500 PetscErrorCode ierr; 501 PetscInt l,*garray = mat->garray,diag; 502 503 PetscFunctionBegin; 504 /* code only works for square matrices A */ 505 506 /* find size of row to the left of the diagonal part */ 507 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 508 row = row - diag; 509 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 510 if (garray[b->j[b->i[row]+l]] > diag) break; 511 } 512 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 513 514 /* diagonal part */ 515 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* right of diagonal part */ 518 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 523 { 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 525 PetscScalar value; 526 PetscErrorCode ierr; 527 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 528 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 529 PetscBool roworiented = aij->roworiented; 530 531 /* Some Variables required in the macro */ 532 Mat A = aij->A; 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 535 MatScalar *aa = a->a; 536 PetscBool ignorezeroentries = a->ignorezeroentries; 537 Mat B = aij->B; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 539 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 540 MatScalar *ba = b->a; 541 542 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 543 PetscInt nonew; 544 MatScalar *ap1,*ap2; 545 546 PetscFunctionBegin; 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 lastcol1 = -1; 555 rp1 = aj + ai[row]; 556 ap1 = aa + ai[row]; 557 rmax1 = aimax[row]; 558 nrow1 = ailen[row]; 559 low1 = 0; 560 high1 = nrow1; 561 lastcol2 = -1; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 569 for (j=0; j<n; j++) { 570 if (roworiented) value = v[i*n+j]; 571 else value = v[i+j*m]; 572 if (in[j] >= cstart && in[j] < cend) { 573 col = in[j] - cstart; 574 nonew = a->nonew; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 576 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 577 } else if (in[j] < 0) continue; 578 #if defined(PETSC_USE_DEBUG) 579 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 580 #endif 581 else { 582 if (mat->was_assembled) { 583 if (!aij->colmap) { 584 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 585 } 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 593 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ*)B->data; 598 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 599 rp2 = bj + bi[row]; 600 ap2 = ba + bi[row]; 601 rmax2 = bimax[row]; 602 nrow2 = bilen[row]; 603 low2 = 0; 604 high2 = nrow2; 605 bm = aij->B->rmap->n; 606 ba = b->a; 607 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 608 } else col = in[j]; 609 nonew = b->nonew; 610 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 611 } 612 } 613 } else { 614 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 615 if (!aij->donotstash) { 616 mat->assembled = PETSC_FALSE; 617 if (roworiented) { 618 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!aij->colmap) { 649 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 669 670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscInt nstash,reallocs; 675 676 PetscFunctionBegin; 677 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 678 679 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 680 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 681 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 689 PetscErrorCode ierr; 690 PetscMPIInt n; 691 PetscInt i,j,rstart,ncols,flg; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscScalar *val; 695 696 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 697 698 PetscFunctionBegin; 699 if (!aij->donotstash && !mat->nooffprocentries) { 700 while (1) { 701 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 702 if (!flg) break; 703 704 for (i=0; i<n; ) { 705 /* Now identify the consecutive vals belonging to the same row */ 706 for (j=i,rstart=row[j]; j<n; j++) { 707 if (row[j] != rstart) break; 708 } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 713 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 } 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourselfs, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 730 if (mat->was_assembled && !other_disassembled) { 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 740 741 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 742 743 aij->rowvalues = 0; 744 745 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 746 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 747 748 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 749 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 750 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 751 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 757 { 758 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 763 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 768 { 769 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 770 PetscInt *lrows; 771 PetscInt r, len; 772 PetscErrorCode ierr; 773 774 PetscFunctionBegin; 775 /* get locally owned rows */ 776 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 789 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 790 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 791 PetscBool cong; 792 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 793 if (cong) A->congruentlayouts = 1; 794 else A->congruentlayouts = 0; 795 } 796 if ((diag != 0.0) && A->congruentlayouts) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 821 { 822 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 823 PetscErrorCode ierr; 824 PetscMPIInt n = A->rmap->n; 825 PetscInt i,j,r,m,p = 0,len = 0; 826 PetscInt *lrows,*owners = A->rmap->range; 827 PetscSFNode *rrows; 828 PetscSF sf; 829 const PetscScalar *xx; 830 PetscScalar *bb,*mask; 831 Vec xmask,lmask; 832 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 833 const PetscInt *aj, *ii,*ridx; 834 PetscScalar *aa; 835 836 PetscFunctionBegin; 837 /* Create SF where leaves are input rows and roots are owned rows */ 838 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 839 for (r = 0; r < n; ++r) lrows[r] = -1; 840 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 841 for (r = 0; r < N; ++r) { 842 const PetscInt idx = rows[r]; 843 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 844 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 845 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 846 } 847 rrows[r].rank = p; 848 rrows[r].index = rows[r] - owners[p]; 849 } 850 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 851 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 852 /* Collect flags for rows to be zeroed */ 853 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 854 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 856 /* Compress and put in row numbers */ 857 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 858 /* zero diagonal part of matrix */ 859 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 860 /* handle off diagonal part of matrix */ 861 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 862 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 863 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 864 for (i=0; i<len; i++) bb[lrows[i]] = 1; 865 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 866 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 869 if (x) { 870 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 873 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 874 } 875 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 876 /* remove zeroed rows of off diagonal matrix */ 877 ii = aij->i; 878 for (i=0; i<len; i++) { 879 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 /* loop over all elements of off process part of matrix zeroing removed columns*/ 882 if (aij->compressedrow.use) { 883 m = aij->compressedrow.nrows; 884 ii = aij->compressedrow.i; 885 ridx = aij->compressedrow.rindex; 886 for (i=0; i<m; i++) { 887 n = ii[i+1] - ii[i]; 888 aj = aij->j + ii[i]; 889 aa = aij->a + ii[i]; 890 891 for (j=0; j<n; j++) { 892 if (PetscAbsScalar(mask[*aj])) { 893 if (b) bb[*ridx] -= *aa*xx[*aj]; 894 *aa = 0.0; 895 } 896 aa++; 897 aj++; 898 } 899 ridx++; 900 } 901 } else { /* do not use compressed row format */ 902 m = l->B->rmap->n; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij->a + ii[i]; 907 for (j=0; j<n; j++) { 908 if (PetscAbsScalar(mask[*aj])) { 909 if (b) bb[i] -= *aa*xx[*aj]; 910 *aa = 0.0; 911 } 912 aa++; 913 aj++; 914 } 915 } 916 } 917 if (x) { 918 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 919 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 920 } 921 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 922 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 923 ierr = PetscFree(lrows);CHKERRQ(ierr); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 927 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 928 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 939 PetscFunctionBegin; 940 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 941 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 942 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 944 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 950 { 951 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 952 PetscErrorCode ierr; 953 954 PetscFunctionBegin; 955 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 956 PetscFunctionReturn(0); 957 } 958 959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 960 { 961 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 967 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 973 { 974 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 975 PetscErrorCode ierr; 976 PetscBool merged; 977 978 PetscFunctionBegin; 979 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 980 /* do nondiagonal part */ 981 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 982 if (!merged) { 983 /* send it on its way */ 984 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 985 /* do local part */ 986 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 987 /* receive remote parts: note this assumes the values are not actually */ 988 /* added in yy until the next line, */ 989 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 990 } else { 991 /* do local part */ 992 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 993 /* send it on its way */ 994 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 995 /* values actually were received in the Begin() but we need to call this nop */ 996 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1002 { 1003 MPI_Comm comm; 1004 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1005 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1006 IS Me,Notme; 1007 PetscErrorCode ierr; 1008 PetscInt M,N,first,last,*notme,i; 1009 PetscMPIInt size; 1010 1011 PetscFunctionBegin; 1012 /* Easy test: symmetric diagonal block */ 1013 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1014 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1015 if (!*f) PetscFunctionReturn(0); 1016 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1017 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1018 if (size == 1) PetscFunctionReturn(0); 1019 1020 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1021 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1022 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1023 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1024 for (i=0; i<first; i++) notme[i] = i; 1025 for (i=last; i<M; i++) notme[i-last+first] = i; 1026 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1027 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1028 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1029 Aoff = Aoffs[0]; 1030 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1031 Boff = Boffs[0]; 1032 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1033 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1034 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1035 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1036 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1037 ierr = PetscFree(notme);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1042 { 1043 PetscErrorCode ierr; 1044 1045 PetscFunctionBegin; 1046 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1051 { 1052 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1053 PetscErrorCode ierr; 1054 1055 PetscFunctionBegin; 1056 /* do nondiagonal part */ 1057 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1058 /* send it on its way */ 1059 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1060 /* do local part */ 1061 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1062 /* receive remote parts */ 1063 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 /* 1068 This only works correctly for square matrices where the subblock A->A is the 1069 diagonal block 1070 */ 1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1072 { 1073 PetscErrorCode ierr; 1074 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1075 1076 PetscFunctionBegin; 1077 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1078 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1079 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1084 { 1085 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1086 PetscErrorCode ierr; 1087 1088 PetscFunctionBegin; 1089 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1090 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1095 { 1096 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 #if defined(PETSC_USE_LOG) 1101 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1102 #endif 1103 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1104 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1105 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1106 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1107 #if defined(PETSC_USE_CTABLE) 1108 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1109 #else 1110 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1111 #endif 1112 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1113 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1114 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1115 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1116 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1117 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1118 1119 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1127 #if defined(PETSC_HAVE_ELEMENTAL) 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1129 #endif 1130 #if defined(PETSC_HAVE_HYPRE) 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1133 #endif 1134 PetscFunctionReturn(0); 1135 } 1136 1137 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1138 { 1139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1140 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1141 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1142 PetscErrorCode ierr; 1143 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1144 int fd; 1145 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1146 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1147 PetscScalar *column_values; 1148 PetscInt message_count,flowcontrolcount; 1149 FILE *file; 1150 1151 PetscFunctionBegin; 1152 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1153 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1154 nz = A->nz + B->nz; 1155 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1156 if (!rank) { 1157 header[0] = MAT_FILE_CLASSID; 1158 header[1] = mat->rmap->N; 1159 header[2] = mat->cmap->N; 1160 1161 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1162 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1163 /* get largest number of rows any processor has */ 1164 rlen = mat->rmap->n; 1165 range = mat->rmap->range; 1166 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1167 } else { 1168 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1169 rlen = mat->rmap->n; 1170 } 1171 1172 /* load up the local row counts */ 1173 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1174 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1175 1176 /* store the row lengths to the file */ 1177 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 for (i=1; i<size; i++) { 1181 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1182 rlen = range[i+1] - range[i]; 1183 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1184 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1185 } 1186 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1187 } else { 1188 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1189 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1190 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1191 } 1192 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1193 1194 /* load up the local column indices */ 1195 nzmax = nz; /* th processor needs space a largest processor needs */ 1196 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1197 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1198 cnt = 0; 1199 for (i=0; i<mat->rmap->n; i++) { 1200 for (j=B->i[i]; j<B->i[i+1]; j++) { 1201 if ((col = garray[B->j[j]]) > cstart) break; 1202 column_indices[cnt++] = col; 1203 } 1204 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1205 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1206 } 1207 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1208 1209 /* store the column indices to the file */ 1210 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1211 if (!rank) { 1212 MPI_Status status; 1213 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1214 for (i=1; i<size; i++) { 1215 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1216 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1217 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1218 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1220 } 1221 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1222 } else { 1223 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1224 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1227 } 1228 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1229 1230 /* load up the local column values */ 1231 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1232 cnt = 0; 1233 for (i=0; i<mat->rmap->n; i++) { 1234 for (j=B->i[i]; j<B->i[i+1]; j++) { 1235 if (garray[B->j[j]] > cstart) break; 1236 column_values[cnt++] = B->a[j]; 1237 } 1238 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1239 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1240 } 1241 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1242 1243 /* store the column values to the file */ 1244 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1245 if (!rank) { 1246 MPI_Status status; 1247 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1248 for (i=1; i<size; i++) { 1249 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1250 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1251 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1252 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1253 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1254 } 1255 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1256 } else { 1257 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1258 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1259 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1260 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1261 } 1262 ierr = PetscFree(column_values);CHKERRQ(ierr); 1263 1264 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1265 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #include <petscdraw.h> 1270 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 PetscMPIInt rank = aij->rank,size = aij->size; 1275 PetscBool isdraw,iascii,isbinary; 1276 PetscViewer sviewer; 1277 PetscViewerFormat format; 1278 1279 PetscFunctionBegin; 1280 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1281 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1282 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1283 if (iascii) { 1284 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1285 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1286 MatInfo info; 1287 PetscBool inodes; 1288 1289 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1290 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1291 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1292 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1293 if (!inodes) { 1294 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1295 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1296 } else { 1297 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1298 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1299 } 1300 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1301 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1302 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1303 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1304 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1306 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1307 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1310 PetscInt inodecount,inodelimit,*inodes; 1311 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1312 if (inodes) { 1313 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1314 } else { 1315 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1316 } 1317 PetscFunctionReturn(0); 1318 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1319 PetscFunctionReturn(0); 1320 } 1321 } else if (isbinary) { 1322 if (size == 1) { 1323 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1324 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1325 } else { 1326 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1327 } 1328 PetscFunctionReturn(0); 1329 } else if (isdraw) { 1330 PetscDraw draw; 1331 PetscBool isnull; 1332 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1333 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1334 if (isnull) PetscFunctionReturn(0); 1335 } 1336 1337 { 1338 /* assemble the entire matrix onto first processor. */ 1339 Mat A; 1340 Mat_SeqAIJ *Aloc; 1341 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1342 MatScalar *a; 1343 1344 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1345 if (!rank) { 1346 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1347 } else { 1348 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1349 } 1350 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1351 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1352 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1353 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1354 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1355 1356 /* copy over the A part */ 1357 Aloc = (Mat_SeqAIJ*)aij->A->data; 1358 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1359 row = mat->rmap->rstart; 1360 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1361 for (i=0; i<m; i++) { 1362 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1363 row++; 1364 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1365 } 1366 aj = Aloc->j; 1367 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1368 1369 /* copy over the B part */ 1370 Aloc = (Mat_SeqAIJ*)aij->B->data; 1371 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1372 row = mat->rmap->rstart; 1373 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1374 ct = cols; 1375 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1376 for (i=0; i<m; i++) { 1377 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1378 row++; 1379 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1380 } 1381 ierr = PetscFree(ct);CHKERRQ(ierr); 1382 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1383 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1384 /* 1385 Everyone has to call to draw the matrix since the graphics waits are 1386 synchronized across all processors that share the PetscDraw object 1387 */ 1388 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1389 if (!rank) { 1390 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1391 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1394 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1395 ierr = MatDestroy(&A);CHKERRQ(ierr); 1396 } 1397 PetscFunctionReturn(0); 1398 } 1399 1400 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1401 { 1402 PetscErrorCode ierr; 1403 PetscBool iascii,isdraw,issocket,isbinary; 1404 1405 PetscFunctionBegin; 1406 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1407 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1408 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1409 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1410 if (iascii || isdraw || isbinary || issocket) { 1411 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1412 } 1413 PetscFunctionReturn(0); 1414 } 1415 1416 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1417 { 1418 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1419 PetscErrorCode ierr; 1420 Vec bb1 = 0; 1421 PetscBool hasop; 1422 1423 PetscFunctionBegin; 1424 if (flag == SOR_APPLY_UPPER) { 1425 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1430 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1431 } 1432 1433 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1434 if (flag & SOR_ZERO_INITIAL_GUESS) { 1435 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1436 its--; 1437 } 1438 1439 while (its--) { 1440 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1441 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1442 1443 /* update rhs: bb1 = bb - B*x */ 1444 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1445 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1446 1447 /* local sweep */ 1448 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1449 } 1450 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1451 if (flag & SOR_ZERO_INITIAL_GUESS) { 1452 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1453 its--; 1454 } 1455 while (its--) { 1456 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1458 1459 /* update rhs: bb1 = bb - B*x */ 1460 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1461 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1462 1463 /* local sweep */ 1464 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1465 } 1466 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1467 if (flag & SOR_ZERO_INITIAL_GUESS) { 1468 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1469 its--; 1470 } 1471 while (its--) { 1472 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1473 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1474 1475 /* update rhs: bb1 = bb - B*x */ 1476 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1477 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1478 1479 /* local sweep */ 1480 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1481 } 1482 } else if (flag & SOR_EISENSTAT) { 1483 Vec xx1; 1484 1485 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1486 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1487 1488 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1490 if (!mat->diag) { 1491 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1492 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1493 } 1494 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1495 if (hasop) { 1496 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1497 } else { 1498 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1499 } 1500 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1501 1502 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1503 1504 /* local sweep */ 1505 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1506 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1507 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1508 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1509 1510 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1511 1512 matin->factorerrortype = mat->A->factorerrortype; 1513 PetscFunctionReturn(0); 1514 } 1515 1516 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1517 { 1518 Mat aA,aB,Aperm; 1519 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1520 PetscScalar *aa,*ba; 1521 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1522 PetscSF rowsf,sf; 1523 IS parcolp = NULL; 1524 PetscBool done; 1525 PetscErrorCode ierr; 1526 1527 PetscFunctionBegin; 1528 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1529 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1530 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1531 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1532 1533 /* Invert row permutation to find out where my rows should go */ 1534 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1535 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1536 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1537 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1538 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1539 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1540 1541 /* Invert column permutation to find out where my columns should go */ 1542 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1543 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1544 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1545 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1546 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1547 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1548 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1549 1550 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1551 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1552 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1553 1554 /* Find out where my gcols should go */ 1555 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1556 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1557 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1558 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1559 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1560 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1561 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1562 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1563 1564 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1565 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1566 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1567 for (i=0; i<m; i++) { 1568 PetscInt row = rdest[i],rowner; 1569 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1570 for (j=ai[i]; j<ai[i+1]; j++) { 1571 PetscInt cowner,col = cdest[aj[j]]; 1572 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1573 if (rowner == cowner) dnnz[i]++; 1574 else onnz[i]++; 1575 } 1576 for (j=bi[i]; j<bi[i+1]; j++) { 1577 PetscInt cowner,col = gcdest[bj[j]]; 1578 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1579 if (rowner == cowner) dnnz[i]++; 1580 else onnz[i]++; 1581 } 1582 } 1583 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1584 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1585 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1586 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1587 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1588 1589 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1590 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1591 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1592 for (i=0; i<m; i++) { 1593 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1594 PetscInt j0,rowlen; 1595 rowlen = ai[i+1] - ai[i]; 1596 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1597 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1598 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1599 } 1600 rowlen = bi[i+1] - bi[i]; 1601 for (j0=j=0; j<rowlen; j0=j) { 1602 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1603 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1604 } 1605 } 1606 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1607 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1608 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1609 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1610 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1611 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1612 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1613 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1614 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1615 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1616 *B = Aperm; 1617 PetscFunctionReturn(0); 1618 } 1619 1620 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1621 { 1622 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1623 PetscErrorCode ierr; 1624 1625 PetscFunctionBegin; 1626 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1627 if (ghosts) *ghosts = aij->garray; 1628 PetscFunctionReturn(0); 1629 } 1630 1631 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1632 { 1633 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1634 Mat A = mat->A,B = mat->B; 1635 PetscErrorCode ierr; 1636 PetscReal isend[5],irecv[5]; 1637 1638 PetscFunctionBegin; 1639 info->block_size = 1.0; 1640 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1641 1642 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1643 isend[3] = info->memory; isend[4] = info->mallocs; 1644 1645 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1646 1647 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1648 isend[3] += info->memory; isend[4] += info->mallocs; 1649 if (flag == MAT_LOCAL) { 1650 info->nz_used = isend[0]; 1651 info->nz_allocated = isend[1]; 1652 info->nz_unneeded = isend[2]; 1653 info->memory = isend[3]; 1654 info->mallocs = isend[4]; 1655 } else if (flag == MAT_GLOBAL_MAX) { 1656 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1657 1658 info->nz_used = irecv[0]; 1659 info->nz_allocated = irecv[1]; 1660 info->nz_unneeded = irecv[2]; 1661 info->memory = irecv[3]; 1662 info->mallocs = irecv[4]; 1663 } else if (flag == MAT_GLOBAL_SUM) { 1664 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } 1672 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1673 info->fill_ratio_needed = 0; 1674 info->factor_mallocs = 0; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1679 { 1680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1681 PetscErrorCode ierr; 1682 1683 PetscFunctionBegin; 1684 switch (op) { 1685 case MAT_NEW_NONZERO_LOCATIONS: 1686 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1687 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1688 case MAT_KEEP_NONZERO_PATTERN: 1689 case MAT_NEW_NONZERO_LOCATION_ERR: 1690 case MAT_USE_INODES: 1691 case MAT_IGNORE_ZERO_ENTRIES: 1692 MatCheckPreallocated(A,1); 1693 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1694 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1695 break; 1696 case MAT_ROW_ORIENTED: 1697 MatCheckPreallocated(A,1); 1698 a->roworiented = flg; 1699 1700 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1701 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1702 break; 1703 case MAT_NEW_DIAGONALS: 1704 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1705 break; 1706 case MAT_IGNORE_OFF_PROC_ENTRIES: 1707 a->donotstash = flg; 1708 break; 1709 case MAT_SPD: 1710 A->spd_set = PETSC_TRUE; 1711 A->spd = flg; 1712 if (flg) { 1713 A->symmetric = PETSC_TRUE; 1714 A->structurally_symmetric = PETSC_TRUE; 1715 A->symmetric_set = PETSC_TRUE; 1716 A->structurally_symmetric_set = PETSC_TRUE; 1717 } 1718 break; 1719 case MAT_SYMMETRIC: 1720 MatCheckPreallocated(A,1); 1721 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1722 break; 1723 case MAT_STRUCTURALLY_SYMMETRIC: 1724 MatCheckPreallocated(A,1); 1725 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1726 break; 1727 case MAT_HERMITIAN: 1728 MatCheckPreallocated(A,1); 1729 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1730 break; 1731 case MAT_SYMMETRY_ETERNAL: 1732 MatCheckPreallocated(A,1); 1733 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1734 break; 1735 case MAT_SUBMAT_SINGLEIS: 1736 A->submat_singleis = flg; 1737 break; 1738 case MAT_STRUCTURE_ONLY: 1739 /* The option is handled directly by MatSetOption() */ 1740 break; 1741 default: 1742 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1743 } 1744 PetscFunctionReturn(0); 1745 } 1746 1747 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1748 { 1749 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1750 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1751 PetscErrorCode ierr; 1752 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1753 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1754 PetscInt *cmap,*idx_p; 1755 1756 PetscFunctionBegin; 1757 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1758 mat->getrowactive = PETSC_TRUE; 1759 1760 if (!mat->rowvalues && (idx || v)) { 1761 /* 1762 allocate enough space to hold information from the longest row. 1763 */ 1764 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1765 PetscInt max = 1,tmp; 1766 for (i=0; i<matin->rmap->n; i++) { 1767 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1768 if (max < tmp) max = tmp; 1769 } 1770 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1771 } 1772 1773 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1774 lrow = row - rstart; 1775 1776 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1777 if (!v) {pvA = 0; pvB = 0;} 1778 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1779 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1780 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1781 nztot = nzA + nzB; 1782 1783 cmap = mat->garray; 1784 if (v || idx) { 1785 if (nztot) { 1786 /* Sort by increasing column numbers, assuming A and B already sorted */ 1787 PetscInt imark = -1; 1788 if (v) { 1789 *v = v_p = mat->rowvalues; 1790 for (i=0; i<nzB; i++) { 1791 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1792 else break; 1793 } 1794 imark = i; 1795 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1796 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1797 } 1798 if (idx) { 1799 *idx = idx_p = mat->rowindices; 1800 if (imark > -1) { 1801 for (i=0; i<imark; i++) { 1802 idx_p[i] = cmap[cworkB[i]]; 1803 } 1804 } else { 1805 for (i=0; i<nzB; i++) { 1806 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1807 else break; 1808 } 1809 imark = i; 1810 } 1811 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1812 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1813 } 1814 } else { 1815 if (idx) *idx = 0; 1816 if (v) *v = 0; 1817 } 1818 } 1819 *nz = nztot; 1820 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1821 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1826 { 1827 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1828 1829 PetscFunctionBegin; 1830 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1831 aij->getrowactive = PETSC_FALSE; 1832 PetscFunctionReturn(0); 1833 } 1834 1835 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1836 { 1837 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1838 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1839 PetscErrorCode ierr; 1840 PetscInt i,j,cstart = mat->cmap->rstart; 1841 PetscReal sum = 0.0; 1842 MatScalar *v; 1843 1844 PetscFunctionBegin; 1845 if (aij->size == 1) { 1846 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1847 } else { 1848 if (type == NORM_FROBENIUS) { 1849 v = amat->a; 1850 for (i=0; i<amat->nz; i++) { 1851 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1852 } 1853 v = bmat->a; 1854 for (i=0; i<bmat->nz; i++) { 1855 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1856 } 1857 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1858 *norm = PetscSqrtReal(*norm); 1859 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1860 } else if (type == NORM_1) { /* max column norm */ 1861 PetscReal *tmp,*tmp2; 1862 PetscInt *jj,*garray = aij->garray; 1863 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1864 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1865 *norm = 0.0; 1866 v = amat->a; jj = amat->j; 1867 for (j=0; j<amat->nz; j++) { 1868 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1869 } 1870 v = bmat->a; jj = bmat->j; 1871 for (j=0; j<bmat->nz; j++) { 1872 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1873 } 1874 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1875 for (j=0; j<mat->cmap->N; j++) { 1876 if (tmp2[j] > *norm) *norm = tmp2[j]; 1877 } 1878 ierr = PetscFree(tmp);CHKERRQ(ierr); 1879 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1880 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1881 } else if (type == NORM_INFINITY) { /* max row norm */ 1882 PetscReal ntemp = 0.0; 1883 for (j=0; j<aij->A->rmap->n; j++) { 1884 v = amat->a + amat->i[j]; 1885 sum = 0.0; 1886 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1887 sum += PetscAbsScalar(*v); v++; 1888 } 1889 v = bmat->a + bmat->i[j]; 1890 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1891 sum += PetscAbsScalar(*v); v++; 1892 } 1893 if (sum > ntemp) ntemp = sum; 1894 } 1895 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1896 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1897 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1903 { 1904 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1905 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1906 PetscErrorCode ierr; 1907 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1908 PetscInt cstart = A->cmap->rstart,ncol; 1909 Mat B; 1910 MatScalar *array; 1911 1912 PetscFunctionBegin; 1913 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1914 1915 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1916 ai = Aloc->i; aj = Aloc->j; 1917 bi = Bloc->i; bj = Bloc->j; 1918 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1919 PetscInt *d_nnz,*g_nnz,*o_nnz; 1920 PetscSFNode *oloc; 1921 PETSC_UNUSED PetscSF sf; 1922 1923 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1924 /* compute d_nnz for preallocation */ 1925 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1926 for (i=0; i<ai[ma]; i++) { 1927 d_nnz[aj[i]]++; 1928 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1929 } 1930 /* compute local off-diagonal contributions */ 1931 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1932 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1933 /* map those to global */ 1934 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1935 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1936 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1937 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1938 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1939 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1940 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1941 1942 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1943 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1944 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1945 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1946 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1947 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1948 } else { 1949 B = *matout; 1950 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1951 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1952 } 1953 1954 /* copy over the A part */ 1955 array = Aloc->a; 1956 row = A->rmap->rstart; 1957 for (i=0; i<ma; i++) { 1958 ncol = ai[i+1]-ai[i]; 1959 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1960 row++; 1961 array += ncol; aj += ncol; 1962 } 1963 aj = Aloc->j; 1964 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1965 1966 /* copy over the B part */ 1967 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1968 array = Bloc->a; 1969 row = A->rmap->rstart; 1970 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1971 cols_tmp = cols; 1972 for (i=0; i<mb; i++) { 1973 ncol = bi[i+1]-bi[i]; 1974 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1975 row++; 1976 array += ncol; cols_tmp += ncol; 1977 } 1978 ierr = PetscFree(cols);CHKERRQ(ierr); 1979 1980 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1981 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1982 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1983 *matout = B; 1984 } else { 1985 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1986 } 1987 PetscFunctionReturn(0); 1988 } 1989 1990 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1991 { 1992 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1993 Mat a = aij->A,b = aij->B; 1994 PetscErrorCode ierr; 1995 PetscInt s1,s2,s3; 1996 1997 PetscFunctionBegin; 1998 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1999 if (rr) { 2000 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2001 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2002 /* Overlap communication with computation. */ 2003 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2004 } 2005 if (ll) { 2006 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2007 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2008 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2009 } 2010 /* scale the diagonal block */ 2011 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2012 2013 if (rr) { 2014 /* Do a scatter end and then right scale the off-diagonal block */ 2015 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2016 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2017 } 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2022 { 2023 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2024 PetscErrorCode ierr; 2025 2026 PetscFunctionBegin; 2027 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2028 PetscFunctionReturn(0); 2029 } 2030 2031 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2032 { 2033 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2034 Mat a,b,c,d; 2035 PetscBool flg; 2036 PetscErrorCode ierr; 2037 2038 PetscFunctionBegin; 2039 a = matA->A; b = matA->B; 2040 c = matB->A; d = matB->B; 2041 2042 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2043 if (flg) { 2044 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2045 } 2046 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2047 PetscFunctionReturn(0); 2048 } 2049 2050 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2051 { 2052 PetscErrorCode ierr; 2053 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2054 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2055 2056 PetscFunctionBegin; 2057 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2058 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2059 /* because of the column compression in the off-processor part of the matrix a->B, 2060 the number of columns in a->B and b->B may be different, hence we cannot call 2061 the MatCopy() directly on the two parts. If need be, we can provide a more 2062 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2063 then copying the submatrices */ 2064 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2065 } else { 2066 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2067 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2068 } 2069 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2070 PetscFunctionReturn(0); 2071 } 2072 2073 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2074 { 2075 PetscErrorCode ierr; 2076 2077 PetscFunctionBegin; 2078 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2079 PetscFunctionReturn(0); 2080 } 2081 2082 /* 2083 Computes the number of nonzeros per row needed for preallocation when X and Y 2084 have different nonzero structure. 2085 */ 2086 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2087 { 2088 PetscInt i,j,k,nzx,nzy; 2089 2090 PetscFunctionBegin; 2091 /* Set the number of nonzeros in the new matrix */ 2092 for (i=0; i<m; i++) { 2093 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2094 nzx = xi[i+1] - xi[i]; 2095 nzy = yi[i+1] - yi[i]; 2096 nnz[i] = 0; 2097 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2098 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2099 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2100 nnz[i]++; 2101 } 2102 for (; k<nzy; k++) nnz[i]++; 2103 } 2104 PetscFunctionReturn(0); 2105 } 2106 2107 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2108 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2109 { 2110 PetscErrorCode ierr; 2111 PetscInt m = Y->rmap->N; 2112 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2113 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2114 2115 PetscFunctionBegin; 2116 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2117 PetscFunctionReturn(0); 2118 } 2119 2120 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2121 { 2122 PetscErrorCode ierr; 2123 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2124 PetscBLASInt bnz,one=1; 2125 Mat_SeqAIJ *x,*y; 2126 2127 PetscFunctionBegin; 2128 if (str == SAME_NONZERO_PATTERN) { 2129 PetscScalar alpha = a; 2130 x = (Mat_SeqAIJ*)xx->A->data; 2131 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2132 y = (Mat_SeqAIJ*)yy->A->data; 2133 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2134 x = (Mat_SeqAIJ*)xx->B->data; 2135 y = (Mat_SeqAIJ*)yy->B->data; 2136 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2137 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2138 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2139 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2140 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2141 } else { 2142 Mat B; 2143 PetscInt *nnz_d,*nnz_o; 2144 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2145 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2146 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2147 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2148 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2149 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2150 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2151 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2152 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2153 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2154 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2155 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2156 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2157 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2158 } 2159 PetscFunctionReturn(0); 2160 } 2161 2162 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2163 2164 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2165 { 2166 #if defined(PETSC_USE_COMPLEX) 2167 PetscErrorCode ierr; 2168 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2169 2170 PetscFunctionBegin; 2171 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2172 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2173 #else 2174 PetscFunctionBegin; 2175 #endif 2176 PetscFunctionReturn(0); 2177 } 2178 2179 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2180 { 2181 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2182 PetscErrorCode ierr; 2183 2184 PetscFunctionBegin; 2185 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2186 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2187 PetscFunctionReturn(0); 2188 } 2189 2190 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2191 { 2192 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2193 PetscErrorCode ierr; 2194 2195 PetscFunctionBegin; 2196 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2197 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2198 PetscFunctionReturn(0); 2199 } 2200 2201 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2204 PetscErrorCode ierr; 2205 PetscInt i,*idxb = 0; 2206 PetscScalar *va,*vb; 2207 Vec vtmp; 2208 2209 PetscFunctionBegin; 2210 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2211 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2212 if (idx) { 2213 for (i=0; i<A->rmap->n; i++) { 2214 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2215 } 2216 } 2217 2218 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2219 if (idx) { 2220 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2221 } 2222 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2223 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2224 2225 for (i=0; i<A->rmap->n; i++) { 2226 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2227 va[i] = vb[i]; 2228 if (idx) idx[i] = a->garray[idxb[i]]; 2229 } 2230 } 2231 2232 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2233 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2234 ierr = PetscFree(idxb);CHKERRQ(ierr); 2235 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2236 PetscFunctionReturn(0); 2237 } 2238 2239 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2240 { 2241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2242 PetscErrorCode ierr; 2243 PetscInt i,*idxb = 0; 2244 PetscScalar *va,*vb; 2245 Vec vtmp; 2246 2247 PetscFunctionBegin; 2248 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2249 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2250 if (idx) { 2251 for (i=0; i<A->cmap->n; i++) { 2252 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2253 } 2254 } 2255 2256 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2257 if (idx) { 2258 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2259 } 2260 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2261 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2262 2263 for (i=0; i<A->rmap->n; i++) { 2264 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2265 va[i] = vb[i]; 2266 if (idx) idx[i] = a->garray[idxb[i]]; 2267 } 2268 } 2269 2270 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2271 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2272 ierr = PetscFree(idxb);CHKERRQ(ierr); 2273 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2274 PetscFunctionReturn(0); 2275 } 2276 2277 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2278 { 2279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2280 PetscInt n = A->rmap->n; 2281 PetscInt cstart = A->cmap->rstart; 2282 PetscInt *cmap = mat->garray; 2283 PetscInt *diagIdx, *offdiagIdx; 2284 Vec diagV, offdiagV; 2285 PetscScalar *a, *diagA, *offdiagA; 2286 PetscInt r; 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2291 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2292 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2293 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2294 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2295 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2296 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2297 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2298 for (r = 0; r < n; ++r) { 2299 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2300 a[r] = diagA[r]; 2301 idx[r] = cstart + diagIdx[r]; 2302 } else { 2303 a[r] = offdiagA[r]; 2304 idx[r] = cmap[offdiagIdx[r]]; 2305 } 2306 } 2307 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2308 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2309 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2310 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2311 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2312 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2313 PetscFunctionReturn(0); 2314 } 2315 2316 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2317 { 2318 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2319 PetscInt n = A->rmap->n; 2320 PetscInt cstart = A->cmap->rstart; 2321 PetscInt *cmap = mat->garray; 2322 PetscInt *diagIdx, *offdiagIdx; 2323 Vec diagV, offdiagV; 2324 PetscScalar *a, *diagA, *offdiagA; 2325 PetscInt r; 2326 PetscErrorCode ierr; 2327 2328 PetscFunctionBegin; 2329 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2330 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2331 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2332 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2333 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2334 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2335 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2336 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2337 for (r = 0; r < n; ++r) { 2338 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2339 a[r] = diagA[r]; 2340 idx[r] = cstart + diagIdx[r]; 2341 } else { 2342 a[r] = offdiagA[r]; 2343 idx[r] = cmap[offdiagIdx[r]]; 2344 } 2345 } 2346 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2347 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2348 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2349 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2350 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2351 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2352 PetscFunctionReturn(0); 2353 } 2354 2355 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2356 { 2357 PetscErrorCode ierr; 2358 Mat *dummy; 2359 2360 PetscFunctionBegin; 2361 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2362 *newmat = *dummy; 2363 ierr = PetscFree(dummy);CHKERRQ(ierr); 2364 PetscFunctionReturn(0); 2365 } 2366 2367 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2368 { 2369 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2370 PetscErrorCode ierr; 2371 2372 PetscFunctionBegin; 2373 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2374 A->factorerrortype = a->A->factorerrortype; 2375 PetscFunctionReturn(0); 2376 } 2377 2378 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2379 { 2380 PetscErrorCode ierr; 2381 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2382 2383 PetscFunctionBegin; 2384 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2385 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2386 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2392 { 2393 PetscFunctionBegin; 2394 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2395 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2396 PetscFunctionReturn(0); 2397 } 2398 2399 /*@ 2400 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2401 2402 Collective on Mat 2403 2404 Input Parameters: 2405 + A - the matrix 2406 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2407 2408 Level: advanced 2409 2410 @*/ 2411 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2412 { 2413 PetscErrorCode ierr; 2414 2415 PetscFunctionBegin; 2416 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2421 { 2422 PetscErrorCode ierr; 2423 PetscBool sc = PETSC_FALSE,flg; 2424 2425 PetscFunctionBegin; 2426 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2427 ierr = PetscObjectOptionsBegin((PetscObject)A); 2428 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2429 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2430 if (flg) { 2431 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2432 } 2433 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2438 { 2439 PetscErrorCode ierr; 2440 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2441 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2442 2443 PetscFunctionBegin; 2444 if (!Y->preallocated) { 2445 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2446 } else if (!aij->nz) { 2447 PetscInt nonew = aij->nonew; 2448 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2449 aij->nonew = nonew; 2450 } 2451 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2456 { 2457 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2458 PetscErrorCode ierr; 2459 2460 PetscFunctionBegin; 2461 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2462 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2463 if (d) { 2464 PetscInt rstart; 2465 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2466 *d += rstart; 2467 2468 } 2469 PetscFunctionReturn(0); 2470 } 2471 2472 2473 /* -------------------------------------------------------------------*/ 2474 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2475 MatGetRow_MPIAIJ, 2476 MatRestoreRow_MPIAIJ, 2477 MatMult_MPIAIJ, 2478 /* 4*/ MatMultAdd_MPIAIJ, 2479 MatMultTranspose_MPIAIJ, 2480 MatMultTransposeAdd_MPIAIJ, 2481 0, 2482 0, 2483 0, 2484 /*10*/ 0, 2485 0, 2486 0, 2487 MatSOR_MPIAIJ, 2488 MatTranspose_MPIAIJ, 2489 /*15*/ MatGetInfo_MPIAIJ, 2490 MatEqual_MPIAIJ, 2491 MatGetDiagonal_MPIAIJ, 2492 MatDiagonalScale_MPIAIJ, 2493 MatNorm_MPIAIJ, 2494 /*20*/ MatAssemblyBegin_MPIAIJ, 2495 MatAssemblyEnd_MPIAIJ, 2496 MatSetOption_MPIAIJ, 2497 MatZeroEntries_MPIAIJ, 2498 /*24*/ MatZeroRows_MPIAIJ, 2499 0, 2500 0, 2501 0, 2502 0, 2503 /*29*/ MatSetUp_MPIAIJ, 2504 0, 2505 0, 2506 MatGetDiagonalBlock_MPIAIJ, 2507 0, 2508 /*34*/ MatDuplicate_MPIAIJ, 2509 0, 2510 0, 2511 0, 2512 0, 2513 /*39*/ MatAXPY_MPIAIJ, 2514 MatCreateSubMatrices_MPIAIJ, 2515 MatIncreaseOverlap_MPIAIJ, 2516 MatGetValues_MPIAIJ, 2517 MatCopy_MPIAIJ, 2518 /*44*/ MatGetRowMax_MPIAIJ, 2519 MatScale_MPIAIJ, 2520 MatShift_MPIAIJ, 2521 MatDiagonalSet_MPIAIJ, 2522 MatZeroRowsColumns_MPIAIJ, 2523 /*49*/ MatSetRandom_MPIAIJ, 2524 0, 2525 0, 2526 0, 2527 0, 2528 /*54*/ MatFDColoringCreate_MPIXAIJ, 2529 0, 2530 MatSetUnfactored_MPIAIJ, 2531 MatPermute_MPIAIJ, 2532 0, 2533 /*59*/ MatCreateSubMatrix_MPIAIJ, 2534 MatDestroy_MPIAIJ, 2535 MatView_MPIAIJ, 2536 0, 2537 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2538 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2539 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2540 0, 2541 0, 2542 0, 2543 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2544 MatGetRowMinAbs_MPIAIJ, 2545 0, 2546 0, 2547 0, 2548 0, 2549 /*75*/ MatFDColoringApply_AIJ, 2550 MatSetFromOptions_MPIAIJ, 2551 0, 2552 0, 2553 MatFindZeroDiagonals_MPIAIJ, 2554 /*80*/ 0, 2555 0, 2556 0, 2557 /*83*/ MatLoad_MPIAIJ, 2558 MatIsSymmetric_MPIAIJ, 2559 0, 2560 0, 2561 0, 2562 0, 2563 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2564 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2565 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2566 MatPtAP_MPIAIJ_MPIAIJ, 2567 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2568 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2569 0, 2570 0, 2571 0, 2572 0, 2573 /*99*/ 0, 2574 0, 2575 0, 2576 MatConjugate_MPIAIJ, 2577 0, 2578 /*104*/MatSetValuesRow_MPIAIJ, 2579 MatRealPart_MPIAIJ, 2580 MatImaginaryPart_MPIAIJ, 2581 0, 2582 0, 2583 /*109*/0, 2584 0, 2585 MatGetRowMin_MPIAIJ, 2586 0, 2587 MatMissingDiagonal_MPIAIJ, 2588 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2589 0, 2590 MatGetGhosts_MPIAIJ, 2591 0, 2592 0, 2593 /*119*/0, 2594 0, 2595 0, 2596 0, 2597 MatGetMultiProcBlock_MPIAIJ, 2598 /*124*/MatFindNonzeroRows_MPIAIJ, 2599 MatGetColumnNorms_MPIAIJ, 2600 MatInvertBlockDiagonal_MPIAIJ, 2601 0, 2602 MatCreateSubMatricesMPI_MPIAIJ, 2603 /*129*/0, 2604 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2605 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2606 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2607 0, 2608 /*134*/0, 2609 0, 2610 MatRARt_MPIAIJ_MPIAIJ, 2611 0, 2612 0, 2613 /*139*/MatSetBlockSizes_MPIAIJ, 2614 0, 2615 0, 2616 MatFDColoringSetUp_MPIXAIJ, 2617 MatFindOffBlockDiagonalEntries_MPIAIJ, 2618 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2619 }; 2620 2621 /* ----------------------------------------------------------------------------------------*/ 2622 2623 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2624 { 2625 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2626 PetscErrorCode ierr; 2627 2628 PetscFunctionBegin; 2629 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2630 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2631 PetscFunctionReturn(0); 2632 } 2633 2634 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2635 { 2636 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2637 PetscErrorCode ierr; 2638 2639 PetscFunctionBegin; 2640 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2641 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2642 PetscFunctionReturn(0); 2643 } 2644 2645 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2646 { 2647 Mat_MPIAIJ *b; 2648 PetscErrorCode ierr; 2649 2650 PetscFunctionBegin; 2651 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2652 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2653 b = (Mat_MPIAIJ*)B->data; 2654 2655 #if defined(PETSC_USE_CTABLE) 2656 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2657 #else 2658 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2659 #endif 2660 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2661 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2662 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2663 2664 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2665 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2666 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2667 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2668 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2669 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2670 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2671 2672 if (!B->preallocated) { 2673 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2674 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2675 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2676 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2677 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2678 } 2679 2680 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2681 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2682 B->preallocated = PETSC_TRUE; 2683 B->was_assembled = PETSC_FALSE; 2684 B->assembled = PETSC_FALSE;; 2685 PetscFunctionReturn(0); 2686 } 2687 2688 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2689 { 2690 Mat mat; 2691 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2692 PetscErrorCode ierr; 2693 2694 PetscFunctionBegin; 2695 *newmat = 0; 2696 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2697 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2698 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2699 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2700 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2701 a = (Mat_MPIAIJ*)mat->data; 2702 2703 mat->factortype = matin->factortype; 2704 mat->assembled = PETSC_TRUE; 2705 mat->insertmode = NOT_SET_VALUES; 2706 mat->preallocated = PETSC_TRUE; 2707 2708 a->size = oldmat->size; 2709 a->rank = oldmat->rank; 2710 a->donotstash = oldmat->donotstash; 2711 a->roworiented = oldmat->roworiented; 2712 a->rowindices = 0; 2713 a->rowvalues = 0; 2714 a->getrowactive = PETSC_FALSE; 2715 2716 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2717 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2718 2719 if (oldmat->colmap) { 2720 #if defined(PETSC_USE_CTABLE) 2721 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2722 #else 2723 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2724 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2725 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2726 #endif 2727 } else a->colmap = 0; 2728 if (oldmat->garray) { 2729 PetscInt len; 2730 len = oldmat->B->cmap->n; 2731 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2732 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2733 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2734 } else a->garray = 0; 2735 2736 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2737 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2738 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2739 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2740 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2741 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2742 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2743 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2744 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2745 *newmat = mat; 2746 PetscFunctionReturn(0); 2747 } 2748 2749 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2750 { 2751 PetscScalar *vals,*svals; 2752 MPI_Comm comm; 2753 PetscErrorCode ierr; 2754 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2755 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2756 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2757 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2758 PetscInt cend,cstart,n,*rowners; 2759 int fd; 2760 PetscInt bs = newMat->rmap->bs; 2761 2762 PetscFunctionBegin; 2763 /* force binary viewer to load .info file if it has not yet done so */ 2764 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2765 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2766 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2767 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2768 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2769 if (!rank) { 2770 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2771 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2772 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2773 } 2774 2775 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2776 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2777 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2778 if (bs < 0) bs = 1; 2779 2780 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2781 M = header[1]; N = header[2]; 2782 2783 /* If global sizes are set, check if they are consistent with that given in the file */ 2784 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2785 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2786 2787 /* determine ownership of all (block) rows */ 2788 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2789 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2790 else m = newMat->rmap->n; /* Set by user */ 2791 2792 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2793 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2794 2795 /* First process needs enough room for process with most rows */ 2796 if (!rank) { 2797 mmax = rowners[1]; 2798 for (i=2; i<=size; i++) { 2799 mmax = PetscMax(mmax, rowners[i]); 2800 } 2801 } else mmax = -1; /* unused, but compilers complain */ 2802 2803 rowners[0] = 0; 2804 for (i=2; i<=size; i++) { 2805 rowners[i] += rowners[i-1]; 2806 } 2807 rstart = rowners[rank]; 2808 rend = rowners[rank+1]; 2809 2810 /* distribute row lengths to all processors */ 2811 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2812 if (!rank) { 2813 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2814 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2815 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2816 for (j=0; j<m; j++) { 2817 procsnz[0] += ourlens[j]; 2818 } 2819 for (i=1; i<size; i++) { 2820 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2821 /* calculate the number of nonzeros on each processor */ 2822 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2823 procsnz[i] += rowlengths[j]; 2824 } 2825 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2826 } 2827 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2828 } else { 2829 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2830 } 2831 2832 if (!rank) { 2833 /* determine max buffer needed and allocate it */ 2834 maxnz = 0; 2835 for (i=0; i<size; i++) { 2836 maxnz = PetscMax(maxnz,procsnz[i]); 2837 } 2838 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2839 2840 /* read in my part of the matrix column indices */ 2841 nz = procsnz[0]; 2842 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2843 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2844 2845 /* read in every one elses and ship off */ 2846 for (i=1; i<size; i++) { 2847 nz = procsnz[i]; 2848 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2849 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2850 } 2851 ierr = PetscFree(cols);CHKERRQ(ierr); 2852 } else { 2853 /* determine buffer space needed for message */ 2854 nz = 0; 2855 for (i=0; i<m; i++) { 2856 nz += ourlens[i]; 2857 } 2858 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2859 2860 /* receive message of column indices*/ 2861 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2862 } 2863 2864 /* determine column ownership if matrix is not square */ 2865 if (N != M) { 2866 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2867 else n = newMat->cmap->n; 2868 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2869 cstart = cend - n; 2870 } else { 2871 cstart = rstart; 2872 cend = rend; 2873 n = cend - cstart; 2874 } 2875 2876 /* loop over local rows, determining number of off diagonal entries */ 2877 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2878 jj = 0; 2879 for (i=0; i<m; i++) { 2880 for (j=0; j<ourlens[i]; j++) { 2881 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2882 jj++; 2883 } 2884 } 2885 2886 for (i=0; i<m; i++) { 2887 ourlens[i] -= offlens[i]; 2888 } 2889 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2890 2891 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2892 2893 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2894 2895 for (i=0; i<m; i++) { 2896 ourlens[i] += offlens[i]; 2897 } 2898 2899 if (!rank) { 2900 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2901 2902 /* read in my part of the matrix numerical values */ 2903 nz = procsnz[0]; 2904 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2905 2906 /* insert into matrix */ 2907 jj = rstart; 2908 smycols = mycols; 2909 svals = vals; 2910 for (i=0; i<m; i++) { 2911 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2912 smycols += ourlens[i]; 2913 svals += ourlens[i]; 2914 jj++; 2915 } 2916 2917 /* read in other processors and ship out */ 2918 for (i=1; i<size; i++) { 2919 nz = procsnz[i]; 2920 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2921 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2922 } 2923 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2924 } else { 2925 /* receive numeric values */ 2926 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2927 2928 /* receive message of values*/ 2929 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2930 2931 /* insert into matrix */ 2932 jj = rstart; 2933 smycols = mycols; 2934 svals = vals; 2935 for (i=0; i<m; i++) { 2936 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2937 smycols += ourlens[i]; 2938 svals += ourlens[i]; 2939 jj++; 2940 } 2941 } 2942 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2943 ierr = PetscFree(vals);CHKERRQ(ierr); 2944 ierr = PetscFree(mycols);CHKERRQ(ierr); 2945 ierr = PetscFree(rowners);CHKERRQ(ierr); 2946 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2947 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2948 PetscFunctionReturn(0); 2949 } 2950 2951 /* Not scalable because of ISAllGather() unless getting all columns. */ 2952 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2953 { 2954 PetscErrorCode ierr; 2955 IS iscol_local; 2956 PetscBool isstride; 2957 PetscMPIInt lisstride=0,gisstride; 2958 2959 PetscFunctionBegin; 2960 /* check if we are grabbing all columns*/ 2961 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2962 2963 if (isstride) { 2964 PetscInt start,len,mstart,mlen; 2965 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2966 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2967 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 2968 if (mstart == start && mlen-mstart == len) lisstride = 1; 2969 } 2970 2971 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2972 if (gisstride) { 2973 PetscInt N; 2974 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 2975 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 2976 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 2977 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 2978 } else { 2979 PetscInt cbs; 2980 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 2981 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2982 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 2983 } 2984 2985 *isseq = iscol_local; 2986 PetscFunctionReturn(0); 2987 } 2988 2989 /* 2990 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 2991 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 2992 2993 Input Parameters: 2994 mat - matrix 2995 isrow - parallel row index set; its local indices are a subset of local columns of mat, 2996 i.e., mat->rstart <= isrow[i] < mat->rend 2997 iscol - parallel column index set; its local indices are a subset of local columns of mat, 2998 i.e., mat->cstart <= iscol[i] < mat->cend 2999 Output Parameter: 3000 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3001 iscol_o - sequential column index set for retrieving mat->B 3002 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3003 */ 3004 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3005 { 3006 PetscErrorCode ierr; 3007 Vec x,cmap; 3008 const PetscInt *is_idx; 3009 PetscScalar *xarray,*cmaparray; 3010 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3011 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3012 Mat B=a->B; 3013 Vec lvec=a->lvec,lcmap; 3014 PetscInt i,cstart,cend,Bn=B->cmap->N; 3015 MPI_Comm comm; 3016 3017 PetscFunctionBegin; 3018 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3019 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3020 3021 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3022 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3023 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3024 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3025 3026 /* Get start indices */ 3027 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3028 isstart -= ncols; 3029 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3030 3031 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3032 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3033 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3034 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3035 for (i=0; i<ncols; i++) { 3036 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3037 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3038 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3039 } 3040 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3041 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3042 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3043 3044 /* Get iscol_d */ 3045 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3046 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3047 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3048 3049 /* Get isrow_d */ 3050 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3051 rstart = mat->rmap->rstart; 3052 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3053 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3054 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3055 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3056 3057 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3058 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3059 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3060 3061 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3062 ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3063 3064 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3065 3066 ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3067 ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3068 ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3069 3070 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3071 /* off-process column indices */ 3072 count = 0; 3073 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3074 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3075 3076 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3077 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3078 for (i=0; i<Bn; i++) { 3079 if (PetscRealPart(xarray[i]) > -1.0) { 3080 idx[count] = i; /* local column index in off-diagonal part B */ 3081 cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3082 } 3083 } 3084 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3085 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3086 3087 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3088 /* cannot ensure iscol_o has same blocksize as iscol! */ 3089 3090 ierr = PetscFree(idx);CHKERRQ(ierr); 3091 3092 *garray = cmap1; 3093 3094 ierr = VecDestroy(&x);CHKERRQ(ierr); 3095 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3096 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3097 PetscFunctionReturn(0); 3098 } 3099 3100 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3101 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3102 { 3103 PetscErrorCode ierr; 3104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3105 Mat M = NULL; 3106 MPI_Comm comm; 3107 IS iscol_d,isrow_d,iscol_o; 3108 Mat Asub = NULL,Bsub = NULL; 3109 PetscInt n; 3110 3111 PetscFunctionBegin; 3112 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3113 3114 if (call == MAT_REUSE_MATRIX) { 3115 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3116 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3117 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3118 3119 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3120 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3121 3122 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3123 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3124 3125 /* Update diagonal and off-diagonal portions of submat */ 3126 asub = (Mat_MPIAIJ*)(*submat)->data; 3127 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3128 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3129 if (n) { 3130 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3131 } 3132 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3133 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3134 3135 } else { /* call == MAT_INITIAL_MATRIX) */ 3136 const PetscInt *garray; 3137 PetscInt BsubN; 3138 3139 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3140 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3141 3142 /* Create local submatrices Asub and Bsub */ 3143 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3144 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3145 3146 /* Create submatrix M */ 3147 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3148 3149 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3150 asub = (Mat_MPIAIJ*)M->data; 3151 3152 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3153 n = asub->B->cmap->N; 3154 if (BsubN > n) { 3155 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3156 const PetscInt *idx; 3157 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3158 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3159 3160 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3161 j = 0; 3162 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3163 for (i=0; i<n; i++) { 3164 if (j >= BsubN) break; 3165 while (subgarray[i] > garray[j]) j++; 3166 3167 if (subgarray[i] == garray[j]) { 3168 idx_new[i] = idx[j++]; 3169 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3170 } 3171 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3172 3173 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3174 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3175 3176 } else if (BsubN < n) { 3177 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3178 } 3179 3180 ierr = PetscFree(garray);CHKERRQ(ierr); 3181 *submat = M; 3182 3183 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3184 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3185 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3186 3187 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3188 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3189 3190 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3191 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3192 } 3193 PetscFunctionReturn(0); 3194 } 3195 3196 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3197 { 3198 PetscErrorCode ierr; 3199 IS iscol_local=NULL,isrow_d; 3200 PetscInt csize; 3201 PetscInt n,i,j,start,end; 3202 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3203 MPI_Comm comm; 3204 3205 PetscFunctionBegin; 3206 /* If isrow has same processor distribution as mat, 3207 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3208 if (call == MAT_REUSE_MATRIX) { 3209 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3210 if (isrow_d) { 3211 sameRowDist = PETSC_TRUE; 3212 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3213 } else { 3214 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3215 if (iscol_local) { 3216 sameRowDist = PETSC_TRUE; 3217 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3218 } 3219 } 3220 } else { 3221 /* Check if isrow has same processor distribution as mat */ 3222 sameDist[0] = PETSC_FALSE; 3223 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3224 if (!n) { 3225 sameDist[0] = PETSC_TRUE; 3226 } else { 3227 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3228 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3229 if (i >= start && j < end) { 3230 sameDist[0] = PETSC_TRUE; 3231 } 3232 } 3233 3234 /* Check if iscol has same processor distribution as mat */ 3235 sameDist[1] = PETSC_FALSE; 3236 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3237 if (!n) { 3238 sameDist[1] = PETSC_TRUE; 3239 } else { 3240 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3241 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3242 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3243 } 3244 3245 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3246 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3247 sameRowDist = tsameDist[0]; 3248 } 3249 3250 if (sameRowDist) { 3251 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3252 /* isrow and iscol have same processor distribution as mat */ 3253 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3254 PetscFunctionReturn(0); 3255 } else { /* sameRowDist */ 3256 /* isrow has same processor distribution as mat */ 3257 if (call == MAT_INITIAL_MATRIX) { 3258 PetscBool sorted; 3259 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3260 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3261 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3262 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3263 3264 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3265 if (sorted) { 3266 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3267 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3268 PetscFunctionReturn(0); 3269 } 3270 } else { /* call == MAT_REUSE_MATRIX */ 3271 IS iscol_sub; 3272 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3273 if (iscol_sub) { 3274 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3275 PetscFunctionReturn(0); 3276 } 3277 } 3278 } 3279 } 3280 3281 /* General case: iscol -> iscol_local which has global size of iscol */ 3282 if (call == MAT_REUSE_MATRIX) { 3283 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3284 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3285 } else { 3286 if (!iscol_local) { 3287 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3288 } 3289 } 3290 3291 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3292 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3293 3294 if (call == MAT_INITIAL_MATRIX) { 3295 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3296 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3297 } 3298 PetscFunctionReturn(0); 3299 } 3300 3301 /*@C 3302 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3303 and "off-diagonal" part of the matrix in CSR format. 3304 3305 Collective on MPI_Comm 3306 3307 Input Parameters: 3308 + comm - MPI communicator 3309 . A - "diagonal" portion of matrix 3310 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3311 - garray - global index of B columns 3312 3313 Output Parameter: 3314 . mat - the matrix, with input A as its local diagonal matrix 3315 Level: advanced 3316 3317 Notes: 3318 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3319 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3320 3321 .seealso: MatCreateMPIAIJWithSplitArrays() 3322 @*/ 3323 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3324 { 3325 PetscErrorCode ierr; 3326 Mat_MPIAIJ *maij; 3327 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3328 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3329 PetscScalar *oa=b->a; 3330 Mat Bnew; 3331 PetscInt m,n,N; 3332 3333 PetscFunctionBegin; 3334 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3335 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3336 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3337 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3338 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3339 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3340 3341 /* Get global columns of mat */ 3342 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3343 3344 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3345 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3346 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3347 maij = (Mat_MPIAIJ*)(*mat)->data; 3348 3349 (*mat)->preallocated = PETSC_TRUE; 3350 3351 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3352 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3353 3354 /* Set A as diagonal portion of *mat */ 3355 maij->A = A; 3356 3357 nz = oi[m]; 3358 for (i=0; i<nz; i++) { 3359 col = oj[i]; 3360 oj[i] = garray[col]; 3361 } 3362 3363 /* Set Bnew as off-diagonal portion of *mat */ 3364 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3365 bnew = (Mat_SeqAIJ*)Bnew->data; 3366 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3367 maij->B = Bnew; 3368 3369 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3370 3371 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3372 b->free_a = PETSC_FALSE; 3373 b->free_ij = PETSC_FALSE; 3374 ierr = MatDestroy(&B);CHKERRQ(ierr); 3375 3376 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3377 bnew->free_a = PETSC_TRUE; 3378 bnew->free_ij = PETSC_TRUE; 3379 3380 /* condense columns of maij->B */ 3381 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3382 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3383 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3384 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3385 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3386 PetscFunctionReturn(0); 3387 } 3388 3389 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3390 3391 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3392 { 3393 PetscErrorCode ierr; 3394 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3395 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3396 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3397 Mat M,Msub,B=a->B; 3398 MatScalar *aa; 3399 Mat_SeqAIJ *aij; 3400 PetscInt *garray = a->garray,*colsub,Ncols; 3401 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3402 IS iscol_sub,iscmap; 3403 const PetscInt *is_idx,*cmap; 3404 PetscBool allcolumns=PETSC_FALSE; 3405 MPI_Comm comm; 3406 3407 PetscFunctionBegin; 3408 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3409 3410 if (call == MAT_REUSE_MATRIX) { 3411 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3412 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3413 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3414 3415 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3416 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3417 3418 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3419 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3420 3421 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3422 3423 } else { /* call == MAT_INITIAL_MATRIX) */ 3424 PetscBool flg; 3425 3426 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3427 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3428 3429 /* (1) iscol -> nonscalable iscol_local */ 3430 /* Check for special case: each processor gets entire matrix columns */ 3431 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3432 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3433 if (allcolumns) { 3434 iscol_sub = iscol_local; 3435 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3436 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3437 3438 } else { 3439 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3440 PetscInt *idx,*cmap1,k; 3441 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3442 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3443 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3444 count = 0; 3445 k = 0; 3446 for (i=0; i<Ncols; i++) { 3447 j = is_idx[i]; 3448 if (j >= cstart && j < cend) { 3449 /* diagonal part of mat */ 3450 idx[count] = j; 3451 cmap1[count++] = i; /* column index in submat */ 3452 } else if (Bn) { 3453 /* off-diagonal part of mat */ 3454 if (j == garray[k]) { 3455 idx[count] = j; 3456 cmap1[count++] = i; /* column index in submat */ 3457 } else if (j > garray[k]) { 3458 while (j > garray[k] && k < Bn-1) k++; 3459 if (j == garray[k]) { 3460 idx[count] = j; 3461 cmap1[count++] = i; /* column index in submat */ 3462 } 3463 } 3464 } 3465 } 3466 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3467 3468 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3469 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3470 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3471 3472 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3473 } 3474 3475 /* (3) Create sequential Msub */ 3476 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3477 } 3478 3479 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3480 aij = (Mat_SeqAIJ*)(Msub)->data; 3481 ii = aij->i; 3482 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3483 3484 /* 3485 m - number of local rows 3486 Ncols - number of columns (same on all processors) 3487 rstart - first row in new global matrix generated 3488 */ 3489 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3490 3491 if (call == MAT_INITIAL_MATRIX) { 3492 /* (4) Create parallel newmat */ 3493 PetscMPIInt rank,size; 3494 PetscInt csize; 3495 3496 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3497 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3498 3499 /* 3500 Determine the number of non-zeros in the diagonal and off-diagonal 3501 portions of the matrix in order to do correct preallocation 3502 */ 3503 3504 /* first get start and end of "diagonal" columns */ 3505 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3506 if (csize == PETSC_DECIDE) { 3507 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3508 if (mglobal == Ncols) { /* square matrix */ 3509 nlocal = m; 3510 } else { 3511 nlocal = Ncols/size + ((Ncols % size) > rank); 3512 } 3513 } else { 3514 nlocal = csize; 3515 } 3516 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3517 rstart = rend - nlocal; 3518 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3519 3520 /* next, compute all the lengths */ 3521 jj = aij->j; 3522 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3523 olens = dlens + m; 3524 for (i=0; i<m; i++) { 3525 jend = ii[i+1] - ii[i]; 3526 olen = 0; 3527 dlen = 0; 3528 for (j=0; j<jend; j++) { 3529 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3530 else dlen++; 3531 jj++; 3532 } 3533 olens[i] = olen; 3534 dlens[i] = dlen; 3535 } 3536 3537 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3538 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3539 3540 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3541 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3542 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3543 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3544 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3545 ierr = PetscFree(dlens);CHKERRQ(ierr); 3546 3547 } else { /* call == MAT_REUSE_MATRIX */ 3548 M = *newmat; 3549 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3550 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3551 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3552 /* 3553 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3554 rather than the slower MatSetValues(). 3555 */ 3556 M->was_assembled = PETSC_TRUE; 3557 M->assembled = PETSC_FALSE; 3558 } 3559 3560 /* (5) Set values of Msub to *newmat */ 3561 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3562 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3563 3564 jj = aij->j; 3565 aa = aij->a; 3566 for (i=0; i<m; i++) { 3567 row = rstart + i; 3568 nz = ii[i+1] - ii[i]; 3569 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3570 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3571 jj += nz; aa += nz; 3572 } 3573 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3574 3575 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3576 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3577 3578 ierr = PetscFree(colsub);CHKERRQ(ierr); 3579 3580 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3581 if (call == MAT_INITIAL_MATRIX) { 3582 *newmat = M; 3583 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3584 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3585 3586 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3587 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3588 3589 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3590 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3591 3592 if (iscol_local) { 3593 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3594 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3595 } 3596 } 3597 PetscFunctionReturn(0); 3598 } 3599 3600 /* 3601 Not great since it makes two copies of the submatrix, first an SeqAIJ 3602 in local and then by concatenating the local matrices the end result. 3603 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3604 3605 Note: This requires a sequential iscol with all indices. 3606 */ 3607 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3608 { 3609 PetscErrorCode ierr; 3610 PetscMPIInt rank,size; 3611 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3612 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3613 Mat M,Mreuse; 3614 MatScalar *aa,*vwork; 3615 MPI_Comm comm; 3616 Mat_SeqAIJ *aij; 3617 PetscBool colflag,allcolumns=PETSC_FALSE; 3618 3619 PetscFunctionBegin; 3620 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3621 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3622 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3623 3624 /* Check for special case: each processor gets entire matrix columns */ 3625 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3626 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3627 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3628 3629 if (call == MAT_REUSE_MATRIX) { 3630 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3631 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3632 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3633 } else { 3634 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3635 } 3636 3637 /* 3638 m - number of local rows 3639 n - number of columns (same on all processors) 3640 rstart - first row in new global matrix generated 3641 */ 3642 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3643 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3644 if (call == MAT_INITIAL_MATRIX) { 3645 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3646 ii = aij->i; 3647 jj = aij->j; 3648 3649 /* 3650 Determine the number of non-zeros in the diagonal and off-diagonal 3651 portions of the matrix in order to do correct preallocation 3652 */ 3653 3654 /* first get start and end of "diagonal" columns */ 3655 if (csize == PETSC_DECIDE) { 3656 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3657 if (mglobal == n) { /* square matrix */ 3658 nlocal = m; 3659 } else { 3660 nlocal = n/size + ((n % size) > rank); 3661 } 3662 } else { 3663 nlocal = csize; 3664 } 3665 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3666 rstart = rend - nlocal; 3667 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3668 3669 /* next, compute all the lengths */ 3670 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3671 olens = dlens + m; 3672 for (i=0; i<m; i++) { 3673 jend = ii[i+1] - ii[i]; 3674 olen = 0; 3675 dlen = 0; 3676 for (j=0; j<jend; j++) { 3677 if (*jj < rstart || *jj >= rend) olen++; 3678 else dlen++; 3679 jj++; 3680 } 3681 olens[i] = olen; 3682 dlens[i] = dlen; 3683 } 3684 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3685 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3686 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3687 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3688 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3689 ierr = PetscFree(dlens);CHKERRQ(ierr); 3690 } else { 3691 PetscInt ml,nl; 3692 3693 M = *newmat; 3694 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3695 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3696 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3697 /* 3698 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3699 rather than the slower MatSetValues(). 3700 */ 3701 M->was_assembled = PETSC_TRUE; 3702 M->assembled = PETSC_FALSE; 3703 } 3704 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3705 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3706 ii = aij->i; 3707 jj = aij->j; 3708 aa = aij->a; 3709 for (i=0; i<m; i++) { 3710 row = rstart + i; 3711 nz = ii[i+1] - ii[i]; 3712 cwork = jj; jj += nz; 3713 vwork = aa; aa += nz; 3714 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3715 } 3716 3717 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3718 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3719 *newmat = M; 3720 3721 /* save submatrix used in processor for next request */ 3722 if (call == MAT_INITIAL_MATRIX) { 3723 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3724 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3725 } 3726 PetscFunctionReturn(0); 3727 } 3728 3729 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3730 { 3731 PetscInt m,cstart, cend,j,nnz,i,d; 3732 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3733 const PetscInt *JJ; 3734 PetscScalar *values; 3735 PetscErrorCode ierr; 3736 PetscBool nooffprocentries; 3737 3738 PetscFunctionBegin; 3739 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3740 3741 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3742 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3743 m = B->rmap->n; 3744 cstart = B->cmap->rstart; 3745 cend = B->cmap->rend; 3746 rstart = B->rmap->rstart; 3747 3748 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3749 3750 #if defined(PETSC_USE_DEBUGGING) 3751 for (i=0; i<m; i++) { 3752 nnz = Ii[i+1]- Ii[i]; 3753 JJ = J + Ii[i]; 3754 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3755 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3756 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3757 } 3758 #endif 3759 3760 for (i=0; i<m; i++) { 3761 nnz = Ii[i+1]- Ii[i]; 3762 JJ = J + Ii[i]; 3763 nnz_max = PetscMax(nnz_max,nnz); 3764 d = 0; 3765 for (j=0; j<nnz; j++) { 3766 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3767 } 3768 d_nnz[i] = d; 3769 o_nnz[i] = nnz - d; 3770 } 3771 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3772 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3773 3774 if (v) values = (PetscScalar*)v; 3775 else { 3776 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3777 } 3778 3779 for (i=0; i<m; i++) { 3780 ii = i + rstart; 3781 nnz = Ii[i+1]- Ii[i]; 3782 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3783 } 3784 nooffprocentries = B->nooffprocentries; 3785 B->nooffprocentries = PETSC_TRUE; 3786 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3787 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3788 B->nooffprocentries = nooffprocentries; 3789 3790 if (!v) { 3791 ierr = PetscFree(values);CHKERRQ(ierr); 3792 } 3793 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3794 PetscFunctionReturn(0); 3795 } 3796 3797 /*@ 3798 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3799 (the default parallel PETSc format). 3800 3801 Collective on MPI_Comm 3802 3803 Input Parameters: 3804 + B - the matrix 3805 . i - the indices into j for the start of each local row (starts with zero) 3806 . j - the column indices for each local row (starts with zero) 3807 - v - optional values in the matrix 3808 3809 Level: developer 3810 3811 Notes: 3812 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3813 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3814 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3815 3816 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3817 3818 The format which is used for the sparse matrix input, is equivalent to a 3819 row-major ordering.. i.e for the following matrix, the input data expected is 3820 as shown 3821 3822 $ 1 0 0 3823 $ 2 0 3 P0 3824 $ ------- 3825 $ 4 5 6 P1 3826 $ 3827 $ Process0 [P0]: rows_owned=[0,1] 3828 $ i = {0,1,3} [size = nrow+1 = 2+1] 3829 $ j = {0,0,2} [size = 3] 3830 $ v = {1,2,3} [size = 3] 3831 $ 3832 $ Process1 [P1]: rows_owned=[2] 3833 $ i = {0,3} [size = nrow+1 = 1+1] 3834 $ j = {0,1,2} [size = 3] 3835 $ v = {4,5,6} [size = 3] 3836 3837 .keywords: matrix, aij, compressed row, sparse, parallel 3838 3839 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3840 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3841 @*/ 3842 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3843 { 3844 PetscErrorCode ierr; 3845 3846 PetscFunctionBegin; 3847 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3848 PetscFunctionReturn(0); 3849 } 3850 3851 /*@C 3852 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3853 (the default parallel PETSc format). For good matrix assembly performance 3854 the user should preallocate the matrix storage by setting the parameters 3855 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3856 performance can be increased by more than a factor of 50. 3857 3858 Collective on MPI_Comm 3859 3860 Input Parameters: 3861 + B - the matrix 3862 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3863 (same value is used for all local rows) 3864 . d_nnz - array containing the number of nonzeros in the various rows of the 3865 DIAGONAL portion of the local submatrix (possibly different for each row) 3866 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3867 The size of this array is equal to the number of local rows, i.e 'm'. 3868 For matrices that will be factored, you must leave room for (and set) 3869 the diagonal entry even if it is zero. 3870 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3871 submatrix (same value is used for all local rows). 3872 - o_nnz - array containing the number of nonzeros in the various rows of the 3873 OFF-DIAGONAL portion of the local submatrix (possibly different for 3874 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3875 structure. The size of this array is equal to the number 3876 of local rows, i.e 'm'. 3877 3878 If the *_nnz parameter is given then the *_nz parameter is ignored 3879 3880 The AIJ format (also called the Yale sparse matrix format or 3881 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3882 storage. The stored row and column indices begin with zero. 3883 See Users-Manual: ch_mat for details. 3884 3885 The parallel matrix is partitioned such that the first m0 rows belong to 3886 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3887 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3888 3889 The DIAGONAL portion of the local submatrix of a processor can be defined 3890 as the submatrix which is obtained by extraction the part corresponding to 3891 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3892 first row that belongs to the processor, r2 is the last row belonging to 3893 the this processor, and c1-c2 is range of indices of the local part of a 3894 vector suitable for applying the matrix to. This is an mxn matrix. In the 3895 common case of a square matrix, the row and column ranges are the same and 3896 the DIAGONAL part is also square. The remaining portion of the local 3897 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3898 3899 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3900 3901 You can call MatGetInfo() to get information on how effective the preallocation was; 3902 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3903 You can also run with the option -info and look for messages with the string 3904 malloc in them to see if additional memory allocation was needed. 3905 3906 Example usage: 3907 3908 Consider the following 8x8 matrix with 34 non-zero values, that is 3909 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3910 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3911 as follows: 3912 3913 .vb 3914 1 2 0 | 0 3 0 | 0 4 3915 Proc0 0 5 6 | 7 0 0 | 8 0 3916 9 0 10 | 11 0 0 | 12 0 3917 ------------------------------------- 3918 13 0 14 | 15 16 17 | 0 0 3919 Proc1 0 18 0 | 19 20 21 | 0 0 3920 0 0 0 | 22 23 0 | 24 0 3921 ------------------------------------- 3922 Proc2 25 26 27 | 0 0 28 | 29 0 3923 30 0 0 | 31 32 33 | 0 34 3924 .ve 3925 3926 This can be represented as a collection of submatrices as: 3927 3928 .vb 3929 A B C 3930 D E F 3931 G H I 3932 .ve 3933 3934 Where the submatrices A,B,C are owned by proc0, D,E,F are 3935 owned by proc1, G,H,I are owned by proc2. 3936 3937 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3938 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3939 The 'M','N' parameters are 8,8, and have the same values on all procs. 3940 3941 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3942 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3943 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3944 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3945 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3946 matrix, ans [DF] as another SeqAIJ matrix. 3947 3948 When d_nz, o_nz parameters are specified, d_nz storage elements are 3949 allocated for every row of the local diagonal submatrix, and o_nz 3950 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3951 One way to choose d_nz and o_nz is to use the max nonzerors per local 3952 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3953 In this case, the values of d_nz,o_nz are: 3954 .vb 3955 proc0 : dnz = 2, o_nz = 2 3956 proc1 : dnz = 3, o_nz = 2 3957 proc2 : dnz = 1, o_nz = 4 3958 .ve 3959 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3960 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3961 for proc3. i.e we are using 12+15+10=37 storage locations to store 3962 34 values. 3963 3964 When d_nnz, o_nnz parameters are specified, the storage is specified 3965 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3966 In the above case the values for d_nnz,o_nnz are: 3967 .vb 3968 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3969 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3970 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3971 .ve 3972 Here the space allocated is sum of all the above values i.e 34, and 3973 hence pre-allocation is perfect. 3974 3975 Level: intermediate 3976 3977 .keywords: matrix, aij, compressed row, sparse, parallel 3978 3979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3980 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3981 @*/ 3982 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3983 { 3984 PetscErrorCode ierr; 3985 3986 PetscFunctionBegin; 3987 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3988 PetscValidType(B,1); 3989 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3990 PetscFunctionReturn(0); 3991 } 3992 3993 /*@ 3994 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3995 CSR format the local rows. 3996 3997 Collective on MPI_Comm 3998 3999 Input Parameters: 4000 + comm - MPI communicator 4001 . m - number of local rows (Cannot be PETSC_DECIDE) 4002 . n - This value should be the same as the local size used in creating the 4003 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4004 calculated if N is given) For square matrices n is almost always m. 4005 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4006 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4007 . i - row indices 4008 . j - column indices 4009 - a - matrix values 4010 4011 Output Parameter: 4012 . mat - the matrix 4013 4014 Level: intermediate 4015 4016 Notes: 4017 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4018 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4019 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4020 4021 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4022 4023 The format which is used for the sparse matrix input, is equivalent to a 4024 row-major ordering.. i.e for the following matrix, the input data expected is 4025 as shown 4026 4027 $ 1 0 0 4028 $ 2 0 3 P0 4029 $ ------- 4030 $ 4 5 6 P1 4031 $ 4032 $ Process0 [P0]: rows_owned=[0,1] 4033 $ i = {0,1,3} [size = nrow+1 = 2+1] 4034 $ j = {0,0,2} [size = 3] 4035 $ v = {1,2,3} [size = 3] 4036 $ 4037 $ Process1 [P1]: rows_owned=[2] 4038 $ i = {0,3} [size = nrow+1 = 1+1] 4039 $ j = {0,1,2} [size = 3] 4040 $ v = {4,5,6} [size = 3] 4041 4042 .keywords: matrix, aij, compressed row, sparse, parallel 4043 4044 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4045 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4046 @*/ 4047 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4048 { 4049 PetscErrorCode ierr; 4050 4051 PetscFunctionBegin; 4052 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4053 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4054 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4055 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4056 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4057 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4058 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4059 PetscFunctionReturn(0); 4060 } 4061 4062 /*@C 4063 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4064 (the default parallel PETSc format). For good matrix assembly performance 4065 the user should preallocate the matrix storage by setting the parameters 4066 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4067 performance can be increased by more than a factor of 50. 4068 4069 Collective on MPI_Comm 4070 4071 Input Parameters: 4072 + comm - MPI communicator 4073 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4074 This value should be the same as the local size used in creating the 4075 y vector for the matrix-vector product y = Ax. 4076 . n - This value should be the same as the local size used in creating the 4077 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4078 calculated if N is given) For square matrices n is almost always m. 4079 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4080 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4081 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4082 (same value is used for all local rows) 4083 . d_nnz - array containing the number of nonzeros in the various rows of the 4084 DIAGONAL portion of the local submatrix (possibly different for each row) 4085 or NULL, if d_nz is used to specify the nonzero structure. 4086 The size of this array is equal to the number of local rows, i.e 'm'. 4087 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4088 submatrix (same value is used for all local rows). 4089 - o_nnz - array containing the number of nonzeros in the various rows of the 4090 OFF-DIAGONAL portion of the local submatrix (possibly different for 4091 each row) or NULL, if o_nz is used to specify the nonzero 4092 structure. The size of this array is equal to the number 4093 of local rows, i.e 'm'. 4094 4095 Output Parameter: 4096 . A - the matrix 4097 4098 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4099 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4100 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4101 4102 Notes: 4103 If the *_nnz parameter is given then the *_nz parameter is ignored 4104 4105 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4106 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4107 storage requirements for this matrix. 4108 4109 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4110 processor than it must be used on all processors that share the object for 4111 that argument. 4112 4113 The user MUST specify either the local or global matrix dimensions 4114 (possibly both). 4115 4116 The parallel matrix is partitioned across processors such that the 4117 first m0 rows belong to process 0, the next m1 rows belong to 4118 process 1, the next m2 rows belong to process 2 etc.. where 4119 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4120 values corresponding to [m x N] submatrix. 4121 4122 The columns are logically partitioned with the n0 columns belonging 4123 to 0th partition, the next n1 columns belonging to the next 4124 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4125 4126 The DIAGONAL portion of the local submatrix on any given processor 4127 is the submatrix corresponding to the rows and columns m,n 4128 corresponding to the given processor. i.e diagonal matrix on 4129 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4130 etc. The remaining portion of the local submatrix [m x (N-n)] 4131 constitute the OFF-DIAGONAL portion. The example below better 4132 illustrates this concept. 4133 4134 For a square global matrix we define each processor's diagonal portion 4135 to be its local rows and the corresponding columns (a square submatrix); 4136 each processor's off-diagonal portion encompasses the remainder of the 4137 local matrix (a rectangular submatrix). 4138 4139 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4140 4141 When calling this routine with a single process communicator, a matrix of 4142 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4143 type of communicator, use the construction mechanism 4144 .vb 4145 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4146 .ve 4147 4148 $ MatCreate(...,&A); 4149 $ MatSetType(A,MATMPIAIJ); 4150 $ MatSetSizes(A, m,n,M,N); 4151 $ MatMPIAIJSetPreallocation(A,...); 4152 4153 By default, this format uses inodes (identical nodes) when possible. 4154 We search for consecutive rows with the same nonzero structure, thereby 4155 reusing matrix information to achieve increased efficiency. 4156 4157 Options Database Keys: 4158 + -mat_no_inode - Do not use inodes 4159 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4160 - -mat_aij_oneindex - Internally use indexing starting at 1 4161 rather than 0. Note that when calling MatSetValues(), 4162 the user still MUST index entries starting at 0! 4163 4164 4165 Example usage: 4166 4167 Consider the following 8x8 matrix with 34 non-zero values, that is 4168 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4169 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4170 as follows 4171 4172 .vb 4173 1 2 0 | 0 3 0 | 0 4 4174 Proc0 0 5 6 | 7 0 0 | 8 0 4175 9 0 10 | 11 0 0 | 12 0 4176 ------------------------------------- 4177 13 0 14 | 15 16 17 | 0 0 4178 Proc1 0 18 0 | 19 20 21 | 0 0 4179 0 0 0 | 22 23 0 | 24 0 4180 ------------------------------------- 4181 Proc2 25 26 27 | 0 0 28 | 29 0 4182 30 0 0 | 31 32 33 | 0 34 4183 .ve 4184 4185 This can be represented as a collection of submatrices as 4186 4187 .vb 4188 A B C 4189 D E F 4190 G H I 4191 .ve 4192 4193 Where the submatrices A,B,C are owned by proc0, D,E,F are 4194 owned by proc1, G,H,I are owned by proc2. 4195 4196 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4197 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4198 The 'M','N' parameters are 8,8, and have the same values on all procs. 4199 4200 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4201 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4202 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4203 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4204 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4205 matrix, ans [DF] as another SeqAIJ matrix. 4206 4207 When d_nz, o_nz parameters are specified, d_nz storage elements are 4208 allocated for every row of the local diagonal submatrix, and o_nz 4209 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4210 One way to choose d_nz and o_nz is to use the max nonzerors per local 4211 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4212 In this case, the values of d_nz,o_nz are 4213 .vb 4214 proc0 : dnz = 2, o_nz = 2 4215 proc1 : dnz = 3, o_nz = 2 4216 proc2 : dnz = 1, o_nz = 4 4217 .ve 4218 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4219 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4220 for proc3. i.e we are using 12+15+10=37 storage locations to store 4221 34 values. 4222 4223 When d_nnz, o_nnz parameters are specified, the storage is specified 4224 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4225 In the above case the values for d_nnz,o_nnz are 4226 .vb 4227 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4228 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4229 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4230 .ve 4231 Here the space allocated is sum of all the above values i.e 34, and 4232 hence pre-allocation is perfect. 4233 4234 Level: intermediate 4235 4236 .keywords: matrix, aij, compressed row, sparse, parallel 4237 4238 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4239 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4240 @*/ 4241 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4242 { 4243 PetscErrorCode ierr; 4244 PetscMPIInt size; 4245 4246 PetscFunctionBegin; 4247 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4248 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4250 if (size > 1) { 4251 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4252 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4253 } else { 4254 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4255 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4256 } 4257 PetscFunctionReturn(0); 4258 } 4259 4260 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4261 { 4262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4263 PetscBool flg; 4264 PetscErrorCode ierr; 4265 4266 PetscFunctionBegin; 4267 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4268 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4269 if (Ad) *Ad = a->A; 4270 if (Ao) *Ao = a->B; 4271 if (colmap) *colmap = a->garray; 4272 PetscFunctionReturn(0); 4273 } 4274 4275 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4276 { 4277 PetscErrorCode ierr; 4278 PetscInt m,N,i,rstart,nnz,Ii; 4279 PetscInt *indx; 4280 PetscScalar *values; 4281 4282 PetscFunctionBegin; 4283 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4284 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4285 PetscInt *dnz,*onz,sum,bs,cbs; 4286 4287 if (n == PETSC_DECIDE) { 4288 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4289 } 4290 /* Check sum(n) = N */ 4291 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4292 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4293 4294 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4295 rstart -= m; 4296 4297 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4298 for (i=0; i<m; i++) { 4299 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4300 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4301 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4302 } 4303 4304 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4305 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4306 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4307 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4308 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4309 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4310 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4311 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4312 } 4313 4314 /* numeric phase */ 4315 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4316 for (i=0; i<m; i++) { 4317 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4318 Ii = i + rstart; 4319 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4320 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4321 } 4322 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4323 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4324 PetscFunctionReturn(0); 4325 } 4326 4327 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4328 { 4329 PetscErrorCode ierr; 4330 PetscMPIInt rank; 4331 PetscInt m,N,i,rstart,nnz; 4332 size_t len; 4333 const PetscInt *indx; 4334 PetscViewer out; 4335 char *name; 4336 Mat B; 4337 const PetscScalar *values; 4338 4339 PetscFunctionBegin; 4340 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4341 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4342 /* Should this be the type of the diagonal block of A? */ 4343 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4344 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4345 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4346 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4347 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4348 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4349 for (i=0; i<m; i++) { 4350 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4351 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4352 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4353 } 4354 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4355 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4356 4357 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4358 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4359 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4360 sprintf(name,"%s.%d",outfile,rank); 4361 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4362 ierr = PetscFree(name);CHKERRQ(ierr); 4363 ierr = MatView(B,out);CHKERRQ(ierr); 4364 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4365 ierr = MatDestroy(&B);CHKERRQ(ierr); 4366 PetscFunctionReturn(0); 4367 } 4368 4369 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4370 { 4371 PetscErrorCode ierr; 4372 Mat_Merge_SeqsToMPI *merge; 4373 PetscContainer container; 4374 4375 PetscFunctionBegin; 4376 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4377 if (container) { 4378 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4379 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4380 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4381 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4382 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4383 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4384 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4385 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4386 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4387 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4388 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4389 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4390 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4391 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4392 ierr = PetscFree(merge);CHKERRQ(ierr); 4393 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4394 } 4395 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4396 PetscFunctionReturn(0); 4397 } 4398 4399 #include <../src/mat/utils/freespace.h> 4400 #include <petscbt.h> 4401 4402 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4403 { 4404 PetscErrorCode ierr; 4405 MPI_Comm comm; 4406 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4407 PetscMPIInt size,rank,taga,*len_s; 4408 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4409 PetscInt proc,m; 4410 PetscInt **buf_ri,**buf_rj; 4411 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4412 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4413 MPI_Request *s_waits,*r_waits; 4414 MPI_Status *status; 4415 MatScalar *aa=a->a; 4416 MatScalar **abuf_r,*ba_i; 4417 Mat_Merge_SeqsToMPI *merge; 4418 PetscContainer container; 4419 4420 PetscFunctionBegin; 4421 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4422 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4423 4424 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4425 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4426 4427 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4428 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4429 4430 bi = merge->bi; 4431 bj = merge->bj; 4432 buf_ri = merge->buf_ri; 4433 buf_rj = merge->buf_rj; 4434 4435 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4436 owners = merge->rowmap->range; 4437 len_s = merge->len_s; 4438 4439 /* send and recv matrix values */ 4440 /*-----------------------------*/ 4441 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4442 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4443 4444 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4445 for (proc=0,k=0; proc<size; proc++) { 4446 if (!len_s[proc]) continue; 4447 i = owners[proc]; 4448 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4449 k++; 4450 } 4451 4452 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4453 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4454 ierr = PetscFree(status);CHKERRQ(ierr); 4455 4456 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4457 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4458 4459 /* insert mat values of mpimat */ 4460 /*----------------------------*/ 4461 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4462 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4463 4464 for (k=0; k<merge->nrecv; k++) { 4465 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4466 nrows = *(buf_ri_k[k]); 4467 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4468 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4469 } 4470 4471 /* set values of ba */ 4472 m = merge->rowmap->n; 4473 for (i=0; i<m; i++) { 4474 arow = owners[rank] + i; 4475 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4476 bnzi = bi[i+1] - bi[i]; 4477 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4478 4479 /* add local non-zero vals of this proc's seqmat into ba */ 4480 anzi = ai[arow+1] - ai[arow]; 4481 aj = a->j + ai[arow]; 4482 aa = a->a + ai[arow]; 4483 nextaj = 0; 4484 for (j=0; nextaj<anzi; j++) { 4485 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4486 ba_i[j] += aa[nextaj++]; 4487 } 4488 } 4489 4490 /* add received vals into ba */ 4491 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4492 /* i-th row */ 4493 if (i == *nextrow[k]) { 4494 anzi = *(nextai[k]+1) - *nextai[k]; 4495 aj = buf_rj[k] + *(nextai[k]); 4496 aa = abuf_r[k] + *(nextai[k]); 4497 nextaj = 0; 4498 for (j=0; nextaj<anzi; j++) { 4499 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4500 ba_i[j] += aa[nextaj++]; 4501 } 4502 } 4503 nextrow[k]++; nextai[k]++; 4504 } 4505 } 4506 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4507 } 4508 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4509 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4510 4511 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4512 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4513 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4514 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4515 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4516 PetscFunctionReturn(0); 4517 } 4518 4519 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4520 { 4521 PetscErrorCode ierr; 4522 Mat B_mpi; 4523 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4524 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4525 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4526 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4527 PetscInt len,proc,*dnz,*onz,bs,cbs; 4528 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4529 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4530 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4531 MPI_Status *status; 4532 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4533 PetscBT lnkbt; 4534 Mat_Merge_SeqsToMPI *merge; 4535 PetscContainer container; 4536 4537 PetscFunctionBegin; 4538 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4539 4540 /* make sure it is a PETSc comm */ 4541 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4542 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4543 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4544 4545 ierr = PetscNew(&merge);CHKERRQ(ierr); 4546 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4547 4548 /* determine row ownership */ 4549 /*---------------------------------------------------------*/ 4550 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4551 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4552 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4553 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4554 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4555 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4556 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4557 4558 m = merge->rowmap->n; 4559 owners = merge->rowmap->range; 4560 4561 /* determine the number of messages to send, their lengths */ 4562 /*---------------------------------------------------------*/ 4563 len_s = merge->len_s; 4564 4565 len = 0; /* length of buf_si[] */ 4566 merge->nsend = 0; 4567 for (proc=0; proc<size; proc++) { 4568 len_si[proc] = 0; 4569 if (proc == rank) { 4570 len_s[proc] = 0; 4571 } else { 4572 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4573 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4574 } 4575 if (len_s[proc]) { 4576 merge->nsend++; 4577 nrows = 0; 4578 for (i=owners[proc]; i<owners[proc+1]; i++) { 4579 if (ai[i+1] > ai[i]) nrows++; 4580 } 4581 len_si[proc] = 2*(nrows+1); 4582 len += len_si[proc]; 4583 } 4584 } 4585 4586 /* determine the number and length of messages to receive for ij-structure */ 4587 /*-------------------------------------------------------------------------*/ 4588 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4589 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4590 4591 /* post the Irecv of j-structure */ 4592 /*-------------------------------*/ 4593 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4594 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4595 4596 /* post the Isend of j-structure */ 4597 /*--------------------------------*/ 4598 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4599 4600 for (proc=0, k=0; proc<size; proc++) { 4601 if (!len_s[proc]) continue; 4602 i = owners[proc]; 4603 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4604 k++; 4605 } 4606 4607 /* receives and sends of j-structure are complete */ 4608 /*------------------------------------------------*/ 4609 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4610 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4611 4612 /* send and recv i-structure */ 4613 /*---------------------------*/ 4614 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4615 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4616 4617 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4618 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4619 for (proc=0,k=0; proc<size; proc++) { 4620 if (!len_s[proc]) continue; 4621 /* form outgoing message for i-structure: 4622 buf_si[0]: nrows to be sent 4623 [1:nrows]: row index (global) 4624 [nrows+1:2*nrows+1]: i-structure index 4625 */ 4626 /*-------------------------------------------*/ 4627 nrows = len_si[proc]/2 - 1; 4628 buf_si_i = buf_si + nrows+1; 4629 buf_si[0] = nrows; 4630 buf_si_i[0] = 0; 4631 nrows = 0; 4632 for (i=owners[proc]; i<owners[proc+1]; i++) { 4633 anzi = ai[i+1] - ai[i]; 4634 if (anzi) { 4635 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4636 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4637 nrows++; 4638 } 4639 } 4640 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4641 k++; 4642 buf_si += len_si[proc]; 4643 } 4644 4645 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4646 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4647 4648 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4649 for (i=0; i<merge->nrecv; i++) { 4650 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4651 } 4652 4653 ierr = PetscFree(len_si);CHKERRQ(ierr); 4654 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4655 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4656 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4657 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4658 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4659 ierr = PetscFree(status);CHKERRQ(ierr); 4660 4661 /* compute a local seq matrix in each processor */ 4662 /*----------------------------------------------*/ 4663 /* allocate bi array and free space for accumulating nonzero column info */ 4664 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4665 bi[0] = 0; 4666 4667 /* create and initialize a linked list */ 4668 nlnk = N+1; 4669 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4670 4671 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4672 len = ai[owners[rank+1]] - ai[owners[rank]]; 4673 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4674 4675 current_space = free_space; 4676 4677 /* determine symbolic info for each local row */ 4678 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4679 4680 for (k=0; k<merge->nrecv; k++) { 4681 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4682 nrows = *buf_ri_k[k]; 4683 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4684 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4685 } 4686 4687 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4688 len = 0; 4689 for (i=0; i<m; i++) { 4690 bnzi = 0; 4691 /* add local non-zero cols of this proc's seqmat into lnk */ 4692 arow = owners[rank] + i; 4693 anzi = ai[arow+1] - ai[arow]; 4694 aj = a->j + ai[arow]; 4695 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4696 bnzi += nlnk; 4697 /* add received col data into lnk */ 4698 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4699 if (i == *nextrow[k]) { /* i-th row */ 4700 anzi = *(nextai[k]+1) - *nextai[k]; 4701 aj = buf_rj[k] + *nextai[k]; 4702 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4703 bnzi += nlnk; 4704 nextrow[k]++; nextai[k]++; 4705 } 4706 } 4707 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4708 4709 /* if free space is not available, make more free space */ 4710 if (current_space->local_remaining<bnzi) { 4711 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4712 nspacedouble++; 4713 } 4714 /* copy data into free space, then initialize lnk */ 4715 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4716 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4717 4718 current_space->array += bnzi; 4719 current_space->local_used += bnzi; 4720 current_space->local_remaining -= bnzi; 4721 4722 bi[i+1] = bi[i] + bnzi; 4723 } 4724 4725 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4726 4727 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4728 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4729 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4730 4731 /* create symbolic parallel matrix B_mpi */ 4732 /*---------------------------------------*/ 4733 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4734 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4735 if (n==PETSC_DECIDE) { 4736 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4737 } else { 4738 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4739 } 4740 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4741 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4742 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4743 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4744 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4745 4746 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4747 B_mpi->assembled = PETSC_FALSE; 4748 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4749 merge->bi = bi; 4750 merge->bj = bj; 4751 merge->buf_ri = buf_ri; 4752 merge->buf_rj = buf_rj; 4753 merge->coi = NULL; 4754 merge->coj = NULL; 4755 merge->owners_co = NULL; 4756 4757 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4758 4759 /* attach the supporting struct to B_mpi for reuse */ 4760 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4761 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4762 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4763 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4764 *mpimat = B_mpi; 4765 4766 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4767 PetscFunctionReturn(0); 4768 } 4769 4770 /*@C 4771 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4772 matrices from each processor 4773 4774 Collective on MPI_Comm 4775 4776 Input Parameters: 4777 + comm - the communicators the parallel matrix will live on 4778 . seqmat - the input sequential matrices 4779 . m - number of local rows (or PETSC_DECIDE) 4780 . n - number of local columns (or PETSC_DECIDE) 4781 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4782 4783 Output Parameter: 4784 . mpimat - the parallel matrix generated 4785 4786 Level: advanced 4787 4788 Notes: 4789 The dimensions of the sequential matrix in each processor MUST be the same. 4790 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4791 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4792 @*/ 4793 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4794 { 4795 PetscErrorCode ierr; 4796 PetscMPIInt size; 4797 4798 PetscFunctionBegin; 4799 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4800 if (size == 1) { 4801 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4802 if (scall == MAT_INITIAL_MATRIX) { 4803 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4804 } else { 4805 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4806 } 4807 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4808 PetscFunctionReturn(0); 4809 } 4810 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4811 if (scall == MAT_INITIAL_MATRIX) { 4812 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4813 } 4814 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4815 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4816 PetscFunctionReturn(0); 4817 } 4818 4819 /*@ 4820 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4821 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4822 with MatGetSize() 4823 4824 Not Collective 4825 4826 Input Parameters: 4827 + A - the matrix 4828 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4829 4830 Output Parameter: 4831 . A_loc - the local sequential matrix generated 4832 4833 Level: developer 4834 4835 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4836 4837 @*/ 4838 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4839 { 4840 PetscErrorCode ierr; 4841 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4842 Mat_SeqAIJ *mat,*a,*b; 4843 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4844 MatScalar *aa,*ba,*cam; 4845 PetscScalar *ca; 4846 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4847 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4848 PetscBool match; 4849 MPI_Comm comm; 4850 PetscMPIInt size; 4851 4852 PetscFunctionBegin; 4853 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4854 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4855 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4856 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4857 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4858 4859 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4860 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4861 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4862 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4863 aa = a->a; ba = b->a; 4864 if (scall == MAT_INITIAL_MATRIX) { 4865 if (size == 1) { 4866 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4867 PetscFunctionReturn(0); 4868 } 4869 4870 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4871 ci[0] = 0; 4872 for (i=0; i<am; i++) { 4873 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4874 } 4875 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4876 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4877 k = 0; 4878 for (i=0; i<am; i++) { 4879 ncols_o = bi[i+1] - bi[i]; 4880 ncols_d = ai[i+1] - ai[i]; 4881 /* off-diagonal portion of A */ 4882 for (jo=0; jo<ncols_o; jo++) { 4883 col = cmap[*bj]; 4884 if (col >= cstart) break; 4885 cj[k] = col; bj++; 4886 ca[k++] = *ba++; 4887 } 4888 /* diagonal portion of A */ 4889 for (j=0; j<ncols_d; j++) { 4890 cj[k] = cstart + *aj++; 4891 ca[k++] = *aa++; 4892 } 4893 /* off-diagonal portion of A */ 4894 for (j=jo; j<ncols_o; j++) { 4895 cj[k] = cmap[*bj++]; 4896 ca[k++] = *ba++; 4897 } 4898 } 4899 /* put together the new matrix */ 4900 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4901 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4902 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4903 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4904 mat->free_a = PETSC_TRUE; 4905 mat->free_ij = PETSC_TRUE; 4906 mat->nonew = 0; 4907 } else if (scall == MAT_REUSE_MATRIX) { 4908 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4909 ci = mat->i; cj = mat->j; cam = mat->a; 4910 for (i=0; i<am; i++) { 4911 /* off-diagonal portion of A */ 4912 ncols_o = bi[i+1] - bi[i]; 4913 for (jo=0; jo<ncols_o; jo++) { 4914 col = cmap[*bj]; 4915 if (col >= cstart) break; 4916 *cam++ = *ba++; bj++; 4917 } 4918 /* diagonal portion of A */ 4919 ncols_d = ai[i+1] - ai[i]; 4920 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4921 /* off-diagonal portion of A */ 4922 for (j=jo; j<ncols_o; j++) { 4923 *cam++ = *ba++; bj++; 4924 } 4925 } 4926 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4927 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4928 PetscFunctionReturn(0); 4929 } 4930 4931 /*@C 4932 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4933 4934 Not Collective 4935 4936 Input Parameters: 4937 + A - the matrix 4938 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4939 - row, col - index sets of rows and columns to extract (or NULL) 4940 4941 Output Parameter: 4942 . A_loc - the local sequential matrix generated 4943 4944 Level: developer 4945 4946 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4947 4948 @*/ 4949 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4950 { 4951 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4952 PetscErrorCode ierr; 4953 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4954 IS isrowa,iscola; 4955 Mat *aloc; 4956 PetscBool match; 4957 4958 PetscFunctionBegin; 4959 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4960 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4961 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4962 if (!row) { 4963 start = A->rmap->rstart; end = A->rmap->rend; 4964 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4965 } else { 4966 isrowa = *row; 4967 } 4968 if (!col) { 4969 start = A->cmap->rstart; 4970 cmap = a->garray; 4971 nzA = a->A->cmap->n; 4972 nzB = a->B->cmap->n; 4973 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4974 ncols = 0; 4975 for (i=0; i<nzB; i++) { 4976 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4977 else break; 4978 } 4979 imark = i; 4980 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4981 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4982 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4983 } else { 4984 iscola = *col; 4985 } 4986 if (scall != MAT_INITIAL_MATRIX) { 4987 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4988 aloc[0] = *A_loc; 4989 } 4990 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4991 *A_loc = aloc[0]; 4992 ierr = PetscFree(aloc);CHKERRQ(ierr); 4993 if (!row) { 4994 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4995 } 4996 if (!col) { 4997 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4998 } 4999 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5000 PetscFunctionReturn(0); 5001 } 5002 5003 /*@C 5004 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5005 5006 Collective on Mat 5007 5008 Input Parameters: 5009 + A,B - the matrices in mpiaij format 5010 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5011 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5012 5013 Output Parameter: 5014 + rowb, colb - index sets of rows and columns of B to extract 5015 - B_seq - the sequential matrix generated 5016 5017 Level: developer 5018 5019 @*/ 5020 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5021 { 5022 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5023 PetscErrorCode ierr; 5024 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5025 IS isrowb,iscolb; 5026 Mat *bseq=NULL; 5027 5028 PetscFunctionBegin; 5029 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5030 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5031 } 5032 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5033 5034 if (scall == MAT_INITIAL_MATRIX) { 5035 start = A->cmap->rstart; 5036 cmap = a->garray; 5037 nzA = a->A->cmap->n; 5038 nzB = a->B->cmap->n; 5039 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5040 ncols = 0; 5041 for (i=0; i<nzB; i++) { /* row < local row index */ 5042 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5043 else break; 5044 } 5045 imark = i; 5046 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5047 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5048 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5049 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5050 } else { 5051 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5052 isrowb = *rowb; iscolb = *colb; 5053 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5054 bseq[0] = *B_seq; 5055 } 5056 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5057 *B_seq = bseq[0]; 5058 ierr = PetscFree(bseq);CHKERRQ(ierr); 5059 if (!rowb) { 5060 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5061 } else { 5062 *rowb = isrowb; 5063 } 5064 if (!colb) { 5065 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5066 } else { 5067 *colb = iscolb; 5068 } 5069 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5070 PetscFunctionReturn(0); 5071 } 5072 5073 /* 5074 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5075 of the OFF-DIAGONAL portion of local A 5076 5077 Collective on Mat 5078 5079 Input Parameters: 5080 + A,B - the matrices in mpiaij format 5081 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5082 5083 Output Parameter: 5084 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5085 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5086 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5087 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5088 5089 Level: developer 5090 5091 */ 5092 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5093 { 5094 VecScatter_MPI_General *gen_to,*gen_from; 5095 PetscErrorCode ierr; 5096 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5097 Mat_SeqAIJ *b_oth; 5098 VecScatter ctx =a->Mvctx; 5099 MPI_Comm comm; 5100 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5101 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5102 PetscInt *rvalues,*svalues; 5103 MatScalar *b_otha,*bufa,*bufA; 5104 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5105 MPI_Request *rwaits = NULL,*swaits = NULL; 5106 MPI_Status *sstatus,rstatus; 5107 PetscMPIInt jj,size; 5108 PetscInt *cols,sbs,rbs; 5109 PetscScalar *vals; 5110 5111 PetscFunctionBegin; 5112 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5113 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5114 5115 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5116 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5117 } 5118 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5119 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5120 5121 if (size == 1) { 5122 startsj_s = NULL; 5123 bufa_ptr = NULL; 5124 *B_oth = NULL; 5125 PetscFunctionReturn(0); 5126 } 5127 5128 gen_to = (VecScatter_MPI_General*)ctx->todata; 5129 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5130 nrecvs = gen_from->n; 5131 nsends = gen_to->n; 5132 5133 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5134 srow = gen_to->indices; /* local row index to be sent */ 5135 sstarts = gen_to->starts; 5136 sprocs = gen_to->procs; 5137 sstatus = gen_to->sstatus; 5138 sbs = gen_to->bs; 5139 rstarts = gen_from->starts; 5140 rprocs = gen_from->procs; 5141 rbs = gen_from->bs; 5142 5143 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5144 if (scall == MAT_INITIAL_MATRIX) { 5145 /* i-array */ 5146 /*---------*/ 5147 /* post receives */ 5148 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5149 for (i=0; i<nrecvs; i++) { 5150 rowlen = rvalues + rstarts[i]*rbs; 5151 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5152 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5153 } 5154 5155 /* pack the outgoing message */ 5156 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5157 5158 sstartsj[0] = 0; 5159 rstartsj[0] = 0; 5160 len = 0; /* total length of j or a array to be sent */ 5161 k = 0; 5162 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5163 for (i=0; i<nsends; i++) { 5164 rowlen = svalues + sstarts[i]*sbs; 5165 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5166 for (j=0; j<nrows; j++) { 5167 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5168 for (l=0; l<sbs; l++) { 5169 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5170 5171 rowlen[j*sbs+l] = ncols; 5172 5173 len += ncols; 5174 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5175 } 5176 k++; 5177 } 5178 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5179 5180 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5181 } 5182 /* recvs and sends of i-array are completed */ 5183 i = nrecvs; 5184 while (i--) { 5185 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5186 } 5187 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5188 ierr = PetscFree(svalues);CHKERRQ(ierr); 5189 5190 /* allocate buffers for sending j and a arrays */ 5191 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5192 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5193 5194 /* create i-array of B_oth */ 5195 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5196 5197 b_othi[0] = 0; 5198 len = 0; /* total length of j or a array to be received */ 5199 k = 0; 5200 for (i=0; i<nrecvs; i++) { 5201 rowlen = rvalues + rstarts[i]*rbs; 5202 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5203 for (j=0; j<nrows; j++) { 5204 b_othi[k+1] = b_othi[k] + rowlen[j]; 5205 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5206 k++; 5207 } 5208 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5209 } 5210 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5211 5212 /* allocate space for j and a arrrays of B_oth */ 5213 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5214 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5215 5216 /* j-array */ 5217 /*---------*/ 5218 /* post receives of j-array */ 5219 for (i=0; i<nrecvs; i++) { 5220 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5221 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5222 } 5223 5224 /* pack the outgoing message j-array */ 5225 k = 0; 5226 for (i=0; i<nsends; i++) { 5227 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5228 bufJ = bufj+sstartsj[i]; 5229 for (j=0; j<nrows; j++) { 5230 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5231 for (ll=0; ll<sbs; ll++) { 5232 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5233 for (l=0; l<ncols; l++) { 5234 *bufJ++ = cols[l]; 5235 } 5236 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5237 } 5238 } 5239 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5240 } 5241 5242 /* recvs and sends of j-array are completed */ 5243 i = nrecvs; 5244 while (i--) { 5245 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5246 } 5247 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5248 } else if (scall == MAT_REUSE_MATRIX) { 5249 sstartsj = *startsj_s; 5250 rstartsj = *startsj_r; 5251 bufa = *bufa_ptr; 5252 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5253 b_otha = b_oth->a; 5254 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5255 5256 /* a-array */ 5257 /*---------*/ 5258 /* post receives of a-array */ 5259 for (i=0; i<nrecvs; i++) { 5260 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5261 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5262 } 5263 5264 /* pack the outgoing message a-array */ 5265 k = 0; 5266 for (i=0; i<nsends; i++) { 5267 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5268 bufA = bufa+sstartsj[i]; 5269 for (j=0; j<nrows; j++) { 5270 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5271 for (ll=0; ll<sbs; ll++) { 5272 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5273 for (l=0; l<ncols; l++) { 5274 *bufA++ = vals[l]; 5275 } 5276 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5277 } 5278 } 5279 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5280 } 5281 /* recvs and sends of a-array are completed */ 5282 i = nrecvs; 5283 while (i--) { 5284 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5285 } 5286 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5287 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5288 5289 if (scall == MAT_INITIAL_MATRIX) { 5290 /* put together the new matrix */ 5291 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5292 5293 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5294 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5295 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5296 b_oth->free_a = PETSC_TRUE; 5297 b_oth->free_ij = PETSC_TRUE; 5298 b_oth->nonew = 0; 5299 5300 ierr = PetscFree(bufj);CHKERRQ(ierr); 5301 if (!startsj_s || !bufa_ptr) { 5302 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5303 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5304 } else { 5305 *startsj_s = sstartsj; 5306 *startsj_r = rstartsj; 5307 *bufa_ptr = bufa; 5308 } 5309 } 5310 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5311 PetscFunctionReturn(0); 5312 } 5313 5314 /*@C 5315 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5316 5317 Not Collective 5318 5319 Input Parameters: 5320 . A - The matrix in mpiaij format 5321 5322 Output Parameter: 5323 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5324 . colmap - A map from global column index to local index into lvec 5325 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5326 5327 Level: developer 5328 5329 @*/ 5330 #if defined(PETSC_USE_CTABLE) 5331 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5332 #else 5333 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5334 #endif 5335 { 5336 Mat_MPIAIJ *a; 5337 5338 PetscFunctionBegin; 5339 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5340 PetscValidPointer(lvec, 2); 5341 PetscValidPointer(colmap, 3); 5342 PetscValidPointer(multScatter, 4); 5343 a = (Mat_MPIAIJ*) A->data; 5344 if (lvec) *lvec = a->lvec; 5345 if (colmap) *colmap = a->colmap; 5346 if (multScatter) *multScatter = a->Mvctx; 5347 PetscFunctionReturn(0); 5348 } 5349 5350 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5351 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5352 #if defined(PETSC_HAVE_MKL_SPARSE) 5353 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5354 #endif 5355 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5356 #if defined(PETSC_HAVE_ELEMENTAL) 5357 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5358 #endif 5359 #if defined(PETSC_HAVE_HYPRE) 5360 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5361 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5362 #endif 5363 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5364 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIELL(Mat,MatType,MatReuse,Mat*); 5365 5366 /* 5367 Computes (B'*A')' since computing B*A directly is untenable 5368 5369 n p p 5370 ( ) ( ) ( ) 5371 m ( A ) * n ( B ) = m ( C ) 5372 ( ) ( ) ( ) 5373 5374 */ 5375 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5376 { 5377 PetscErrorCode ierr; 5378 Mat At,Bt,Ct; 5379 5380 PetscFunctionBegin; 5381 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5382 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5383 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5384 ierr = MatDestroy(&At);CHKERRQ(ierr); 5385 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5386 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5387 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5388 PetscFunctionReturn(0); 5389 } 5390 5391 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5392 { 5393 PetscErrorCode ierr; 5394 PetscInt m=A->rmap->n,n=B->cmap->n; 5395 Mat Cmat; 5396 5397 PetscFunctionBegin; 5398 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5399 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5400 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5401 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5402 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5403 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5404 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5405 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5406 5407 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5408 5409 *C = Cmat; 5410 PetscFunctionReturn(0); 5411 } 5412 5413 /* ----------------------------------------------------------------*/ 5414 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5415 { 5416 PetscErrorCode ierr; 5417 5418 PetscFunctionBegin; 5419 if (scall == MAT_INITIAL_MATRIX) { 5420 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5421 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5422 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5423 } 5424 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5425 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5426 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5427 PetscFunctionReturn(0); 5428 } 5429 5430 /*MC 5431 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5432 5433 Options Database Keys: 5434 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5435 5436 Level: beginner 5437 5438 .seealso: MatCreateAIJ() 5439 M*/ 5440 5441 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5442 { 5443 Mat_MPIAIJ *b; 5444 PetscErrorCode ierr; 5445 PetscMPIInt size; 5446 5447 PetscFunctionBegin; 5448 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5449 5450 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5451 B->data = (void*)b; 5452 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5453 B->assembled = PETSC_FALSE; 5454 B->insertmode = NOT_SET_VALUES; 5455 b->size = size; 5456 5457 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5458 5459 /* build cache for off array entries formed */ 5460 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5461 5462 b->donotstash = PETSC_FALSE; 5463 b->colmap = 0; 5464 b->garray = 0; 5465 b->roworiented = PETSC_TRUE; 5466 5467 /* stuff used for matrix vector multiply */ 5468 b->lvec = NULL; 5469 b->Mvctx = NULL; 5470 5471 /* stuff for MatGetRow() */ 5472 b->rowindices = 0; 5473 b->rowvalues = 0; 5474 b->getrowactive = PETSC_FALSE; 5475 5476 /* flexible pointer used in CUSP/CUSPARSE classes */ 5477 b->spptr = NULL; 5478 5479 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5480 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5481 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5482 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5483 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5484 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5485 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5486 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5487 #if defined(PETSC_HAVE_MKL_SPARSE) 5488 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5489 #endif 5490 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5491 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5492 #if defined(PETSC_HAVE_ELEMENTAL) 5493 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5494 #endif 5495 #if defined(PETSC_HAVE_HYPRE) 5496 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5497 #endif 5498 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5499 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiell_C",MatConvert_MPIAIJ_MPIELL);CHKERRQ(ierr); 5500 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5501 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5502 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5503 #if defined(PETSC_HAVE_HYPRE) 5504 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5505 #endif 5506 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5507 PetscFunctionReturn(0); 5508 } 5509 5510 /*@C 5511 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5512 and "off-diagonal" part of the matrix in CSR format. 5513 5514 Collective on MPI_Comm 5515 5516 Input Parameters: 5517 + comm - MPI communicator 5518 . m - number of local rows (Cannot be PETSC_DECIDE) 5519 . n - This value should be the same as the local size used in creating the 5520 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5521 calculated if N is given) For square matrices n is almost always m. 5522 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5523 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5524 . i - row indices for "diagonal" portion of matrix 5525 . j - column indices 5526 . a - matrix values 5527 . oi - row indices for "off-diagonal" portion of matrix 5528 . oj - column indices 5529 - oa - matrix values 5530 5531 Output Parameter: 5532 . mat - the matrix 5533 5534 Level: advanced 5535 5536 Notes: 5537 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5538 must free the arrays once the matrix has been destroyed and not before. 5539 5540 The i and j indices are 0 based 5541 5542 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5543 5544 This sets local rows and cannot be used to set off-processor values. 5545 5546 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5547 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5548 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5549 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5550 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5551 communication if it is known that only local entries will be set. 5552 5553 .keywords: matrix, aij, compressed row, sparse, parallel 5554 5555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5556 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5557 @*/ 5558 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5559 { 5560 PetscErrorCode ierr; 5561 Mat_MPIAIJ *maij; 5562 5563 PetscFunctionBegin; 5564 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5565 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5566 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5567 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5568 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5569 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5570 maij = (Mat_MPIAIJ*) (*mat)->data; 5571 5572 (*mat)->preallocated = PETSC_TRUE; 5573 5574 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5575 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5576 5577 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5578 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5579 5580 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5581 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5582 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5583 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5584 5585 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5586 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5587 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5588 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5589 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5590 PetscFunctionReturn(0); 5591 } 5592 5593 /* 5594 Special version for direct calls from Fortran 5595 */ 5596 #include <petsc/private/fortranimpl.h> 5597 5598 /* Change these macros so can be used in void function */ 5599 #undef CHKERRQ 5600 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5601 #undef SETERRQ2 5602 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5603 #undef SETERRQ3 5604 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5605 #undef SETERRQ 5606 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5607 5608 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5609 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5610 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5611 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5612 #else 5613 #endif 5614 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5615 { 5616 Mat mat = *mmat; 5617 PetscInt m = *mm, n = *mn; 5618 InsertMode addv = *maddv; 5619 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5620 PetscScalar value; 5621 PetscErrorCode ierr; 5622 5623 MatCheckPreallocated(mat,1); 5624 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5625 5626 #if defined(PETSC_USE_DEBUG) 5627 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5628 #endif 5629 { 5630 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5631 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5632 PetscBool roworiented = aij->roworiented; 5633 5634 /* Some Variables required in the macro */ 5635 Mat A = aij->A; 5636 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5637 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5638 MatScalar *aa = a->a; 5639 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5640 Mat B = aij->B; 5641 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5642 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5643 MatScalar *ba = b->a; 5644 5645 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5646 PetscInt nonew = a->nonew; 5647 MatScalar *ap1,*ap2; 5648 5649 PetscFunctionBegin; 5650 for (i=0; i<m; i++) { 5651 if (im[i] < 0) continue; 5652 #if defined(PETSC_USE_DEBUG) 5653 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5654 #endif 5655 if (im[i] >= rstart && im[i] < rend) { 5656 row = im[i] - rstart; 5657 lastcol1 = -1; 5658 rp1 = aj + ai[row]; 5659 ap1 = aa + ai[row]; 5660 rmax1 = aimax[row]; 5661 nrow1 = ailen[row]; 5662 low1 = 0; 5663 high1 = nrow1; 5664 lastcol2 = -1; 5665 rp2 = bj + bi[row]; 5666 ap2 = ba + bi[row]; 5667 rmax2 = bimax[row]; 5668 nrow2 = bilen[row]; 5669 low2 = 0; 5670 high2 = nrow2; 5671 5672 for (j=0; j<n; j++) { 5673 if (roworiented) value = v[i*n+j]; 5674 else value = v[i+j*m]; 5675 if (in[j] >= cstart && in[j] < cend) { 5676 col = in[j] - cstart; 5677 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5678 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5679 } else if (in[j] < 0) continue; 5680 #if defined(PETSC_USE_DEBUG) 5681 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5682 #endif 5683 else { 5684 if (mat->was_assembled) { 5685 if (!aij->colmap) { 5686 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5687 } 5688 #if defined(PETSC_USE_CTABLE) 5689 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5690 col--; 5691 #else 5692 col = aij->colmap[in[j]] - 1; 5693 #endif 5694 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5695 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5696 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5697 col = in[j]; 5698 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5699 B = aij->B; 5700 b = (Mat_SeqAIJ*)B->data; 5701 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5702 rp2 = bj + bi[row]; 5703 ap2 = ba + bi[row]; 5704 rmax2 = bimax[row]; 5705 nrow2 = bilen[row]; 5706 low2 = 0; 5707 high2 = nrow2; 5708 bm = aij->B->rmap->n; 5709 ba = b->a; 5710 } 5711 } else col = in[j]; 5712 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5713 } 5714 } 5715 } else if (!aij->donotstash) { 5716 if (roworiented) { 5717 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5718 } else { 5719 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5720 } 5721 } 5722 } 5723 } 5724 PetscFunctionReturnVoid(); 5725 } 5726 5727