1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *owners = A->rmap->range; 787 PetscInt n = A->rmap->n; 788 PetscSF sf; 789 PetscInt *lrows; 790 PetscSFNode *rrows; 791 PetscInt r, p = 0, len = 0; 792 PetscErrorCode ierr; 793 794 PetscFunctionBegin; 795 /* Create SF where leaves are input rows and roots are owned rows */ 796 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 797 for (r = 0; r < n; ++r) lrows[r] = -1; 798 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 799 for (r = 0; r < N; ++r) { 800 const PetscInt idx = rows[r]; 801 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 802 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 803 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 804 } 805 if (A->nooffproczerorows) { 806 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 807 lrows[len++] = idx - owners[p]; 808 } else { 809 rrows[r].rank = p; 810 rrows[r].index = rows[r] - owners[p]; 811 } 812 } 813 if (!A->nooffproczerorows) { 814 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 815 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 816 /* Collect flags for rows to be zeroed */ 817 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 818 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 819 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 820 /* Compress and put in row numbers */ 821 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 822 } 823 /* fix right hand side if needed */ 824 if (x && b) { 825 const PetscScalar *xx; 826 PetscScalar *bb; 827 828 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 829 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 830 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 831 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 833 } 834 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 835 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 836 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 837 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 838 } else if (diag != 0.0) { 839 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 841 for (r = 0; r < len; ++r) { 842 const PetscInt row = lrows[r] + A->rmap->rstart; 843 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 844 } 845 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 846 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 847 } else { 848 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 849 } 850 ierr = PetscFree(lrows);CHKERRQ(ierr); 851 852 /* only change matrix nonzero state if pattern was allowed to be changed */ 853 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 854 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 855 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 856 } 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 865 PetscErrorCode ierr; 866 PetscMPIInt n = A->rmap->n; 867 PetscInt i,j,r,m,p = 0,len = 0; 868 PetscInt *lrows,*owners = A->rmap->range; 869 PetscSFNode *rrows; 870 PetscSF sf; 871 const PetscScalar *xx; 872 PetscScalar *bb,*mask; 873 Vec xmask,lmask; 874 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 875 const PetscInt *aj, *ii,*ridx; 876 PetscScalar *aa; 877 878 PetscFunctionBegin; 879 /* Create SF where leaves are input rows and roots are owned rows */ 880 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 881 for (r = 0; r < n; ++r) lrows[r] = -1; 882 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 883 for (r = 0; r < N; ++r) { 884 const PetscInt idx = rows[r]; 885 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 886 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 887 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 888 } 889 rrows[r].rank = p; 890 rrows[r].index = rows[r] - owners[p]; 891 } 892 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 893 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 894 /* Collect flags for rows to be zeroed */ 895 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 896 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 897 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 898 /* Compress and put in row numbers */ 899 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 900 /* zero diagonal part of matrix */ 901 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 902 /* handle off diagonal part of matrix */ 903 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 904 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 905 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 906 for (i=0; i<len; i++) bb[lrows[i]] = 1; 907 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 908 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 910 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 911 if (x) { 912 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 915 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 916 } 917 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 918 /* remove zeroed rows of off diagonal matrix */ 919 ii = aij->i; 920 for (i=0; i<len; i++) { 921 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 922 } 923 /* loop over all elements of off process part of matrix zeroing removed columns*/ 924 if (aij->compressedrow.use) { 925 m = aij->compressedrow.nrows; 926 ii = aij->compressedrow.i; 927 ridx = aij->compressedrow.rindex; 928 for (i=0; i<m; i++) { 929 n = ii[i+1] - ii[i]; 930 aj = aij->j + ii[i]; 931 aa = aij->a + ii[i]; 932 933 for (j=0; j<n; j++) { 934 if (PetscAbsScalar(mask[*aj])) { 935 if (b) bb[*ridx] -= *aa*xx[*aj]; 936 *aa = 0.0; 937 } 938 aa++; 939 aj++; 940 } 941 ridx++; 942 } 943 } else { /* do not use compressed row format */ 944 m = l->B->rmap->n; 945 for (i=0; i<m; i++) { 946 n = ii[i+1] - ii[i]; 947 aj = aij->j + ii[i]; 948 aa = aij->a + ii[i]; 949 for (j=0; j<n; j++) { 950 if (PetscAbsScalar(mask[*aj])) { 951 if (b) bb[i] -= *aa*xx[*aj]; 952 *aa = 0.0; 953 } 954 aa++; 955 aj++; 956 } 957 } 958 } 959 if (x) { 960 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 961 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 962 } 963 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 964 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 965 ierr = PetscFree(lrows);CHKERRQ(ierr); 966 967 /* only change matrix nonzero state if pattern was allowed to be changed */ 968 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 969 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 970 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 971 } 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatMult_MPIAIJ" 977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 PetscInt nt; 982 983 PetscFunctionBegin; 984 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 985 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 986 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 988 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 998 PetscErrorCode ierr; 999 1000 PetscFunctionBegin; 1001 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1008 { 1009 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1010 PetscErrorCode ierr; 1011 1012 PetscFunctionBegin; 1013 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1015 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1016 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1023 { 1024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1025 PetscErrorCode ierr; 1026 PetscBool merged; 1027 1028 PetscFunctionBegin; 1029 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1030 /* do nondiagonal part */ 1031 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1032 if (!merged) { 1033 /* send it on its way */ 1034 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1035 /* do local part */ 1036 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1037 /* receive remote parts: note this assumes the values are not actually */ 1038 /* added in yy until the next line, */ 1039 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1040 } else { 1041 /* do local part */ 1042 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1043 /* send it on its way */ 1044 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1045 /* values actually were received in the Begin() but we need to call this nop */ 1046 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1053 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1054 { 1055 MPI_Comm comm; 1056 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1057 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1058 IS Me,Notme; 1059 PetscErrorCode ierr; 1060 PetscInt M,N,first,last,*notme,i; 1061 PetscMPIInt size; 1062 1063 PetscFunctionBegin; 1064 /* Easy test: symmetric diagonal block */ 1065 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1066 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1067 if (!*f) PetscFunctionReturn(0); 1068 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1069 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1070 if (size == 1) PetscFunctionReturn(0); 1071 1072 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1073 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1074 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1075 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1076 for (i=0; i<first; i++) notme[i] = i; 1077 for (i=last; i<M; i++) notme[i-last+first] = i; 1078 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1079 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1080 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1081 Aoff = Aoffs[0]; 1082 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1083 Boff = Boffs[0]; 1084 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1085 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1086 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1087 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1088 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1089 ierr = PetscFree(notme);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1096 { 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 /* do nondiagonal part */ 1102 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1103 /* send it on its way */ 1104 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1105 /* do local part */ 1106 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1107 /* receive remote parts */ 1108 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 /* 1113 This only works correctly for square matrices where the subblock A->A is the 1114 diagonal block 1115 */ 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1119 { 1120 PetscErrorCode ierr; 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 1123 PetscFunctionBegin; 1124 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1125 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1126 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatScale_MPIAIJ" 1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1139 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatDestroy_MPIAIJ" 1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1146 { 1147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 #if defined(PETSC_USE_LOG) 1152 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1153 #endif 1154 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1155 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1156 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1157 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1158 #if defined(PETSC_USE_CTABLE) 1159 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1160 #else 1161 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1162 #endif 1163 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1164 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1165 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1166 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1167 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1168 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1169 1170 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1181 #endif 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1190 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1191 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1192 PetscErrorCode ierr; 1193 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1194 int fd; 1195 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1196 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1197 PetscScalar *column_values; 1198 PetscInt message_count,flowcontrolcount; 1199 FILE *file; 1200 1201 PetscFunctionBegin; 1202 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1203 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1204 nz = A->nz + B->nz; 1205 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1206 if (!rank) { 1207 header[0] = MAT_FILE_CLASSID; 1208 header[1] = mat->rmap->N; 1209 header[2] = mat->cmap->N; 1210 1211 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1213 /* get largest number of rows any processor has */ 1214 rlen = mat->rmap->n; 1215 range = mat->rmap->range; 1216 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1217 } else { 1218 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 rlen = mat->rmap->n; 1220 } 1221 1222 /* load up the local row counts */ 1223 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1224 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1225 1226 /* store the row lengths to the file */ 1227 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1228 if (!rank) { 1229 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1230 for (i=1; i<size; i++) { 1231 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1232 rlen = range[i+1] - range[i]; 1233 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1235 } 1236 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1237 } else { 1238 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1239 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1240 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1241 } 1242 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1243 1244 /* load up the local column indices */ 1245 nzmax = nz; /* th processor needs space a largest processor needs */ 1246 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1247 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1248 cnt = 0; 1249 for (i=0; i<mat->rmap->n; i++) { 1250 for (j=B->i[i]; j<B->i[i+1]; j++) { 1251 if ((col = garray[B->j[j]]) > cstart) break; 1252 column_indices[cnt++] = col; 1253 } 1254 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1255 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1256 } 1257 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1258 1259 /* store the column indices to the file */ 1260 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1261 if (!rank) { 1262 MPI_Status status; 1263 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 for (i=1; i<size; i++) { 1265 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1266 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1267 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1268 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1270 } 1271 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1272 } else { 1273 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1274 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1275 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1276 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1277 } 1278 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1279 1280 /* load up the local column values */ 1281 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1282 cnt = 0; 1283 for (i=0; i<mat->rmap->n; i++) { 1284 for (j=B->i[i]; j<B->i[i+1]; j++) { 1285 if (garray[B->j[j]] > cstart) break; 1286 column_values[cnt++] = B->a[j]; 1287 } 1288 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1289 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1290 } 1291 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1292 1293 /* store the column values to the file */ 1294 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1295 if (!rank) { 1296 MPI_Status status; 1297 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1298 for (i=1; i<size; i++) { 1299 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1300 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1301 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1302 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1303 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1304 } 1305 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1306 } else { 1307 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1308 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1309 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1310 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1311 } 1312 ierr = PetscFree(column_values);CHKERRQ(ierr); 1313 1314 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1315 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #include <petscdraw.h> 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1323 { 1324 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1325 PetscErrorCode ierr; 1326 PetscMPIInt rank = aij->rank,size = aij->size; 1327 PetscBool isdraw,iascii,isbinary; 1328 PetscViewer sviewer; 1329 PetscViewerFormat format; 1330 1331 PetscFunctionBegin; 1332 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1333 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1334 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1335 if (iascii) { 1336 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1337 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1338 MatInfo info; 1339 PetscBool inodes; 1340 1341 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1342 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1343 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1344 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1345 if (!inodes) { 1346 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1347 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1348 } else { 1349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1350 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1351 } 1352 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1353 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1354 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1355 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1356 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1357 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1358 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1359 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1362 PetscInt inodecount,inodelimit,*inodes; 1363 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1364 if (inodes) { 1365 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1366 } else { 1367 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1368 } 1369 PetscFunctionReturn(0); 1370 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1371 PetscFunctionReturn(0); 1372 } 1373 } else if (isbinary) { 1374 if (size == 1) { 1375 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1376 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1377 } else { 1378 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } else if (isdraw) { 1382 PetscDraw draw; 1383 PetscBool isnull; 1384 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1385 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1386 if (isnull) PetscFunctionReturn(0); 1387 } 1388 1389 { 1390 /* assemble the entire matrix onto first processor. */ 1391 Mat A; 1392 Mat_SeqAIJ *Aloc; 1393 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1394 MatScalar *a; 1395 1396 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1397 if (!rank) { 1398 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1399 } else { 1400 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1401 } 1402 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1403 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1404 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1405 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1406 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1407 1408 /* copy over the A part */ 1409 Aloc = (Mat_SeqAIJ*)aij->A->data; 1410 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1411 row = mat->rmap->rstart; 1412 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1413 for (i=0; i<m; i++) { 1414 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1415 row++; 1416 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1417 } 1418 aj = Aloc->j; 1419 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1420 1421 /* copy over the B part */ 1422 Aloc = (Mat_SeqAIJ*)aij->B->data; 1423 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1424 row = mat->rmap->rstart; 1425 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1426 ct = cols; 1427 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1428 for (i=0; i<m; i++) { 1429 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1430 row++; 1431 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1432 } 1433 ierr = PetscFree(ct);CHKERRQ(ierr); 1434 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1435 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1436 /* 1437 Everyone has to call to draw the matrix since the graphics waits are 1438 synchronized across all processors that share the PetscDraw object 1439 */ 1440 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1441 if (!rank) { 1442 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1443 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1444 } 1445 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1446 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1447 ierr = MatDestroy(&A);CHKERRQ(ierr); 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatView_MPIAIJ" 1454 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1455 { 1456 PetscErrorCode ierr; 1457 PetscBool iascii,isdraw,issocket,isbinary; 1458 1459 PetscFunctionBegin; 1460 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1461 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1463 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1464 if (iascii || isdraw || isbinary || issocket) { 1465 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1466 } 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatSOR_MPIAIJ" 1472 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1473 { 1474 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1475 PetscErrorCode ierr; 1476 Vec bb1 = 0; 1477 PetscBool hasop; 1478 1479 PetscFunctionBegin; 1480 if (flag == SOR_APPLY_UPPER) { 1481 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1486 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1487 } 1488 1489 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1490 if (flag & SOR_ZERO_INITIAL_GUESS) { 1491 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1492 its--; 1493 } 1494 1495 while (its--) { 1496 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 1499 /* update rhs: bb1 = bb - B*x */ 1500 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1502 1503 /* local sweep */ 1504 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1505 } 1506 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1507 if (flag & SOR_ZERO_INITIAL_GUESS) { 1508 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1509 its--; 1510 } 1511 while (its--) { 1512 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1513 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 1515 /* update rhs: bb1 = bb - B*x */ 1516 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1517 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1518 1519 /* local sweep */ 1520 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1521 } 1522 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1523 if (flag & SOR_ZERO_INITIAL_GUESS) { 1524 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1525 its--; 1526 } 1527 while (its--) { 1528 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1529 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 1531 /* update rhs: bb1 = bb - B*x */ 1532 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1533 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1534 1535 /* local sweep */ 1536 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1537 } 1538 } else if (flag & SOR_EISENSTAT) { 1539 Vec xx1; 1540 1541 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1542 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1543 1544 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1546 if (!mat->diag) { 1547 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1548 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1549 } 1550 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1551 if (hasop) { 1552 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1553 } else { 1554 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1555 } 1556 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1557 1558 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1559 1560 /* local sweep */ 1561 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1562 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1563 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1564 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1565 1566 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1567 1568 matin->errortype = mat->A->errortype; 1569 PetscFunctionReturn(0); 1570 } 1571 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatPermute_MPIAIJ" 1574 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1575 { 1576 Mat aA,aB,Aperm; 1577 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1578 PetscScalar *aa,*ba; 1579 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1580 PetscSF rowsf,sf; 1581 IS parcolp = NULL; 1582 PetscBool done; 1583 PetscErrorCode ierr; 1584 1585 PetscFunctionBegin; 1586 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1587 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1588 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1589 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1590 1591 /* Invert row permutation to find out where my rows should go */ 1592 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1593 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1594 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1595 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1596 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1597 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1598 1599 /* Invert column permutation to find out where my columns should go */ 1600 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1601 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1602 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1603 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1604 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1605 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1606 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1607 1608 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1609 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1610 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1611 1612 /* Find out where my gcols should go */ 1613 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1614 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1615 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1616 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1617 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1618 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1619 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1620 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1621 1622 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1623 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1624 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1625 for (i=0; i<m; i++) { 1626 PetscInt row = rdest[i],rowner; 1627 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1628 for (j=ai[i]; j<ai[i+1]; j++) { 1629 PetscInt cowner,col = cdest[aj[j]]; 1630 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1631 if (rowner == cowner) dnnz[i]++; 1632 else onnz[i]++; 1633 } 1634 for (j=bi[i]; j<bi[i+1]; j++) { 1635 PetscInt cowner,col = gcdest[bj[j]]; 1636 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1637 if (rowner == cowner) dnnz[i]++; 1638 else onnz[i]++; 1639 } 1640 } 1641 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1642 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1643 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1644 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1645 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1646 1647 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1648 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1649 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1650 for (i=0; i<m; i++) { 1651 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1652 PetscInt j0,rowlen; 1653 rowlen = ai[i+1] - ai[i]; 1654 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1655 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1656 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1657 } 1658 rowlen = bi[i+1] - bi[i]; 1659 for (j0=j=0; j<rowlen; j0=j) { 1660 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1661 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1662 } 1663 } 1664 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1665 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1666 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1667 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1668 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1669 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1670 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1671 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1672 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1673 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1674 *B = Aperm; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1680 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1681 { 1682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1683 PetscErrorCode ierr; 1684 1685 PetscFunctionBegin; 1686 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1687 if (ghosts) *ghosts = aij->garray; 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1693 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1694 { 1695 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1696 Mat A = mat->A,B = mat->B; 1697 PetscErrorCode ierr; 1698 PetscReal isend[5],irecv[5]; 1699 1700 PetscFunctionBegin; 1701 info->block_size = 1.0; 1702 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1703 1704 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1705 isend[3] = info->memory; isend[4] = info->mallocs; 1706 1707 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1708 1709 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1710 isend[3] += info->memory; isend[4] += info->mallocs; 1711 if (flag == MAT_LOCAL) { 1712 info->nz_used = isend[0]; 1713 info->nz_allocated = isend[1]; 1714 info->nz_unneeded = isend[2]; 1715 info->memory = isend[3]; 1716 info->mallocs = isend[4]; 1717 } else if (flag == MAT_GLOBAL_MAX) { 1718 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1719 1720 info->nz_used = irecv[0]; 1721 info->nz_allocated = irecv[1]; 1722 info->nz_unneeded = irecv[2]; 1723 info->memory = irecv[3]; 1724 info->mallocs = irecv[4]; 1725 } else if (flag == MAT_GLOBAL_SUM) { 1726 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1727 1728 info->nz_used = irecv[0]; 1729 info->nz_allocated = irecv[1]; 1730 info->nz_unneeded = irecv[2]; 1731 info->memory = irecv[3]; 1732 info->mallocs = irecv[4]; 1733 } 1734 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1735 info->fill_ratio_needed = 0; 1736 info->factor_mallocs = 0; 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatSetOption_MPIAIJ" 1742 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1743 { 1744 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1745 PetscErrorCode ierr; 1746 1747 PetscFunctionBegin; 1748 switch (op) { 1749 case MAT_NEW_NONZERO_LOCATIONS: 1750 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1751 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1752 case MAT_KEEP_NONZERO_PATTERN: 1753 case MAT_NEW_NONZERO_LOCATION_ERR: 1754 case MAT_USE_INODES: 1755 case MAT_IGNORE_ZERO_ENTRIES: 1756 MatCheckPreallocated(A,1); 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1759 break; 1760 case MAT_ROW_ORIENTED: 1761 MatCheckPreallocated(A,1); 1762 a->roworiented = flg; 1763 1764 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1765 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1766 break; 1767 case MAT_NEW_DIAGONALS: 1768 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1769 break; 1770 case MAT_IGNORE_OFF_PROC_ENTRIES: 1771 a->donotstash = flg; 1772 break; 1773 case MAT_SPD: 1774 A->spd_set = PETSC_TRUE; 1775 A->spd = flg; 1776 if (flg) { 1777 A->symmetric = PETSC_TRUE; 1778 A->structurally_symmetric = PETSC_TRUE; 1779 A->symmetric_set = PETSC_TRUE; 1780 A->structurally_symmetric_set = PETSC_TRUE; 1781 } 1782 break; 1783 case MAT_SYMMETRIC: 1784 MatCheckPreallocated(A,1); 1785 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1786 break; 1787 case MAT_STRUCTURALLY_SYMMETRIC: 1788 MatCheckPreallocated(A,1); 1789 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1790 break; 1791 case MAT_HERMITIAN: 1792 MatCheckPreallocated(A,1); 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 break; 1795 case MAT_SYMMETRY_ETERNAL: 1796 MatCheckPreallocated(A,1); 1797 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1798 break; 1799 default: 1800 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1801 } 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatGetRow_MPIAIJ" 1807 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1808 { 1809 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1810 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1811 PetscErrorCode ierr; 1812 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1813 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1814 PetscInt *cmap,*idx_p; 1815 1816 PetscFunctionBegin; 1817 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1818 mat->getrowactive = PETSC_TRUE; 1819 1820 if (!mat->rowvalues && (idx || v)) { 1821 /* 1822 allocate enough space to hold information from the longest row. 1823 */ 1824 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1825 PetscInt max = 1,tmp; 1826 for (i=0; i<matin->rmap->n; i++) { 1827 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1828 if (max < tmp) max = tmp; 1829 } 1830 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1831 } 1832 1833 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1834 lrow = row - rstart; 1835 1836 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1837 if (!v) {pvA = 0; pvB = 0;} 1838 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1839 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1840 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1841 nztot = nzA + nzB; 1842 1843 cmap = mat->garray; 1844 if (v || idx) { 1845 if (nztot) { 1846 /* Sort by increasing column numbers, assuming A and B already sorted */ 1847 PetscInt imark = -1; 1848 if (v) { 1849 *v = v_p = mat->rowvalues; 1850 for (i=0; i<nzB; i++) { 1851 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1852 else break; 1853 } 1854 imark = i; 1855 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1856 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1857 } 1858 if (idx) { 1859 *idx = idx_p = mat->rowindices; 1860 if (imark > -1) { 1861 for (i=0; i<imark; i++) { 1862 idx_p[i] = cmap[cworkB[i]]; 1863 } 1864 } else { 1865 for (i=0; i<nzB; i++) { 1866 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1867 else break; 1868 } 1869 imark = i; 1870 } 1871 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1872 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1873 } 1874 } else { 1875 if (idx) *idx = 0; 1876 if (v) *v = 0; 1877 } 1878 } 1879 *nz = nztot; 1880 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1881 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1887 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1888 { 1889 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1890 1891 PetscFunctionBegin; 1892 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1893 aij->getrowactive = PETSC_FALSE; 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatNorm_MPIAIJ" 1899 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1900 { 1901 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1902 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1903 PetscErrorCode ierr; 1904 PetscInt i,j,cstart = mat->cmap->rstart; 1905 PetscReal sum = 0.0; 1906 MatScalar *v; 1907 1908 PetscFunctionBegin; 1909 if (aij->size == 1) { 1910 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1911 } else { 1912 if (type == NORM_FROBENIUS) { 1913 v = amat->a; 1914 for (i=0; i<amat->nz; i++) { 1915 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1916 } 1917 v = bmat->a; 1918 for (i=0; i<bmat->nz; i++) { 1919 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1920 } 1921 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1922 *norm = PetscSqrtReal(*norm); 1923 } else if (type == NORM_1) { /* max column norm */ 1924 PetscReal *tmp,*tmp2; 1925 PetscInt *jj,*garray = aij->garray; 1926 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1927 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1928 *norm = 0.0; 1929 v = amat->a; jj = amat->j; 1930 for (j=0; j<amat->nz; j++) { 1931 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1932 } 1933 v = bmat->a; jj = bmat->j; 1934 for (j=0; j<bmat->nz; j++) { 1935 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1936 } 1937 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1938 for (j=0; j<mat->cmap->N; j++) { 1939 if (tmp2[j] > *norm) *norm = tmp2[j]; 1940 } 1941 ierr = PetscFree(tmp);CHKERRQ(ierr); 1942 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1943 } else if (type == NORM_INFINITY) { /* max row norm */ 1944 PetscReal ntemp = 0.0; 1945 for (j=0; j<aij->A->rmap->n; j++) { 1946 v = amat->a + amat->i[j]; 1947 sum = 0.0; 1948 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1949 sum += PetscAbsScalar(*v); v++; 1950 } 1951 v = bmat->a + bmat->i[j]; 1952 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1953 sum += PetscAbsScalar(*v); v++; 1954 } 1955 if (sum > ntemp) ntemp = sum; 1956 } 1957 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1958 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1959 } 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatTranspose_MPIAIJ" 1965 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1966 { 1967 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1968 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1969 PetscErrorCode ierr; 1970 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1971 PetscInt cstart = A->cmap->rstart,ncol; 1972 Mat B; 1973 MatScalar *array; 1974 1975 PetscFunctionBegin; 1976 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1977 1978 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1979 ai = Aloc->i; aj = Aloc->j; 1980 bi = Bloc->i; bj = Bloc->j; 1981 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1982 PetscInt *d_nnz,*g_nnz,*o_nnz; 1983 PetscSFNode *oloc; 1984 PETSC_UNUSED PetscSF sf; 1985 1986 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1987 /* compute d_nnz for preallocation */ 1988 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1989 for (i=0; i<ai[ma]; i++) { 1990 d_nnz[aj[i]]++; 1991 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1992 } 1993 /* compute local off-diagonal contributions */ 1994 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1995 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1996 /* map those to global */ 1997 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1998 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1999 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2000 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2001 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2002 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2003 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2004 2005 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2006 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2007 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2008 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2009 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2010 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2011 } else { 2012 B = *matout; 2013 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2014 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2015 } 2016 2017 /* copy over the A part */ 2018 array = Aloc->a; 2019 row = A->rmap->rstart; 2020 for (i=0; i<ma; i++) { 2021 ncol = ai[i+1]-ai[i]; 2022 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2023 row++; 2024 array += ncol; aj += ncol; 2025 } 2026 aj = Aloc->j; 2027 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2028 2029 /* copy over the B part */ 2030 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2031 array = Bloc->a; 2032 row = A->rmap->rstart; 2033 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2034 cols_tmp = cols; 2035 for (i=0; i<mb; i++) { 2036 ncol = bi[i+1]-bi[i]; 2037 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2038 row++; 2039 array += ncol; cols_tmp += ncol; 2040 } 2041 ierr = PetscFree(cols);CHKERRQ(ierr); 2042 2043 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2044 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2045 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2046 *matout = B; 2047 } else { 2048 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2049 } 2050 PetscFunctionReturn(0); 2051 } 2052 2053 #undef __FUNCT__ 2054 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2055 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2056 { 2057 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2058 Mat a = aij->A,b = aij->B; 2059 PetscErrorCode ierr; 2060 PetscInt s1,s2,s3; 2061 2062 PetscFunctionBegin; 2063 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2064 if (rr) { 2065 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2066 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2067 /* Overlap communication with computation. */ 2068 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2069 } 2070 if (ll) { 2071 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2072 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2073 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2074 } 2075 /* scale the diagonal block */ 2076 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2077 2078 if (rr) { 2079 /* Do a scatter end and then right scale the off-diagonal block */ 2080 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2081 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2082 } 2083 PetscFunctionReturn(0); 2084 } 2085 2086 #undef __FUNCT__ 2087 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2088 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2089 { 2090 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2091 PetscErrorCode ierr; 2092 2093 PetscFunctionBegin; 2094 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2095 PetscFunctionReturn(0); 2096 } 2097 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatEqual_MPIAIJ" 2100 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2101 { 2102 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2103 Mat a,b,c,d; 2104 PetscBool flg; 2105 PetscErrorCode ierr; 2106 2107 PetscFunctionBegin; 2108 a = matA->A; b = matA->B; 2109 c = matB->A; d = matB->B; 2110 2111 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2112 if (flg) { 2113 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2114 } 2115 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 #undef __FUNCT__ 2120 #define __FUNCT__ "MatCopy_MPIAIJ" 2121 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2122 { 2123 PetscErrorCode ierr; 2124 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2125 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2126 2127 PetscFunctionBegin; 2128 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2129 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2130 /* because of the column compression in the off-processor part of the matrix a->B, 2131 the number of columns in a->B and b->B may be different, hence we cannot call 2132 the MatCopy() directly on the two parts. If need be, we can provide a more 2133 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2134 then copying the submatrices */ 2135 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2136 } else { 2137 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2138 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2139 } 2140 PetscFunctionReturn(0); 2141 } 2142 2143 #undef __FUNCT__ 2144 #define __FUNCT__ "MatSetUp_MPIAIJ" 2145 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2146 { 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2151 PetscFunctionReturn(0); 2152 } 2153 2154 /* 2155 Computes the number of nonzeros per row needed for preallocation when X and Y 2156 have different nonzero structure. 2157 */ 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2160 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2161 { 2162 PetscInt i,j,k,nzx,nzy; 2163 2164 PetscFunctionBegin; 2165 /* Set the number of nonzeros in the new matrix */ 2166 for (i=0; i<m; i++) { 2167 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2168 nzx = xi[i+1] - xi[i]; 2169 nzy = yi[i+1] - yi[i]; 2170 nnz[i] = 0; 2171 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2172 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2173 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2174 nnz[i]++; 2175 } 2176 for (; k<nzy; k++) nnz[i]++; 2177 } 2178 PetscFunctionReturn(0); 2179 } 2180 2181 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2184 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2185 { 2186 PetscErrorCode ierr; 2187 PetscInt m = Y->rmap->N; 2188 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2189 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2190 2191 PetscFunctionBegin; 2192 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatAXPY_MPIAIJ" 2198 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2199 { 2200 PetscErrorCode ierr; 2201 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2202 PetscBLASInt bnz,one=1; 2203 Mat_SeqAIJ *x,*y; 2204 2205 PetscFunctionBegin; 2206 if (str == SAME_NONZERO_PATTERN) { 2207 PetscScalar alpha = a; 2208 x = (Mat_SeqAIJ*)xx->A->data; 2209 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2210 y = (Mat_SeqAIJ*)yy->A->data; 2211 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2212 x = (Mat_SeqAIJ*)xx->B->data; 2213 y = (Mat_SeqAIJ*)yy->B->data; 2214 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2215 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2216 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2217 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2218 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2219 } else { 2220 Mat B; 2221 PetscInt *nnz_d,*nnz_o; 2222 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2223 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2224 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2225 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2226 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2227 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2228 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2229 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2230 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2231 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2232 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2233 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2234 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2235 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2236 } 2237 PetscFunctionReturn(0); 2238 } 2239 2240 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2241 2242 #undef __FUNCT__ 2243 #define __FUNCT__ "MatConjugate_MPIAIJ" 2244 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2245 { 2246 #if defined(PETSC_USE_COMPLEX) 2247 PetscErrorCode ierr; 2248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2249 2250 PetscFunctionBegin; 2251 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2252 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2253 #else 2254 PetscFunctionBegin; 2255 #endif 2256 PetscFunctionReturn(0); 2257 } 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatRealPart_MPIAIJ" 2261 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2262 { 2263 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2264 PetscErrorCode ierr; 2265 2266 PetscFunctionBegin; 2267 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2268 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2274 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2275 { 2276 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2277 PetscErrorCode ierr; 2278 2279 PetscFunctionBegin; 2280 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2281 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2282 PetscFunctionReturn(0); 2283 } 2284 2285 #undef __FUNCT__ 2286 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2287 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2288 { 2289 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2290 PetscErrorCode ierr; 2291 PetscInt i,*idxb = 0; 2292 PetscScalar *va,*vb; 2293 Vec vtmp; 2294 2295 PetscFunctionBegin; 2296 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2297 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2298 if (idx) { 2299 for (i=0; i<A->rmap->n; i++) { 2300 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2301 } 2302 } 2303 2304 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2305 if (idx) { 2306 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2307 } 2308 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2309 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2310 2311 for (i=0; i<A->rmap->n; i++) { 2312 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2313 va[i] = vb[i]; 2314 if (idx) idx[i] = a->garray[idxb[i]]; 2315 } 2316 } 2317 2318 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2319 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2320 ierr = PetscFree(idxb);CHKERRQ(ierr); 2321 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2327 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2328 { 2329 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2330 PetscErrorCode ierr; 2331 PetscInt i,*idxb = 0; 2332 PetscScalar *va,*vb; 2333 Vec vtmp; 2334 2335 PetscFunctionBegin; 2336 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2337 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2338 if (idx) { 2339 for (i=0; i<A->cmap->n; i++) { 2340 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2341 } 2342 } 2343 2344 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2345 if (idx) { 2346 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2347 } 2348 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2349 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2350 2351 for (i=0; i<A->rmap->n; i++) { 2352 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2353 va[i] = vb[i]; 2354 if (idx) idx[i] = a->garray[idxb[i]]; 2355 } 2356 } 2357 2358 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2359 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2360 ierr = PetscFree(idxb);CHKERRQ(ierr); 2361 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2367 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2368 { 2369 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2370 PetscInt n = A->rmap->n; 2371 PetscInt cstart = A->cmap->rstart; 2372 PetscInt *cmap = mat->garray; 2373 PetscInt *diagIdx, *offdiagIdx; 2374 Vec diagV, offdiagV; 2375 PetscScalar *a, *diagA, *offdiagA; 2376 PetscInt r; 2377 PetscErrorCode ierr; 2378 2379 PetscFunctionBegin; 2380 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2381 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2382 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2383 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2384 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2385 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2386 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2387 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2388 for (r = 0; r < n; ++r) { 2389 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2390 a[r] = diagA[r]; 2391 idx[r] = cstart + diagIdx[r]; 2392 } else { 2393 a[r] = offdiagA[r]; 2394 idx[r] = cmap[offdiagIdx[r]]; 2395 } 2396 } 2397 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2398 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2399 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2400 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2401 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2402 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2403 PetscFunctionReturn(0); 2404 } 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2408 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2409 { 2410 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2411 PetscInt n = A->rmap->n; 2412 PetscInt cstart = A->cmap->rstart; 2413 PetscInt *cmap = mat->garray; 2414 PetscInt *diagIdx, *offdiagIdx; 2415 Vec diagV, offdiagV; 2416 PetscScalar *a, *diagA, *offdiagA; 2417 PetscInt r; 2418 PetscErrorCode ierr; 2419 2420 PetscFunctionBegin; 2421 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2422 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2423 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2424 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2425 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2426 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2427 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2428 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2429 for (r = 0; r < n; ++r) { 2430 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2431 a[r] = diagA[r]; 2432 idx[r] = cstart + diagIdx[r]; 2433 } else { 2434 a[r] = offdiagA[r]; 2435 idx[r] = cmap[offdiagIdx[r]]; 2436 } 2437 } 2438 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2439 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2440 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2441 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2442 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2443 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2449 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2450 { 2451 PetscErrorCode ierr; 2452 Mat *dummy; 2453 2454 PetscFunctionBegin; 2455 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2456 *newmat = *dummy; 2457 ierr = PetscFree(dummy);CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 #undef __FUNCT__ 2462 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2463 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2464 { 2465 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2466 PetscErrorCode ierr; 2467 2468 PetscFunctionBegin; 2469 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2470 A->errortype = a->A->errortype; 2471 PetscFunctionReturn(0); 2472 } 2473 2474 #undef __FUNCT__ 2475 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2476 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2477 { 2478 PetscErrorCode ierr; 2479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2480 2481 PetscFunctionBegin; 2482 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2483 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2484 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2485 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2486 PetscFunctionReturn(0); 2487 } 2488 2489 #undef __FUNCT__ 2490 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2491 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2492 { 2493 PetscFunctionBegin; 2494 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2495 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2496 PetscFunctionReturn(0); 2497 } 2498 2499 #undef __FUNCT__ 2500 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2501 /*@ 2502 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2503 2504 Collective on Mat 2505 2506 Input Parameters: 2507 + A - the matrix 2508 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2509 2510 Level: advanced 2511 2512 @*/ 2513 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2514 { 2515 PetscErrorCode ierr; 2516 2517 PetscFunctionBegin; 2518 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2524 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2525 { 2526 PetscErrorCode ierr; 2527 PetscBool sc = PETSC_FALSE,flg; 2528 2529 PetscFunctionBegin; 2530 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2531 ierr = PetscObjectOptionsBegin((PetscObject)A); 2532 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2533 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2534 if (flg) { 2535 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2536 } 2537 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2538 PetscFunctionReturn(0); 2539 } 2540 2541 #undef __FUNCT__ 2542 #define __FUNCT__ "MatShift_MPIAIJ" 2543 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2544 { 2545 PetscErrorCode ierr; 2546 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2547 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2548 2549 PetscFunctionBegin; 2550 if (!Y->preallocated) { 2551 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2552 } else if (!aij->nz) { 2553 PetscInt nonew = aij->nonew; 2554 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2555 aij->nonew = nonew; 2556 } 2557 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2563 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2564 { 2565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2566 PetscErrorCode ierr; 2567 2568 PetscFunctionBegin; 2569 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2570 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2571 if (d) { 2572 PetscInt rstart; 2573 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2574 *d += rstart; 2575 2576 } 2577 PetscFunctionReturn(0); 2578 } 2579 2580 2581 /* -------------------------------------------------------------------*/ 2582 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2583 MatGetRow_MPIAIJ, 2584 MatRestoreRow_MPIAIJ, 2585 MatMult_MPIAIJ, 2586 /* 4*/ MatMultAdd_MPIAIJ, 2587 MatMultTranspose_MPIAIJ, 2588 MatMultTransposeAdd_MPIAIJ, 2589 0, 2590 0, 2591 0, 2592 /*10*/ 0, 2593 0, 2594 0, 2595 MatSOR_MPIAIJ, 2596 MatTranspose_MPIAIJ, 2597 /*15*/ MatGetInfo_MPIAIJ, 2598 MatEqual_MPIAIJ, 2599 MatGetDiagonal_MPIAIJ, 2600 MatDiagonalScale_MPIAIJ, 2601 MatNorm_MPIAIJ, 2602 /*20*/ MatAssemblyBegin_MPIAIJ, 2603 MatAssemblyEnd_MPIAIJ, 2604 MatSetOption_MPIAIJ, 2605 MatZeroEntries_MPIAIJ, 2606 /*24*/ MatZeroRows_MPIAIJ, 2607 0, 2608 0, 2609 0, 2610 0, 2611 /*29*/ MatSetUp_MPIAIJ, 2612 0, 2613 0, 2614 0, 2615 0, 2616 /*34*/ MatDuplicate_MPIAIJ, 2617 0, 2618 0, 2619 0, 2620 0, 2621 /*39*/ MatAXPY_MPIAIJ, 2622 MatGetSubMatrices_MPIAIJ, 2623 MatIncreaseOverlap_MPIAIJ, 2624 MatGetValues_MPIAIJ, 2625 MatCopy_MPIAIJ, 2626 /*44*/ MatGetRowMax_MPIAIJ, 2627 MatScale_MPIAIJ, 2628 MatShift_MPIAIJ, 2629 MatDiagonalSet_MPIAIJ, 2630 MatZeroRowsColumns_MPIAIJ, 2631 /*49*/ MatSetRandom_MPIAIJ, 2632 0, 2633 0, 2634 0, 2635 0, 2636 /*54*/ MatFDColoringCreate_MPIXAIJ, 2637 0, 2638 MatSetUnfactored_MPIAIJ, 2639 MatPermute_MPIAIJ, 2640 0, 2641 /*59*/ MatGetSubMatrix_MPIAIJ, 2642 MatDestroy_MPIAIJ, 2643 MatView_MPIAIJ, 2644 0, 2645 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2646 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2647 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2648 0, 2649 0, 2650 0, 2651 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2652 MatGetRowMinAbs_MPIAIJ, 2653 0, 2654 MatSetColoring_MPIAIJ, 2655 0, 2656 MatSetValuesAdifor_MPIAIJ, 2657 /*75*/ MatFDColoringApply_AIJ, 2658 MatSetFromOptions_MPIAIJ, 2659 0, 2660 0, 2661 MatFindZeroDiagonals_MPIAIJ, 2662 /*80*/ 0, 2663 0, 2664 0, 2665 /*83*/ MatLoad_MPIAIJ, 2666 0, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2672 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2673 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2674 MatPtAP_MPIAIJ_MPIAIJ, 2675 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2676 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2677 0, 2678 0, 2679 0, 2680 0, 2681 /*99*/ 0, 2682 0, 2683 0, 2684 MatConjugate_MPIAIJ, 2685 0, 2686 /*104*/MatSetValuesRow_MPIAIJ, 2687 MatRealPart_MPIAIJ, 2688 MatImaginaryPart_MPIAIJ, 2689 0, 2690 0, 2691 /*109*/0, 2692 0, 2693 MatGetRowMin_MPIAIJ, 2694 0, 2695 MatMissingDiagonal_MPIAIJ, 2696 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2697 0, 2698 MatGetGhosts_MPIAIJ, 2699 0, 2700 0, 2701 /*119*/0, 2702 0, 2703 0, 2704 0, 2705 MatGetMultiProcBlock_MPIAIJ, 2706 /*124*/MatFindNonzeroRows_MPIAIJ, 2707 MatGetColumnNorms_MPIAIJ, 2708 MatInvertBlockDiagonal_MPIAIJ, 2709 0, 2710 MatGetSubMatricesMPI_MPIAIJ, 2711 /*129*/0, 2712 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2713 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2714 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2715 0, 2716 /*134*/0, 2717 0, 2718 0, 2719 0, 2720 0, 2721 /*139*/0, 2722 0, 2723 0, 2724 MatFDColoringSetUp_MPIXAIJ, 2725 MatFindOffBlockDiagonalEntries_MPIAIJ, 2726 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2727 }; 2728 2729 /* ----------------------------------------------------------------------------------------*/ 2730 2731 #undef __FUNCT__ 2732 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2733 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2734 { 2735 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2736 PetscErrorCode ierr; 2737 2738 PetscFunctionBegin; 2739 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2740 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2741 PetscFunctionReturn(0); 2742 } 2743 2744 #undef __FUNCT__ 2745 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2746 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2747 { 2748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2749 PetscErrorCode ierr; 2750 2751 PetscFunctionBegin; 2752 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2753 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2754 PetscFunctionReturn(0); 2755 } 2756 2757 #undef __FUNCT__ 2758 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2759 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2760 { 2761 Mat_MPIAIJ *b; 2762 PetscErrorCode ierr; 2763 2764 PetscFunctionBegin; 2765 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2766 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2767 b = (Mat_MPIAIJ*)B->data; 2768 2769 if (!B->preallocated) { 2770 /* Explicitly create 2 MATSEQAIJ matrices. */ 2771 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2772 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2773 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2774 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2775 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2776 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2777 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2778 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2779 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2780 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2781 } 2782 2783 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2784 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2785 B->preallocated = PETSC_TRUE; 2786 PetscFunctionReturn(0); 2787 } 2788 2789 #undef __FUNCT__ 2790 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2791 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2792 { 2793 Mat mat; 2794 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2795 PetscErrorCode ierr; 2796 2797 PetscFunctionBegin; 2798 *newmat = 0; 2799 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2800 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2801 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2802 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2803 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2804 a = (Mat_MPIAIJ*)mat->data; 2805 2806 mat->factortype = matin->factortype; 2807 mat->assembled = PETSC_TRUE; 2808 mat->insertmode = NOT_SET_VALUES; 2809 mat->preallocated = PETSC_TRUE; 2810 2811 a->size = oldmat->size; 2812 a->rank = oldmat->rank; 2813 a->donotstash = oldmat->donotstash; 2814 a->roworiented = oldmat->roworiented; 2815 a->rowindices = 0; 2816 a->rowvalues = 0; 2817 a->getrowactive = PETSC_FALSE; 2818 2819 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2820 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2821 2822 if (oldmat->colmap) { 2823 #if defined(PETSC_USE_CTABLE) 2824 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2825 #else 2826 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2827 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2828 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2829 #endif 2830 } else a->colmap = 0; 2831 if (oldmat->garray) { 2832 PetscInt len; 2833 len = oldmat->B->cmap->n; 2834 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2835 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2836 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2837 } else a->garray = 0; 2838 2839 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2840 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2841 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2842 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2843 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2844 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2845 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2846 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2847 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2848 *newmat = mat; 2849 PetscFunctionReturn(0); 2850 } 2851 2852 2853 2854 #undef __FUNCT__ 2855 #define __FUNCT__ "MatLoad_MPIAIJ" 2856 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2857 { 2858 PetscScalar *vals,*svals; 2859 MPI_Comm comm; 2860 PetscErrorCode ierr; 2861 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2862 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2863 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2864 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2865 PetscInt cend,cstart,n,*rowners; 2866 int fd; 2867 PetscInt bs = newMat->rmap->bs; 2868 2869 PetscFunctionBegin; 2870 /* force binary viewer to load .info file if it has not yet done so */ 2871 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2872 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2873 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2874 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2875 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2876 if (!rank) { 2877 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2878 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2879 } 2880 2881 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr); 2882 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2883 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2884 if (bs < 0) bs = 1; 2885 2886 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2887 M = header[1]; N = header[2]; 2888 2889 /* If global sizes are set, check if they are consistent with that given in the file */ 2890 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2891 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2892 2893 /* determine ownership of all (block) rows */ 2894 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2895 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2896 else m = newMat->rmap->n; /* Set by user */ 2897 2898 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2899 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2900 2901 /* First process needs enough room for process with most rows */ 2902 if (!rank) { 2903 mmax = rowners[1]; 2904 for (i=2; i<=size; i++) { 2905 mmax = PetscMax(mmax, rowners[i]); 2906 } 2907 } else mmax = -1; /* unused, but compilers complain */ 2908 2909 rowners[0] = 0; 2910 for (i=2; i<=size; i++) { 2911 rowners[i] += rowners[i-1]; 2912 } 2913 rstart = rowners[rank]; 2914 rend = rowners[rank+1]; 2915 2916 /* distribute row lengths to all processors */ 2917 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2918 if (!rank) { 2919 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2920 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2921 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2922 for (j=0; j<m; j++) { 2923 procsnz[0] += ourlens[j]; 2924 } 2925 for (i=1; i<size; i++) { 2926 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2927 /* calculate the number of nonzeros on each processor */ 2928 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2929 procsnz[i] += rowlengths[j]; 2930 } 2931 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2932 } 2933 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2934 } else { 2935 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2936 } 2937 2938 if (!rank) { 2939 /* determine max buffer needed and allocate it */ 2940 maxnz = 0; 2941 for (i=0; i<size; i++) { 2942 maxnz = PetscMax(maxnz,procsnz[i]); 2943 } 2944 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2945 2946 /* read in my part of the matrix column indices */ 2947 nz = procsnz[0]; 2948 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2949 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2950 2951 /* read in every one elses and ship off */ 2952 for (i=1; i<size; i++) { 2953 nz = procsnz[i]; 2954 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2955 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2956 } 2957 ierr = PetscFree(cols);CHKERRQ(ierr); 2958 } else { 2959 /* determine buffer space needed for message */ 2960 nz = 0; 2961 for (i=0; i<m; i++) { 2962 nz += ourlens[i]; 2963 } 2964 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2965 2966 /* receive message of column indices*/ 2967 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2968 } 2969 2970 /* determine column ownership if matrix is not square */ 2971 if (N != M) { 2972 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2973 else n = newMat->cmap->n; 2974 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2975 cstart = cend - n; 2976 } else { 2977 cstart = rstart; 2978 cend = rend; 2979 n = cend - cstart; 2980 } 2981 2982 /* loop over local rows, determining number of off diagonal entries */ 2983 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2984 jj = 0; 2985 for (i=0; i<m; i++) { 2986 for (j=0; j<ourlens[i]; j++) { 2987 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2988 jj++; 2989 } 2990 } 2991 2992 for (i=0; i<m; i++) { 2993 ourlens[i] -= offlens[i]; 2994 } 2995 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2996 2997 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2998 2999 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3000 3001 for (i=0; i<m; i++) { 3002 ourlens[i] += offlens[i]; 3003 } 3004 3005 if (!rank) { 3006 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3007 3008 /* read in my part of the matrix numerical values */ 3009 nz = procsnz[0]; 3010 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3011 3012 /* insert into matrix */ 3013 jj = rstart; 3014 smycols = mycols; 3015 svals = vals; 3016 for (i=0; i<m; i++) { 3017 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3018 smycols += ourlens[i]; 3019 svals += ourlens[i]; 3020 jj++; 3021 } 3022 3023 /* read in other processors and ship out */ 3024 for (i=1; i<size; i++) { 3025 nz = procsnz[i]; 3026 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3027 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3028 } 3029 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3030 } else { 3031 /* receive numeric values */ 3032 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3033 3034 /* receive message of values*/ 3035 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3036 3037 /* insert into matrix */ 3038 jj = rstart; 3039 smycols = mycols; 3040 svals = vals; 3041 for (i=0; i<m; i++) { 3042 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3043 smycols += ourlens[i]; 3044 svals += ourlens[i]; 3045 jj++; 3046 } 3047 } 3048 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3049 ierr = PetscFree(vals);CHKERRQ(ierr); 3050 ierr = PetscFree(mycols);CHKERRQ(ierr); 3051 ierr = PetscFree(rowners);CHKERRQ(ierr); 3052 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3053 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3054 PetscFunctionReturn(0); 3055 } 3056 3057 #undef __FUNCT__ 3058 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3059 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3060 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3061 { 3062 PetscErrorCode ierr; 3063 IS iscol_local; 3064 PetscInt csize; 3065 3066 PetscFunctionBegin; 3067 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3068 if (call == MAT_REUSE_MATRIX) { 3069 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3070 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3071 } else { 3072 /* check if we are grabbing all columns*/ 3073 PetscBool isstride; 3074 PetscMPIInt lisstride = 0,gisstride; 3075 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3076 if (isstride) { 3077 PetscInt start,len,mstart,mlen; 3078 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3079 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3080 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3081 if (mstart == start && mlen-mstart == len) lisstride = 1; 3082 } 3083 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3084 if (gisstride) { 3085 PetscInt N; 3086 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3087 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3088 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3089 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3090 } else { 3091 PetscInt cbs; 3092 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3093 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3094 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3095 } 3096 } 3097 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3098 if (call == MAT_INITIAL_MATRIX) { 3099 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3100 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3101 } 3102 PetscFunctionReturn(0); 3103 } 3104 3105 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3106 #undef __FUNCT__ 3107 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3108 /* 3109 Not great since it makes two copies of the submatrix, first an SeqAIJ 3110 in local and then by concatenating the local matrices the end result. 3111 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3112 3113 Note: This requires a sequential iscol with all indices. 3114 */ 3115 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3116 { 3117 PetscErrorCode ierr; 3118 PetscMPIInt rank,size; 3119 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3120 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3121 PetscBool allcolumns, colflag; 3122 Mat M,Mreuse; 3123 MatScalar *vwork,*aa; 3124 MPI_Comm comm; 3125 Mat_SeqAIJ *aij; 3126 3127 PetscFunctionBegin; 3128 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3129 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3130 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3131 3132 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3133 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3134 if (colflag && ncol == mat->cmap->N) { 3135 allcolumns = PETSC_TRUE; 3136 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3137 } else { 3138 allcolumns = PETSC_FALSE; 3139 } 3140 if (call == MAT_REUSE_MATRIX) { 3141 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3142 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3143 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3144 } else { 3145 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3146 } 3147 3148 /* 3149 m - number of local rows 3150 n - number of columns (same on all processors) 3151 rstart - first row in new global matrix generated 3152 */ 3153 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3154 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3155 if (call == MAT_INITIAL_MATRIX) { 3156 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3157 ii = aij->i; 3158 jj = aij->j; 3159 3160 /* 3161 Determine the number of non-zeros in the diagonal and off-diagonal 3162 portions of the matrix in order to do correct preallocation 3163 */ 3164 3165 /* first get start and end of "diagonal" columns */ 3166 if (csize == PETSC_DECIDE) { 3167 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3168 if (mglobal == n) { /* square matrix */ 3169 nlocal = m; 3170 } else { 3171 nlocal = n/size + ((n % size) > rank); 3172 } 3173 } else { 3174 nlocal = csize; 3175 } 3176 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3177 rstart = rend - nlocal; 3178 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3179 3180 /* next, compute all the lengths */ 3181 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3182 olens = dlens + m; 3183 for (i=0; i<m; i++) { 3184 jend = ii[i+1] - ii[i]; 3185 olen = 0; 3186 dlen = 0; 3187 for (j=0; j<jend; j++) { 3188 if (*jj < rstart || *jj >= rend) olen++; 3189 else dlen++; 3190 jj++; 3191 } 3192 olens[i] = olen; 3193 dlens[i] = dlen; 3194 } 3195 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3196 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3197 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3198 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3199 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3200 ierr = PetscFree(dlens);CHKERRQ(ierr); 3201 } else { 3202 PetscInt ml,nl; 3203 3204 M = *newmat; 3205 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3206 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3207 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3208 /* 3209 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3210 rather than the slower MatSetValues(). 3211 */ 3212 M->was_assembled = PETSC_TRUE; 3213 M->assembled = PETSC_FALSE; 3214 } 3215 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3216 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3217 ii = aij->i; 3218 jj = aij->j; 3219 aa = aij->a; 3220 for (i=0; i<m; i++) { 3221 row = rstart + i; 3222 nz = ii[i+1] - ii[i]; 3223 cwork = jj; jj += nz; 3224 vwork = aa; aa += nz; 3225 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3226 } 3227 3228 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3229 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3230 *newmat = M; 3231 3232 /* save submatrix used in processor for next request */ 3233 if (call == MAT_INITIAL_MATRIX) { 3234 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3235 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3236 } 3237 PetscFunctionReturn(0); 3238 } 3239 3240 #undef __FUNCT__ 3241 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3242 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3243 { 3244 PetscInt m,cstart, cend,j,nnz,i,d; 3245 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3246 const PetscInt *JJ; 3247 PetscScalar *values; 3248 PetscErrorCode ierr; 3249 3250 PetscFunctionBegin; 3251 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3252 3253 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3254 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3255 m = B->rmap->n; 3256 cstart = B->cmap->rstart; 3257 cend = B->cmap->rend; 3258 rstart = B->rmap->rstart; 3259 3260 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3261 3262 #if defined(PETSC_USE_DEBUGGING) 3263 for (i=0; i<m; i++) { 3264 nnz = Ii[i+1]- Ii[i]; 3265 JJ = J + Ii[i]; 3266 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3267 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3268 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3269 } 3270 #endif 3271 3272 for (i=0; i<m; i++) { 3273 nnz = Ii[i+1]- Ii[i]; 3274 JJ = J + Ii[i]; 3275 nnz_max = PetscMax(nnz_max,nnz); 3276 d = 0; 3277 for (j=0; j<nnz; j++) { 3278 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3279 } 3280 d_nnz[i] = d; 3281 o_nnz[i] = nnz - d; 3282 } 3283 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3284 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3285 3286 if (v) values = (PetscScalar*)v; 3287 else { 3288 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3289 } 3290 3291 for (i=0; i<m; i++) { 3292 ii = i + rstart; 3293 nnz = Ii[i+1]- Ii[i]; 3294 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3295 } 3296 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3297 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3298 3299 if (!v) { 3300 ierr = PetscFree(values);CHKERRQ(ierr); 3301 } 3302 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3303 PetscFunctionReturn(0); 3304 } 3305 3306 #undef __FUNCT__ 3307 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3308 /*@ 3309 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3310 (the default parallel PETSc format). 3311 3312 Collective on MPI_Comm 3313 3314 Input Parameters: 3315 + B - the matrix 3316 . i - the indices into j for the start of each local row (starts with zero) 3317 . j - the column indices for each local row (starts with zero) 3318 - v - optional values in the matrix 3319 3320 Level: developer 3321 3322 Notes: 3323 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3324 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3325 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3326 3327 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3328 3329 The format which is used for the sparse matrix input, is equivalent to a 3330 row-major ordering.. i.e for the following matrix, the input data expected is 3331 as shown 3332 3333 $ 1 0 0 3334 $ 2 0 3 P0 3335 $ ------- 3336 $ 4 5 6 P1 3337 $ 3338 $ Process0 [P0]: rows_owned=[0,1] 3339 $ i = {0,1,3} [size = nrow+1 = 2+1] 3340 $ j = {0,0,2} [size = 3] 3341 $ v = {1,2,3} [size = 3] 3342 $ 3343 $ Process1 [P1]: rows_owned=[2] 3344 $ i = {0,3} [size = nrow+1 = 1+1] 3345 $ j = {0,1,2} [size = 3] 3346 $ v = {4,5,6} [size = 3] 3347 3348 .keywords: matrix, aij, compressed row, sparse, parallel 3349 3350 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3351 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3352 @*/ 3353 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3354 { 3355 PetscErrorCode ierr; 3356 3357 PetscFunctionBegin; 3358 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3359 PetscFunctionReturn(0); 3360 } 3361 3362 #undef __FUNCT__ 3363 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3364 /*@C 3365 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3366 (the default parallel PETSc format). For good matrix assembly performance 3367 the user should preallocate the matrix storage by setting the parameters 3368 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3369 performance can be increased by more than a factor of 50. 3370 3371 Collective on MPI_Comm 3372 3373 Input Parameters: 3374 + B - the matrix 3375 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3376 (same value is used for all local rows) 3377 . d_nnz - array containing the number of nonzeros in the various rows of the 3378 DIAGONAL portion of the local submatrix (possibly different for each row) 3379 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3380 The size of this array is equal to the number of local rows, i.e 'm'. 3381 For matrices that will be factored, you must leave room for (and set) 3382 the diagonal entry even if it is zero. 3383 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3384 submatrix (same value is used for all local rows). 3385 - o_nnz - array containing the number of nonzeros in the various rows of the 3386 OFF-DIAGONAL portion of the local submatrix (possibly different for 3387 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3388 structure. The size of this array is equal to the number 3389 of local rows, i.e 'm'. 3390 3391 If the *_nnz parameter is given then the *_nz parameter is ignored 3392 3393 The AIJ format (also called the Yale sparse matrix format or 3394 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3395 storage. The stored row and column indices begin with zero. 3396 See Users-Manual: ch_mat for details. 3397 3398 The parallel matrix is partitioned such that the first m0 rows belong to 3399 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3400 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3401 3402 The DIAGONAL portion of the local submatrix of a processor can be defined 3403 as the submatrix which is obtained by extraction the part corresponding to 3404 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3405 first row that belongs to the processor, r2 is the last row belonging to 3406 the this processor, and c1-c2 is range of indices of the local part of a 3407 vector suitable for applying the matrix to. This is an mxn matrix. In the 3408 common case of a square matrix, the row and column ranges are the same and 3409 the DIAGONAL part is also square. The remaining portion of the local 3410 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3411 3412 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3413 3414 You can call MatGetInfo() to get information on how effective the preallocation was; 3415 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3416 You can also run with the option -info and look for messages with the string 3417 malloc in them to see if additional memory allocation was needed. 3418 3419 Example usage: 3420 3421 Consider the following 8x8 matrix with 34 non-zero values, that is 3422 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3423 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3424 as follows: 3425 3426 .vb 3427 1 2 0 | 0 3 0 | 0 4 3428 Proc0 0 5 6 | 7 0 0 | 8 0 3429 9 0 10 | 11 0 0 | 12 0 3430 ------------------------------------- 3431 13 0 14 | 15 16 17 | 0 0 3432 Proc1 0 18 0 | 19 20 21 | 0 0 3433 0 0 0 | 22 23 0 | 24 0 3434 ------------------------------------- 3435 Proc2 25 26 27 | 0 0 28 | 29 0 3436 30 0 0 | 31 32 33 | 0 34 3437 .ve 3438 3439 This can be represented as a collection of submatrices as: 3440 3441 .vb 3442 A B C 3443 D E F 3444 G H I 3445 .ve 3446 3447 Where the submatrices A,B,C are owned by proc0, D,E,F are 3448 owned by proc1, G,H,I are owned by proc2. 3449 3450 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3451 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3452 The 'M','N' parameters are 8,8, and have the same values on all procs. 3453 3454 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3455 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3456 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3457 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3458 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3459 matrix, ans [DF] as another SeqAIJ matrix. 3460 3461 When d_nz, o_nz parameters are specified, d_nz storage elements are 3462 allocated for every row of the local diagonal submatrix, and o_nz 3463 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3464 One way to choose d_nz and o_nz is to use the max nonzerors per local 3465 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3466 In this case, the values of d_nz,o_nz are: 3467 .vb 3468 proc0 : dnz = 2, o_nz = 2 3469 proc1 : dnz = 3, o_nz = 2 3470 proc2 : dnz = 1, o_nz = 4 3471 .ve 3472 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3473 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3474 for proc3. i.e we are using 12+15+10=37 storage locations to store 3475 34 values. 3476 3477 When d_nnz, o_nnz parameters are specified, the storage is specified 3478 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3479 In the above case the values for d_nnz,o_nnz are: 3480 .vb 3481 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3482 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3483 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3484 .ve 3485 Here the space allocated is sum of all the above values i.e 34, and 3486 hence pre-allocation is perfect. 3487 3488 Level: intermediate 3489 3490 .keywords: matrix, aij, compressed row, sparse, parallel 3491 3492 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3493 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3494 @*/ 3495 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3496 { 3497 PetscErrorCode ierr; 3498 3499 PetscFunctionBegin; 3500 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3501 PetscValidType(B,1); 3502 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3503 PetscFunctionReturn(0); 3504 } 3505 3506 #undef __FUNCT__ 3507 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3508 /*@ 3509 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3510 CSR format the local rows. 3511 3512 Collective on MPI_Comm 3513 3514 Input Parameters: 3515 + comm - MPI communicator 3516 . m - number of local rows (Cannot be PETSC_DECIDE) 3517 . n - This value should be the same as the local size used in creating the 3518 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3519 calculated if N is given) For square matrices n is almost always m. 3520 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3521 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3522 . i - row indices 3523 . j - column indices 3524 - a - matrix values 3525 3526 Output Parameter: 3527 . mat - the matrix 3528 3529 Level: intermediate 3530 3531 Notes: 3532 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3533 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3534 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3535 3536 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3537 3538 The format which is used for the sparse matrix input, is equivalent to a 3539 row-major ordering.. i.e for the following matrix, the input data expected is 3540 as shown 3541 3542 $ 1 0 0 3543 $ 2 0 3 P0 3544 $ ------- 3545 $ 4 5 6 P1 3546 $ 3547 $ Process0 [P0]: rows_owned=[0,1] 3548 $ i = {0,1,3} [size = nrow+1 = 2+1] 3549 $ j = {0,0,2} [size = 3] 3550 $ v = {1,2,3} [size = 3] 3551 $ 3552 $ Process1 [P1]: rows_owned=[2] 3553 $ i = {0,3} [size = nrow+1 = 1+1] 3554 $ j = {0,1,2} [size = 3] 3555 $ v = {4,5,6} [size = 3] 3556 3557 .keywords: matrix, aij, compressed row, sparse, parallel 3558 3559 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3560 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3561 @*/ 3562 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3563 { 3564 PetscErrorCode ierr; 3565 3566 PetscFunctionBegin; 3567 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3568 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3569 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3570 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3571 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3572 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3573 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3574 PetscFunctionReturn(0); 3575 } 3576 3577 #undef __FUNCT__ 3578 #define __FUNCT__ "MatCreateAIJ" 3579 /*@C 3580 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3581 (the default parallel PETSc format). For good matrix assembly performance 3582 the user should preallocate the matrix storage by setting the parameters 3583 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3584 performance can be increased by more than a factor of 50. 3585 3586 Collective on MPI_Comm 3587 3588 Input Parameters: 3589 + comm - MPI communicator 3590 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3591 This value should be the same as the local size used in creating the 3592 y vector for the matrix-vector product y = Ax. 3593 . n - This value should be the same as the local size used in creating the 3594 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3595 calculated if N is given) For square matrices n is almost always m. 3596 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3597 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3598 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3599 (same value is used for all local rows) 3600 . d_nnz - array containing the number of nonzeros in the various rows of the 3601 DIAGONAL portion of the local submatrix (possibly different for each row) 3602 or NULL, if d_nz is used to specify the nonzero structure. 3603 The size of this array is equal to the number of local rows, i.e 'm'. 3604 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3605 submatrix (same value is used for all local rows). 3606 - o_nnz - array containing the number of nonzeros in the various rows of the 3607 OFF-DIAGONAL portion of the local submatrix (possibly different for 3608 each row) or NULL, if o_nz is used to specify the nonzero 3609 structure. The size of this array is equal to the number 3610 of local rows, i.e 'm'. 3611 3612 Output Parameter: 3613 . A - the matrix 3614 3615 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3616 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3617 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3618 3619 Notes: 3620 If the *_nnz parameter is given then the *_nz parameter is ignored 3621 3622 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3623 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3624 storage requirements for this matrix. 3625 3626 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3627 processor than it must be used on all processors that share the object for 3628 that argument. 3629 3630 The user MUST specify either the local or global matrix dimensions 3631 (possibly both). 3632 3633 The parallel matrix is partitioned across processors such that the 3634 first m0 rows belong to process 0, the next m1 rows belong to 3635 process 1, the next m2 rows belong to process 2 etc.. where 3636 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3637 values corresponding to [m x N] submatrix. 3638 3639 The columns are logically partitioned with the n0 columns belonging 3640 to 0th partition, the next n1 columns belonging to the next 3641 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3642 3643 The DIAGONAL portion of the local submatrix on any given processor 3644 is the submatrix corresponding to the rows and columns m,n 3645 corresponding to the given processor. i.e diagonal matrix on 3646 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3647 etc. The remaining portion of the local submatrix [m x (N-n)] 3648 constitute the OFF-DIAGONAL portion. The example below better 3649 illustrates this concept. 3650 3651 For a square global matrix we define each processor's diagonal portion 3652 to be its local rows and the corresponding columns (a square submatrix); 3653 each processor's off-diagonal portion encompasses the remainder of the 3654 local matrix (a rectangular submatrix). 3655 3656 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3657 3658 When calling this routine with a single process communicator, a matrix of 3659 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3660 type of communicator, use the construction mechanism: 3661 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3662 3663 By default, this format uses inodes (identical nodes) when possible. 3664 We search for consecutive rows with the same nonzero structure, thereby 3665 reusing matrix information to achieve increased efficiency. 3666 3667 Options Database Keys: 3668 + -mat_no_inode - Do not use inodes 3669 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3670 - -mat_aij_oneindex - Internally use indexing starting at 1 3671 rather than 0. Note that when calling MatSetValues(), 3672 the user still MUST index entries starting at 0! 3673 3674 3675 Example usage: 3676 3677 Consider the following 8x8 matrix with 34 non-zero values, that is 3678 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3679 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3680 as follows: 3681 3682 .vb 3683 1 2 0 | 0 3 0 | 0 4 3684 Proc0 0 5 6 | 7 0 0 | 8 0 3685 9 0 10 | 11 0 0 | 12 0 3686 ------------------------------------- 3687 13 0 14 | 15 16 17 | 0 0 3688 Proc1 0 18 0 | 19 20 21 | 0 0 3689 0 0 0 | 22 23 0 | 24 0 3690 ------------------------------------- 3691 Proc2 25 26 27 | 0 0 28 | 29 0 3692 30 0 0 | 31 32 33 | 0 34 3693 .ve 3694 3695 This can be represented as a collection of submatrices as: 3696 3697 .vb 3698 A B C 3699 D E F 3700 G H I 3701 .ve 3702 3703 Where the submatrices A,B,C are owned by proc0, D,E,F are 3704 owned by proc1, G,H,I are owned by proc2. 3705 3706 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3707 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3708 The 'M','N' parameters are 8,8, and have the same values on all procs. 3709 3710 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3711 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3712 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3713 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3714 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3715 matrix, ans [DF] as another SeqAIJ matrix. 3716 3717 When d_nz, o_nz parameters are specified, d_nz storage elements are 3718 allocated for every row of the local diagonal submatrix, and o_nz 3719 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3720 One way to choose d_nz and o_nz is to use the max nonzerors per local 3721 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3722 In this case, the values of d_nz,o_nz are: 3723 .vb 3724 proc0 : dnz = 2, o_nz = 2 3725 proc1 : dnz = 3, o_nz = 2 3726 proc2 : dnz = 1, o_nz = 4 3727 .ve 3728 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3729 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3730 for proc3. i.e we are using 12+15+10=37 storage locations to store 3731 34 values. 3732 3733 When d_nnz, o_nnz parameters are specified, the storage is specified 3734 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3735 In the above case the values for d_nnz,o_nnz are: 3736 .vb 3737 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3738 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3739 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3740 .ve 3741 Here the space allocated is sum of all the above values i.e 34, and 3742 hence pre-allocation is perfect. 3743 3744 Level: intermediate 3745 3746 .keywords: matrix, aij, compressed row, sparse, parallel 3747 3748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3749 MPIAIJ, MatCreateMPIAIJWithArrays() 3750 @*/ 3751 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3752 { 3753 PetscErrorCode ierr; 3754 PetscMPIInt size; 3755 3756 PetscFunctionBegin; 3757 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3758 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3759 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3760 if (size > 1) { 3761 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3762 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3763 } else { 3764 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3765 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3766 } 3767 PetscFunctionReturn(0); 3768 } 3769 3770 #undef __FUNCT__ 3771 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3772 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3773 { 3774 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3775 PetscBool flg; 3776 PetscErrorCode ierr; 3777 3778 PetscFunctionBegin; 3779 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3780 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input"); 3781 if (Ad) *Ad = a->A; 3782 if (Ao) *Ao = a->B; 3783 if (colmap) *colmap = a->garray; 3784 PetscFunctionReturn(0); 3785 } 3786 3787 #undef __FUNCT__ 3788 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3789 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3790 { 3791 PetscErrorCode ierr; 3792 PetscInt i; 3793 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3794 3795 PetscFunctionBegin; 3796 if (coloring->ctype == IS_COLORING_GLOBAL) { 3797 ISColoringValue *allcolors,*colors; 3798 ISColoring ocoloring; 3799 3800 /* set coloring for diagonal portion */ 3801 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3802 3803 /* set coloring for off-diagonal portion */ 3804 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3805 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3806 for (i=0; i<a->B->cmap->n; i++) { 3807 colors[i] = allcolors[a->garray[i]]; 3808 } 3809 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3810 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3811 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3812 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3813 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3814 ISColoringValue *colors; 3815 PetscInt *larray; 3816 ISColoring ocoloring; 3817 3818 /* set coloring for diagonal portion */ 3819 ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr); 3820 for (i=0; i<a->A->cmap->n; i++) { 3821 larray[i] = i + A->cmap->rstart; 3822 } 3823 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3824 ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr); 3825 for (i=0; i<a->A->cmap->n; i++) { 3826 colors[i] = coloring->colors[larray[i]]; 3827 } 3828 ierr = PetscFree(larray);CHKERRQ(ierr); 3829 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3830 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3831 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3832 3833 /* set coloring for off-diagonal portion */ 3834 ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr); 3835 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3836 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3837 for (i=0; i<a->B->cmap->n; i++) { 3838 colors[i] = coloring->colors[larray[i]]; 3839 } 3840 ierr = PetscFree(larray);CHKERRQ(ierr); 3841 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3842 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3843 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3844 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3845 PetscFunctionReturn(0); 3846 } 3847 3848 #undef __FUNCT__ 3849 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3850 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3851 { 3852 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3853 PetscErrorCode ierr; 3854 3855 PetscFunctionBegin; 3856 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3857 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3858 PetscFunctionReturn(0); 3859 } 3860 3861 #undef __FUNCT__ 3862 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3863 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3864 { 3865 PetscErrorCode ierr; 3866 PetscInt m,N,i,rstart,nnz,Ii; 3867 PetscInt *indx; 3868 PetscScalar *values; 3869 3870 PetscFunctionBegin; 3871 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3872 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3873 PetscInt *dnz,*onz,sum,bs,cbs; 3874 3875 if (n == PETSC_DECIDE) { 3876 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3877 } 3878 /* Check sum(n) = N */ 3879 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3880 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3881 3882 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3883 rstart -= m; 3884 3885 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3886 for (i=0; i<m; i++) { 3887 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3888 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3889 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3890 } 3891 3892 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3893 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3894 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3895 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3896 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3897 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3898 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3899 } 3900 3901 /* numeric phase */ 3902 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3903 for (i=0; i<m; i++) { 3904 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3905 Ii = i + rstart; 3906 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3907 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3908 } 3909 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3910 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3911 PetscFunctionReturn(0); 3912 } 3913 3914 #undef __FUNCT__ 3915 #define __FUNCT__ "MatFileSplit" 3916 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3917 { 3918 PetscErrorCode ierr; 3919 PetscMPIInt rank; 3920 PetscInt m,N,i,rstart,nnz; 3921 size_t len; 3922 const PetscInt *indx; 3923 PetscViewer out; 3924 char *name; 3925 Mat B; 3926 const PetscScalar *values; 3927 3928 PetscFunctionBegin; 3929 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3930 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3931 /* Should this be the type of the diagonal block of A? */ 3932 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3933 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3934 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3935 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3936 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3937 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3938 for (i=0; i<m; i++) { 3939 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3940 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3941 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3942 } 3943 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3944 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3945 3946 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3947 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3948 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3949 sprintf(name,"%s.%d",outfile,rank); 3950 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3951 ierr = PetscFree(name);CHKERRQ(ierr); 3952 ierr = MatView(B,out);CHKERRQ(ierr); 3953 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3954 ierr = MatDestroy(&B);CHKERRQ(ierr); 3955 PetscFunctionReturn(0); 3956 } 3957 3958 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3959 #undef __FUNCT__ 3960 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3961 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3962 { 3963 PetscErrorCode ierr; 3964 Mat_Merge_SeqsToMPI *merge; 3965 PetscContainer container; 3966 3967 PetscFunctionBegin; 3968 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3969 if (container) { 3970 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3971 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3972 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3973 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3974 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3975 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3976 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3977 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3978 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3979 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3980 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3981 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3982 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3983 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3984 ierr = PetscFree(merge);CHKERRQ(ierr); 3985 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3986 } 3987 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3988 PetscFunctionReturn(0); 3989 } 3990 3991 #include <../src/mat/utils/freespace.h> 3992 #include <petscbt.h> 3993 3994 #undef __FUNCT__ 3995 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3996 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3997 { 3998 PetscErrorCode ierr; 3999 MPI_Comm comm; 4000 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4001 PetscMPIInt size,rank,taga,*len_s; 4002 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4003 PetscInt proc,m; 4004 PetscInt **buf_ri,**buf_rj; 4005 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4006 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4007 MPI_Request *s_waits,*r_waits; 4008 MPI_Status *status; 4009 MatScalar *aa=a->a; 4010 MatScalar **abuf_r,*ba_i; 4011 Mat_Merge_SeqsToMPI *merge; 4012 PetscContainer container; 4013 4014 PetscFunctionBegin; 4015 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4016 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4017 4018 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4019 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4020 4021 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4022 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4023 4024 bi = merge->bi; 4025 bj = merge->bj; 4026 buf_ri = merge->buf_ri; 4027 buf_rj = merge->buf_rj; 4028 4029 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4030 owners = merge->rowmap->range; 4031 len_s = merge->len_s; 4032 4033 /* send and recv matrix values */ 4034 /*-----------------------------*/ 4035 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4036 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4037 4038 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4039 for (proc=0,k=0; proc<size; proc++) { 4040 if (!len_s[proc]) continue; 4041 i = owners[proc]; 4042 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4043 k++; 4044 } 4045 4046 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4047 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4048 ierr = PetscFree(status);CHKERRQ(ierr); 4049 4050 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4051 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4052 4053 /* insert mat values of mpimat */ 4054 /*----------------------------*/ 4055 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4056 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4057 4058 for (k=0; k<merge->nrecv; k++) { 4059 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4060 nrows = *(buf_ri_k[k]); 4061 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4062 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4063 } 4064 4065 /* set values of ba */ 4066 m = merge->rowmap->n; 4067 for (i=0; i<m; i++) { 4068 arow = owners[rank] + i; 4069 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4070 bnzi = bi[i+1] - bi[i]; 4071 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4072 4073 /* add local non-zero vals of this proc's seqmat into ba */ 4074 anzi = ai[arow+1] - ai[arow]; 4075 aj = a->j + ai[arow]; 4076 aa = a->a + ai[arow]; 4077 nextaj = 0; 4078 for (j=0; nextaj<anzi; j++) { 4079 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4080 ba_i[j] += aa[nextaj++]; 4081 } 4082 } 4083 4084 /* add received vals into ba */ 4085 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4086 /* i-th row */ 4087 if (i == *nextrow[k]) { 4088 anzi = *(nextai[k]+1) - *nextai[k]; 4089 aj = buf_rj[k] + *(nextai[k]); 4090 aa = abuf_r[k] + *(nextai[k]); 4091 nextaj = 0; 4092 for (j=0; nextaj<anzi; j++) { 4093 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4094 ba_i[j] += aa[nextaj++]; 4095 } 4096 } 4097 nextrow[k]++; nextai[k]++; 4098 } 4099 } 4100 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4101 } 4102 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4103 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4104 4105 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4106 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4107 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4108 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4109 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4110 PetscFunctionReturn(0); 4111 } 4112 4113 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4114 4115 #undef __FUNCT__ 4116 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4117 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4118 { 4119 PetscErrorCode ierr; 4120 Mat B_mpi; 4121 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4122 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4123 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4124 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4125 PetscInt len,proc,*dnz,*onz,bs,cbs; 4126 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4127 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4128 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4129 MPI_Status *status; 4130 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4131 PetscBT lnkbt; 4132 Mat_Merge_SeqsToMPI *merge; 4133 PetscContainer container; 4134 4135 PetscFunctionBegin; 4136 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4137 4138 /* make sure it is a PETSc comm */ 4139 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4140 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4141 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4142 4143 ierr = PetscNew(&merge);CHKERRQ(ierr); 4144 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4145 4146 /* determine row ownership */ 4147 /*---------------------------------------------------------*/ 4148 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4149 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4150 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4151 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4152 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4153 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4154 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4155 4156 m = merge->rowmap->n; 4157 owners = merge->rowmap->range; 4158 4159 /* determine the number of messages to send, their lengths */ 4160 /*---------------------------------------------------------*/ 4161 len_s = merge->len_s; 4162 4163 len = 0; /* length of buf_si[] */ 4164 merge->nsend = 0; 4165 for (proc=0; proc<size; proc++) { 4166 len_si[proc] = 0; 4167 if (proc == rank) { 4168 len_s[proc] = 0; 4169 } else { 4170 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4171 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4172 } 4173 if (len_s[proc]) { 4174 merge->nsend++; 4175 nrows = 0; 4176 for (i=owners[proc]; i<owners[proc+1]; i++) { 4177 if (ai[i+1] > ai[i]) nrows++; 4178 } 4179 len_si[proc] = 2*(nrows+1); 4180 len += len_si[proc]; 4181 } 4182 } 4183 4184 /* determine the number and length of messages to receive for ij-structure */ 4185 /*-------------------------------------------------------------------------*/ 4186 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4187 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4188 4189 /* post the Irecv of j-structure */ 4190 /*-------------------------------*/ 4191 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4192 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4193 4194 /* post the Isend of j-structure */ 4195 /*--------------------------------*/ 4196 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4197 4198 for (proc=0, k=0; proc<size; proc++) { 4199 if (!len_s[proc]) continue; 4200 i = owners[proc]; 4201 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4202 k++; 4203 } 4204 4205 /* receives and sends of j-structure are complete */ 4206 /*------------------------------------------------*/ 4207 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4208 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4209 4210 /* send and recv i-structure */ 4211 /*---------------------------*/ 4212 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4213 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4214 4215 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4216 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4217 for (proc=0,k=0; proc<size; proc++) { 4218 if (!len_s[proc]) continue; 4219 /* form outgoing message for i-structure: 4220 buf_si[0]: nrows to be sent 4221 [1:nrows]: row index (global) 4222 [nrows+1:2*nrows+1]: i-structure index 4223 */ 4224 /*-------------------------------------------*/ 4225 nrows = len_si[proc]/2 - 1; 4226 buf_si_i = buf_si + nrows+1; 4227 buf_si[0] = nrows; 4228 buf_si_i[0] = 0; 4229 nrows = 0; 4230 for (i=owners[proc]; i<owners[proc+1]; i++) { 4231 anzi = ai[i+1] - ai[i]; 4232 if (anzi) { 4233 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4234 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4235 nrows++; 4236 } 4237 } 4238 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4239 k++; 4240 buf_si += len_si[proc]; 4241 } 4242 4243 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4244 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4245 4246 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4247 for (i=0; i<merge->nrecv; i++) { 4248 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4249 } 4250 4251 ierr = PetscFree(len_si);CHKERRQ(ierr); 4252 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4253 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4254 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4255 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4256 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4257 ierr = PetscFree(status);CHKERRQ(ierr); 4258 4259 /* compute a local seq matrix in each processor */ 4260 /*----------------------------------------------*/ 4261 /* allocate bi array and free space for accumulating nonzero column info */ 4262 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4263 bi[0] = 0; 4264 4265 /* create and initialize a linked list */ 4266 nlnk = N+1; 4267 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4268 4269 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4270 len = ai[owners[rank+1]] - ai[owners[rank]]; 4271 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4272 4273 current_space = free_space; 4274 4275 /* determine symbolic info for each local row */ 4276 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4277 4278 for (k=0; k<merge->nrecv; k++) { 4279 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4280 nrows = *buf_ri_k[k]; 4281 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4282 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4283 } 4284 4285 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4286 len = 0; 4287 for (i=0; i<m; i++) { 4288 bnzi = 0; 4289 /* add local non-zero cols of this proc's seqmat into lnk */ 4290 arow = owners[rank] + i; 4291 anzi = ai[arow+1] - ai[arow]; 4292 aj = a->j + ai[arow]; 4293 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4294 bnzi += nlnk; 4295 /* add received col data into lnk */ 4296 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4297 if (i == *nextrow[k]) { /* i-th row */ 4298 anzi = *(nextai[k]+1) - *nextai[k]; 4299 aj = buf_rj[k] + *nextai[k]; 4300 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4301 bnzi += nlnk; 4302 nextrow[k]++; nextai[k]++; 4303 } 4304 } 4305 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4306 4307 /* if free space is not available, make more free space */ 4308 if (current_space->local_remaining<bnzi) { 4309 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4310 nspacedouble++; 4311 } 4312 /* copy data into free space, then initialize lnk */ 4313 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4314 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4315 4316 current_space->array += bnzi; 4317 current_space->local_used += bnzi; 4318 current_space->local_remaining -= bnzi; 4319 4320 bi[i+1] = bi[i] + bnzi; 4321 } 4322 4323 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4324 4325 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4326 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4327 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4328 4329 /* create symbolic parallel matrix B_mpi */ 4330 /*---------------------------------------*/ 4331 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4332 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4333 if (n==PETSC_DECIDE) { 4334 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4335 } else { 4336 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4337 } 4338 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4339 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4340 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4341 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4342 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4343 4344 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4345 B_mpi->assembled = PETSC_FALSE; 4346 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4347 merge->bi = bi; 4348 merge->bj = bj; 4349 merge->buf_ri = buf_ri; 4350 merge->buf_rj = buf_rj; 4351 merge->coi = NULL; 4352 merge->coj = NULL; 4353 merge->owners_co = NULL; 4354 4355 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4356 4357 /* attach the supporting struct to B_mpi for reuse */ 4358 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4359 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4360 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4361 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4362 *mpimat = B_mpi; 4363 4364 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4365 PetscFunctionReturn(0); 4366 } 4367 4368 #undef __FUNCT__ 4369 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4370 /*@C 4371 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4372 matrices from each processor 4373 4374 Collective on MPI_Comm 4375 4376 Input Parameters: 4377 + comm - the communicators the parallel matrix will live on 4378 . seqmat - the input sequential matrices 4379 . m - number of local rows (or PETSC_DECIDE) 4380 . n - number of local columns (or PETSC_DECIDE) 4381 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4382 4383 Output Parameter: 4384 . mpimat - the parallel matrix generated 4385 4386 Level: advanced 4387 4388 Notes: 4389 The dimensions of the sequential matrix in each processor MUST be the same. 4390 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4391 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4392 @*/ 4393 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4394 { 4395 PetscErrorCode ierr; 4396 PetscMPIInt size; 4397 4398 PetscFunctionBegin; 4399 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4400 if (size == 1) { 4401 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4402 if (scall == MAT_INITIAL_MATRIX) { 4403 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4404 } else { 4405 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4406 } 4407 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4408 PetscFunctionReturn(0); 4409 } 4410 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4411 if (scall == MAT_INITIAL_MATRIX) { 4412 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4413 } 4414 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4415 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4416 PetscFunctionReturn(0); 4417 } 4418 4419 #undef __FUNCT__ 4420 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4421 /*@ 4422 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4423 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4424 with MatGetSize() 4425 4426 Not Collective 4427 4428 Input Parameters: 4429 + A - the matrix 4430 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4431 4432 Output Parameter: 4433 . A_loc - the local sequential matrix generated 4434 4435 Level: developer 4436 4437 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4438 4439 @*/ 4440 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4441 { 4442 PetscErrorCode ierr; 4443 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4444 Mat_SeqAIJ *mat,*a,*b; 4445 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4446 MatScalar *aa,*ba,*cam; 4447 PetscScalar *ca; 4448 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4449 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4450 PetscBool match; 4451 MPI_Comm comm; 4452 PetscMPIInt size; 4453 4454 PetscFunctionBegin; 4455 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4456 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4457 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4458 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4459 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4460 4461 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4462 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4463 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4464 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4465 aa = a->a; ba = b->a; 4466 if (scall == MAT_INITIAL_MATRIX) { 4467 if (size == 1) { 4468 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4469 PetscFunctionReturn(0); 4470 } 4471 4472 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4473 ci[0] = 0; 4474 for (i=0; i<am; i++) { 4475 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4476 } 4477 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4478 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4479 k = 0; 4480 for (i=0; i<am; i++) { 4481 ncols_o = bi[i+1] - bi[i]; 4482 ncols_d = ai[i+1] - ai[i]; 4483 /* off-diagonal portion of A */ 4484 for (jo=0; jo<ncols_o; jo++) { 4485 col = cmap[*bj]; 4486 if (col >= cstart) break; 4487 cj[k] = col; bj++; 4488 ca[k++] = *ba++; 4489 } 4490 /* diagonal portion of A */ 4491 for (j=0; j<ncols_d; j++) { 4492 cj[k] = cstart + *aj++; 4493 ca[k++] = *aa++; 4494 } 4495 /* off-diagonal portion of A */ 4496 for (j=jo; j<ncols_o; j++) { 4497 cj[k] = cmap[*bj++]; 4498 ca[k++] = *ba++; 4499 } 4500 } 4501 /* put together the new matrix */ 4502 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4503 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4504 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4505 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4506 mat->free_a = PETSC_TRUE; 4507 mat->free_ij = PETSC_TRUE; 4508 mat->nonew = 0; 4509 } else if (scall == MAT_REUSE_MATRIX) { 4510 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4511 ci = mat->i; cj = mat->j; cam = mat->a; 4512 for (i=0; i<am; i++) { 4513 /* off-diagonal portion of A */ 4514 ncols_o = bi[i+1] - bi[i]; 4515 for (jo=0; jo<ncols_o; jo++) { 4516 col = cmap[*bj]; 4517 if (col >= cstart) break; 4518 *cam++ = *ba++; bj++; 4519 } 4520 /* diagonal portion of A */ 4521 ncols_d = ai[i+1] - ai[i]; 4522 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4523 /* off-diagonal portion of A */ 4524 for (j=jo; j<ncols_o; j++) { 4525 *cam++ = *ba++; bj++; 4526 } 4527 } 4528 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4529 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4530 PetscFunctionReturn(0); 4531 } 4532 4533 #undef __FUNCT__ 4534 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4535 /*@C 4536 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4537 4538 Not Collective 4539 4540 Input Parameters: 4541 + A - the matrix 4542 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4543 - row, col - index sets of rows and columns to extract (or NULL) 4544 4545 Output Parameter: 4546 . A_loc - the local sequential matrix generated 4547 4548 Level: developer 4549 4550 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4551 4552 @*/ 4553 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4554 { 4555 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4556 PetscErrorCode ierr; 4557 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4558 IS isrowa,iscola; 4559 Mat *aloc; 4560 PetscBool match; 4561 4562 PetscFunctionBegin; 4563 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4564 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4565 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4566 if (!row) { 4567 start = A->rmap->rstart; end = A->rmap->rend; 4568 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4569 } else { 4570 isrowa = *row; 4571 } 4572 if (!col) { 4573 start = A->cmap->rstart; 4574 cmap = a->garray; 4575 nzA = a->A->cmap->n; 4576 nzB = a->B->cmap->n; 4577 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4578 ncols = 0; 4579 for (i=0; i<nzB; i++) { 4580 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4581 else break; 4582 } 4583 imark = i; 4584 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4585 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4586 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4587 } else { 4588 iscola = *col; 4589 } 4590 if (scall != MAT_INITIAL_MATRIX) { 4591 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4592 aloc[0] = *A_loc; 4593 } 4594 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4595 *A_loc = aloc[0]; 4596 ierr = PetscFree(aloc);CHKERRQ(ierr); 4597 if (!row) { 4598 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4599 } 4600 if (!col) { 4601 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4602 } 4603 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4604 PetscFunctionReturn(0); 4605 } 4606 4607 #undef __FUNCT__ 4608 #define __FUNCT__ "MatGetBrowsOfAcols" 4609 /*@C 4610 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4611 4612 Collective on Mat 4613 4614 Input Parameters: 4615 + A,B - the matrices in mpiaij format 4616 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4617 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4618 4619 Output Parameter: 4620 + rowb, colb - index sets of rows and columns of B to extract 4621 - B_seq - the sequential matrix generated 4622 4623 Level: developer 4624 4625 @*/ 4626 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4627 { 4628 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4629 PetscErrorCode ierr; 4630 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4631 IS isrowb,iscolb; 4632 Mat *bseq=NULL; 4633 4634 PetscFunctionBegin; 4635 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4636 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4637 } 4638 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4639 4640 if (scall == MAT_INITIAL_MATRIX) { 4641 start = A->cmap->rstart; 4642 cmap = a->garray; 4643 nzA = a->A->cmap->n; 4644 nzB = a->B->cmap->n; 4645 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4646 ncols = 0; 4647 for (i=0; i<nzB; i++) { /* row < local row index */ 4648 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4649 else break; 4650 } 4651 imark = i; 4652 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4653 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4654 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4655 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4656 } else { 4657 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4658 isrowb = *rowb; iscolb = *colb; 4659 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4660 bseq[0] = *B_seq; 4661 } 4662 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4663 *B_seq = bseq[0]; 4664 ierr = PetscFree(bseq);CHKERRQ(ierr); 4665 if (!rowb) { 4666 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4667 } else { 4668 *rowb = isrowb; 4669 } 4670 if (!colb) { 4671 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4672 } else { 4673 *colb = iscolb; 4674 } 4675 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4676 PetscFunctionReturn(0); 4677 } 4678 4679 #undef __FUNCT__ 4680 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4681 /* 4682 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4683 of the OFF-DIAGONAL portion of local A 4684 4685 Collective on Mat 4686 4687 Input Parameters: 4688 + A,B - the matrices in mpiaij format 4689 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4690 4691 Output Parameter: 4692 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4693 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4694 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4695 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4696 4697 Level: developer 4698 4699 */ 4700 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4701 { 4702 VecScatter_MPI_General *gen_to,*gen_from; 4703 PetscErrorCode ierr; 4704 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4705 Mat_SeqAIJ *b_oth; 4706 VecScatter ctx =a->Mvctx; 4707 MPI_Comm comm; 4708 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4709 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4710 PetscScalar *rvalues,*svalues; 4711 MatScalar *b_otha,*bufa,*bufA; 4712 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4713 MPI_Request *rwaits = NULL,*swaits = NULL; 4714 MPI_Status *sstatus,rstatus; 4715 PetscMPIInt jj,size; 4716 PetscInt *cols,sbs,rbs; 4717 PetscScalar *vals; 4718 4719 PetscFunctionBegin; 4720 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4721 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4722 4723 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4724 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4725 } 4726 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4727 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4728 4729 gen_to = (VecScatter_MPI_General*)ctx->todata; 4730 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4731 rvalues = gen_from->values; /* holds the length of receiving row */ 4732 svalues = gen_to->values; /* holds the length of sending row */ 4733 nrecvs = gen_from->n; 4734 nsends = gen_to->n; 4735 4736 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4737 srow = gen_to->indices; /* local row index to be sent */ 4738 sstarts = gen_to->starts; 4739 sprocs = gen_to->procs; 4740 sstatus = gen_to->sstatus; 4741 sbs = gen_to->bs; 4742 rstarts = gen_from->starts; 4743 rprocs = gen_from->procs; 4744 rbs = gen_from->bs; 4745 4746 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4747 if (scall == MAT_INITIAL_MATRIX) { 4748 /* i-array */ 4749 /*---------*/ 4750 /* post receives */ 4751 for (i=0; i<nrecvs; i++) { 4752 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4753 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4754 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4755 } 4756 4757 /* pack the outgoing message */ 4758 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4759 4760 sstartsj[0] = 0; 4761 rstartsj[0] = 0; 4762 len = 0; /* total length of j or a array to be sent */ 4763 k = 0; 4764 for (i=0; i<nsends; i++) { 4765 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4766 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4767 for (j=0; j<nrows; j++) { 4768 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4769 for (l=0; l<sbs; l++) { 4770 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4771 4772 rowlen[j*sbs+l] = ncols; 4773 4774 len += ncols; 4775 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4776 } 4777 k++; 4778 } 4779 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4780 4781 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4782 } 4783 /* recvs and sends of i-array are completed */ 4784 i = nrecvs; 4785 while (i--) { 4786 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4787 } 4788 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4789 4790 /* allocate buffers for sending j and a arrays */ 4791 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4792 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4793 4794 /* create i-array of B_oth */ 4795 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4796 4797 b_othi[0] = 0; 4798 len = 0; /* total length of j or a array to be received */ 4799 k = 0; 4800 for (i=0; i<nrecvs; i++) { 4801 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4802 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4803 for (j=0; j<nrows; j++) { 4804 b_othi[k+1] = b_othi[k] + rowlen[j]; 4805 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4806 k++; 4807 } 4808 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4809 } 4810 4811 /* allocate space for j and a arrrays of B_oth */ 4812 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4813 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4814 4815 /* j-array */ 4816 /*---------*/ 4817 /* post receives of j-array */ 4818 for (i=0; i<nrecvs; i++) { 4819 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4820 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4821 } 4822 4823 /* pack the outgoing message j-array */ 4824 k = 0; 4825 for (i=0; i<nsends; i++) { 4826 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4827 bufJ = bufj+sstartsj[i]; 4828 for (j=0; j<nrows; j++) { 4829 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4830 for (ll=0; ll<sbs; ll++) { 4831 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4832 for (l=0; l<ncols; l++) { 4833 *bufJ++ = cols[l]; 4834 } 4835 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4836 } 4837 } 4838 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4839 } 4840 4841 /* recvs and sends of j-array are completed */ 4842 i = nrecvs; 4843 while (i--) { 4844 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4845 } 4846 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4847 } else if (scall == MAT_REUSE_MATRIX) { 4848 sstartsj = *startsj_s; 4849 rstartsj = *startsj_r; 4850 bufa = *bufa_ptr; 4851 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4852 b_otha = b_oth->a; 4853 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4854 4855 /* a-array */ 4856 /*---------*/ 4857 /* post receives of a-array */ 4858 for (i=0; i<nrecvs; i++) { 4859 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4860 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4861 } 4862 4863 /* pack the outgoing message a-array */ 4864 k = 0; 4865 for (i=0; i<nsends; i++) { 4866 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4867 bufA = bufa+sstartsj[i]; 4868 for (j=0; j<nrows; j++) { 4869 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4870 for (ll=0; ll<sbs; ll++) { 4871 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4872 for (l=0; l<ncols; l++) { 4873 *bufA++ = vals[l]; 4874 } 4875 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4876 } 4877 } 4878 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4879 } 4880 /* recvs and sends of a-array are completed */ 4881 i = nrecvs; 4882 while (i--) { 4883 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4884 } 4885 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4886 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4887 4888 if (scall == MAT_INITIAL_MATRIX) { 4889 /* put together the new matrix */ 4890 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4891 4892 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4893 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4894 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4895 b_oth->free_a = PETSC_TRUE; 4896 b_oth->free_ij = PETSC_TRUE; 4897 b_oth->nonew = 0; 4898 4899 ierr = PetscFree(bufj);CHKERRQ(ierr); 4900 if (!startsj_s || !bufa_ptr) { 4901 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4902 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4903 } else { 4904 *startsj_s = sstartsj; 4905 *startsj_r = rstartsj; 4906 *bufa_ptr = bufa; 4907 } 4908 } 4909 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4910 PetscFunctionReturn(0); 4911 } 4912 4913 #undef __FUNCT__ 4914 #define __FUNCT__ "MatGetCommunicationStructs" 4915 /*@C 4916 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4917 4918 Not Collective 4919 4920 Input Parameters: 4921 . A - The matrix in mpiaij format 4922 4923 Output Parameter: 4924 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4925 . colmap - A map from global column index to local index into lvec 4926 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4927 4928 Level: developer 4929 4930 @*/ 4931 #if defined(PETSC_USE_CTABLE) 4932 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4933 #else 4934 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4935 #endif 4936 { 4937 Mat_MPIAIJ *a; 4938 4939 PetscFunctionBegin; 4940 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4941 PetscValidPointer(lvec, 2); 4942 PetscValidPointer(colmap, 3); 4943 PetscValidPointer(multScatter, 4); 4944 a = (Mat_MPIAIJ*) A->data; 4945 if (lvec) *lvec = a->lvec; 4946 if (colmap) *colmap = a->colmap; 4947 if (multScatter) *multScatter = a->Mvctx; 4948 PetscFunctionReturn(0); 4949 } 4950 4951 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4953 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4954 #if defined(PETSC_HAVE_ELEMENTAL) 4955 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4956 #endif 4957 4958 #undef __FUNCT__ 4959 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4960 /* 4961 Computes (B'*A')' since computing B*A directly is untenable 4962 4963 n p p 4964 ( ) ( ) ( ) 4965 m ( A ) * n ( B ) = m ( C ) 4966 ( ) ( ) ( ) 4967 4968 */ 4969 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4970 { 4971 PetscErrorCode ierr; 4972 Mat At,Bt,Ct; 4973 4974 PetscFunctionBegin; 4975 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4976 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4977 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4978 ierr = MatDestroy(&At);CHKERRQ(ierr); 4979 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4980 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4981 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4982 PetscFunctionReturn(0); 4983 } 4984 4985 #undef __FUNCT__ 4986 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4987 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4988 { 4989 PetscErrorCode ierr; 4990 PetscInt m=A->rmap->n,n=B->cmap->n; 4991 Mat Cmat; 4992 4993 PetscFunctionBegin; 4994 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4995 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4996 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4997 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4998 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4999 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5000 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5001 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5002 5003 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5004 5005 *C = Cmat; 5006 PetscFunctionReturn(0); 5007 } 5008 5009 /* ----------------------------------------------------------------*/ 5010 #undef __FUNCT__ 5011 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5012 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5013 { 5014 PetscErrorCode ierr; 5015 5016 PetscFunctionBegin; 5017 if (scall == MAT_INITIAL_MATRIX) { 5018 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5019 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5020 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5021 } 5022 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5023 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5024 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5025 PetscFunctionReturn(0); 5026 } 5027 5028 /*MC 5029 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5030 5031 Options Database Keys: 5032 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5033 5034 Level: beginner 5035 5036 .seealso: MatCreateAIJ() 5037 M*/ 5038 5039 #undef __FUNCT__ 5040 #define __FUNCT__ "MatCreate_MPIAIJ" 5041 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5042 { 5043 Mat_MPIAIJ *b; 5044 PetscErrorCode ierr; 5045 PetscMPIInt size; 5046 5047 PetscFunctionBegin; 5048 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5049 5050 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5051 B->data = (void*)b; 5052 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5053 B->assembled = PETSC_FALSE; 5054 B->insertmode = NOT_SET_VALUES; 5055 b->size = size; 5056 5057 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5058 5059 /* build cache for off array entries formed */ 5060 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5061 5062 b->donotstash = PETSC_FALSE; 5063 b->colmap = 0; 5064 b->garray = 0; 5065 b->roworiented = PETSC_TRUE; 5066 5067 /* stuff used for matrix vector multiply */ 5068 b->lvec = NULL; 5069 b->Mvctx = NULL; 5070 5071 /* stuff for MatGetRow() */ 5072 b->rowindices = 0; 5073 b->rowvalues = 0; 5074 b->getrowactive = PETSC_FALSE; 5075 5076 /* flexible pointer used in CUSP/CUSPARSE classes */ 5077 b->spptr = NULL; 5078 5079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5081 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5082 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5090 #if defined(PETSC_HAVE_ELEMENTAL) 5091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5092 #endif 5093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5096 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5097 PetscFunctionReturn(0); 5098 } 5099 5100 #undef __FUNCT__ 5101 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5102 /*@C 5103 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5104 and "off-diagonal" part of the matrix in CSR format. 5105 5106 Collective on MPI_Comm 5107 5108 Input Parameters: 5109 + comm - MPI communicator 5110 . m - number of local rows (Cannot be PETSC_DECIDE) 5111 . n - This value should be the same as the local size used in creating the 5112 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5113 calculated if N is given) For square matrices n is almost always m. 5114 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5115 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5116 . i - row indices for "diagonal" portion of matrix 5117 . j - column indices 5118 . a - matrix values 5119 . oi - row indices for "off-diagonal" portion of matrix 5120 . oj - column indices 5121 - oa - matrix values 5122 5123 Output Parameter: 5124 . mat - the matrix 5125 5126 Level: advanced 5127 5128 Notes: 5129 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5130 must free the arrays once the matrix has been destroyed and not before. 5131 5132 The i and j indices are 0 based 5133 5134 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5135 5136 This sets local rows and cannot be used to set off-processor values. 5137 5138 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5139 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5140 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5141 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5142 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5143 communication if it is known that only local entries will be set. 5144 5145 .keywords: matrix, aij, compressed row, sparse, parallel 5146 5147 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5148 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5149 @*/ 5150 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5151 { 5152 PetscErrorCode ierr; 5153 Mat_MPIAIJ *maij; 5154 5155 PetscFunctionBegin; 5156 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5157 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5158 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5159 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5160 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5161 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5162 maij = (Mat_MPIAIJ*) (*mat)->data; 5163 5164 (*mat)->preallocated = PETSC_TRUE; 5165 5166 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5167 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5168 5169 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5170 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5171 5172 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5173 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5174 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5175 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5176 5177 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5178 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5179 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5180 PetscFunctionReturn(0); 5181 } 5182 5183 /* 5184 Special version for direct calls from Fortran 5185 */ 5186 #include <petsc/private/fortranimpl.h> 5187 5188 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5189 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5190 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5191 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5192 #endif 5193 5194 /* Change these macros so can be used in void function */ 5195 #undef CHKERRQ 5196 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5197 #undef SETERRQ2 5198 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5199 #undef SETERRQ3 5200 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5201 #undef SETERRQ 5202 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5203 5204 #undef __FUNCT__ 5205 #define __FUNCT__ "matsetvaluesmpiaij_" 5206 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5207 { 5208 Mat mat = *mmat; 5209 PetscInt m = *mm, n = *mn; 5210 InsertMode addv = *maddv; 5211 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5212 PetscScalar value; 5213 PetscErrorCode ierr; 5214 5215 MatCheckPreallocated(mat,1); 5216 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5217 5218 #if defined(PETSC_USE_DEBUG) 5219 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5220 #endif 5221 { 5222 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5223 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5224 PetscBool roworiented = aij->roworiented; 5225 5226 /* Some Variables required in the macro */ 5227 Mat A = aij->A; 5228 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5229 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5230 MatScalar *aa = a->a; 5231 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5232 Mat B = aij->B; 5233 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5234 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5235 MatScalar *ba = b->a; 5236 5237 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5238 PetscInt nonew = a->nonew; 5239 MatScalar *ap1,*ap2; 5240 5241 PetscFunctionBegin; 5242 for (i=0; i<m; i++) { 5243 if (im[i] < 0) continue; 5244 #if defined(PETSC_USE_DEBUG) 5245 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5246 #endif 5247 if (im[i] >= rstart && im[i] < rend) { 5248 row = im[i] - rstart; 5249 lastcol1 = -1; 5250 rp1 = aj + ai[row]; 5251 ap1 = aa + ai[row]; 5252 rmax1 = aimax[row]; 5253 nrow1 = ailen[row]; 5254 low1 = 0; 5255 high1 = nrow1; 5256 lastcol2 = -1; 5257 rp2 = bj + bi[row]; 5258 ap2 = ba + bi[row]; 5259 rmax2 = bimax[row]; 5260 nrow2 = bilen[row]; 5261 low2 = 0; 5262 high2 = nrow2; 5263 5264 for (j=0; j<n; j++) { 5265 if (roworiented) value = v[i*n+j]; 5266 else value = v[i+j*m]; 5267 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5268 if (in[j] >= cstart && in[j] < cend) { 5269 col = in[j] - cstart; 5270 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5271 } else if (in[j] < 0) continue; 5272 #if defined(PETSC_USE_DEBUG) 5273 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5274 #endif 5275 else { 5276 if (mat->was_assembled) { 5277 if (!aij->colmap) { 5278 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5279 } 5280 #if defined(PETSC_USE_CTABLE) 5281 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5282 col--; 5283 #else 5284 col = aij->colmap[in[j]] - 1; 5285 #endif 5286 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5287 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5288 col = in[j]; 5289 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5290 B = aij->B; 5291 b = (Mat_SeqAIJ*)B->data; 5292 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5293 rp2 = bj + bi[row]; 5294 ap2 = ba + bi[row]; 5295 rmax2 = bimax[row]; 5296 nrow2 = bilen[row]; 5297 low2 = 0; 5298 high2 = nrow2; 5299 bm = aij->B->rmap->n; 5300 ba = b->a; 5301 } 5302 } else col = in[j]; 5303 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5304 } 5305 } 5306 } else if (!aij->donotstash) { 5307 if (roworiented) { 5308 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5309 } else { 5310 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5311 } 5312 } 5313 } 5314 } 5315 PetscFunctionReturnVoid(); 5316 } 5317 5318