1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,const MatType,MatReuse,Mat*); 17 #endif 18 19 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 22 PetscErrorCode ierr; 23 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 24 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 25 PetscReal shift = 0.0; 26 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 27 28 PetscFunctionBegin; 29 allowzeropivot = PetscNot(A->erroriffailure); 30 31 if (a->idiagvalid) { 32 if (values) *values = a->idiag; 33 PetscFunctionReturn(0); 34 } 35 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 36 diag_offset = a->diag; 37 if (!a->idiag) { 38 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 39 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 40 } 41 diag = a->idiag; 42 mdiag = a->idiag+bs2*mbs; 43 if (values) *values = a->idiag; 44 /* factor and invert each block */ 45 switch (bs) { 46 case 1: 47 for (i=0; i<mbs; i++) { 48 odiag = v + 1*diag_offset[i]; 49 diag[0] = odiag[0]; 50 mdiag[0] = odiag[0]; 51 52 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 53 if (allowzeropivot) { 54 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 55 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 56 A->factorerror_zeropivot_row = i; 57 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 58 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 59 } 60 61 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 62 diag += 1; 63 mdiag += 1; 64 } 65 break; 66 case 2: 67 for (i=0; i<mbs; i++) { 68 odiag = v + 4*diag_offset[i]; 69 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 70 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 71 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 72 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 73 diag += 4; 74 mdiag += 4; 75 } 76 break; 77 case 3: 78 for (i=0; i<mbs; i++) { 79 odiag = v + 9*diag_offset[i]; 80 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 81 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 82 diag[8] = odiag[8]; 83 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 84 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 85 mdiag[8] = odiag[8]; 86 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 87 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 88 diag += 9; 89 mdiag += 9; 90 } 91 break; 92 case 4: 93 for (i=0; i<mbs; i++) { 94 odiag = v + 16*diag_offset[i]; 95 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 96 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 97 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 98 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 99 diag += 16; 100 mdiag += 16; 101 } 102 break; 103 case 5: 104 for (i=0; i<mbs; i++) { 105 odiag = v + 25*diag_offset[i]; 106 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 107 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 108 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 109 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 110 diag += 25; 111 mdiag += 25; 112 } 113 break; 114 case 6: 115 for (i=0; i<mbs; i++) { 116 odiag = v + 36*diag_offset[i]; 117 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 118 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 119 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 120 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 121 diag += 36; 122 mdiag += 36; 123 } 124 break; 125 case 7: 126 for (i=0; i<mbs; i++) { 127 odiag = v + 49*diag_offset[i]; 128 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 129 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 130 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 131 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 132 diag += 49; 133 mdiag += 49; 134 } 135 break; 136 default: 137 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 138 for (i=0; i<mbs; i++) { 139 odiag = v + bs2*diag_offset[i]; 140 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 141 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 142 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 143 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 144 diag += bs2; 145 mdiag += bs2; 146 } 147 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 148 } 149 a->idiagvalid = PETSC_TRUE; 150 PetscFunctionReturn(0); 151 } 152 153 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 154 { 155 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 156 PetscScalar *x,*work,*w,*workt,*t; 157 const MatScalar *v,*aa = a->a, *idiag; 158 const PetscScalar *b,*xb; 159 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 160 PetscErrorCode ierr; 161 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 162 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 163 164 PetscFunctionBegin; 165 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 166 its = its*lits; 167 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 168 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 169 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 170 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 171 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 172 173 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 174 175 if (!m) PetscFunctionReturn(0); 176 diag = a->diag; 177 idiag = a->idiag; 178 k = PetscMax(A->rmap->n,A->cmap->n); 179 if (!a->mult_work) { 180 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 181 } 182 if (!a->sor_workt) { 183 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 184 } 185 if (!a->sor_work) { 186 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 187 } 188 work = a->mult_work; 189 t = a->sor_workt; 190 w = a->sor_work; 191 192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 193 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 194 195 if (flag & SOR_ZERO_INITIAL_GUESS) { 196 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 197 switch (bs) { 198 case 1: 199 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 200 t[0] = b[0]; 201 i2 = 1; 202 idiag += 1; 203 for (i=1; i<m; i++) { 204 v = aa + ai[i]; 205 vi = aj + ai[i]; 206 nz = diag[i] - ai[i]; 207 s[0] = b[i2]; 208 for (j=0; j<nz; j++) { 209 xw[0] = x[vi[j]]; 210 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 211 } 212 t[i2] = s[0]; 213 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 214 x[i2] = xw[0]; 215 idiag += 1; 216 i2 += 1; 217 } 218 break; 219 case 2: 220 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 221 t[0] = b[0]; t[1] = b[1]; 222 i2 = 2; 223 idiag += 4; 224 for (i=1; i<m; i++) { 225 v = aa + 4*ai[i]; 226 vi = aj + ai[i]; 227 nz = diag[i] - ai[i]; 228 s[0] = b[i2]; s[1] = b[i2+1]; 229 for (j=0; j<nz; j++) { 230 idx = 2*vi[j]; 231 it = 4*j; 232 xw[0] = x[idx]; xw[1] = x[1+idx]; 233 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 234 } 235 t[i2] = s[0]; t[i2+1] = s[1]; 236 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 237 x[i2] = xw[0]; x[i2+1] = xw[1]; 238 idiag += 4; 239 i2 += 2; 240 } 241 break; 242 case 3: 243 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 244 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 245 i2 = 3; 246 idiag += 9; 247 for (i=1; i<m; i++) { 248 v = aa + 9*ai[i]; 249 vi = aj + ai[i]; 250 nz = diag[i] - ai[i]; 251 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 252 while (nz--) { 253 idx = 3*(*vi++); 254 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 255 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 256 v += 9; 257 } 258 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 259 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 260 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 261 idiag += 9; 262 i2 += 3; 263 } 264 break; 265 case 4: 266 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 267 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 268 i2 = 4; 269 idiag += 16; 270 for (i=1; i<m; i++) { 271 v = aa + 16*ai[i]; 272 vi = aj + ai[i]; 273 nz = diag[i] - ai[i]; 274 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 275 while (nz--) { 276 idx = 4*(*vi++); 277 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 278 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 279 v += 16; 280 } 281 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 282 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 283 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 284 idiag += 16; 285 i2 += 4; 286 } 287 break; 288 case 5: 289 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 290 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 291 i2 = 5; 292 idiag += 25; 293 for (i=1; i<m; i++) { 294 v = aa + 25*ai[i]; 295 vi = aj + ai[i]; 296 nz = diag[i] - ai[i]; 297 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 298 while (nz--) { 299 idx = 5*(*vi++); 300 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 301 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 302 v += 25; 303 } 304 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 305 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 306 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 307 idiag += 25; 308 i2 += 5; 309 } 310 break; 311 case 6: 312 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 313 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 314 i2 = 6; 315 idiag += 36; 316 for (i=1; i<m; i++) { 317 v = aa + 36*ai[i]; 318 vi = aj + ai[i]; 319 nz = diag[i] - ai[i]; 320 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 321 while (nz--) { 322 idx = 6*(*vi++); 323 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 324 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 325 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 326 v += 36; 327 } 328 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 329 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 330 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 331 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 332 idiag += 36; 333 i2 += 6; 334 } 335 break; 336 case 7: 337 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 338 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 339 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 340 i2 = 7; 341 idiag += 49; 342 for (i=1; i<m; i++) { 343 v = aa + 49*ai[i]; 344 vi = aj + ai[i]; 345 nz = diag[i] - ai[i]; 346 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 347 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 348 while (nz--) { 349 idx = 7*(*vi++); 350 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 351 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 352 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 353 v += 49; 354 } 355 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 356 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 357 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 358 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 359 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 360 idiag += 49; 361 i2 += 7; 362 } 363 break; 364 default: 365 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 366 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 367 i2 = bs; 368 idiag += bs2; 369 for (i=1; i<m; i++) { 370 v = aa + bs2*ai[i]; 371 vi = aj + ai[i]; 372 nz = diag[i] - ai[i]; 373 374 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 375 /* copy all rows of x that are needed into contiguous space */ 376 workt = work; 377 for (j=0; j<nz; j++) { 378 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 379 workt += bs; 380 } 381 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 382 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 383 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 384 385 idiag += bs2; 386 i2 += bs; 387 } 388 break; 389 } 390 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 391 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 392 xb = t; 393 } 394 else xb = b; 395 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 396 idiag = a->idiag+bs2*(a->mbs-1); 397 i2 = bs * (m-1); 398 switch (bs) { 399 case 1: 400 s[0] = xb[i2]; 401 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 402 x[i2] = xw[0]; 403 i2 -= 1; 404 for (i=m-2; i>=0; i--) { 405 v = aa + (diag[i]+1); 406 vi = aj + diag[i] + 1; 407 nz = ai[i+1] - diag[i] - 1; 408 s[0] = xb[i2]; 409 for (j=0; j<nz; j++) { 410 xw[0] = x[vi[j]]; 411 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 412 } 413 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 414 x[i2] = xw[0]; 415 idiag -= 1; 416 i2 -= 1; 417 } 418 break; 419 case 2: 420 s[0] = xb[i2]; s[1] = xb[i2+1]; 421 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 422 x[i2] = xw[0]; x[i2+1] = xw[1]; 423 i2 -= 2; 424 idiag -= 4; 425 for (i=m-2; i>=0; i--) { 426 v = aa + 4*(diag[i] + 1); 427 vi = aj + diag[i] + 1; 428 nz = ai[i+1] - diag[i] - 1; 429 s[0] = xb[i2]; s[1] = xb[i2+1]; 430 for (j=0; j<nz; j++) { 431 idx = 2*vi[j]; 432 it = 4*j; 433 xw[0] = x[idx]; xw[1] = x[1+idx]; 434 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 435 } 436 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 437 x[i2] = xw[0]; x[i2+1] = xw[1]; 438 idiag -= 4; 439 i2 -= 2; 440 } 441 break; 442 case 3: 443 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 444 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 445 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 446 i2 -= 3; 447 idiag -= 9; 448 for (i=m-2; i>=0; i--) { 449 v = aa + 9*(diag[i]+1); 450 vi = aj + diag[i] + 1; 451 nz = ai[i+1] - diag[i] - 1; 452 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 453 while (nz--) { 454 idx = 3*(*vi++); 455 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 456 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 457 v += 9; 458 } 459 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 460 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 461 idiag -= 9; 462 i2 -= 3; 463 } 464 break; 465 case 4: 466 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 467 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 468 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 469 i2 -= 4; 470 idiag -= 16; 471 for (i=m-2; i>=0; i--) { 472 v = aa + 16*(diag[i]+1); 473 vi = aj + diag[i] + 1; 474 nz = ai[i+1] - diag[i] - 1; 475 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 476 while (nz--) { 477 idx = 4*(*vi++); 478 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 479 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 480 v += 16; 481 } 482 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 483 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 484 idiag -= 16; 485 i2 -= 4; 486 } 487 break; 488 case 5: 489 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 490 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 491 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 492 i2 -= 5; 493 idiag -= 25; 494 for (i=m-2; i>=0; i--) { 495 v = aa + 25*(diag[i]+1); 496 vi = aj + diag[i] + 1; 497 nz = ai[i+1] - diag[i] - 1; 498 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 499 while (nz--) { 500 idx = 5*(*vi++); 501 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 502 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 503 v += 25; 504 } 505 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 506 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 507 idiag -= 25; 508 i2 -= 5; 509 } 510 break; 511 case 6: 512 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 513 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 514 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 515 i2 -= 6; 516 idiag -= 36; 517 for (i=m-2; i>=0; i--) { 518 v = aa + 36*(diag[i]+1); 519 vi = aj + diag[i] + 1; 520 nz = ai[i+1] - diag[i] - 1; 521 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 522 while (nz--) { 523 idx = 6*(*vi++); 524 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 525 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 526 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 527 v += 36; 528 } 529 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 530 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 531 idiag -= 36; 532 i2 -= 6; 533 } 534 break; 535 case 7: 536 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 537 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 538 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 539 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 540 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 541 i2 -= 7; 542 idiag -= 49; 543 for (i=m-2; i>=0; i--) { 544 v = aa + 49*(diag[i]+1); 545 vi = aj + diag[i] + 1; 546 nz = ai[i+1] - diag[i] - 1; 547 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 548 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 549 while (nz--) { 550 idx = 7*(*vi++); 551 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 552 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 553 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 554 v += 49; 555 } 556 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 557 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 558 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 559 idiag -= 49; 560 i2 -= 7; 561 } 562 break; 563 default: 564 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 565 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 566 i2 -= bs; 567 idiag -= bs2; 568 for (i=m-2; i>=0; i--) { 569 v = aa + bs2*(diag[i]+1); 570 vi = aj + diag[i] + 1; 571 nz = ai[i+1] - diag[i] - 1; 572 573 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 574 /* copy all rows of x that are needed into contiguous space */ 575 workt = work; 576 for (j=0; j<nz; j++) { 577 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 578 workt += bs; 579 } 580 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 581 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 582 583 idiag -= bs2; 584 i2 -= bs; 585 } 586 break; 587 } 588 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 589 } 590 its--; 591 } 592 while (its--) { 593 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 594 idiag = a->idiag; 595 i2 = 0; 596 switch (bs) { 597 case 1: 598 for (i=0; i<m; i++) { 599 v = aa + ai[i]; 600 vi = aj + ai[i]; 601 nz = ai[i+1] - ai[i]; 602 s[0] = b[i2]; 603 for (j=0; j<nz; j++) { 604 xw[0] = x[vi[j]]; 605 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 606 } 607 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 608 x[i2] += xw[0]; 609 idiag += 1; 610 i2 += 1; 611 } 612 break; 613 case 2: 614 for (i=0; i<m; i++) { 615 v = aa + 4*ai[i]; 616 vi = aj + ai[i]; 617 nz = ai[i+1] - ai[i]; 618 s[0] = b[i2]; s[1] = b[i2+1]; 619 for (j=0; j<nz; j++) { 620 idx = 2*vi[j]; 621 it = 4*j; 622 xw[0] = x[idx]; xw[1] = x[1+idx]; 623 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 624 } 625 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 626 x[i2] += xw[0]; x[i2+1] += xw[1]; 627 idiag += 4; 628 i2 += 2; 629 } 630 break; 631 case 3: 632 for (i=0; i<m; i++) { 633 v = aa + 9*ai[i]; 634 vi = aj + ai[i]; 635 nz = ai[i+1] - ai[i]; 636 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 637 while (nz--) { 638 idx = 3*(*vi++); 639 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 640 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 641 v += 9; 642 } 643 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 644 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 645 idiag += 9; 646 i2 += 3; 647 } 648 break; 649 case 4: 650 for (i=0; i<m; i++) { 651 v = aa + 16*ai[i]; 652 vi = aj + ai[i]; 653 nz = ai[i+1] - ai[i]; 654 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 655 while (nz--) { 656 idx = 4*(*vi++); 657 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 659 v += 16; 660 } 661 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 662 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 663 idiag += 16; 664 i2 += 4; 665 } 666 break; 667 case 5: 668 for (i=0; i<m; i++) { 669 v = aa + 25*ai[i]; 670 vi = aj + ai[i]; 671 nz = ai[i+1] - ai[i]; 672 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 673 while (nz--) { 674 idx = 5*(*vi++); 675 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 676 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 677 v += 25; 678 } 679 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 680 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 681 idiag += 25; 682 i2 += 5; 683 } 684 break; 685 case 6: 686 for (i=0; i<m; i++) { 687 v = aa + 36*ai[i]; 688 vi = aj + ai[i]; 689 nz = ai[i+1] - ai[i]; 690 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 691 while (nz--) { 692 idx = 6*(*vi++); 693 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 694 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 695 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 696 v += 36; 697 } 698 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 699 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 700 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 701 idiag += 36; 702 i2 += 6; 703 } 704 break; 705 case 7: 706 for (i=0; i<m; i++) { 707 v = aa + 49*ai[i]; 708 vi = aj + ai[i]; 709 nz = ai[i+1] - ai[i]; 710 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 711 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 712 while (nz--) { 713 idx = 7*(*vi++); 714 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 715 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 716 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 717 v += 49; 718 } 719 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 720 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 721 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 722 idiag += 49; 723 i2 += 7; 724 } 725 break; 726 default: 727 for (i=0; i<m; i++) { 728 v = aa + bs2*ai[i]; 729 vi = aj + ai[i]; 730 nz = ai[i+1] - ai[i]; 731 732 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 733 /* copy all rows of x that are needed into contiguous space */ 734 workt = work; 735 for (j=0; j<nz; j++) { 736 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 737 workt += bs; 738 } 739 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 740 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 741 742 idiag += bs2; 743 i2 += bs; 744 } 745 break; 746 } 747 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 748 } 749 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 750 idiag = a->idiag+bs2*(a->mbs-1); 751 i2 = bs * (m-1); 752 switch (bs) { 753 case 1: 754 for (i=m-1; i>=0; i--) { 755 v = aa + ai[i]; 756 vi = aj + ai[i]; 757 nz = ai[i+1] - ai[i]; 758 s[0] = b[i2]; 759 for (j=0; j<nz; j++) { 760 xw[0] = x[vi[j]]; 761 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 762 } 763 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 764 x[i2] += xw[0]; 765 idiag -= 1; 766 i2 -= 1; 767 } 768 break; 769 case 2: 770 for (i=m-1; i>=0; i--) { 771 v = aa + 4*ai[i]; 772 vi = aj + ai[i]; 773 nz = ai[i+1] - ai[i]; 774 s[0] = b[i2]; s[1] = b[i2+1]; 775 for (j=0; j<nz; j++) { 776 idx = 2*vi[j]; 777 it = 4*j; 778 xw[0] = x[idx]; xw[1] = x[1+idx]; 779 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 780 } 781 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 782 x[i2] += xw[0]; x[i2+1] += xw[1]; 783 idiag -= 4; 784 i2 -= 2; 785 } 786 break; 787 case 3: 788 for (i=m-1; i>=0; i--) { 789 v = aa + 9*ai[i]; 790 vi = aj + ai[i]; 791 nz = ai[i+1] - ai[i]; 792 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 793 while (nz--) { 794 idx = 3*(*vi++); 795 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 796 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 797 v += 9; 798 } 799 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 800 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 801 idiag -= 9; 802 i2 -= 3; 803 } 804 break; 805 case 4: 806 for (i=m-1; i>=0; i--) { 807 v = aa + 16*ai[i]; 808 vi = aj + ai[i]; 809 nz = ai[i+1] - ai[i]; 810 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 811 while (nz--) { 812 idx = 4*(*vi++); 813 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 814 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 815 v += 16; 816 } 817 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 818 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 819 idiag -= 16; 820 i2 -= 4; 821 } 822 break; 823 case 5: 824 for (i=m-1; i>=0; i--) { 825 v = aa + 25*ai[i]; 826 vi = aj + ai[i]; 827 nz = ai[i+1] - ai[i]; 828 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 829 while (nz--) { 830 idx = 5*(*vi++); 831 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 832 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 833 v += 25; 834 } 835 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 836 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 837 idiag -= 25; 838 i2 -= 5; 839 } 840 break; 841 case 6: 842 for (i=m-1; i>=0; i--) { 843 v = aa + 36*ai[i]; 844 vi = aj + ai[i]; 845 nz = ai[i+1] - ai[i]; 846 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 847 while (nz--) { 848 idx = 6*(*vi++); 849 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 850 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 851 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 852 v += 36; 853 } 854 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 855 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 856 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 857 idiag -= 36; 858 i2 -= 6; 859 } 860 break; 861 case 7: 862 for (i=m-1; i>=0; i--) { 863 v = aa + 49*ai[i]; 864 vi = aj + ai[i]; 865 nz = ai[i+1] - ai[i]; 866 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 867 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 868 while (nz--) { 869 idx = 7*(*vi++); 870 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 871 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 872 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 873 v += 49; 874 } 875 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 876 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 877 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 878 idiag -= 49; 879 i2 -= 7; 880 } 881 break; 882 default: 883 for (i=m-1; i>=0; i--) { 884 v = aa + bs2*ai[i]; 885 vi = aj + ai[i]; 886 nz = ai[i+1] - ai[i]; 887 888 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 889 /* copy all rows of x that are needed into contiguous space */ 890 workt = work; 891 for (j=0; j<nz; j++) { 892 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 893 workt += bs; 894 } 895 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 896 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 897 898 idiag -= bs2; 899 i2 -= bs; 900 } 901 break; 902 } 903 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 904 } 905 } 906 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 907 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 912 /* 913 Special version for direct calls from Fortran (Used in PETSc-fun3d) 914 */ 915 #if defined(PETSC_HAVE_FORTRAN_CAPS) 916 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 917 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 918 #define matsetvaluesblocked4_ matsetvaluesblocked4 919 #endif 920 921 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 922 { 923 Mat A = *AA; 924 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 925 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 926 PetscInt *ai =a->i,*ailen=a->ilen; 927 PetscInt *aj =a->j,stepval,lastcol = -1; 928 const PetscScalar *value = v; 929 MatScalar *ap,*aa = a->a,*bap; 930 931 PetscFunctionBegin; 932 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 933 stepval = (n-1)*4; 934 for (k=0; k<m; k++) { /* loop over added rows */ 935 row = im[k]; 936 rp = aj + ai[row]; 937 ap = aa + 16*ai[row]; 938 nrow = ailen[row]; 939 low = 0; 940 high = nrow; 941 for (l=0; l<n; l++) { /* loop over added columns */ 942 col = in[l]; 943 if (col <= lastcol) low = 0; 944 else high = nrow; 945 lastcol = col; 946 value = v + k*(stepval+4 + l)*4; 947 while (high-low > 7) { 948 t = (low+high)/2; 949 if (rp[t] > col) high = t; 950 else low = t; 951 } 952 for (i=low; i<high; i++) { 953 if (rp[i] > col) break; 954 if (rp[i] == col) { 955 bap = ap + 16*i; 956 for (ii=0; ii<4; ii++,value+=stepval) { 957 for (jj=ii; jj<16; jj+=4) { 958 bap[jj] += *value++; 959 } 960 } 961 goto noinsert2; 962 } 963 } 964 N = nrow++ - 1; 965 high++; /* added new column index thus must search to one higher than before */ 966 /* shift up all the later entries in this row */ 967 for (ii=N; ii>=i; ii--) { 968 rp[ii+1] = rp[ii]; 969 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 970 } 971 if (N >= i) { 972 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 973 } 974 rp[i] = col; 975 bap = ap + 16*i; 976 for (ii=0; ii<4; ii++,value+=stepval) { 977 for (jj=ii; jj<16; jj+=4) { 978 bap[jj] = *value++; 979 } 980 } 981 noinsert2:; 982 low = i; 983 } 984 ailen[row] = nrow; 985 } 986 PetscFunctionReturnVoid(); 987 } 988 989 #if defined(PETSC_HAVE_FORTRAN_CAPS) 990 #define matsetvalues4_ MATSETVALUES4 991 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 992 #define matsetvalues4_ matsetvalues4 993 #endif 994 995 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 996 { 997 Mat A = *AA; 998 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 999 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1000 PetscInt *ai=a->i,*ailen=a->ilen; 1001 PetscInt *aj=a->j,brow,bcol; 1002 PetscInt ridx,cidx,lastcol = -1; 1003 MatScalar *ap,value,*aa=a->a,*bap; 1004 1005 PetscFunctionBegin; 1006 for (k=0; k<m; k++) { /* loop over added rows */ 1007 row = im[k]; brow = row/4; 1008 rp = aj + ai[brow]; 1009 ap = aa + 16*ai[brow]; 1010 nrow = ailen[brow]; 1011 low = 0; 1012 high = nrow; 1013 for (l=0; l<n; l++) { /* loop over added columns */ 1014 col = in[l]; bcol = col/4; 1015 ridx = row % 4; cidx = col % 4; 1016 value = v[l + k*n]; 1017 if (col <= lastcol) low = 0; 1018 else high = nrow; 1019 lastcol = col; 1020 while (high-low > 7) { 1021 t = (low+high)/2; 1022 if (rp[t] > bcol) high = t; 1023 else low = t; 1024 } 1025 for (i=low; i<high; i++) { 1026 if (rp[i] > bcol) break; 1027 if (rp[i] == bcol) { 1028 bap = ap + 16*i + 4*cidx + ridx; 1029 *bap += value; 1030 goto noinsert1; 1031 } 1032 } 1033 N = nrow++ - 1; 1034 high++; /* added new column thus must search to one higher than before */ 1035 /* shift up all the later entries in this row */ 1036 for (ii=N; ii>=i; ii--) { 1037 rp[ii+1] = rp[ii]; 1038 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1039 } 1040 if (N>=i) { 1041 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1042 } 1043 rp[i] = bcol; 1044 ap[16*i + 4*cidx + ridx] = value; 1045 noinsert1:; 1046 low = i; 1047 } 1048 ailen[brow] = nrow; 1049 } 1050 PetscFunctionReturnVoid(); 1051 } 1052 1053 /* 1054 Checks for missing diagonals 1055 */ 1056 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1057 { 1058 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1059 PetscErrorCode ierr; 1060 PetscInt *diag,*ii = a->i,i; 1061 1062 PetscFunctionBegin; 1063 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1064 *missing = PETSC_FALSE; 1065 if (A->rmap->n > 0 && !ii) { 1066 *missing = PETSC_TRUE; 1067 if (d) *d = 0; 1068 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1069 } else { 1070 diag = a->diag; 1071 for (i=0; i<a->mbs; i++) { 1072 if (diag[i] >= ii[i+1]) { 1073 *missing = PETSC_TRUE; 1074 if (d) *d = i; 1075 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1076 break; 1077 } 1078 } 1079 } 1080 PetscFunctionReturn(0); 1081 } 1082 1083 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1084 { 1085 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1086 PetscErrorCode ierr; 1087 PetscInt i,j,m = a->mbs; 1088 1089 PetscFunctionBegin; 1090 if (!a->diag) { 1091 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1092 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1093 a->free_diag = PETSC_TRUE; 1094 } 1095 for (i=0; i<m; i++) { 1096 a->diag[i] = a->i[i+1]; 1097 for (j=a->i[i]; j<a->i[i+1]; j++) { 1098 if (a->j[j] == i) { 1099 a->diag[i] = j; 1100 break; 1101 } 1102 } 1103 } 1104 PetscFunctionReturn(0); 1105 } 1106 1107 1108 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1109 { 1110 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1113 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1114 1115 PetscFunctionBegin; 1116 *nn = n; 1117 if (!ia) PetscFunctionReturn(0); 1118 if (symmetric) { 1119 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1120 nz = tia[n]; 1121 } else { 1122 tia = a->i; tja = a->j; 1123 } 1124 1125 if (!blockcompressed && bs > 1) { 1126 (*nn) *= bs; 1127 /* malloc & create the natural set of indices */ 1128 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1129 if (n) { 1130 (*ia)[0] = oshift; 1131 for (j=1; j<bs; j++) { 1132 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1133 } 1134 } 1135 1136 for (i=1; i<n; i++) { 1137 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1138 for (j=1; j<bs; j++) { 1139 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1140 } 1141 } 1142 if (n) { 1143 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1144 } 1145 1146 if (inja) { 1147 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1148 cnt = 0; 1149 for (i=0; i<n; i++) { 1150 for (j=0; j<bs; j++) { 1151 for (k=tia[i]; k<tia[i+1]; k++) { 1152 for (l=0; l<bs; l++) { 1153 (*ja)[cnt++] = bs*tja[k] + l; 1154 } 1155 } 1156 } 1157 } 1158 } 1159 1160 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1161 ierr = PetscFree(tia);CHKERRQ(ierr); 1162 ierr = PetscFree(tja);CHKERRQ(ierr); 1163 } 1164 } else if (oshift == 1) { 1165 if (symmetric) { 1166 nz = tia[A->rmap->n/bs]; 1167 /* add 1 to i and j indices */ 1168 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1169 *ia = tia; 1170 if (ja) { 1171 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1172 *ja = tja; 1173 } 1174 } else { 1175 nz = a->i[A->rmap->n/bs]; 1176 /* malloc space and add 1 to i and j indices */ 1177 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1178 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1179 if (ja) { 1180 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1181 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1182 } 1183 } 1184 } else { 1185 *ia = tia; 1186 if (ja) *ja = tja; 1187 } 1188 PetscFunctionReturn(0); 1189 } 1190 1191 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1192 { 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 if (!ia) PetscFunctionReturn(0); 1197 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1198 ierr = PetscFree(*ia);CHKERRQ(ierr); 1199 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1200 } 1201 PetscFunctionReturn(0); 1202 } 1203 1204 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1205 { 1206 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 #if defined(PETSC_USE_LOG) 1211 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1212 #endif 1213 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1214 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1215 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1216 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1217 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1218 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1219 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1220 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1221 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1222 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1223 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1224 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1225 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1226 1227 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1228 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1229 ierr = PetscFree(A->data);CHKERRQ(ierr); 1230 1231 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1242 #if defined(PETSC_HAVE_HYPRE) 1243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1244 #endif 1245 PetscFunctionReturn(0); 1246 } 1247 1248 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1249 { 1250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1251 PetscErrorCode ierr; 1252 1253 PetscFunctionBegin; 1254 switch (op) { 1255 case MAT_ROW_ORIENTED: 1256 a->roworiented = flg; 1257 break; 1258 case MAT_KEEP_NONZERO_PATTERN: 1259 a->keepnonzeropattern = flg; 1260 break; 1261 case MAT_NEW_NONZERO_LOCATIONS: 1262 a->nonew = (flg ? 0 : 1); 1263 break; 1264 case MAT_NEW_NONZERO_LOCATION_ERR: 1265 a->nonew = (flg ? -1 : 0); 1266 break; 1267 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1268 a->nonew = (flg ? -2 : 0); 1269 break; 1270 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1271 a->nounused = (flg ? -1 : 0); 1272 break; 1273 case MAT_NEW_DIAGONALS: 1274 case MAT_IGNORE_OFF_PROC_ENTRIES: 1275 case MAT_USE_HASH_TABLE: 1276 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1277 break; 1278 case MAT_SPD: 1279 case MAT_SYMMETRIC: 1280 case MAT_STRUCTURALLY_SYMMETRIC: 1281 case MAT_HERMITIAN: 1282 case MAT_SYMMETRY_ETERNAL: 1283 case MAT_SUBMAT_SINGLEIS: 1284 case MAT_STRUCTURE_ONLY: 1285 /* These options are handled directly by MatSetOption() */ 1286 break; 1287 default: 1288 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1289 } 1290 PetscFunctionReturn(0); 1291 } 1292 1293 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1294 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1295 { 1296 PetscErrorCode ierr; 1297 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1298 MatScalar *aa_i; 1299 PetscScalar *v_i; 1300 1301 PetscFunctionBegin; 1302 bs = A->rmap->bs; 1303 bs2 = bs*bs; 1304 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1305 1306 bn = row/bs; /* Block number */ 1307 bp = row % bs; /* Block Position */ 1308 M = ai[bn+1] - ai[bn]; 1309 *nz = bs*M; 1310 1311 if (v) { 1312 *v = 0; 1313 if (*nz) { 1314 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1315 for (i=0; i<M; i++) { /* for each block in the block row */ 1316 v_i = *v + i*bs; 1317 aa_i = aa + bs2*(ai[bn] + i); 1318 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1319 } 1320 } 1321 } 1322 1323 if (idx) { 1324 *idx = 0; 1325 if (*nz) { 1326 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1327 for (i=0; i<M; i++) { /* for each block in the block row */ 1328 idx_i = *idx + i*bs; 1329 itmp = bs*aj[ai[bn] + i]; 1330 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1331 } 1332 } 1333 } 1334 PetscFunctionReturn(0); 1335 } 1336 1337 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1338 { 1339 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1344 PetscFunctionReturn(0); 1345 } 1346 1347 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1348 { 1349 PetscErrorCode ierr; 1350 1351 PetscFunctionBegin; 1352 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1353 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1354 PetscFunctionReturn(0); 1355 } 1356 1357 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1358 { 1359 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1360 Mat C; 1361 PetscErrorCode ierr; 1362 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1363 PetscInt *rows,*cols,bs2=a->bs2; 1364 MatScalar *array; 1365 1366 PetscFunctionBegin; 1367 if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1368 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1369 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1370 1371 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1372 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1373 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1374 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1375 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1376 ierr = PetscFree(col);CHKERRQ(ierr); 1377 } else { 1378 C = *B; 1379 } 1380 1381 array = a->a; 1382 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1383 for (i=0; i<mbs; i++) { 1384 cols[0] = i*bs; 1385 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1386 len = ai[i+1] - ai[i]; 1387 for (j=0; j<len; j++) { 1388 rows[0] = (*aj++)*bs; 1389 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1390 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1391 array += bs2; 1392 } 1393 } 1394 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1395 1396 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1398 1399 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1400 *B = C; 1401 } else { 1402 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1403 } 1404 PetscFunctionReturn(0); 1405 } 1406 1407 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1408 { 1409 PetscErrorCode ierr; 1410 Mat Btrans; 1411 1412 PetscFunctionBegin; 1413 *f = PETSC_FALSE; 1414 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1415 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1416 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1421 { 1422 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1423 PetscErrorCode ierr; 1424 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1425 int fd; 1426 PetscScalar *aa; 1427 FILE *file; 1428 1429 PetscFunctionBegin; 1430 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1431 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1432 col_lens[0] = MAT_FILE_CLASSID; 1433 1434 col_lens[1] = A->rmap->N; 1435 col_lens[2] = A->cmap->n; 1436 col_lens[3] = a->nz*bs2; 1437 1438 /* store lengths of each row and write (including header) to file */ 1439 count = 0; 1440 for (i=0; i<a->mbs; i++) { 1441 for (j=0; j<bs; j++) { 1442 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1443 } 1444 } 1445 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1446 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1447 1448 /* store column indices (zero start index) */ 1449 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1450 count = 0; 1451 for (i=0; i<a->mbs; i++) { 1452 for (j=0; j<bs; j++) { 1453 for (k=a->i[i]; k<a->i[i+1]; k++) { 1454 for (l=0; l<bs; l++) { 1455 jj[count++] = bs*a->j[k] + l; 1456 } 1457 } 1458 } 1459 } 1460 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1461 ierr = PetscFree(jj);CHKERRQ(ierr); 1462 1463 /* store nonzero values */ 1464 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1465 count = 0; 1466 for (i=0; i<a->mbs; i++) { 1467 for (j=0; j<bs; j++) { 1468 for (k=a->i[i]; k<a->i[i+1]; k++) { 1469 for (l=0; l<bs; l++) { 1470 aa[count++] = a->a[bs2*k + l*bs + j]; 1471 } 1472 } 1473 } 1474 } 1475 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1476 ierr = PetscFree(aa);CHKERRQ(ierr); 1477 1478 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1479 if (file) { 1480 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1486 { 1487 PetscErrorCode ierr; 1488 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1489 PetscInt i,bs = A->rmap->bs,k; 1490 1491 PetscFunctionBegin; 1492 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1493 for (i=0; i<a->mbs; i++) { 1494 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1495 for (k=a->i[i]; k<a->i[i+1]; k++) { 1496 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1497 } 1498 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1499 } 1500 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 1504 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1505 { 1506 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1507 PetscErrorCode ierr; 1508 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1509 PetscViewerFormat format; 1510 1511 PetscFunctionBegin; 1512 if (A->structure_only) { 1513 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1514 PetscFunctionReturn(0); 1515 } 1516 1517 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1518 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1519 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1520 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1521 const char *matname; 1522 Mat aij; 1523 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1524 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1525 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1526 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1527 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1528 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1529 PetscFunctionReturn(0); 1530 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1531 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1532 for (i=0; i<a->mbs; i++) { 1533 for (j=0; j<bs; j++) { 1534 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1535 for (k=a->i[i]; k<a->i[i+1]; k++) { 1536 for (l=0; l<bs; l++) { 1537 #if defined(PETSC_USE_COMPLEX) 1538 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1539 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1540 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1541 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1542 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1543 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1544 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1545 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1546 } 1547 #else 1548 if (a->a[bs2*k + l*bs + j] != 0.0) { 1549 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1550 } 1551 #endif 1552 } 1553 } 1554 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1555 } 1556 } 1557 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1558 } else { 1559 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1560 for (i=0; i<a->mbs; i++) { 1561 for (j=0; j<bs; j++) { 1562 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1563 for (k=a->i[i]; k<a->i[i+1]; k++) { 1564 for (l=0; l<bs; l++) { 1565 #if defined(PETSC_USE_COMPLEX) 1566 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1568 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1569 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1570 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1571 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1572 } else { 1573 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1574 } 1575 #else 1576 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1577 #endif 1578 } 1579 } 1580 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1581 } 1582 } 1583 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1584 } 1585 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #include <petscdraw.h> 1590 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1591 { 1592 Mat A = (Mat) Aa; 1593 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1594 PetscErrorCode ierr; 1595 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1596 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1597 MatScalar *aa; 1598 PetscViewer viewer; 1599 PetscViewerFormat format; 1600 1601 PetscFunctionBegin; 1602 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1603 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1604 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1605 1606 /* loop over matrix elements drawing boxes */ 1607 1608 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1609 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1610 /* Blue for negative, Cyan for zero and Red for positive */ 1611 color = PETSC_DRAW_BLUE; 1612 for (i=0,row=0; i<mbs; i++,row+=bs) { 1613 for (j=a->i[i]; j<a->i[i+1]; j++) { 1614 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1615 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1616 aa = a->a + j*bs2; 1617 for (k=0; k<bs; k++) { 1618 for (l=0; l<bs; l++) { 1619 if (PetscRealPart(*aa++) >= 0.) continue; 1620 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1621 } 1622 } 1623 } 1624 } 1625 color = PETSC_DRAW_CYAN; 1626 for (i=0,row=0; i<mbs; i++,row+=bs) { 1627 for (j=a->i[i]; j<a->i[i+1]; j++) { 1628 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1629 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1630 aa = a->a + j*bs2; 1631 for (k=0; k<bs; k++) { 1632 for (l=0; l<bs; l++) { 1633 if (PetscRealPart(*aa++) != 0.) continue; 1634 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1635 } 1636 } 1637 } 1638 } 1639 color = PETSC_DRAW_RED; 1640 for (i=0,row=0; i<mbs; i++,row+=bs) { 1641 for (j=a->i[i]; j<a->i[i+1]; j++) { 1642 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1643 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1644 aa = a->a + j*bs2; 1645 for (k=0; k<bs; k++) { 1646 for (l=0; l<bs; l++) { 1647 if (PetscRealPart(*aa++) <= 0.) continue; 1648 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1649 } 1650 } 1651 } 1652 } 1653 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1654 } else { 1655 /* use contour shading to indicate magnitude of values */ 1656 /* first determine max of all nonzero values */ 1657 PetscReal minv = 0.0, maxv = 0.0; 1658 PetscDraw popup; 1659 1660 for (i=0; i<a->nz*a->bs2; i++) { 1661 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1662 } 1663 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1664 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1665 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1666 1667 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1668 for (i=0,row=0; i<mbs; i++,row+=bs) { 1669 for (j=a->i[i]; j<a->i[i+1]; j++) { 1670 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1671 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1672 aa = a->a + j*bs2; 1673 for (k=0; k<bs; k++) { 1674 for (l=0; l<bs; l++) { 1675 MatScalar v = *aa++; 1676 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1677 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1678 } 1679 } 1680 } 1681 } 1682 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1683 } 1684 PetscFunctionReturn(0); 1685 } 1686 1687 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1688 { 1689 PetscErrorCode ierr; 1690 PetscReal xl,yl,xr,yr,w,h; 1691 PetscDraw draw; 1692 PetscBool isnull; 1693 1694 PetscFunctionBegin; 1695 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1696 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1697 if (isnull) PetscFunctionReturn(0); 1698 1699 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1700 xr += w; yr += h; xl = -w; yl = -h; 1701 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1702 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1703 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1704 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1705 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1710 { 1711 PetscErrorCode ierr; 1712 PetscBool iascii,isbinary,isdraw; 1713 1714 PetscFunctionBegin; 1715 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1716 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1717 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1718 if (iascii) { 1719 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1720 } else if (isbinary) { 1721 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1722 } else if (isdraw) { 1723 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1724 } else { 1725 Mat B; 1726 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1727 ierr = MatView(B,viewer);CHKERRQ(ierr); 1728 ierr = MatDestroy(&B);CHKERRQ(ierr); 1729 } 1730 PetscFunctionReturn(0); 1731 } 1732 1733 1734 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1735 { 1736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1737 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1738 PetscInt *ai = a->i,*ailen = a->ilen; 1739 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1740 MatScalar *ap,*aa = a->a; 1741 1742 PetscFunctionBegin; 1743 for (k=0; k<m; k++) { /* loop over rows */ 1744 row = im[k]; brow = row/bs; 1745 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1746 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1747 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1748 nrow = ailen[brow]; 1749 for (l=0; l<n; l++) { /* loop over columns */ 1750 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1751 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1752 col = in[l]; 1753 bcol = col/bs; 1754 cidx = col%bs; 1755 ridx = row%bs; 1756 high = nrow; 1757 low = 0; /* assume unsorted */ 1758 while (high-low > 5) { 1759 t = (low+high)/2; 1760 if (rp[t] > bcol) high = t; 1761 else low = t; 1762 } 1763 for (i=low; i<high; i++) { 1764 if (rp[i] > bcol) break; 1765 if (rp[i] == bcol) { 1766 *v++ = ap[bs2*i+bs*cidx+ridx]; 1767 goto finished; 1768 } 1769 } 1770 *v++ = 0.0; 1771 finished:; 1772 } 1773 } 1774 PetscFunctionReturn(0); 1775 } 1776 1777 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1778 { 1779 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1780 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1781 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1782 PetscErrorCode ierr; 1783 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1784 PetscBool roworiented=a->roworiented; 1785 const PetscScalar *value = v; 1786 MatScalar *ap=NULL,*aa = a->a,*bap; 1787 1788 PetscFunctionBegin; 1789 if (roworiented) { 1790 stepval = (n-1)*bs; 1791 } else { 1792 stepval = (m-1)*bs; 1793 } 1794 for (k=0; k<m; k++) { /* loop over added rows */ 1795 row = im[k]; 1796 if (row < 0) continue; 1797 #if defined(PETSC_USE_DEBUG) 1798 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1799 #endif 1800 rp = aj + ai[row]; 1801 if (!A->structure_only) ap = aa + bs2*ai[row]; 1802 rmax = imax[row]; 1803 nrow = ailen[row]; 1804 low = 0; 1805 high = nrow; 1806 for (l=0; l<n; l++) { /* loop over added columns */ 1807 if (in[l] < 0) continue; 1808 #if defined(PETSC_USE_DEBUG) 1809 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1810 #endif 1811 col = in[l]; 1812 if (!A->structure_only) { 1813 if (roworiented) { 1814 value = v + (k*(stepval+bs) + l)*bs; 1815 } else { 1816 value = v + (l*(stepval+bs) + k)*bs; 1817 } 1818 } 1819 if (col <= lastcol) low = 0; 1820 else high = nrow; 1821 lastcol = col; 1822 while (high-low > 7) { 1823 t = (low+high)/2; 1824 if (rp[t] > col) high = t; 1825 else low = t; 1826 } 1827 for (i=low; i<high; i++) { 1828 if (rp[i] > col) break; 1829 if (rp[i] == col) { 1830 if (A->structure_only) goto noinsert2; 1831 bap = ap + bs2*i; 1832 if (roworiented) { 1833 if (is == ADD_VALUES) { 1834 for (ii=0; ii<bs; ii++,value+=stepval) { 1835 for (jj=ii; jj<bs2; jj+=bs) { 1836 bap[jj] += *value++; 1837 } 1838 } 1839 } else { 1840 for (ii=0; ii<bs; ii++,value+=stepval) { 1841 for (jj=ii; jj<bs2; jj+=bs) { 1842 bap[jj] = *value++; 1843 } 1844 } 1845 } 1846 } else { 1847 if (is == ADD_VALUES) { 1848 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1849 for (jj=0; jj<bs; jj++) { 1850 bap[jj] += value[jj]; 1851 } 1852 bap += bs; 1853 } 1854 } else { 1855 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1856 for (jj=0; jj<bs; jj++) { 1857 bap[jj] = value[jj]; 1858 } 1859 bap += bs; 1860 } 1861 } 1862 } 1863 goto noinsert2; 1864 } 1865 } 1866 if (nonew == 1) goto noinsert2; 1867 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1868 if (A->structure_only) { 1869 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1870 } else { 1871 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1872 } 1873 N = nrow++ - 1; high++; 1874 /* shift up all the later entries in this row */ 1875 for (ii=N; ii>=i; ii--) { 1876 rp[ii+1] = rp[ii]; 1877 if (!A->structure_only) { 1878 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1879 } 1880 } 1881 if (N >= i && !A->structure_only) { 1882 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1883 } 1884 1885 rp[i] = col; 1886 if (!A->structure_only) { 1887 bap = ap + bs2*i; 1888 if (roworiented) { 1889 for (ii=0; ii<bs; ii++,value+=stepval) { 1890 for (jj=ii; jj<bs2; jj+=bs) { 1891 bap[jj] = *value++; 1892 } 1893 } 1894 } else { 1895 for (ii=0; ii<bs; ii++,value+=stepval) { 1896 for (jj=0; jj<bs; jj++) { 1897 *bap++ = *value++; 1898 } 1899 } 1900 } 1901 } 1902 noinsert2:; 1903 low = i; 1904 } 1905 ailen[row] = nrow; 1906 } 1907 PetscFunctionReturn(0); 1908 } 1909 1910 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1911 { 1912 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1913 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1914 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1915 PetscErrorCode ierr; 1916 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1917 MatScalar *aa = a->a,*ap; 1918 PetscReal ratio=0.6; 1919 1920 PetscFunctionBegin; 1921 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1922 1923 if (m) rmax = ailen[0]; 1924 for (i=1; i<mbs; i++) { 1925 /* move each row back by the amount of empty slots (fshift) before it*/ 1926 fshift += imax[i-1] - ailen[i-1]; 1927 rmax = PetscMax(rmax,ailen[i]); 1928 if (fshift) { 1929 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1930 N = ailen[i]; 1931 for (j=0; j<N; j++) { 1932 ip[j-fshift] = ip[j]; 1933 if (!A->structure_only) { 1934 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1935 } 1936 } 1937 } 1938 ai[i] = ai[i-1] + ailen[i-1]; 1939 } 1940 if (mbs) { 1941 fshift += imax[mbs-1] - ailen[mbs-1]; 1942 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1943 } 1944 1945 /* reset ilen and imax for each row */ 1946 a->nonzerorowcnt = 0; 1947 if (A->structure_only) { 1948 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1949 } else { /* !A->structure_only */ 1950 for (i=0; i<mbs; i++) { 1951 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1952 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1953 } 1954 } 1955 a->nz = ai[mbs]; 1956 1957 /* diagonals may have moved, so kill the diagonal pointers */ 1958 a->idiagvalid = PETSC_FALSE; 1959 if (fshift && a->diag) { 1960 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1961 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1962 a->diag = 0; 1963 } 1964 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1965 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1966 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1967 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1968 1969 A->info.mallocs += a->reallocs; 1970 a->reallocs = 0; 1971 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1972 a->rmax = rmax; 1973 1974 if (!A->structure_only) { 1975 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1976 } 1977 PetscFunctionReturn(0); 1978 } 1979 1980 /* 1981 This function returns an array of flags which indicate the locations of contiguous 1982 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1983 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1984 Assume: sizes should be long enough to hold all the values. 1985 */ 1986 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1987 { 1988 PetscInt i,j,k,row; 1989 PetscBool flg; 1990 1991 PetscFunctionBegin; 1992 for (i=0,j=0; i<n; j++) { 1993 row = idx[i]; 1994 if (row%bs!=0) { /* Not the begining of a block */ 1995 sizes[j] = 1; 1996 i++; 1997 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1998 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1999 i++; 2000 } else { /* Begining of the block, so check if the complete block exists */ 2001 flg = PETSC_TRUE; 2002 for (k=1; k<bs; k++) { 2003 if (row+k != idx[i+k]) { /* break in the block */ 2004 flg = PETSC_FALSE; 2005 break; 2006 } 2007 } 2008 if (flg) { /* No break in the bs */ 2009 sizes[j] = bs; 2010 i += bs; 2011 } else { 2012 sizes[j] = 1; 2013 i++; 2014 } 2015 } 2016 } 2017 *bs_max = j; 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2022 { 2023 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2024 PetscErrorCode ierr; 2025 PetscInt i,j,k,count,*rows; 2026 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2027 PetscScalar zero = 0.0; 2028 MatScalar *aa; 2029 const PetscScalar *xx; 2030 PetscScalar *bb; 2031 2032 PetscFunctionBegin; 2033 /* fix right hand side if needed */ 2034 if (x && b) { 2035 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2036 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2037 for (i=0; i<is_n; i++) { 2038 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2039 } 2040 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2041 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2042 } 2043 2044 /* Make a copy of the IS and sort it */ 2045 /* allocate memory for rows,sizes */ 2046 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2047 2048 /* copy IS values to rows, and sort them */ 2049 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2050 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2051 2052 if (baij->keepnonzeropattern) { 2053 for (i=0; i<is_n; i++) sizes[i] = 1; 2054 bs_max = is_n; 2055 } else { 2056 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2057 A->nonzerostate++; 2058 } 2059 2060 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2061 row = rows[j]; 2062 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2063 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2064 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2065 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2066 if (diag != (PetscScalar)0.0) { 2067 if (baij->ilen[row/bs] > 0) { 2068 baij->ilen[row/bs] = 1; 2069 baij->j[baij->i[row/bs]] = row/bs; 2070 2071 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2072 } 2073 /* Now insert all the diagonal values for this bs */ 2074 for (k=0; k<bs; k++) { 2075 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2076 } 2077 } else { /* (diag == 0.0) */ 2078 baij->ilen[row/bs] = 0; 2079 } /* end (diag == 0.0) */ 2080 } else { /* (sizes[i] != bs) */ 2081 #if defined(PETSC_USE_DEBUG) 2082 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2083 #endif 2084 for (k=0; k<count; k++) { 2085 aa[0] = zero; 2086 aa += bs; 2087 } 2088 if (diag != (PetscScalar)0.0) { 2089 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2090 } 2091 } 2092 } 2093 2094 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2095 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2100 { 2101 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2102 PetscErrorCode ierr; 2103 PetscInt i,j,k,count; 2104 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2105 PetscScalar zero = 0.0; 2106 MatScalar *aa; 2107 const PetscScalar *xx; 2108 PetscScalar *bb; 2109 PetscBool *zeroed,vecs = PETSC_FALSE; 2110 2111 PetscFunctionBegin; 2112 /* fix right hand side if needed */ 2113 if (x && b) { 2114 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2115 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2116 vecs = PETSC_TRUE; 2117 } 2118 2119 /* zero the columns */ 2120 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2121 for (i=0; i<is_n; i++) { 2122 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2123 zeroed[is_idx[i]] = PETSC_TRUE; 2124 } 2125 for (i=0; i<A->rmap->N; i++) { 2126 if (!zeroed[i]) { 2127 row = i/bs; 2128 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2129 for (k=0; k<bs; k++) { 2130 col = bs*baij->j[j] + k; 2131 if (zeroed[col]) { 2132 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2133 if (vecs) bb[i] -= aa[0]*xx[col]; 2134 aa[0] = 0.0; 2135 } 2136 } 2137 } 2138 } else if (vecs) bb[i] = diag*xx[i]; 2139 } 2140 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2141 if (vecs) { 2142 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2143 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2144 } 2145 2146 /* zero the rows */ 2147 for (i=0; i<is_n; i++) { 2148 row = is_idx[i]; 2149 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2150 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2151 for (k=0; k<count; k++) { 2152 aa[0] = zero; 2153 aa += bs; 2154 } 2155 if (diag != (PetscScalar)0.0) { 2156 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2157 } 2158 } 2159 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2160 PetscFunctionReturn(0); 2161 } 2162 2163 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2164 { 2165 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2166 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2167 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2168 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2169 PetscErrorCode ierr; 2170 PetscInt ridx,cidx,bs2=a->bs2; 2171 PetscBool roworiented=a->roworiented; 2172 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2173 2174 PetscFunctionBegin; 2175 for (k=0; k<m; k++) { /* loop over added rows */ 2176 row = im[k]; 2177 brow = row/bs; 2178 if (row < 0) continue; 2179 #if defined(PETSC_USE_DEBUG) 2180 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2181 #endif 2182 rp = aj + ai[brow]; 2183 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2184 rmax = imax[brow]; 2185 nrow = ailen[brow]; 2186 low = 0; 2187 high = nrow; 2188 for (l=0; l<n; l++) { /* loop over added columns */ 2189 if (in[l] < 0) continue; 2190 #if defined(PETSC_USE_DEBUG) 2191 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2192 #endif 2193 col = in[l]; bcol = col/bs; 2194 ridx = row % bs; cidx = col % bs; 2195 if (!A->structure_only) { 2196 if (roworiented) { 2197 value = v[l + k*n]; 2198 } else { 2199 value = v[k + l*m]; 2200 } 2201 } 2202 if (col <= lastcol) low = 0; else high = nrow; 2203 lastcol = col; 2204 while (high-low > 7) { 2205 t = (low+high)/2; 2206 if (rp[t] > bcol) high = t; 2207 else low = t; 2208 } 2209 for (i=low; i<high; i++) { 2210 if (rp[i] > bcol) break; 2211 if (rp[i] == bcol) { 2212 bap = ap + bs2*i + bs*cidx + ridx; 2213 if (!A->structure_only) { 2214 if (is == ADD_VALUES) *bap += value; 2215 else *bap = value; 2216 } 2217 goto noinsert1; 2218 } 2219 } 2220 if (nonew == 1) goto noinsert1; 2221 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2222 if (A->structure_only) { 2223 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2224 } else { 2225 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2226 } 2227 N = nrow++ - 1; high++; 2228 /* shift up all the later entries in this row */ 2229 for (ii=N; ii>=i; ii--) { 2230 rp[ii+1] = rp[ii]; 2231 if (!A->structure_only) { 2232 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2233 } 2234 } 2235 if (N>=i && !A->structure_only) { 2236 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2237 } 2238 rp[i] = bcol; 2239 if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value; 2240 a->nz++; 2241 A->nonzerostate++; 2242 noinsert1:; 2243 low = i; 2244 } 2245 ailen[brow] = nrow; 2246 } 2247 PetscFunctionReturn(0); 2248 } 2249 2250 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2251 { 2252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2253 Mat outA; 2254 PetscErrorCode ierr; 2255 PetscBool row_identity,col_identity; 2256 2257 PetscFunctionBegin; 2258 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2259 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2260 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2261 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2262 2263 outA = inA; 2264 inA->factortype = MAT_FACTOR_LU; 2265 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2266 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2267 2268 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2269 2270 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2271 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2272 a->row = row; 2273 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2274 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2275 a->col = col; 2276 2277 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2278 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2279 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2280 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2281 2282 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2283 if (!a->solve_work) { 2284 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2285 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2286 } 2287 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2292 { 2293 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2294 PetscInt i,nz,mbs; 2295 2296 PetscFunctionBegin; 2297 nz = baij->maxnz; 2298 mbs = baij->mbs; 2299 for (i=0; i<nz; i++) { 2300 baij->j[i] = indices[i]; 2301 } 2302 baij->nz = nz; 2303 for (i=0; i<mbs; i++) { 2304 baij->ilen[i] = baij->imax[i]; 2305 } 2306 PetscFunctionReturn(0); 2307 } 2308 2309 /*@ 2310 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2311 in the matrix. 2312 2313 Input Parameters: 2314 + mat - the SeqBAIJ matrix 2315 - indices - the column indices 2316 2317 Level: advanced 2318 2319 Notes: 2320 This can be called if you have precomputed the nonzero structure of the 2321 matrix and want to provide it to the matrix object to improve the performance 2322 of the MatSetValues() operation. 2323 2324 You MUST have set the correct numbers of nonzeros per row in the call to 2325 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2326 2327 MUST be called before any calls to MatSetValues(); 2328 2329 @*/ 2330 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2331 { 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2336 PetscValidPointer(indices,2); 2337 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2338 PetscFunctionReturn(0); 2339 } 2340 2341 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2342 { 2343 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2344 PetscErrorCode ierr; 2345 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2346 PetscReal atmp; 2347 PetscScalar *x,zero = 0.0; 2348 MatScalar *aa; 2349 PetscInt ncols,brow,krow,kcol; 2350 2351 PetscFunctionBegin; 2352 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2353 bs = A->rmap->bs; 2354 aa = a->a; 2355 ai = a->i; 2356 aj = a->j; 2357 mbs = a->mbs; 2358 2359 ierr = VecSet(v,zero);CHKERRQ(ierr); 2360 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2361 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2362 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2363 for (i=0; i<mbs; i++) { 2364 ncols = ai[1] - ai[0]; ai++; 2365 brow = bs*i; 2366 for (j=0; j<ncols; j++) { 2367 for (kcol=0; kcol<bs; kcol++) { 2368 for (krow=0; krow<bs; krow++) { 2369 atmp = PetscAbsScalar(*aa);aa++; 2370 row = brow + krow; /* row index */ 2371 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2372 } 2373 } 2374 aj++; 2375 } 2376 } 2377 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2382 { 2383 PetscErrorCode ierr; 2384 2385 PetscFunctionBegin; 2386 /* If the two matrices have the same copy implementation, use fast copy. */ 2387 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2388 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2389 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2390 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2391 2392 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2393 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2394 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2395 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2396 } else { 2397 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2398 } 2399 PetscFunctionReturn(0); 2400 } 2401 2402 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2403 { 2404 PetscErrorCode ierr; 2405 2406 PetscFunctionBegin; 2407 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2412 { 2413 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2414 2415 PetscFunctionBegin; 2416 *array = a->a; 2417 PetscFunctionReturn(0); 2418 } 2419 2420 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2421 { 2422 PetscFunctionBegin; 2423 PetscFunctionReturn(0); 2424 } 2425 2426 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2427 { 2428 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2429 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2430 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2431 PetscErrorCode ierr; 2432 2433 PetscFunctionBegin; 2434 /* Set the number of nonzeros in the new matrix */ 2435 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2440 { 2441 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2442 PetscErrorCode ierr; 2443 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2444 PetscBLASInt one=1; 2445 2446 PetscFunctionBegin; 2447 if (str == SAME_NONZERO_PATTERN) { 2448 PetscScalar alpha = a; 2449 PetscBLASInt bnz; 2450 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2451 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2452 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2453 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2454 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2455 } else { 2456 Mat B; 2457 PetscInt *nnz; 2458 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2459 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2460 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2461 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2462 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2463 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2464 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2465 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2466 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2467 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2468 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2469 ierr = PetscFree(nnz);CHKERRQ(ierr); 2470 } 2471 PetscFunctionReturn(0); 2472 } 2473 2474 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2475 { 2476 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2477 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2478 MatScalar *aa = a->a; 2479 2480 PetscFunctionBegin; 2481 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2486 { 2487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2488 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2489 MatScalar *aa = a->a; 2490 2491 PetscFunctionBegin; 2492 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2493 PetscFunctionReturn(0); 2494 } 2495 2496 /* 2497 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2498 */ 2499 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2500 { 2501 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2502 PetscErrorCode ierr; 2503 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2504 PetscInt nz = a->i[m],row,*jj,mr,col; 2505 2506 PetscFunctionBegin; 2507 *nn = n; 2508 if (!ia) PetscFunctionReturn(0); 2509 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2510 else { 2511 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2512 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2513 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2514 jj = a->j; 2515 for (i=0; i<nz; i++) { 2516 collengths[jj[i]]++; 2517 } 2518 cia[0] = oshift; 2519 for (i=0; i<n; i++) { 2520 cia[i+1] = cia[i] + collengths[i]; 2521 } 2522 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2523 jj = a->j; 2524 for (row=0; row<m; row++) { 2525 mr = a->i[row+1] - a->i[row]; 2526 for (i=0; i<mr; i++) { 2527 col = *jj++; 2528 2529 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2530 } 2531 } 2532 ierr = PetscFree(collengths);CHKERRQ(ierr); 2533 *ia = cia; *ja = cja; 2534 } 2535 PetscFunctionReturn(0); 2536 } 2537 2538 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2539 { 2540 PetscErrorCode ierr; 2541 2542 PetscFunctionBegin; 2543 if (!ia) PetscFunctionReturn(0); 2544 ierr = PetscFree(*ia);CHKERRQ(ierr); 2545 ierr = PetscFree(*ja);CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 /* 2550 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2551 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2552 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2553 */ 2554 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2555 { 2556 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2557 PetscErrorCode ierr; 2558 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2559 PetscInt nz = a->i[m],row,*jj,mr,col; 2560 PetscInt *cspidx; 2561 2562 PetscFunctionBegin; 2563 *nn = n; 2564 if (!ia) PetscFunctionReturn(0); 2565 2566 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2567 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2568 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2569 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2570 jj = a->j; 2571 for (i=0; i<nz; i++) { 2572 collengths[jj[i]]++; 2573 } 2574 cia[0] = oshift; 2575 for (i=0; i<n; i++) { 2576 cia[i+1] = cia[i] + collengths[i]; 2577 } 2578 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2579 jj = a->j; 2580 for (row=0; row<m; row++) { 2581 mr = a->i[row+1] - a->i[row]; 2582 for (i=0; i<mr; i++) { 2583 col = *jj++; 2584 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2585 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2586 } 2587 } 2588 ierr = PetscFree(collengths);CHKERRQ(ierr); 2589 *ia = cia; *ja = cja; 2590 *spidx = cspidx; 2591 PetscFunctionReturn(0); 2592 } 2593 2594 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2595 { 2596 PetscErrorCode ierr; 2597 2598 PetscFunctionBegin; 2599 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2600 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2601 PetscFunctionReturn(0); 2602 } 2603 2604 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2605 { 2606 PetscErrorCode ierr; 2607 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2608 2609 PetscFunctionBegin; 2610 if (!Y->preallocated || !aij->nz) { 2611 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2612 } 2613 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2614 PetscFunctionReturn(0); 2615 } 2616 2617 /* -------------------------------------------------------------------*/ 2618 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2619 MatGetRow_SeqBAIJ, 2620 MatRestoreRow_SeqBAIJ, 2621 MatMult_SeqBAIJ_N, 2622 /* 4*/ MatMultAdd_SeqBAIJ_N, 2623 MatMultTranspose_SeqBAIJ, 2624 MatMultTransposeAdd_SeqBAIJ, 2625 0, 2626 0, 2627 0, 2628 /* 10*/ 0, 2629 MatLUFactor_SeqBAIJ, 2630 0, 2631 0, 2632 MatTranspose_SeqBAIJ, 2633 /* 15*/ MatGetInfo_SeqBAIJ, 2634 MatEqual_SeqBAIJ, 2635 MatGetDiagonal_SeqBAIJ, 2636 MatDiagonalScale_SeqBAIJ, 2637 MatNorm_SeqBAIJ, 2638 /* 20*/ 0, 2639 MatAssemblyEnd_SeqBAIJ, 2640 MatSetOption_SeqBAIJ, 2641 MatZeroEntries_SeqBAIJ, 2642 /* 24*/ MatZeroRows_SeqBAIJ, 2643 0, 2644 0, 2645 0, 2646 0, 2647 /* 29*/ MatSetUp_SeqBAIJ, 2648 0, 2649 0, 2650 0, 2651 0, 2652 /* 34*/ MatDuplicate_SeqBAIJ, 2653 0, 2654 0, 2655 MatILUFactor_SeqBAIJ, 2656 0, 2657 /* 39*/ MatAXPY_SeqBAIJ, 2658 MatCreateSubMatrices_SeqBAIJ, 2659 MatIncreaseOverlap_SeqBAIJ, 2660 MatGetValues_SeqBAIJ, 2661 MatCopy_SeqBAIJ, 2662 /* 44*/ 0, 2663 MatScale_SeqBAIJ, 2664 MatShift_SeqBAIJ, 2665 0, 2666 MatZeroRowsColumns_SeqBAIJ, 2667 /* 49*/ 0, 2668 MatGetRowIJ_SeqBAIJ, 2669 MatRestoreRowIJ_SeqBAIJ, 2670 MatGetColumnIJ_SeqBAIJ, 2671 MatRestoreColumnIJ_SeqBAIJ, 2672 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2673 0, 2674 0, 2675 0, 2676 MatSetValuesBlocked_SeqBAIJ, 2677 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2678 MatDestroy_SeqBAIJ, 2679 MatView_SeqBAIJ, 2680 0, 2681 0, 2682 /* 64*/ 0, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2688 0, 2689 MatConvert_Basic, 2690 0, 2691 0, 2692 /* 74*/ 0, 2693 MatFDColoringApply_BAIJ, 2694 0, 2695 0, 2696 0, 2697 /* 79*/ 0, 2698 0, 2699 0, 2700 0, 2701 MatLoad_SeqBAIJ, 2702 /* 84*/ 0, 2703 0, 2704 0, 2705 0, 2706 0, 2707 /* 89*/ 0, 2708 0, 2709 0, 2710 0, 2711 0, 2712 /* 94*/ 0, 2713 0, 2714 0, 2715 0, 2716 0, 2717 /* 99*/ 0, 2718 0, 2719 0, 2720 0, 2721 0, 2722 /*104*/ 0, 2723 MatRealPart_SeqBAIJ, 2724 MatImaginaryPart_SeqBAIJ, 2725 0, 2726 0, 2727 /*109*/ 0, 2728 0, 2729 0, 2730 0, 2731 MatMissingDiagonal_SeqBAIJ, 2732 /*114*/ 0, 2733 0, 2734 0, 2735 0, 2736 0, 2737 /*119*/ 0, 2738 0, 2739 MatMultHermitianTranspose_SeqBAIJ, 2740 MatMultHermitianTransposeAdd_SeqBAIJ, 2741 0, 2742 /*124*/ 0, 2743 0, 2744 MatInvertBlockDiagonal_SeqBAIJ, 2745 0, 2746 0, 2747 /*129*/ 0, 2748 0, 2749 0, 2750 0, 2751 0, 2752 /*134*/ 0, 2753 0, 2754 0, 2755 0, 2756 0, 2757 /*139*/ MatSetBlockSizes_Default, 2758 0, 2759 0, 2760 MatFDColoringSetUp_SeqXAIJ, 2761 0, 2762 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2763 MatDestroySubMatrices_SeqBAIJ 2764 }; 2765 2766 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2767 { 2768 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2769 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2770 PetscErrorCode ierr; 2771 2772 PetscFunctionBegin; 2773 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2774 2775 /* allocate space for values if not already there */ 2776 if (!aij->saved_values) { 2777 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2778 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2779 } 2780 2781 /* copy values over */ 2782 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2783 PetscFunctionReturn(0); 2784 } 2785 2786 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2787 { 2788 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2789 PetscErrorCode ierr; 2790 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2791 2792 PetscFunctionBegin; 2793 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2794 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2795 2796 /* copy values over */ 2797 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2798 PetscFunctionReturn(0); 2799 } 2800 2801 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2802 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2803 2804 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2805 { 2806 Mat_SeqBAIJ *b; 2807 PetscErrorCode ierr; 2808 PetscInt i,mbs,nbs,bs2; 2809 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2810 2811 PetscFunctionBegin; 2812 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2813 if (nz == MAT_SKIP_ALLOCATION) { 2814 skipallocation = PETSC_TRUE; 2815 nz = 0; 2816 } 2817 2818 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2819 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2820 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2821 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2822 2823 B->preallocated = PETSC_TRUE; 2824 2825 mbs = B->rmap->n/bs; 2826 nbs = B->cmap->n/bs; 2827 bs2 = bs*bs; 2828 2829 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2830 2831 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2832 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2833 if (nnz) { 2834 for (i=0; i<mbs; i++) { 2835 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2836 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2837 } 2838 } 2839 2840 b = (Mat_SeqBAIJ*)B->data; 2841 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2842 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2843 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2844 2845 if (!flg) { 2846 switch (bs) { 2847 case 1: 2848 B->ops->mult = MatMult_SeqBAIJ_1; 2849 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2850 break; 2851 case 2: 2852 B->ops->mult = MatMult_SeqBAIJ_2; 2853 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2854 break; 2855 case 3: 2856 B->ops->mult = MatMult_SeqBAIJ_3; 2857 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2858 break; 2859 case 4: 2860 B->ops->mult = MatMult_SeqBAIJ_4; 2861 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2862 break; 2863 case 5: 2864 B->ops->mult = MatMult_SeqBAIJ_5; 2865 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2866 break; 2867 case 6: 2868 B->ops->mult = MatMult_SeqBAIJ_6; 2869 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2870 break; 2871 case 7: 2872 B->ops->mult = MatMult_SeqBAIJ_7; 2873 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2874 break; 2875 case 11: 2876 B->ops->mult = MatMult_SeqBAIJ_11; 2877 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2878 break; 2879 case 15: 2880 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2881 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2882 break; 2883 default: 2884 B->ops->mult = MatMult_SeqBAIJ_N; 2885 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2886 break; 2887 } 2888 } 2889 B->ops->sor = MatSOR_SeqBAIJ; 2890 b->mbs = mbs; 2891 b->nbs = nbs; 2892 if (!skipallocation) { 2893 if (!b->imax) { 2894 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2895 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2896 2897 b->free_imax_ilen = PETSC_TRUE; 2898 } 2899 /* b->ilen will count nonzeros in each block row so far. */ 2900 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2901 if (!nnz) { 2902 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2903 else if (nz < 0) nz = 1; 2904 for (i=0; i<mbs; i++) b->imax[i] = nz; 2905 nz = nz*mbs; 2906 } else { 2907 nz = 0; 2908 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2909 } 2910 2911 /* allocate the matrix space */ 2912 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2913 if (B->structure_only) { 2914 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2915 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2916 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2917 } else { 2918 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2919 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2920 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2921 } 2922 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2923 2924 if (B->structure_only) { 2925 b->singlemalloc = PETSC_FALSE; 2926 b->free_a = PETSC_FALSE; 2927 } else { 2928 b->singlemalloc = PETSC_TRUE; 2929 b->free_a = PETSC_TRUE; 2930 } 2931 b->free_ij = PETSC_TRUE; 2932 2933 b->i[0] = 0; 2934 for (i=1; i<mbs+1; i++) { 2935 b->i[i] = b->i[i-1] + b->imax[i-1]; 2936 } 2937 2938 } else { 2939 b->free_a = PETSC_FALSE; 2940 b->free_ij = PETSC_FALSE; 2941 } 2942 2943 b->bs2 = bs2; 2944 b->mbs = mbs; 2945 b->nz = 0; 2946 b->maxnz = nz; 2947 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2948 B->was_assembled = PETSC_FALSE; 2949 B->assembled = PETSC_FALSE; 2950 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2951 PetscFunctionReturn(0); 2952 } 2953 2954 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2955 { 2956 PetscInt i,m,nz,nz_max=0,*nnz; 2957 PetscScalar *values=0; 2958 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2959 PetscErrorCode ierr; 2960 2961 PetscFunctionBegin; 2962 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2963 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2964 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2965 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2966 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2967 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2968 m = B->rmap->n/bs; 2969 2970 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2971 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2972 for (i=0; i<m; i++) { 2973 nz = ii[i+1]- ii[i]; 2974 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2975 nz_max = PetscMax(nz_max, nz); 2976 nnz[i] = nz; 2977 } 2978 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2979 ierr = PetscFree(nnz);CHKERRQ(ierr); 2980 2981 values = (PetscScalar*)V; 2982 if (!values) { 2983 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2984 } 2985 for (i=0; i<m; i++) { 2986 PetscInt ncols = ii[i+1] - ii[i]; 2987 const PetscInt *icols = jj + ii[i]; 2988 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2989 if (!roworiented) { 2990 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2991 } else { 2992 PetscInt j; 2993 for (j=0; j<ncols; j++) { 2994 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2995 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2996 } 2997 } 2998 } 2999 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3000 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3001 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3002 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3003 PetscFunctionReturn(0); 3004 } 3005 3006 /*MC 3007 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3008 block sparse compressed row format. 3009 3010 Options Database Keys: 3011 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3012 3013 Level: beginner 3014 3015 .seealso: MatCreateSeqBAIJ() 3016 M*/ 3017 3018 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3019 3020 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3021 { 3022 PetscErrorCode ierr; 3023 PetscMPIInt size; 3024 Mat_SeqBAIJ *b; 3025 3026 PetscFunctionBegin; 3027 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3028 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3029 3030 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3031 B->data = (void*)b; 3032 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3033 3034 b->row = 0; 3035 b->col = 0; 3036 b->icol = 0; 3037 b->reallocs = 0; 3038 b->saved_values = 0; 3039 3040 b->roworiented = PETSC_TRUE; 3041 b->nonew = 0; 3042 b->diag = 0; 3043 B->spptr = 0; 3044 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3045 b->keepnonzeropattern = PETSC_FALSE; 3046 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3056 #if defined(PETSC_HAVE_HYPRE) 3057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3058 #endif 3059 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3060 PetscFunctionReturn(0); 3061 } 3062 3063 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3064 { 3065 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3066 PetscErrorCode ierr; 3067 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3068 3069 PetscFunctionBegin; 3070 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3071 3072 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3073 c->imax = a->imax; 3074 c->ilen = a->ilen; 3075 c->free_imax_ilen = PETSC_FALSE; 3076 } else { 3077 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3078 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3079 for (i=0; i<mbs; i++) { 3080 c->imax[i] = a->imax[i]; 3081 c->ilen[i] = a->ilen[i]; 3082 } 3083 c->free_imax_ilen = PETSC_TRUE; 3084 } 3085 3086 /* allocate the matrix space */ 3087 if (mallocmatspace) { 3088 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3089 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3090 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3091 3092 c->i = a->i; 3093 c->j = a->j; 3094 c->singlemalloc = PETSC_FALSE; 3095 c->free_a = PETSC_TRUE; 3096 c->free_ij = PETSC_FALSE; 3097 c->parent = A; 3098 C->preallocated = PETSC_TRUE; 3099 C->assembled = PETSC_TRUE; 3100 3101 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3102 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3103 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3104 } else { 3105 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3106 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3107 3108 c->singlemalloc = PETSC_TRUE; 3109 c->free_a = PETSC_TRUE; 3110 c->free_ij = PETSC_TRUE; 3111 3112 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3113 if (mbs > 0) { 3114 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3115 if (cpvalues == MAT_COPY_VALUES) { 3116 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3117 } else { 3118 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3119 } 3120 } 3121 C->preallocated = PETSC_TRUE; 3122 C->assembled = PETSC_TRUE; 3123 } 3124 } 3125 3126 c->roworiented = a->roworiented; 3127 c->nonew = a->nonew; 3128 3129 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3130 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3131 3132 c->bs2 = a->bs2; 3133 c->mbs = a->mbs; 3134 c->nbs = a->nbs; 3135 3136 if (a->diag) { 3137 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3138 c->diag = a->diag; 3139 c->free_diag = PETSC_FALSE; 3140 } else { 3141 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3142 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3143 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3144 c->free_diag = PETSC_TRUE; 3145 } 3146 } else c->diag = 0; 3147 3148 c->nz = a->nz; 3149 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3150 c->solve_work = NULL; 3151 c->mult_work = NULL; 3152 c->sor_workt = NULL; 3153 c->sor_work = NULL; 3154 3155 c->compressedrow.use = a->compressedrow.use; 3156 c->compressedrow.nrows = a->compressedrow.nrows; 3157 if (a->compressedrow.use) { 3158 i = a->compressedrow.nrows; 3159 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3160 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3161 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3162 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3163 } else { 3164 c->compressedrow.use = PETSC_FALSE; 3165 c->compressedrow.i = NULL; 3166 c->compressedrow.rindex = NULL; 3167 } 3168 C->nonzerostate = A->nonzerostate; 3169 3170 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3171 PetscFunctionReturn(0); 3172 } 3173 3174 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3175 { 3176 PetscErrorCode ierr; 3177 3178 PetscFunctionBegin; 3179 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3180 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3181 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3182 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3183 PetscFunctionReturn(0); 3184 } 3185 3186 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3187 { 3188 Mat_SeqBAIJ *a; 3189 PetscErrorCode ierr; 3190 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3191 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3192 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3193 PetscInt *masked,nmask,tmp,bs2,ishift; 3194 PetscMPIInt size; 3195 int fd; 3196 PetscScalar *aa; 3197 MPI_Comm comm; 3198 3199 PetscFunctionBegin; 3200 /* force binary viewer to load .info file if it has not yet done so */ 3201 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3202 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3203 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3204 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3205 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3206 if (bs < 0) bs = 1; 3207 bs2 = bs*bs; 3208 3209 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3210 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3211 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3212 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3213 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3214 M = header[1]; N = header[2]; nz = header[3]; 3215 3216 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3217 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3218 3219 /* 3220 This code adds extra rows to make sure the number of rows is 3221 divisible by the blocksize 3222 */ 3223 mbs = M/bs; 3224 extra_rows = bs - M + bs*(mbs); 3225 if (extra_rows == bs) extra_rows = 0; 3226 else mbs++; 3227 if (extra_rows) { 3228 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3229 } 3230 3231 /* Set global sizes if not already set */ 3232 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3233 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3234 } else { /* Check if the matrix global sizes are correct */ 3235 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3236 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3237 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3238 } 3239 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3240 } 3241 3242 /* read in row lengths */ 3243 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3244 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3245 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3246 3247 /* read in column indices */ 3248 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3249 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3250 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3251 3252 /* loop over row lengths determining block row lengths */ 3253 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3254 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3255 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3256 rowcount = 0; 3257 nzcount = 0; 3258 for (i=0; i<mbs; i++) { 3259 nmask = 0; 3260 for (j=0; j<bs; j++) { 3261 kmax = rowlengths[rowcount]; 3262 for (k=0; k<kmax; k++) { 3263 tmp = jj[nzcount++]/bs; 3264 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3265 } 3266 rowcount++; 3267 } 3268 browlengths[i] += nmask; 3269 /* zero out the mask elements we set */ 3270 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3271 } 3272 3273 /* Do preallocation */ 3274 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3275 a = (Mat_SeqBAIJ*)newmat->data; 3276 3277 /* set matrix "i" values */ 3278 a->i[0] = 0; 3279 for (i=1; i<= mbs; i++) { 3280 a->i[i] = a->i[i-1] + browlengths[i-1]; 3281 a->ilen[i-1] = browlengths[i-1]; 3282 } 3283 a->nz = 0; 3284 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3285 3286 /* read in nonzero values */ 3287 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3288 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3289 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3290 3291 /* set "a" and "j" values into matrix */ 3292 nzcount = 0; jcount = 0; 3293 for (i=0; i<mbs; i++) { 3294 nzcountb = nzcount; 3295 nmask = 0; 3296 for (j=0; j<bs; j++) { 3297 kmax = rowlengths[i*bs+j]; 3298 for (k=0; k<kmax; k++) { 3299 tmp = jj[nzcount++]/bs; 3300 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3301 } 3302 } 3303 /* sort the masked values */ 3304 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3305 3306 /* set "j" values into matrix */ 3307 maskcount = 1; 3308 for (j=0; j<nmask; j++) { 3309 a->j[jcount++] = masked[j]; 3310 mask[masked[j]] = maskcount++; 3311 } 3312 /* set "a" values into matrix */ 3313 ishift = bs2*a->i[i]; 3314 for (j=0; j<bs; j++) { 3315 kmax = rowlengths[i*bs+j]; 3316 for (k=0; k<kmax; k++) { 3317 tmp = jj[nzcountb]/bs; 3318 block = mask[tmp] - 1; 3319 point = jj[nzcountb] - bs*tmp; 3320 idx = ishift + bs2*block + j + bs*point; 3321 a->a[idx] = (MatScalar)aa[nzcountb++]; 3322 } 3323 } 3324 /* zero out the mask elements we set */ 3325 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3326 } 3327 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3328 3329 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3330 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3331 ierr = PetscFree(aa);CHKERRQ(ierr); 3332 ierr = PetscFree(jj);CHKERRQ(ierr); 3333 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3334 3335 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3336 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3337 PetscFunctionReturn(0); 3338 } 3339 3340 /*@C 3341 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3342 compressed row) format. For good matrix assembly performance the 3343 user should preallocate the matrix storage by setting the parameter nz 3344 (or the array nnz). By setting these parameters accurately, performance 3345 during matrix assembly can be increased by more than a factor of 50. 3346 3347 Collective on MPI_Comm 3348 3349 Input Parameters: 3350 + comm - MPI communicator, set to PETSC_COMM_SELF 3351 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3352 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3353 . m - number of rows 3354 . n - number of columns 3355 . nz - number of nonzero blocks per block row (same for all rows) 3356 - nnz - array containing the number of nonzero blocks in the various block rows 3357 (possibly different for each block row) or NULL 3358 3359 Output Parameter: 3360 . A - the matrix 3361 3362 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3363 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3364 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3365 3366 Options Database Keys: 3367 . -mat_no_unroll - uses code that does not unroll the loops in the 3368 block calculations (much slower) 3369 . -mat_block_size - size of the blocks to use 3370 3371 Level: intermediate 3372 3373 Notes: 3374 The number of rows and columns must be divisible by blocksize. 3375 3376 If the nnz parameter is given then the nz parameter is ignored 3377 3378 A nonzero block is any block that as 1 or more nonzeros in it 3379 3380 The block AIJ format is fully compatible with standard Fortran 77 3381 storage. That is, the stored row and column indices can begin at 3382 either one (as in Fortran) or zero. See the users' manual for details. 3383 3384 Specify the preallocated storage with either nz or nnz (not both). 3385 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3386 allocation. See Users-Manual: ch_mat for details. 3387 matrices. 3388 3389 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3390 @*/ 3391 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3392 { 3393 PetscErrorCode ierr; 3394 3395 PetscFunctionBegin; 3396 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3397 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3398 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3399 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3400 PetscFunctionReturn(0); 3401 } 3402 3403 /*@C 3404 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3405 per row in the matrix. For good matrix assembly performance the 3406 user should preallocate the matrix storage by setting the parameter nz 3407 (or the array nnz). By setting these parameters accurately, performance 3408 during matrix assembly can be increased by more than a factor of 50. 3409 3410 Collective on MPI_Comm 3411 3412 Input Parameters: 3413 + B - the matrix 3414 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3415 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3416 . nz - number of block nonzeros per block row (same for all rows) 3417 - nnz - array containing the number of block nonzeros in the various block rows 3418 (possibly different for each block row) or NULL 3419 3420 Options Database Keys: 3421 . -mat_no_unroll - uses code that does not unroll the loops in the 3422 block calculations (much slower) 3423 . -mat_block_size - size of the blocks to use 3424 3425 Level: intermediate 3426 3427 Notes: 3428 If the nnz parameter is given then the nz parameter is ignored 3429 3430 You can call MatGetInfo() to get information on how effective the preallocation was; 3431 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3432 You can also run with the option -info and look for messages with the string 3433 malloc in them to see if additional memory allocation was needed. 3434 3435 The block AIJ format is fully compatible with standard Fortran 77 3436 storage. That is, the stored row and column indices can begin at 3437 either one (as in Fortran) or zero. See the users' manual for details. 3438 3439 Specify the preallocated storage with either nz or nnz (not both). 3440 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3441 allocation. See Users-Manual: ch_mat for details. 3442 3443 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3444 @*/ 3445 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3446 { 3447 PetscErrorCode ierr; 3448 3449 PetscFunctionBegin; 3450 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3451 PetscValidType(B,1); 3452 PetscValidLogicalCollectiveInt(B,bs,2); 3453 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3454 PetscFunctionReturn(0); 3455 } 3456 3457 /*@C 3458 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3459 (the default sequential PETSc format). 3460 3461 Collective on MPI_Comm 3462 3463 Input Parameters: 3464 + B - the matrix 3465 . i - the indices into j for the start of each local row (starts with zero) 3466 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3467 - v - optional values in the matrix 3468 3469 Level: developer 3470 3471 Notes: 3472 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3473 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3474 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3475 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3476 block column and the second index is over columns within a block. 3477 3478 .keywords: matrix, aij, compressed row, sparse 3479 3480 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3481 @*/ 3482 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3483 { 3484 PetscErrorCode ierr; 3485 3486 PetscFunctionBegin; 3487 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3488 PetscValidType(B,1); 3489 PetscValidLogicalCollectiveInt(B,bs,2); 3490 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3491 PetscFunctionReturn(0); 3492 } 3493 3494 3495 /*@ 3496 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3497 3498 Collective on MPI_Comm 3499 3500 Input Parameters: 3501 + comm - must be an MPI communicator of size 1 3502 . bs - size of block 3503 . m - number of rows 3504 . n - number of columns 3505 . i - row indices 3506 . j - column indices 3507 - a - matrix values 3508 3509 Output Parameter: 3510 . mat - the matrix 3511 3512 Level: advanced 3513 3514 Notes: 3515 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3516 once the matrix is destroyed 3517 3518 You cannot set new nonzero locations into this matrix, that will generate an error. 3519 3520 The i and j indices are 0 based 3521 3522 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3523 3524 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3525 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3526 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3527 with column-major ordering within blocks. 3528 3529 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3530 3531 @*/ 3532 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3533 { 3534 PetscErrorCode ierr; 3535 PetscInt ii; 3536 Mat_SeqBAIJ *baij; 3537 3538 PetscFunctionBegin; 3539 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3540 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3541 3542 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3543 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3544 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3545 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3546 baij = (Mat_SeqBAIJ*)(*mat)->data; 3547 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3548 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3549 3550 baij->i = i; 3551 baij->j = j; 3552 baij->a = a; 3553 3554 baij->singlemalloc = PETSC_FALSE; 3555 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3556 baij->free_a = PETSC_FALSE; 3557 baij->free_ij = PETSC_FALSE; 3558 3559 for (ii=0; ii<m; ii++) { 3560 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3561 #if defined(PETSC_USE_DEBUG) 3562 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3563 #endif 3564 } 3565 #if defined(PETSC_USE_DEBUG) 3566 for (ii=0; ii<baij->i[m]; ii++) { 3567 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3568 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3569 } 3570 #endif 3571 3572 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3573 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3574 PetscFunctionReturn(0); 3575 } 3576 3577 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3578 { 3579 PetscErrorCode ierr; 3580 PetscMPIInt size; 3581 3582 PetscFunctionBegin; 3583 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3584 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3585 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3586 } else { 3587 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3588 } 3589 PetscFunctionReturn(0); 3590 } 3591